From: Max Semenik
Date: Fri, 3 Nov 2017 02:35:11 +0000 (-0700)
Subject: Remove nbsp and similar characters from section IDs
X-Git-Tag: 1.31.0-rc.0~1585^2
X-Git-Url: http://git.cyclocoop.org/data/File:Denys_Savchenko_%27%27Pentecoste%27%27.jpg?a=commitdiff_plain;h=129067c907ea65f621ab64cdfff59fd2b28091e1;p=lhc%2Fweb%2Fwiklou.git
Remove nbsp and similar characters from section IDs
Bug: T90902
Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
---
diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31
index 4bfcfcb5de..3688163f23 100644
--- a/RELEASE-NOTES-1.31
+++ b/RELEASE-NOTES-1.31
@@ -41,6 +41,7 @@ production.
* â¦
=== Bug fixes in 1.31 ===
+* (T90902) Non-breaking space in header ID breaks anchor
* â¦
=== Action API changes in 1.31 ===
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index f2e47dc36a..3548da9581 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4206,6 +4206,9 @@ class Parser {
# Decode HTML entities
$safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
+
+ $safeHeadline = $this->normalizeSectionName( $safeHeadline );
+
$fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
$safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
@@ -5767,6 +5770,8 @@ class Parser {
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
$text = Sanitizer::decodeCharReferences( $text );
+ $text = $this->normalizeSectionName( $text );
+
return '#' . Sanitizer::escapeIdForLink( $text );
}
@@ -5786,6 +5791,7 @@ class Parser {
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
$text = Sanitizer::decodeCharReferences( $text );
+ $text = $this->normalizeSectionName( $text );
if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
// ForAttribute() and ForLink() are the same for legacy encoding
@@ -5797,6 +5803,24 @@ class Parser {
return "#$id";
}
+ /**
+ * Apply the same normalization as code making links to this section would
+ *
+ * @param string $text
+ * @return string
+ */
+ private function normalizeSectionName( $text ) {
+ # T90902: ensure the same normalization is applied for IDs as to links
+ $titleParser = MediaWikiServices::getInstance()->getTitleParser();
+ try {
+
+ $parts = $titleParser->splitTitleString( "#$text" );
+ } catch ( MalformedTitleException $ex ) {
+ return $text;
+ }
+ return $parts['fragment'];
+ }
+
/**
* Strips a text string of wikitext for use in a section anchor
*
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 3c861ea10b..1204dbd715 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -29536,3 +29536,17 @@ wgFragmentMode=[ 'html5' ]
#å¤é
#å¤é
!! end
+
+!! test
+T90902: Normalize weird characters in section IDs
+!! config
+wgFragmentMode=[ 'html5', 'legacy' ]
+!! wikitext
+== Foo bar ==
+[[#Foo bar]]
+
+!! html/php
+Foo bar[edit]
+#Foo bar
+
+!! end