From: Fomafix Date: Mon, 2 May 2016 05:14:45 +0000 (+0000) Subject: Do not double decode HTML entities for IDs X-Git-Tag: 1.31.0-rc.0~2115^2 X-Git-Url: http://git.cyclocoop.org/%22.%20generer_url_ecrire%28%22sites_tous%22%2C%22%22%29.%20%22?a=commitdiff_plain;h=b6c895ddc5a2baeb04a542b6b318e33f350f4534;p=lhc%2Fweb%2Fwiklou.git Do not double decode HTML entities for IDs * in links (T103714) * in indicators (T104196) This change removes the automatic Sanitizer::decodeCharReferences from Sanitizer::escapeId and Sanitizer::escapeIdInternal. Where decoding of HTML entities are wanted an explicit call to Sanitizer::decodeCharReferences is added. Explicit decode HTML entities in non local autocomments. (T104311) Bug: T103714 Bug: T104196 Bug: T104311 Change-Id: I88e8e2077e6f5eec2b232391f7818370894a62dc --- diff --git a/includes/Linker.php b/includes/Linker.php index dccd99c73e..41105752bb 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -1175,7 +1175,7 @@ class Linker { $sectionTitle = Title::newFromText( '#' . $section ); } else { $sectionTitle = Title::makeTitleSafe( $title->getNamespace(), - $title->getDBkey(), $section ); + $title->getDBkey(), Sanitizer::decodeCharReferences( $section ) ); } if ( $sectionTitle ) { $link = Linker::makeCommentLink( $sectionTitle, $wgLang->getArrow(), $wikiId, 'noclasses' ); diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 7d17cd1065..a7f963a435 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -1203,8 +1203,6 @@ class Sanitizer { global $wgExperimentalHtmlIds; $options = (array)$options; - $id = self::decodeCharReferences( $id ); - if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) { $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); $id = trim( $id, '_' ); @@ -1313,8 +1311,6 @@ class Sanitizer { * @return string */ private static function escapeIdInternal( $id, $mode ) { - $id = self::decodeCharReferences( $id ); - switch ( $mode ) { case 'html5': $id = str_replace( ' ', '_', $id ); diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index ff4936d035..e901f6f311 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4204,6 +4204,8 @@ class Parser { # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; + # Decode HTML entities + $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline ); $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY ); @@ -5764,6 +5766,7 @@ class Parser { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); $text = Sanitizer::normalizeSectionNameWhitespace( $text ); + $text = Sanitizer::decodeCharReferences( $text ); return '#' . Sanitizer::escapeIdForLink( $text ); } @@ -5782,6 +5785,7 @@ class Parser { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); $text = Sanitizer::normalizeSectionNameWhitespace( $text ); + $text = Sanitizer::decodeCharReferences( $text ); if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { // ForAttribute() and ForLink() are the same for legacy encoding diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 00d2538c80..77854b7ade 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -29279,7 +29279,7 @@ Decoding of HTML entities in headings and links for IDs and link fragments (T103 [[#A&B&C&D&E]] !! html/php

A&B&C&D&E[edit]

-

#A&B&C&D&E +

#A&B&C&D&E

!! end diff --git a/tests/phpunit/includes/SanitizerTest.php b/tests/phpunit/includes/SanitizerTest.php index 7472fb933d..13fed56033 100644 --- a/tests/phpunit/includes/SanitizerTest.php +++ b/tests/phpunit/includes/SanitizerTest.php @@ -376,7 +376,7 @@ class SanitizerTest extends MediaWikiTestCase { [ '\'', '.27' ], [ '§', '.C2.A7' ], [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ], - [ 'A&B&C&amp;D&amp;amp;E', 'A.26B.26C.26amp.3BD.26amp.3Bamp.3BE' ], + [ 'A&B&C&amp;D&amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ], ]; } @@ -453,10 +453,11 @@ class SanitizerTest extends MediaWikiTestCase { public function provideEscapeIdForStuff() { // Test inputs and outputs - $text = 'foo тест_#%!\'()[]:<>'; - $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E'; - $html5Encoded = 'foo_тест_#%!\'()[]:<>'; - $html5Experimental = 'foo_тест_!_()[]:<>'; + $text = 'foo тест_#%!\'()[]:<>&&&amp;'; + $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' . + '.26.26amp.3B.26amp.3Bamp.3B'; + $html5Encoded = 'foo_тест_#%!\'()[]:<>&&&amp;'; + $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;'; // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode $legacy = [ 'legacy', 'legacy' ];