From: Fomafix Date: Thu, 16 Nov 2017 18:37:43 +0000 (+0100) Subject: Armor against French spaces detection in HTML attributes X-Git-Tag: 1.34.0-rc.0~4963^2 X-Git-Url: http://git.cyclocoop.org/%7B%7B%20url_for%28%27admin_vote_add%27%29%20%7D%7D?a=commitdiff_plain;h=a60dcdc2e346d0b0df621daee991cbd26729cd4b;p=lhc%2Fweb%2Fwiklou.git Armor against French spaces detection in HTML attributes This change also solves T13874 in a generic way. Bug: T5158 Change-Id: Id8cdb887182f346acab2d108836ce201626848af --- diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index a1b306434b..02708283bc 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1353,15 +1353,7 @@ class Parser { } # Clean up special characters, only run once, next-to-last before doBlockLevels - $fixtags = [ - # French spaces, last one Guillemet-left - # only if there is something before the space - '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', - # french spaces, Guillemet-right - '/(\\302\\253) /' => '\\1 ', - '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, T13874. - ]; - $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); + $text = Sanitizer::armorFrenchSpaces( $text ); $text = $this->doBlockLevels( $text, $linestart ); diff --git a/includes/parser/Sanitizer.php b/includes/parser/Sanitizer.php index ff543dbb30..89a7c96124 100644 --- a/includes/parser/Sanitizer.php +++ b/includes/parser/Sanitizer.php @@ -1141,6 +1141,27 @@ class Sanitizer { return $encValue; } + /** + * Armor French spaces with a replacement character + * + * @since 1.32 + * @param string $text Text to armor + * @param string $space Space character for the French spaces, defaults to ' ' + * @return string Armored text + */ + public static function armorFrenchSpaces( $text, $space = ' ' ) { + // Replace $ with \$ and \ with \\ + $space = preg_replace( '#(? "\\1$space", + # French spaces, Guillemet-right + '/(\\302\\253) /' => "\\1$space", + ]; + return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); + } + /** * Encode an attribute value for HTML tags, with extra armoring * against further wiki processing. @@ -1168,6 +1189,9 @@ class Sanitizer { '__' => '__', ] ); + # Armor against French spaces detection (T5158) + $encValue = self::armorFrenchSpaces( $encValue, ' ' ); + # Stupid hack $encValue = preg_replace_callback( '/((?i)' . wfUrlProtocols() . ')/', diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index afddd78553..ce4df8597b 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -6589,7 +6589,7 @@ Element attributes with double ! should not be broken up by !! html/php -
hi
+
hi
!! html/parsoid @@ -6610,7 +6610,7 @@ parsoid=wt2html !! html/php - +
style="color: red !important;" data-contrived="put this herestyle="color: red !important;" data-contrived="put this here foo
@@ -18685,7 +18685,7 @@ Punctuation: CSS !important (T13874) !! wikitext
important
!! html -
important
+
important
!!end @@ -18694,7 +18694,7 @@ Punctuation: CSS ! important (T13874; with space after) !! wikitext
important
!! html -
important
+
important
!!end @@ -23954,10 +23954,10 @@ Play a bit with r67090 and T5158
 
 
!! html/php -
 
+
 
 
 
-
 
+
 
!! html/parsoid
 
@@ -23967,6 +23967,15 @@ Play a bit with r67090 and T5158 !! end +!! test +T5158: Test for French spaces in attributes +!! wikitext +
+!! html/php +


+

+!! end + !! test HTML5 data attributes !! wikitext