From: Brion Vibber Date: Mon, 27 Dec 2010 03:21:43 +0000 (+0000) Subject: * bug 26437: fix for Sanitizer::decodeCharReferences converting invalid hex character... X-Git-Tag: 1.31.0-rc.0~33070 X-Git-Url: https://git.cyclocoop.org/%27.WWW_URL.%27admin/?a=commitdiff_plain;h=c9f6f2acea3c5317e82d10c99ba305e48bada86f;p=lhc%2Fweb%2Fwiklou.git * bug 26437: fix for Sanitizer::decodeCharReferences converting invalid hex character references Patch by Umherirrender: https://bugzilla.wikimedia.org/attachment.cgi?id=7931&action=edit Also added a parser regression test case: "HTML Hex character encoding bogus encoding (bug 26437 regression check)" --- diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index fba015611a..ab67010734 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -31,8 +31,7 @@ define( 'MW_CHAR_REFS_REGEX', '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); - |&\#x([0-9A-Za-z]+); - |&\#X([0-9A-Za-z]+); + |&\#[xX]([0-9A-Fa-f]+); |(&)/x' ); /** @@ -1127,8 +1126,6 @@ class Sanitizer { $ret = Sanitizer::decCharReference( $matches[2] ); } elseif( $matches[3] != '' ) { $ret = Sanitizer::hexCharReference( $matches[3] ); - } elseif( $matches[4] != '' ) { - $ret = Sanitizer::hexCharReference( $matches[4] ); } if( is_null( $ret ) ) { return htmlspecialchars( $matches[0] ); @@ -1238,8 +1235,6 @@ class Sanitizer { return Sanitizer::decodeChar( intval( $matches[2] ) ); } elseif( $matches[3] != '' ) { return Sanitizer::decodeChar( hexdec( $matches[3] ) ); - } elseif( $matches[4] != '' ) { - return Sanitizer::decodeChar( hexdec( $matches[4] ) ); } # Last case should be an ampersand by itself return $matches[0]; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 7b21f3f0f8..9ebcba3185 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -6764,6 +6764,24 @@ HTML Hex character encoding (spells the word "JavaScript")

!! end +!! test +HTML Hex character encoding bogus encoding (bug 26437 regression check) +!! input +&#xsee;&#XSEE; +!! result +

&#xsee;&#XSEE; +

+!! end + +!! test +HTML Hex character encoding mixed case +!! input +îî +!! result +

îî +

+!! end + !! test __FORCETOC__ override !! input