From c9f6f2acea3c5317e82d10c99ba305e48bada86f Mon Sep 17 00:00:00 2001
From: Brion Vibber
Date: Mon, 27 Dec 2010 03:21:43 +0000
Subject: [PATCH] * bug 26437: fix for Sanitizer::decodeCharReferences
converting invalid hex character references
Patch by Umherirrender: https://bugzilla.wikimedia.org/attachment.cgi?id=7931&action=edit
Also added a parser regression test case: "HTML Hex character encoding bogus encoding (bug 26437 regression check)"
---
includes/Sanitizer.php | 7 +------
tests/parser/parserTests.txt | 18 ++++++++++++++++++
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index fba015611a..ab67010734 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -31,8 +31,7 @@
define( 'MW_CHAR_REFS_REGEX',
'/&([A-Za-z0-9\x80-\xff]+);
|&\#([0-9]+);
- |&\#x([0-9A-Za-z]+);
- |&\#X([0-9A-Za-z]+);
+ |&\#[xX]([0-9A-Fa-f]+);
|(&)/x' );
/**
@@ -1127,8 +1126,6 @@ class Sanitizer {
$ret = Sanitizer::decCharReference( $matches[2] );
} elseif( $matches[3] != '' ) {
$ret = Sanitizer::hexCharReference( $matches[3] );
- } elseif( $matches[4] != '' ) {
- $ret = Sanitizer::hexCharReference( $matches[4] );
}
if( is_null( $ret ) ) {
return htmlspecialchars( $matches[0] );
@@ -1238,8 +1235,6 @@ class Sanitizer {
return Sanitizer::decodeChar( intval( $matches[2] ) );
} elseif( $matches[3] != '' ) {
return Sanitizer::decodeChar( hexdec( $matches[3] ) );
- } elseif( $matches[4] != '' ) {
- return Sanitizer::decodeChar( hexdec( $matches[4] ) );
}
# Last case should be an ampersand by itself
return $matches[0];
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 7b21f3f0f8..9ebcba3185 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -6764,6 +6764,24 @@ HTML Hex character encoding (spells the word "JavaScript")
!! end
+!! test
+HTML Hex character encoding bogus encoding (bug 26437 regression check)
+!! input
+see;SEE;
+!! result
+&#xsee;&#XSEE;
+
+!! end
+
+!! test
+HTML Hex character encoding mixed case
+!! input
+îî
+!! result
+îî
+
+!! end
+
!! test
__FORCETOC__ override
!! input
--
2.20.1