From: Brion Vibber Date: Sun, 14 Nov 2004 04:07:28 +0000 (+0000) Subject: Fix UTF-8 validation regression: well-formed but forbidden UTF-8 sequence followed... X-Git-Tag: 1.5.0alpha1~1339 X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=9535fc035b57fa548a94c08a8c67860fd87e8f53;p=lhc%2Fweb%2Fwiklou.git Fix UTF-8 validation regression: well-formed but forbidden UTF-8 sequence followed by bogus tail bytes --- diff --git a/includes/normal/CleanUpTest.php b/includes/normal/CleanUpTest.php index eed9c3fb86..7aa83c433b 100644 --- a/includes/normal/CleanUpTest.php +++ b/includes/normal/CleanUpTest.php @@ -207,7 +207,7 @@ class CleanUpTest extends PHPUnit_TestCase { bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), "Forbidden triplet $x should be rejected" ); - } elseif( $second < 0xc0 && $second < 0xc0 ) { + } elseif( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) { $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), @@ -278,6 +278,31 @@ class CleanUpTest extends PHPUnit_TestCase { bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); } + + function testOverlongRegression() { + $text = "\x67" . + "\x1a" . # forbidden ascii + "\xea" . # bad head + "\xc1\xa6" . # overlong sequence + "\xad" . # bad tail + "\x1c" . # forbidden ascii + "\xb0" . # bad tail + "\x3c" . + "\x9e"; # bad tail + $expect = "\x67" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\x3c" . + "\xef\xbf\xbd"; + $this->assertEquals( + bin2hex( $expect ), + bin2hex( UtfNormal::cleanUp( $text ) ) ); + } + } diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php index 3c01b07341..1cbbd01b43 100644 --- a/includes/normal/UtfNormal.php +++ b/includes/normal/UtfNormal.php @@ -408,6 +408,7 @@ class UtfNormal { $replace[] = array( UTF8_REPLACEMENT, $base + $i + 1 - strlen( $sequence ), strlen( $sequence ) ); + $head = ''; continue; } }