From: Brion Vibber Date: Sun, 14 Nov 2004 21:36:43 +0000 (+0000) Subject: Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden X-Git-Tag: 1.5.0alpha1~1331 X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=c6340de5b3e4ecb2b6196218faa20e796b21485a;p=lhc%2Fweb%2Fwiklou.git Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden --- diff --git a/includes/normal/CleanUpTest.php b/includes/normal/CleanUpTest.php index badade6d1e..e9156abd80 100644 --- a/includes/normal/CleanUpTest.php +++ b/includes/normal/CleanUpTest.php @@ -330,6 +330,14 @@ class CleanUpTest extends PHPUnit_TestCase { bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); } + + function testForbiddenRegression() { + $text = "\xef\xbf\xbf"; # U+FFFF, illegal char + $expect = "\xef\xbf\xbd"; + $this->assertEquals( + bin2hex( $expect ), + bin2hex( UtfNormal::cleanUp( $text ) ) ); + } } diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php index a4c095c904..62461d626c 100644 --- a/includes/normal/UtfNormal.php +++ b/includes/normal/UtfNormal.php @@ -132,11 +132,12 @@ class UtfNormal { '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', UTF8_REPLACEMENT, $string ); - $str = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string ); + $string = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string ); + $string = str_replace( UTF8_FFFF, UTF8_REPLACEMENT, $string ); # UnicodeString constructor fails if the string ends with a # head byte. Add a junk char at the end, we'll strip it off. - return rtrim( utf8_normalize( $str . "\x01", UNORM_NFC ), "\x01" ); + return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" ); } elseif( UtfNormal::quickIsNFCVerify( $string ) ) { # Side effect -- $string has had UTF-8 errors cleaned up. return $string;