From c6340de5b3e4ecb2b6196218faa20e796b21485a Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 14 Nov 2004 21:36:43 +0000 Subject: [PATCH] Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden --- includes/normal/CleanUpTest.php | 8 ++++++++ includes/normal/UtfNormal.php | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/includes/normal/CleanUpTest.php b/includes/normal/CleanUpTest.php index badade6d1e..e9156abd80 100644 --- a/includes/normal/CleanUpTest.php +++ b/includes/normal/CleanUpTest.php @@ -330,6 +330,14 @@ class CleanUpTest extends PHPUnit_TestCase { bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); } + + function testForbiddenRegression() { + $text = "\xef\xbf\xbf"; # U+FFFF, illegal char + $expect = "\xef\xbf\xbd"; + $this->assertEquals( + bin2hex( $expect ), + bin2hex( UtfNormal::cleanUp( $text ) ) ); + } } diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php index a4c095c904..62461d626c 100644 --- a/includes/normal/UtfNormal.php +++ b/includes/normal/UtfNormal.php @@ -132,11 +132,12 @@ class UtfNormal { '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', UTF8_REPLACEMENT, $string ); - $str = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string ); + $string = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string ); + $string = str_replace( UTF8_FFFF, UTF8_REPLACEMENT, $string ); # UnicodeString constructor fails if the string ends with a # head byte. Add a junk char at the end, we'll strip it off. - return rtrim( utf8_normalize( $str . "\x01", UNORM_NFC ), "\x01" ); + return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" ); } elseif( UtfNormal::quickIsNFCVerify( $string ) ) { # Side effect -- $string has had UTF-8 errors cleaned up. return $string; -- 2.20.1