From: Brion Vibber <brion@users.mediawiki.org>
Date: Sun, 14 Nov 2004 21:36:43 +0000 (+0000)
Subject: Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden
X-Git-Tag: 1.5.0alpha1~1331
X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=c6340de5b3e4ecb2b6196218faa20e796b21485a;p=lhc%2Fweb%2Fwiklou.git

Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden
---

diff --git a/includes/normal/CleanUpTest.php b/includes/normal/CleanUpTest.php
index badade6d1e..e9156abd80 100644
--- a/includes/normal/CleanUpTest.php
+++ b/includes/normal/CleanUpTest.php
@@ -330,6 +330,14 @@ class CleanUpTest extends PHPUnit_TestCase {
 			bin2hex( $expect ),
 			bin2hex( UtfNormal::cleanUp( $text ) ) );
 	}
+
+	function testForbiddenRegression() {
+		$text   = "\xef\xbf\xbf"; # U+FFFF, illegal char
+		$expect = "\xef\xbf\xbd";
+		$this->assertEquals(
+			bin2hex( $expect ),
+			bin2hex( UtfNormal::cleanUp( $text ) ) );
+	}
 }
 
 
diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php
index a4c095c904..62461d626c 100644
--- a/includes/normal/UtfNormal.php
+++ b/includes/normal/UtfNormal.php
@@ -132,11 +132,12 @@ class UtfNormal {
 				'/[\x00-\x08\x0b\x0c\x0e-\x1f]/',
 				UTF8_REPLACEMENT,
 				$string );
-			$str = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+			$string = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+			$string = str_replace( UTF8_FFFF, UTF8_REPLACEMENT, $string );
 			
 			# UnicodeString constructor fails if the string ends with a
 			# head byte. Add a junk char at the end, we'll strip it off.
-			return rtrim( utf8_normalize( $str . "\x01", UNORM_NFC ), "\x01" );
+			return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" );
 		} elseif( UtfNormal::quickIsNFCVerify( $string ) ) {
 			# Side effect -- $string has had UTF-8 errors cleaned up.
 			return $string;