bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );
}
+
+ function testForbiddenRegression() {
+ $text = "\xef\xbf\xbf"; # U+FFFF, illegal char
+ $expect = "\xef\xbf\xbd";
+ $this->assertEquals(
+ bin2hex( $expect ),
+ bin2hex( UtfNormal::cleanUp( $text ) ) );
+ }
}
'/[\x00-\x08\x0b\x0c\x0e-\x1f]/',
UTF8_REPLACEMENT,
$string );
- $str = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+ $string = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+ $string = str_replace( UTF8_FFFF, UTF8_REPLACEMENT, $string );
# UnicodeString constructor fails if the string ends with a
# head byte. Add a junk char at the end, we'll strip it off.
- return rtrim( utf8_normalize( $str . "\x01", UNORM_NFC ), "\x01" );
+ return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" );
} elseif( UtfNormal::quickIsNFCVerify( $string ) ) {
# Side effect -- $string has had UTF-8 errors cleaned up.
return $string;