*/
function XtestAllChars() {
$rep = UTF8_REPLACEMENT;
- for( $i = 0x0; $i < UNICODE_MAX; $i++ ) {
+ for ( $i = 0x0; $i < UNICODE_MAX; $i++ ) {
$char = codepointToUtf8( $i );
$clean = UtfNormal::cleanUp( $char );
$x = sprintf( "%04X", $i );
- if( $i % 0x1000 == 0 ) echo "U+$x\n";
- if( $i == 0x0009 ||
- $i == 0x000a ||
- $i == 0x000d ||
- ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) ||
- ($i > UNICODE_SURROGATE_LAST && $i < 0xfffe ) ||
- ($i > 0xffff && $i <= UNICODE_MAX ) ) {
- if( isset( UtfNormal::$utfCanonicalComp[$char] ) || isset( UtfNormal::$utfCanonicalDecomp[$char] ) ) {
- $comp = UtfNormal::NFC( $char );
+
+ if ( $i % 0x1000 == 0 ) {
+ echo "U+$x\n";
+ }
+
+ if ( $i == 0x0009 ||
+ $i == 0x000a ||
+ $i == 0x000d ||
+ ( $i > 0x001f && $i < UNICODE_SURROGATE_FIRST ) ||
+ ( $i > UNICODE_SURROGATE_LAST && $i < 0xfffe ) ||
+ ( $i > 0xffff && $i <= UNICODE_MAX )
+ ) {
+ if ( isset( UtfNormal::$utfCanonicalComp[$char] ) || isset( UtfNormal::$utfCanonicalDecomp[$char] ) ) {
+ $comp = UtfNormal::NFC( $char );
$this->assertEquals(
bin2hex( $comp ),
bin2hex( $clean ),
/** @todo document */
function doTestBytes( $head, $tail ) {
- for( $i = 0x0; $i < 256; $i++ ) {
+ for ( $i = 0x0; $i < 256; $i++ ) {
$char = $head . chr( $i ) . $tail;
$clean = UtfNormal::cleanUp( $char );
$x = sprintf( "%02X", $i );
- if( $i == 0x0009 ||
- $i == 0x000a ||
- $i == 0x000d ||
- ($i > 0x001f && $i < 0x80) ) {
+
+ if ( $i == 0x0009 ||
+ $i == 0x000a ||
+ $i == 0x000d ||
+ ( $i > 0x001f && $i < 0x80 )
+ ) {
$this->assertEquals(
bin2hex( $char ),
bin2hex( $clean ),
"ASCII byte $x should be intact" );
- if( $char != $clean ) return;
+ if ( $char != $clean ) {
+ return;
+ }
} else {
$norm = $head . UTF8_REPLACEMENT . $tail;
$this->assertEquals(
bin2hex( $norm ),
bin2hex( $clean ),
"Forbidden byte $x should be rejected" );
- if( $norm != $clean ) return;
+ if ( $norm != $clean ) {
+ return;
+ }
}
}
}
* @todo document
*/
function doTestDoubleBytes( $head, $tail ) {
- for( $first = 0xc0; $first < 0x100; $first+=2 ) {
- for( $second = 0x80; $second < 0x100; $second+=2 ) {
+ for ( $first = 0xc0; $first < 0x100; $first += 2 ) {
+ for ( $second = 0x80; $second < 0x100; $second += 2 ) {
$char = $head . chr( $first ) . chr( $second ) . $tail;
$clean = UtfNormal::cleanUp( $char );
$x = sprintf( "%02X,%02X", $first, $second );
- if( $first > 0xc1 &&
- $first < 0xe0 &&
- $second < 0xc0 ) {
- $norm = UtfNormal::NFC( $char );
+ if ( $first > 0xc1 &&
+ $first < 0xe0 &&
+ $second < 0xc0
+ ) {
+ $norm = UtfNormal::NFC( $char );
$this->assertEquals(
bin2hex( $norm ),
bin2hex( $clean ),
"Pair $x should be intact" );
- if( $norm != $clean ) return;
- } elseif( $first > 0xfd || $second > 0xbf ) {
+ if ( $norm != $clean ) {
+ return;
+ }
+ } elseif ( $first > 0xfd || $second > 0xbf ) {
# fe and ff are not legal head bytes -- expect two replacement chars
$norm = $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail;
$this->assertEquals(
bin2hex( $norm ),
bin2hex( $clean ),
"Forbidden pair $x should be rejected" );
- if( $norm != $clean ) return;
+ if ( $norm != $clean ) {
+ return;
+ }
} else {
$norm = $head . UTF8_REPLACEMENT . $tail;
$this->assertEquals(
bin2hex( $norm ),
bin2hex( $clean ),
"Forbidden pair $x should be rejected" );
- if( $norm != $clean ) return;
+ if ( $norm != $clean ) {
+ return;
+ }
}
}
}
/** @todo document */
function doTestTripleBytes( $head, $tail ) {
- for( $first = 0xc0; $first < 0x100; $first+=2 ) {
- for( $second = 0x80; $second < 0x100; $second+=2 ) {
+ for ( $first = 0xc0; $first < 0x100; $first += 2 ) {
+ for ( $second = 0x80; $second < 0x100; $second += 2 ) {
#for( $third = 0x80; $third < 0x100; $third++ ) {
- for( $third = 0x80; $third < 0x81; $third++ ) {
+ for ( $third = 0x80; $third < 0x81; $third++ ) {
$char = $head . chr( $first ) . chr( $second ) . chr( $third ) . $tail;
$clean = UtfNormal::cleanUp( $char );
$x = sprintf( "%02X,%02X,%02X", $first, $second, $third );
- if( $first >= 0xe0 &&
+
+ if ( $first >= 0xe0 &&
$first < 0xf0 &&
$second < 0xc0 &&
- $third < 0xc0 ) {
- if( $first == 0xe0 && $second < 0xa0 ) {
+ $third < 0xc0
+ ) {
+ if ( $first == 0xe0 && $second < 0xa0 ) {
$this->assertEquals(
bin2hex( $head . UTF8_REPLACEMENT . $tail ),
bin2hex( $clean ),
"Overlong triplet $x should be rejected" );
- } elseif( $first == 0xed &&
- ( chr( $first ) . chr( $second ) . chr( $third )) >= UTF8_SURROGATE_FIRST ) {
+ } elseif ( $first == 0xed &&
+ ( chr( $first ) . chr( $second ) . chr( $third ) ) >= UTF8_SURROGATE_FIRST
+ ) {
$this->assertEquals(
bin2hex( $head . UTF8_REPLACEMENT . $tail ),
bin2hex( $clean ),
bin2hex( $clean ),
"Triplet $x should be intact" );
}
- } elseif( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) {
+ } elseif ( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) {
$this->assertEquals(
bin2hex( UtfNormal::NFC( $head . chr( $first ) . chr( $second ) ) . UTF8_REPLACEMENT . $tail ),
bin2hex( $clean ),
"Valid 2-byte $x + broken tail" );
- } elseif( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) {
+ } elseif ( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) {
$this->assertEquals(
bin2hex( $head . UTF8_REPLACEMENT . UtfNormal::NFC( chr( $second ) . chr( $third ) . $tail ) ),
bin2hex( $clean ),
"Broken head + valid 2-byte $x" );
- } elseif( ( $first > 0xfd || $second > 0xfd ) &&
- ( ( $second > 0xbf && $third > 0xbf ) ||
- ( $second < 0xc0 && $third < 0xc0 ) ||
- ( $second > 0xfd ) ||
- ( $third > 0xfd ) ) ) {
+ } elseif ( ( $first > 0xfd || $second > 0xfd ) &&
+ ( ( $second > 0xbf && $third > 0xbf ) ||
+ ( $second < 0xc0 && $third < 0xc0 ) ||
+ ( $second > 0xfd ) ||
+ ( $third > 0xfd ) )
+ ) {
# fe and ff are not legal head bytes -- expect three replacement chars
$this->assertEquals(
bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),
bin2hex( $clean ),
"Forbidden triplet $x should be rejected" );
- } elseif( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) {
+ } elseif ( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) {
$this->assertEquals(
bin2hex( $head . UTF8_REPLACEMENT . $tail ),
bin2hex( $clean ),
/** @todo document */
function testChunkRegression() {
# Check for regression against a chunking bug
- $text = "\x46\x55\xb8" .
- "\xdc\x96" .
- "\xee" .
- "\xe7" .
- "\x44" .
- "\xaa" .
- "\x2f\x25";
+ $text = "\x46\x55\xb8" .
+ "\xdc\x96" .
+ "\xee" .
+ "\xe7" .
+ "\x44" .
+ "\xaa" .
+ "\x2f\x25";
$expect = "\x46\x55\xef\xbf\xbd" .
- "\xdc\x96" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\x44" .
- "\xef\xbf\xbd" .
- "\x2f\x25";
+ "\xdc\x96" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\x44" .
+ "\xef\xbf\xbd" .
+ "\x2f\x25";
$this->assertEquals(
bin2hex( $expect ),
/** @todo document */
function testInterposeRegression() {
- $text = "\x4e\x30" .
- "\xb1" . # bad tail
- "\x3a" .
- "\x92" . # bad tail
- "\x62\x3a" .
- "\x84" . # bad tail
- "\x43" .
- "\xc6" . # bad head
- "\x3f" .
- "\x92" . # bad tail
- "\xad" . # bad tail
- "\x7d" .
- "\xd9\x95";
+ $text = "\x4e\x30" .
+ "\xb1" . # bad tail
+ "\x3a" .
+ "\x92" . # bad tail
+ "\x62\x3a" .
+ "\x84" . # bad tail
+ "\x43" .
+ "\xc6" . # bad head
+ "\x3f" .
+ "\x92" . # bad tail
+ "\xad" . # bad tail
+ "\x7d" .
+ "\xd9\x95";
$expect = "\x4e\x30" .
- "\xef\xbf\xbd" .
- "\x3a" .
- "\xef\xbf\xbd" .
- "\x62\x3a" .
- "\xef\xbf\xbd" .
- "\x43" .
- "\xef\xbf\xbd" .
- "\x3f" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\x7d" .
- "\xd9\x95";
+ "\xef\xbf\xbd" .
+ "\x3a" .
+ "\xef\xbf\xbd" .
+ "\x62\x3a" .
+ "\xef\xbf\xbd" .
+ "\x43" .
+ "\xef\xbf\xbd" .
+ "\x3f" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\x7d" .
+ "\xd9\x95";
$this->assertEquals(
bin2hex( $expect ),
/** @todo document */
function testOverlongRegression() {
- $text = "\x67" .
- "\x1a" . # forbidden ascii
- "\xea" . # bad head
- "\xc1\xa6" . # overlong sequence
- "\xad" . # bad tail
- "\x1c" . # forbidden ascii
- "\xb0" . # bad tail
- "\x3c" .
- "\x9e"; # bad tail
+ $text = "\x67" .
+ "\x1a" . # forbidden ascii
+ "\xea" . # bad head
+ "\xc1\xa6" . # overlong sequence
+ "\xad" . # bad tail
+ "\x1c" . # forbidden ascii
+ "\xb0" . # bad tail
+ "\x3c" .
+ "\x9e"; # bad tail
$expect = "\x67" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\x3c" .
- "\xef\xbf\xbd";
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\x3c" .
+ "\xef\xbf\xbd";
$this->assertEquals(
bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );
/** @todo document */
function testSurrogateRegression() {
- $text = "\xed\xb4\x96" . # surrogate 0xDD16
- "\x83" . # bad tail
- "\xb4" . # bad tail
- "\xac"; # bad head
+ $text = "\xed\xb4\x96" . # surrogate 0xDD16
+ "\x83" . # bad tail
+ "\xb4" . # bad tail
+ "\xac"; # bad head
$expect = "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd";
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd";
$this->assertEquals(
bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );
/** @todo document */
function testBomRegression() {
- $text = "\xef\xbf\xbe" . # U+FFFE, illegal char
- "\xb2" . # bad tail
- "\xef" . # bad head
- "\x59";
+ $text = "\xef\xbf\xbe" . # U+FFFE, illegal char
+ "\xb2" . # bad tail
+ "\xef" . # bad head
+ "\x59";
$expect = "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\xef\xbf\xbd" .
- "\x59";
+ "\xef\xbf\xbd" .
+ "\xef\xbf\xbd" .
+ "\x59";
$this->assertEquals(
bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );
/** @todo document */
function testForbiddenRegression() {
- $text = "\xef\xbf\xbf"; # U+FFFF, illegal char
+ $text = "\xef\xbf\xbf"; # U+FFFF, illegal char
$expect = "\xef\xbf\xbd";
$this->assertEquals(
bin2hex( $expect ),
/** @todo document */
function testHangulRegression() {
$text = "\xed\x9c\xaf" . # Hangul char
- "\xe1\x87\x81"; # followed by another final jamo
- $expect = $text; # Should *not* change.
+ "\xe1\x87\x81"; # followed by another final jamo
+ $expect = $text; # Should *not* change.
$this->assertEquals(
bin2hex( $expect ),
bin2hex( UtfNormal::cleanUp( $text ) ) );