X-Git-Url: https://git.cyclocoop.org/%242?a=blobdiff_plain;f=tests%2Fphpunit%2Fincludes%2Fnormal%2FCleanUpTest.php;h=86fd95c28395cf264dd98f8f4c3cf6ef9c84f352;hb=49dfbc59d01f7485a50fce5b0e756124bb307699;hp=d5ad18d85693f53a825967ee840e0bef4d95b9f3;hpb=de04f37ef1748198716668c648ab3d2679650bcb;p=lhc%2Fweb%2Fwiklou.git diff --git a/tests/phpunit/includes/normal/CleanUpTest.php b/tests/phpunit/includes/normal/CleanUpTest.php index d5ad18d856..86fd95c283 100644 --- a/tests/phpunit/includes/normal/CleanUpTest.php +++ b/tests/phpunit/includes/normal/CleanUpTest.php @@ -65,19 +65,24 @@ class CleanUpTest extends MediaWikiTestCase { */ function XtestAllChars() { $rep = UTF8_REPLACEMENT; - for( $i = 0x0; $i < UNICODE_MAX; $i++ ) { + for ( $i = 0x0; $i < UNICODE_MAX; $i++ ) { $char = codepointToUtf8( $i ); $clean = UtfNormal::cleanUp( $char ); $x = sprintf( "%04X", $i ); - if( $i % 0x1000 == 0 ) echo "U+$x\n"; - if( $i == 0x0009 || - $i == 0x000a || - $i == 0x000d || - ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) || - ($i > UNICODE_SURROGATE_LAST && $i < 0xfffe ) || - ($i > 0xffff && $i <= UNICODE_MAX ) ) { - if( isset( UtfNormal::$utfCanonicalComp[$char] ) || isset( UtfNormal::$utfCanonicalDecomp[$char] ) ) { - $comp = UtfNormal::NFC( $char ); + + if ( $i % 0x1000 == 0 ) { + echo "U+$x\n"; + } + + if ( $i == 0x0009 || + $i == 0x000a || + $i == 0x000d || + ( $i > 0x001f && $i < UNICODE_SURROGATE_FIRST ) || + ( $i > UNICODE_SURROGATE_LAST && $i < 0xfffe ) || + ( $i > 0xffff && $i <= UNICODE_MAX ) + ) { + if ( isset( UtfNormal::$utfCanonicalComp[$char] ) || isset( UtfNormal::$utfCanonicalDecomp[$char] ) ) { + $comp = UtfNormal::NFC( $char ); $this->assertEquals( bin2hex( $comp ), bin2hex( $clean ), @@ -104,26 +109,32 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function doTestBytes( $head, $tail ) { - for( $i = 0x0; $i < 256; $i++ ) { + for ( $i = 0x0; $i < 256; $i++ ) { $char = $head . chr( $i ) . $tail; $clean = UtfNormal::cleanUp( $char ); $x = sprintf( "%02X", $i ); - if( $i == 0x0009 || - $i == 0x000a || - $i == 0x000d || - ($i > 0x001f && $i < 0x80) ) { + + if ( $i == 0x0009 || + $i == 0x000a || + $i == 0x000d || + ( $i > 0x001f && $i < 0x80 ) + ) { $this->assertEquals( bin2hex( $char ), bin2hex( $clean ), "ASCII byte $x should be intact" ); - if( $char != $clean ) return; + if ( $char != $clean ) { + return; + } } else { $norm = $head . UTF8_REPLACEMENT . $tail; $this->assertEquals( bin2hex( $norm ), bin2hex( $clean ), "Forbidden byte $x should be rejected" ); - if( $norm != $clean ) return; + if ( $norm != $clean ) { + return; + } } } } @@ -140,35 +151,42 @@ class CleanUpTest extends MediaWikiTestCase { * @todo document */ function doTestDoubleBytes( $head, $tail ) { - for( $first = 0xc0; $first < 0x100; $first+=2 ) { - for( $second = 0x80; $second < 0x100; $second+=2 ) { + for ( $first = 0xc0; $first < 0x100; $first += 2 ) { + for ( $second = 0x80; $second < 0x100; $second += 2 ) { $char = $head . chr( $first ) . chr( $second ) . $tail; $clean = UtfNormal::cleanUp( $char ); $x = sprintf( "%02X,%02X", $first, $second ); - if( $first > 0xc1 && - $first < 0xe0 && - $second < 0xc0 ) { - $norm = UtfNormal::NFC( $char ); + if ( $first > 0xc1 && + $first < 0xe0 && + $second < 0xc0 + ) { + $norm = UtfNormal::NFC( $char ); $this->assertEquals( bin2hex( $norm ), bin2hex( $clean ), "Pair $x should be intact" ); - if( $norm != $clean ) return; - } elseif( $first > 0xfd || $second > 0xbf ) { + if ( $norm != $clean ) { + return; + } + } elseif ( $first > 0xfd || $second > 0xbf ) { # fe and ff are not legal head bytes -- expect two replacement chars $norm = $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail; $this->assertEquals( bin2hex( $norm ), bin2hex( $clean ), "Forbidden pair $x should be rejected" ); - if( $norm != $clean ) return; + if ( $norm != $clean ) { + return; + } } else { $norm = $head . UTF8_REPLACEMENT . $tail; $this->assertEquals( bin2hex( $norm ), bin2hex( $clean ), "Forbidden pair $x should be rejected" ); - if( $norm != $clean ) return; + if ( $norm != $clean ) { + return; + } } } } @@ -184,24 +202,27 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function doTestTripleBytes( $head, $tail ) { - for( $first = 0xc0; $first < 0x100; $first+=2 ) { - for( $second = 0x80; $second < 0x100; $second+=2 ) { + for ( $first = 0xc0; $first < 0x100; $first += 2 ) { + for ( $second = 0x80; $second < 0x100; $second += 2 ) { #for( $third = 0x80; $third < 0x100; $third++ ) { - for( $third = 0x80; $third < 0x81; $third++ ) { + for ( $third = 0x80; $third < 0x81; $third++ ) { $char = $head . chr( $first ) . chr( $second ) . chr( $third ) . $tail; $clean = UtfNormal::cleanUp( $char ); $x = sprintf( "%02X,%02X,%02X", $first, $second, $third ); - if( $first >= 0xe0 && + + if ( $first >= 0xe0 && $first < 0xf0 && $second < 0xc0 && - $third < 0xc0 ) { - if( $first == 0xe0 && $second < 0xa0 ) { + $third < 0xc0 + ) { + if ( $first == 0xe0 && $second < 0xa0 ) { $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), "Overlong triplet $x should be rejected" ); - } elseif( $first == 0xed && - ( chr( $first ) . chr( $second ) . chr( $third )) >= UTF8_SURROGATE_FIRST ) { + } elseif ( $first == 0xed && + ( chr( $first ) . chr( $second ) . chr( $third ) ) >= UTF8_SURROGATE_FIRST + ) { $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), @@ -212,27 +233,28 @@ class CleanUpTest extends MediaWikiTestCase { bin2hex( $clean ), "Triplet $x should be intact" ); } - } elseif( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) { + } elseif ( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) { $this->assertEquals( bin2hex( UtfNormal::NFC( $head . chr( $first ) . chr( $second ) ) . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), "Valid 2-byte $x + broken tail" ); - } elseif( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) { + } elseif ( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) { $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . UtfNormal::NFC( chr( $second ) . chr( $third ) . $tail ) ), bin2hex( $clean ), "Broken head + valid 2-byte $x" ); - } elseif( ( $first > 0xfd || $second > 0xfd ) && - ( ( $second > 0xbf && $third > 0xbf ) || - ( $second < 0xc0 && $third < 0xc0 ) || - ( $second > 0xfd ) || - ( $third > 0xfd ) ) ) { + } elseif ( ( $first > 0xfd || $second > 0xfd ) && + ( ( $second > 0xbf && $third > 0xbf ) || + ( $second < 0xc0 && $third < 0xc0 ) || + ( $second > 0xfd ) || + ( $third > 0xfd ) ) + ) { # fe and ff are not legal head bytes -- expect three replacement chars $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), "Forbidden triplet $x should be rejected" ); - } elseif( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) { + } elseif ( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) { $this->assertEquals( bin2hex( $head . UTF8_REPLACEMENT . $tail ), bin2hex( $clean ), @@ -251,20 +273,20 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testChunkRegression() { # Check for regression against a chunking bug - $text = "\x46\x55\xb8" . - "\xdc\x96" . - "\xee" . - "\xe7" . - "\x44" . - "\xaa" . - "\x2f\x25"; + $text = "\x46\x55\xb8" . + "\xdc\x96" . + "\xee" . + "\xe7" . + "\x44" . + "\xaa" . + "\x2f\x25"; $expect = "\x46\x55\xef\xbf\xbd" . - "\xdc\x96" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\x44" . - "\xef\xbf\xbd" . - "\x2f\x25"; + "\xdc\x96" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\x44" . + "\xef\xbf\xbd" . + "\x2f\x25"; $this->assertEquals( bin2hex( $expect ), @@ -273,33 +295,33 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testInterposeRegression() { - $text = "\x4e\x30" . - "\xb1" . # bad tail - "\x3a" . - "\x92" . # bad tail - "\x62\x3a" . - "\x84" . # bad tail - "\x43" . - "\xc6" . # bad head - "\x3f" . - "\x92" . # bad tail - "\xad" . # bad tail - "\x7d" . - "\xd9\x95"; + $text = "\x4e\x30" . + "\xb1" . # bad tail + "\x3a" . + "\x92" . # bad tail + "\x62\x3a" . + "\x84" . # bad tail + "\x43" . + "\xc6" . # bad head + "\x3f" . + "\x92" . # bad tail + "\xad" . # bad tail + "\x7d" . + "\xd9\x95"; $expect = "\x4e\x30" . - "\xef\xbf\xbd" . - "\x3a" . - "\xef\xbf\xbd" . - "\x62\x3a" . - "\xef\xbf\xbd" . - "\x43" . - "\xef\xbf\xbd" . - "\x3f" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\x7d" . - "\xd9\x95"; + "\xef\xbf\xbd" . + "\x3a" . + "\xef\xbf\xbd" . + "\x62\x3a" . + "\xef\xbf\xbd" . + "\x43" . + "\xef\xbf\xbd" . + "\x3f" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\x7d" . + "\xd9\x95"; $this->assertEquals( bin2hex( $expect ), @@ -308,24 +330,24 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testOverlongRegression() { - $text = "\x67" . - "\x1a" . # forbidden ascii - "\xea" . # bad head - "\xc1\xa6" . # overlong sequence - "\xad" . # bad tail - "\x1c" . # forbidden ascii - "\xb0" . # bad tail - "\x3c" . - "\x9e"; # bad tail + $text = "\x67" . + "\x1a" . # forbidden ascii + "\xea" . # bad head + "\xc1\xa6" . # overlong sequence + "\xad" . # bad tail + "\x1c" . # forbidden ascii + "\xb0" . # bad tail + "\x3c" . + "\x9e"; # bad tail $expect = "\x67" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\x3c" . - "\xef\xbf\xbd"; + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\x3c" . + "\xef\xbf\xbd"; $this->assertEquals( bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); @@ -333,14 +355,14 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testSurrogateRegression() { - $text = "\xed\xb4\x96" . # surrogate 0xDD16 - "\x83" . # bad tail - "\xb4" . # bad tail - "\xac"; # bad head + $text = "\xed\xb4\x96" . # surrogate 0xDD16 + "\x83" . # bad tail + "\xb4" . # bad tail + "\xac"; # bad head $expect = "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd"; + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\xef\xbf\xbd"; $this->assertEquals( bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); @@ -348,14 +370,14 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testBomRegression() { - $text = "\xef\xbf\xbe" . # U+FFFE, illegal char - "\xb2" . # bad tail - "\xef" . # bad head - "\x59"; + $text = "\xef\xbf\xbe" . # U+FFFE, illegal char + "\xb2" . # bad tail + "\xef" . # bad head + "\x59"; $expect = "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\xef\xbf\xbd" . - "\x59"; + "\xef\xbf\xbd" . + "\xef\xbf\xbd" . + "\x59"; $this->assertEquals( bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) ); @@ -363,7 +385,7 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testForbiddenRegression() { - $text = "\xef\xbf\xbf"; # U+FFFF, illegal char + $text = "\xef\xbf\xbf"; # U+FFFF, illegal char $expect = "\xef\xbf\xbd"; $this->assertEquals( bin2hex( $expect ), @@ -373,8 +395,8 @@ class CleanUpTest extends MediaWikiTestCase { /** @todo document */ function testHangulRegression() { $text = "\xed\x9c\xaf" . # Hangul char - "\xe1\x87\x81"; # followed by another final jamo - $expect = $text; # Should *not* change. + "\xe1\x87\x81"; # followed by another final jamo + $expect = $text; # Should *not* change. $this->assertEquals( bin2hex( $expect ), bin2hex( UtfNormal::cleanUp( $text ) ) );