Adding some extra tests for the cleanUp() function
[lhc/web/wiklou.git] / includes / normal / CleanUpTest.php
1 <?php
2
3 #ini_set( 'memory_limit', '40M' );
4
5 require_once( 'PHPUnit.php' );
6 require_once( 'UtfNormal.php' );
7
8 class CleanUpTest extends PHPUnit_TestCase {
9 function CleanUpTest( $name ) {
10 $this->PHPUnit_TestCase( $name );
11 }
12
13 function setUp() {
14 }
15
16 function tearDown() {
17 }
18
19 function testAscii() {
20 $text = 'This is plain ASCII text.';
21 $this->assertEquals( $text, UtfNormal::cleanUp( $text ) );
22 }
23
24 function testNull() {
25 $text = "a \x00 null";
26 $expect = "a \xef\xbf\xbd null";
27 $this->assertEquals(
28 bin2hex( $expect ),
29 bin2hex( UtfNormal::cleanUp( $text ) ) );
30 }
31
32 function testLatin() {
33 $text = "L'\xc3\xa9cole";
34 $this->assertEquals( $text, UtfNormal::cleanUp( $text ) );
35 }
36
37 function testLatinNormal() {
38 $text = "L'e\xcc\x81cole";
39 $expect = "L'\xc3\xa9cole";
40 $this->assertEquals( $expect, UtfNormal::cleanUp( $text ) );
41 }
42
43 # This test is *very* expensive!
44 function XtestAllChars() {
45 $rep = UTF8_REPLACEMENT;
46 global $utfCanonicalComp, $utfCanonicalDecomp;
47 for( $i = 0x0; $i < UNICODE_MAX; $i++ ) {
48 $char = codepointToUtf8( $i );
49 $clean = UtfNormal::cleanUp( $char );
50 $x = sprintf( "%04X", $i );
51 if( $i % 0x1000 == 0 ) echo "U+$x\n";
52 if( $i == 0x0009 ||
53 $i == 0x000a ||
54 $i == 0x000d ||
55 ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) ||
56 ($i > UNICODE_SURROGATE_LAST && $i < 0xfdd0 ) ||
57 ($i > 0xfdef && $i < 0xfffe ) ||
58 ($i > 0xffff && $i <= UNICODE_MAX ) ) {
59 if( isset( $utfCanonicalComp[$char] ) || isset( $utfCanonicalDecomp[$char] ) ) {
60 $comp = UtfNormal::NFC( $char );
61 $this->assertEquals(
62 bin2hex( $comp ),
63 bin2hex( $clean ),
64 "U+$x should be decomposed" );
65 } else {
66 $this->assertEquals(
67 bin2hex( $char ),
68 bin2hex( $clean ),
69 "U+$x should be intact" );
70 }
71 } else {
72 $this->assertEquals( bin2hex( $rep ), bin2hex( $clean ), $x );
73 }
74 }
75 }
76
77 function testAllBytes() {
78 $this->doTestBytes( '', '' );
79 $this->doTestBytes( 'x', '' );
80 $this->doTestBytes( '', 'x' );
81 $this->doTestBytes( 'x', 'x' );
82 }
83
84 function doTestBytes( $head, $tail ) {
85 for( $i = 0x0; $i < 256; $i++ ) {
86 $char = $head . chr( $i ) . $tail;
87 $clean = UtfNormal::cleanUp( $char );
88 $x = sprintf( "%02X", $i );
89 if( $i == 0x0009 ||
90 $i == 0x000a ||
91 $i == 0x000d ||
92 ($i > 0x001f && $i < 0x80) ) {
93 $this->assertEquals(
94 bin2hex( $char ),
95 bin2hex( $clean ),
96 "ASCII byte $x should be intact" );
97 } else {
98 $this->assertEquals(
99 bin2hex( $head . UTF8_REPLACEMENT . $tail ),
100 bin2hex( $clean ),
101 "Forbidden byte $x should be rejected" );
102 }
103 }
104 }
105
106 function testDoubleBytes() {
107 $this->doTestDoubleBytes( '', '' );
108 $this->doTestDoubleBytes( 'x', '' );
109 $this->doTestDoubleBytes( '', 'x' );
110 $this->doTestDoubleBytes( 'x', 'x' );
111 }
112
113 function doTestDoubleBytes( $head, $tail ) {
114 for( $first = 0xc0; $first < 0x100; $first++ ) {
115 for( $second = 0x80; $second < 0x100; $second++ ) {
116 $char = $head . chr( $first ) . chr( $second ) . $tail;
117 $clean = UtfNormal::cleanUp( $char );
118 $x = sprintf( "%02X,%02X", $first, $second );
119 if( $first > 0xc1 &&
120 $first < 0xe0 &&
121 $second < 0xc0 ) {
122 $this->assertEquals(
123 bin2hex( UtfNormal::NFC( $char ) ),
124 bin2hex( $clean ),
125 "Pair $x should be intact" );
126 } elseif( $first > 0xfd || $second > 0xbf ) {
127 # fe and ff are not legal head bytes -- expect two replacement chars
128 $this->assertEquals(
129 bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),
130 bin2hex( $clean ),
131 "Forbidden pair $x should be rejected" );
132 } else {
133 $this->assertEquals(
134 bin2hex( $head . UTF8_REPLACEMENT . $tail ),
135 bin2hex( $clean ),
136 "Forbidden pair $x should be rejected" );
137 }
138 }
139 }
140 }
141
142 function testTripleBytes() {
143 $this->doTestTripleBytes( '', '' );
144 #$this->doTestTripleBytes( 'x', '' );
145 #$this->doTestTripleBytes( '', 'x' );
146 #$this->doTestTripleBytes( 'x', 'x' );
147 }
148
149 function doTestTripleBytes( $head, $tail ) {
150 for( $first = 0xc0; $first < 0x100; $first++ ) {
151 for( $second = 0x80; $second < 0x100; $second++ ) {
152 #for( $third = 0x80; $third < 0x100; $third++ ) {
153 for( $third = 0x80; $third < 0x81; $third++ ) {
154 $char = $head . chr( $first ) . chr( $second ) . chr( $third ) . $tail;
155 $clean = UtfNormal::cleanUp( $char );
156 $x = sprintf( "%02X,%02X,%02X", $first, $second, $third );
157 if( $first >= 0xe0 &&
158 $first < 0xf0 &&
159 $second < 0xc0 &&
160 $third < 0xc0 ) {
161 if( $first == 0xe0 && $second < 0xa0 ) {
162 $this->assertEquals(
163 bin2hex( UTF8_REPLACEMENT ),
164 bin2hex( $clean ),
165 "Overlong triplet $x should be rejected" );
166 } elseif( $first == 0xed &&
167 ( chr( $first ) . chr( $second ) . chr( $third )) >= UTF8_SURROGATE_FIRST ) {
168 $this->assertEquals(
169 bin2hex( UTF8_REPLACEMENT ),
170 bin2hex( $clean ),
171 "Surrogate triplet $x should be rejected" );
172 } else {
173 $this->assertEquals(
174 bin2hex( UtfNormal::NFC( $char ) ),
175 bin2hex( $clean ),
176 "Triplet $x should be intact" );
177 }
178 } elseif( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) {
179 $this->assertEquals(
180 bin2hex( $head . UtfNormal::NFC( chr( $first ) . chr( $second ) ) . UTF8_REPLACEMENT . $tail ),
181 bin2hex( $clean ),
182 "Valid 2-byte $x + broken tail" );
183 } elseif( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) {
184 $this->assertEquals(
185 bin2hex( $head . UTF8_REPLACEMENT . UtfNormal::NFC( chr( $second ) . chr( $third ) ) . $tail ),
186 bin2hex( $clean ),
187 "Broken head + valid 2-byte $x" );
188 } elseif( $first > 0xfd && ( ( $second > 0xbf && $third > 0xbf ) || ($second < 0xc0 && $third < 0xc0 ) || ($second > 0xfd ) || ($third > 0xfd) ) ) {
189 # fe and ff are not legal head bytes -- expect three replacement chars
190 $this->assertEquals(
191 bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),
192 bin2hex( $clean ),
193 "Forbidden triplet $x should be rejected" );
194 } elseif( $second < 0xc0 && $second < 0xc0 ) {
195 $this->assertEquals(
196 bin2hex( $head . UTF8_REPLACEMENT . $tail ),
197 bin2hex( $clean ),
198 "Forbidden triplet $x should be rejected" );
199 } else {
200 $this->assertEquals(
201 bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),
202 bin2hex( $clean ),
203 "Forbidden triplet $x should be rejected" );
204 }
205 }
206 }
207 }
208 }
209
210 }
211
212
213 $suite =& new PHPUnit_TestSuite( 'CleanUpTest' );
214 $result = PHPUnit::run( $suite );
215 echo $result->toString();
216
217 ?>