f2e41bdc2676321f5553853307ddd8e86efe2a93
[lhc/web/wiklou.git] / tests / phpunit / includes / tidy / BalancerTest.php
1 <?php
2
3 class BalancerTest extends MediaWikiTestCase {
4 private $balancer;
5
6 /**
7 * Anything that needs to happen before your tests should go here.
8 */
9 protected function setUp() {
10 // Be sure to do call the parent setup and teardown functions.
11 // This makes sure that all the various cleanup and restorations
12 // happen as they should (including the restoration for setMwGlobals).
13 parent::setUp();
14 $this->balancer = new MediaWiki\Tidy\Balancer( [
15 'strict' => false, /* not strict */
16 'allowedHtmlElements' => null, /* no sanitization */
17 'tidyCompat' => false, /* standard parser */
18 'allowComments' => true, /* comment parsing */
19 ] );
20 }
21
22 /**
23 * Anything cleanup you need to do should go here.
24 */
25 protected function tearDown() {
26 parent::tearDown();
27 }
28
29 /**
30 * @covers Balancer::balance
31 * @dataProvider provideBalancerTests
32 */
33 public function testBalancer( $description, $input, $expected ) {
34 $output = $this->balancer->balance( $input );
35
36 // Ignore self-closing tags
37 $output = preg_replace( '/\s*\/>/', '>', $output );
38
39 $this->assertEquals( $expected, $output, $description );
40 }
41
42 public static function provideBalancerTests() {
43 // Get the tests from html5lib-tests.json
44 $json = json_decode( file_get_contents(
45 __DIR__ . '/html5lib-tests.json'
46 ), true );
47 // Munge this slightly into the format phpunit expects
48 // for providers, and filter out HTML constructs which
49 // the balancer doesn't support.
50 $tests = [];
51 $start = '<html><head></head><body>';
52 $end = '</body></html>';
53 foreach ( $json as $filename => $cases ) {
54 foreach ( $cases as $case ) {
55 $html = $case['document']['html'];
56 if (
57 substr( $html, 0, strlen( $start ) ) !== $start ||
58 substr( $html, -strlen( $end ) ) !== $end
59 ) {
60 // Skip tests which involve stuff in the <head> or
61 // weird doctypes.
62 continue;
63 }
64 // We used to do this:
65 // $html = substr( $html, strlen( $start ), -strlen( $end ) );
66 // But now we use a different field in the test case,
67 // which reports how domino would parse this case in a
68 // no-quirks <body> context. (The original test case may
69 // have had a different context, or relied on quirks mode.)
70 $html = $case['document']['noQuirksBodyHtml'];
71 // Normalize case of SVG attributes.
72 $html = str_replace( 'foreignObject', 'foreignobject', $html );
73
74 if (
75 isset( $case['document']['props']['comment'] ) &&
76 preg_match( ',<!--[^>]*<,', $html )
77 ) {
78 // Skip tests which include HTML comments containing
79 // the < character, which we don't support.
80 continue;
81 }
82 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
83 // Skip tests involving <![CDATA[ ]]> quoting.
84 continue;
85 }
86 if ( stripos( $case['data'], '<!DOCTYPE' ) !== false ) {
87 // Skip tests involving doctypes.
88 continue;
89 }
90 if ( preg_match( ',</?(html|head|body|frame|plaintext)>|<rdar:|<isindex,i', $case['data'] ) ) {
91 // Skip tests involving some literal tags, which are
92 // unsupported but don't show up in the expected output.
93 continue;
94 }
95 if (
96 isset( $case['document']['props']['tags']['iframe'] ) ||
97 isset( $case['document']['props']['tags']['noembed'] ) ||
98 isset( $case['document']['props']['tags']['noscript'] ) ||
99 isset( $case['document']['props']['tags']['script'] ) ||
100 isset( $case['document']['props']['tags']['svg script'] ) ||
101 isset( $case['document']['props']['tags']['svg title'] ) ||
102 isset( $case['document']['props']['tags']['title'] ) ||
103 isset( $case['document']['props']['tags']['xmp'] )
104 ) {
105 // Skip tests with unsupported tags which *do* show
106 // up in the expected output.
107 continue;
108 }
109 if (
110 $filename === 'entities01.dat' ||
111 $filename === 'entities02.dat' ||
112 preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
113 preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
114 ) {
115 // Skip tests involving entity encoding.
116 continue;
117 }
118 if (
119 isset( $case['document']['props']['tagWithLt'] ) ||
120 isset( $case['document']['props']['attrWithFunnyChar'] ) ||
121 preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
122 preg_match( ':</p<p>:', $case['data'] )
123 ) {
124 // Skip tests with funny tag or attribute names,
125 // which are really tests of the HTML tokenizer, not
126 // the tree builder.
127 continue;
128 }
129 if (
130 stripos( $case['data'], 'encoding=" text/html "' ) !== false
131 ) {
132 // The Sanitizer normalizes whitespace in attribute
133 // values, which makes this test case invalid.
134 continue;
135 }
136 if ( $filename === 'plain-text-unsafe.dat' ) {
137 // Skip tests with ASCII null, etc.
138 continue;
139 }
140 $tests[] = [
141 $filename, # use better description?
142 $case['data'],
143 $html
144 ];
145 }
146 }
147 return $tests;
148 }
149 }