tests/phpunit/includes/tidy/BalancerTest.php

   1 <?php
   2
   3 class BalancerTest extends MediaWikiTestCase {
   4         private $balancer;
   5
   6         /**
   7          * Anything that needs to happen before your tests should go here.
   8          */
   9         protected function setUp() {
  10                 // Be sure to do call the parent setup and teardown functions.
  11                 // This makes sure that all the various cleanup and restorations
  12                 // happen as they should (including the restoration for setMwGlobals).
  13                 parent::setUp();
  14                 $this->balancer = new MediaWiki\Tidy\Balancer( [
  15                         'strict' => false, /* not strict */
  16                         'allowedHtmlElements' => null, /* no sanitization */
  17                         'tidyCompat' => false, /* standard parser */
  18                 ] );
  19         }
  20
  21         /**
  22          * Anything cleanup you need to do should go here.
  23          */
  24         protected function tearDown() {
  25                 parent::tearDown();
  26         }
  27
  28         /**
  29          * @covers Balancer::balance
  30          * @dataProvider provideBalancerTests
  31          */
  32         public function testBalancer( $description, $input, $expected ) {
  33                 $output = $this->balancer->balance( $input );
  34                 $this->assertEquals( $expected, $output, $description );
  35         }
  36
  37         public static function provideBalancerTests() {
  38                 // Get the tests from html5lib-tests.json
  39                 $json = json_decode( file_get_contents(
  40                         __DIR__ . '/html5lib-tests.json'
  41                 ), true );
  42                 // Munge this slightly into the format phpunit expects
  43                 // for providers, and filter out HTML constructs which
  44                 // the balancer doesn't support.
  45                 $tests = [];
  46                 $start = '<html><head></head><body>';
  47                 $end = '</body></html>';
  48                 foreach ( $json as $filename => $cases ) {
  49                         foreach ( $cases as $case ) {
  50                                 $html = $case['document']['html'];
  51                                 if (
  52                                         substr( $html, 0, strlen( $start ) ) !== $start ||
  53                                         substr( $html, -strlen( $end ) ) !== $end
  54                                 ) {
  55                                         // Skip tests which involve stuff in the <head> or
  56                                         // weird doctypes.
  57                                         continue;
  58                                 }
  59                                 // We used to do this:
  60                                 //   $html = substr( $html, strlen( $start ), -strlen( $end ) );
  61                                 // But now we use a different field in the test case,
  62                                 // which reports how domino would parse this case in a
  63                                 // no-quirks <body> context.  (The original test case may
  64                                 // have had a different context, or relied on quirks mode.)
  65                                 $html = $case['document']['noQuirksBodyHtml'];
  66                                 // Normalize case of SVG attributes.
  67                                 $html = str_replace( 'foreignObject', 'foreignobject', $html );
  68                                 // The Sanitizer sorts attributes.
  69                                 $html = preg_replace( '/(size="[^"]+") (id="[^"]+")/', '$2 $1', $html );
  70
  71                                 if ( isset( $case['document']['props']['comment'] ) ) {
  72                                         // Skip tests which include HTML comments, which
  73                                         // the balancer requires to have been stripped.
  74                                         continue;
  75                                 }
  76                                 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
  77                                         // Skip tests involving <![CDATA[ ]]> quoting.
  78                                         continue;
  79                                 }
  80                                 if ( stripos( $case['data'], '<!DOCTYPE' ) !== false ) {
  81                                         // Skip tests involving doctypes.
  82                                         continue;
  83                                 }
  84                                 if ( preg_match( ',</?(html|head|body|frame|plaintext)>|<rdar:,i', $case['data'] ) ) {
  85                                         // Skip tests involving some literal tags, which are
  86                                         // unsupported but don't show up in the expected output.
  87                                         continue;
  88                                 }
  89                                 if (
  90                                         isset( $case['document']['props']['tags']['form'] ) ||
  91                                         isset( $case['document']['props']['tags']['iframe'] ) ||
  92                                         isset( $case['document']['props']['tags']['noembed'] ) ||
  93                                         isset( $case['document']['props']['tags']['noscript'] ) ||
  94                                         isset( $case['document']['props']['tags']['script'] ) ||
  95                                         isset( $case['document']['props']['tags']['select'] ) ||
  96                                         isset( $case['document']['props']['tags']['svg script'] ) ||
  97                                         isset( $case['document']['props']['tags']['svg title'] ) ||
  98                                         isset( $case['document']['props']['tags']['textarea'] ) ||
  99                                         isset( $case['document']['props']['tags']['title'] ) ||
 100                                         isset( $case['document']['props']['tags']['xmp'] )
 101                                 ) {
 102                                         // Skip tests with unsupported tags which *do* show
 103                                         // up in the expected output.
 104                                         continue;
 105                                 }
 106                                 if (
 107                                         $filename === 'entities01.dat' ||
 108                                         $filename === 'entities02.dat' ||
 109                                         preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
 110                                         preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
 111                                 ) {
 112                                         // Skip tests involving entity encoding.
 113                                         continue;
 114                                 }
 115                                 if (
 116                                         isset( $case['document']['props']['tagWithLt'] ) ||
 117                                         isset( $case['document']['props']['attrWithFunnyChar'] ) ||
 118                                         preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
 119                                         preg_match( ':</p<p>:', $case['data'] )
 120                                 ) {
 121                                         // Skip tests with funny tag or attribute names,
 122                                         // which are really tests of the HTML tokenizer, not
 123                                         // the tree builder.
 124                                         continue;
 125                                 }
 126                                 if (
 127                                         stripos( $case['data'], 'encoding=" text/html "' ) !== false
 128                                 ) {
 129                                         // The Sanitizer normalizes whitespace in attribute
 130                                         // values, which makes this test case invalid.
 131                                         continue;
 132                                 }
 133                                 if ( $filename === 'plain-text-unsafe.dat' ) {
 134                                         // Skip tests with ASCII null, etc.
 135                                         continue;
 136                                 }
 137                                 $tests[] = [
 138                                         $filename, # use better description?
 139                                         $case['data'],
 140                                         $html
 141                                 ];
 142                         }
 143                 }
 144                 return $tests;
 145         }
 146 }