From 019e8ce29d5e94597bf7f50a2bc6ba8614e2bbe9 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Sun, 18 May 2014 00:17:25 -0700 Subject: [PATCH] Run some parser tests with tidy. Note that the old parser tests helper function `tidy()` never actually did anything, since $wgUseTidy was forced to `false` in the parser test setup. Remove this unused code, and replace it with our new tidy support. Allows new parser test sections: 'html+tidy' denotes "tidied" HTML (open tags closed and other fixups to original wikitext markup) which should be applicable to any parser. 'html/php+tidy' is output specific to the PHP parser with tidy turned on. The Parsoid backend will use the 'html/parsoid' section if present, but if it is not present it will fallback to first the 'html+tidy' section, and if that is missing the 'html' section. Note that 'tidy' has a large number of open bugs (see https://bugzilla.wikimedia.org/show_bug.cgi?id=2542 ) and so in some cases we deliberately do *not* use 'html+tidy' or 'html/php+tidy' clauses, in order to avoid documenting broken output. In these cases, there is no broken HTML in the PHP parser output, and so (in theory) the 'html' and 'html+tidy' sections would be identical (that is, if tidy didn't have bugs). Change-Id: Iba45f38774b221522dc3b6ae2d1312fb79f8f41f --- tests/TestsAutoLoader.php | 1 + tests/parser/parserTest.inc | 53 +- tests/parser/parserTests.txt | 649 ++++++++++++++++-- .../phpunit/includes/parser/NewParserTest.php | 50 +- tests/testHelpers.inc | 172 +++-- 5 files changed, 759 insertions(+), 166 deletions(-) diff --git a/tests/TestsAutoLoader.php b/tests/TestsAutoLoader.php index b56890badb..664e7f538b 100644 --- a/tests/TestsAutoLoader.php +++ b/tests/TestsAutoLoader.php @@ -35,6 +35,7 @@ $wgAutoloadClasses += array( 'TestRecorder' => "$testDir/testHelpers.inc", 'ITestRecorder' => "$testDir/testHelpers.inc", 'DjVuSupport' => "$testDir/testHelpers.inc", + 'TidySupport' => "$testDir/testHelpers.inc", # tests/phpunit 'MediaWikiTestCase' => "$testDir/phpunit/MediaWikiTestCase.php", diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc index e76b9df455..a2a3867248 100644 --- a/tests/parser/parserTest.inc +++ b/tests/parser/parserTest.inc @@ -69,6 +69,11 @@ class ParserTest { */ private $djVuSupport; + /** + * @var TidySupport + */ + private $tidySupport; + private $maxFuzzTestLength = 300; private $fuzzSeed = 0; private $memoryLimit = 50; @@ -137,6 +142,10 @@ class ParserTest { $this->runParsoid = isset( $options['run-parsoid'] ); $this->djVuSupport = new DjVuSupport(); + $this->tidySupport = new TidySupport(); + if ( !$this->tidySupport->isEnabled() ) { + echo "Warning: tidy is not installed, skipping some tests\n"; + } $this->hooks = array(); $this->functionHooks = array(); @@ -611,6 +620,13 @@ class ParserTest { $output = $parser->parse( $input, $title, $options, true, true, 1337 ); $output->setTOCEnabled( !isset( $opts['notoc'] ) ); $out = $output->getText(); + if ( isset( $opts['tidy'] ) ) { + if ( !$this->tidySupport->isEnabled() ) { + return $this->showSkipped(); + } + $out = MWTidy::tidy( $out ); + $out = preg_replace( '/\s+$/', '', $out); + } if ( isset( $opts['showtitle'] ) ) { if ( $output->getTitleText() ) { @@ -621,20 +637,18 @@ class ParserTest { } if ( isset( $opts['ill'] ) ) { - $out = $this->tidy( implode( ' ', $output->getLanguageLinks() ) ); + $out = implode( ' ', $output->getLanguageLinks() ); } elseif ( isset( $opts['cat'] ) ) { $outputPage = $context->getOutput(); $outputPage->addCategoryLinks( $output->getCategories() ); $cats = $outputPage->getCategoryLinks(); if ( isset( $cats['normal'] ) ) { - $out = $this->tidy( implode( ' ', $cats['normal'] ) ); + $out = implode( ' ', $cats['normal'] ); } else { $out = ''; } } - - $result = $this->tidy( $result ); } $this->teardownGlobals(); @@ -770,6 +784,8 @@ class ParserTest { * @param string $config */ private function setupGlobals( $opts = '', $config = '' ) { + global $IP; + # Find out values for some special options. $lang = self::getOptionValue( 'language', $opts, 'en' ); @@ -832,7 +848,6 @@ class ParserTest { 'wgLocaltimezone' => 'UTC', 'wgAllowExternalImages' => self::getOptionValue( 'wgAllowExternalImages', $opts, true ), 'wgThumbLimits' => array( self::getOptionValue( 'thumbsize', $opts, 180 ) ), - 'wgUseTidy' => false, 'wgDefaultLanguageVariant' => $variant, 'wgVariantArticlePath' => false, 'wgGroupPermissions' => array( '*' => array( @@ -848,13 +863,22 @@ class ParserTest { 'wgLinkHolderBatchSize' => $linkHolderBatchSize, 'wgExperimentalHtmlIds' => false, 'wgExternalLinkTarget' => false, - 'wgAlwaysUseTidy' => false, 'wgHtml5' => true, 'wgWellFormedXml' => true, 'wgAllowMicrodataAttributes' => true, 'wgAdaptiveMessageCache' => true, 'wgDisableLangConversion' => false, 'wgDisableTitleConversion' => false, + // Tidy options. + // We always set 'wgUseTidy' to false when parsing, but certain + // test-running modes still use tidy if available, so ensure + // that the tidy-related options are all set to their defaults. + 'wgUseTidy' => false, + 'wgAlwaysUseTidy' => false, + 'wgDebugTidy' => false, + 'wgTidyConf' => $IP . '/includes/tidy.conf', + 'wgTidyOpts' => '', + 'wgTidyInternal' => $this->tidySupport->isInternal(), ); if ( $config ) { @@ -1564,23 +1588,6 @@ class ParserTest { return true; } - /** - * Run the "tidy" command on text if the $wgUseTidy - * global is true - * - * @param string $text The text to tidy - * @return string - */ - private function tidy( $text ) { - global $wgUseTidy; - - if ( $wgUseTidy ) { - $text = MWTidy::tidy( $text ); - } - - return $text; - } - private function wellFormed( $text ) { $html = Sanitizer::hackDocType() . diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 62e160be25..3a315e10d2 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -46,6 +46,12 @@ Main Page blah blah !! endarticle +!!article +Foo +!!text +FOO +!!endarticle + !!article Template:Foo !!text @@ -401,6 +407,13 @@ http://fr.wikipedia.org/wiki/🍺

!! end +# Note that the html+tidy output removes the spaces after the
  • , +# which is a bug (http://sourceforge.net/p/tidy/bugs/945/, etc). +# This is an issue for all tests with lists. We intentionally do +# *not* add html+tidy clauses for these, as we don't want to +# document/test the broken behavior. (Parsoid matches the non-tidy +# output in these cases.) + !! test Simple list !! wikitext @@ -757,7 +770,7 @@ Italics and bold: 5-quote opening sequence: (5,2+3) parsoid=wt2wt,html2wt !! wikitext '''''foo''''' -!! html/* +!! html

    foo

    !! end @@ -1037,7 +1050,16 @@ parsoid=wt2html,wt2wt !''a!!''b |''a||''b |} -!! html +!! html/php+tidy + + + + + + + +
    abab
    +!! html/parsoid @@ -1179,6 +1201,7 @@ Ruby markup (W3C-style)

    !! end +# There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/ !! test Non-word characters don't terminate tag names (bug 17663, 40670, 52022) !! wikitext @@ -1539,6 +1562,10 @@ b a
    foo

    b

    +!! html+tidy +

    a

    +
    foo
    +

    b

    !! end !! test @@ -1551,6 +1578,12 @@ b a
    foo

    b

    +!! html+tidy +

    a

    +
    +

    foo

    +
    +

    b

    !! end !! test @@ -1563,6 +1596,11 @@ b
    foo
    a
    foo
    b
    foo
    +!! html+tidy +

    a

    +
    foo
    +

    b

    +
    foo
    !! end !! test @@ -1575,6 +1613,15 @@ b
    foo
    a
    foo
    b
    foo
    +!! html+tidy +

    a

    +
    +

    foo

    +
    +

    b

    +
    +

    foo

    +
    !! end !! test @@ -1593,6 +1640,13 @@ d e

    x
    foo
    z +!! html+tidy +
    foo
    +

    a

    +

    b c d e

    +

    x

    +
    foo
    +

    z

    !! end !! test @@ -1623,9 +1677,20 @@ b

    e
    +!! html+tidy +


    +

    a

    +

    b

    +
    a
    +

    b

    +
    b
    +

    d

    +


    +
    e
    !! end ## PHP parser emits output which is broken +## XXX The parsoid output doesn't match the tidy output. !! test Unclosed HTML p-tags should be handled properly !! wikitext @@ -1633,6 +1698,11 @@ Unclosed HTML p-tags should be handled properly a b +!! html/php+tidy +
    +

    foo</div>

    +

    a

    +b
    !! html/parsoid

    foo

    a

    @@ -1784,6 +1854,10 @@ Bug 15491: / in blockquote (2)
    Foo bar baz quux
    +!! html+tidy +
    +

    Foo

    +bar baz quux
    !! end !! test @@ -1909,6 +1983,12 @@ foo
    
     
    +!! html+tidy
    +

    a

    +
    +
    +foo
    +
    !! end !! test @@ -2494,10 +2574,11 @@ c a | b | c -!! html/parsoid +!! html/php
    a
     | b
    -| c
    +| c + !!end !!test @@ -2506,6 +2587,12 @@ c a | b | c +!! html/php +

    a +

    +
    | b
    +| c
    +
    !! html/parsoid

    a

    @@ -2526,7 +2613,19 @@ a
      c 
    foo
     foo 
     
    -!!end +!! html+tidy +

    a

    +

    foo

    +

    b

    +
    foo
    +

    c

    +
    +

    foo

    +
    +
    + foo 
    +
    +!! end !!test 3b. Indent-Pre and block tags (multi-line html) @@ -2538,6 +2637,12 @@ a
    b
    foo
    +!! html+tidy +
    +a foo
    +
    +

    b

    +
    foo
    !!end !!test @@ -2619,6 +2724,18 @@ File:foobar.jpg +!! html+tidy +

    a

    + !!end !! test @@ -3390,6 +3507,9 @@ Definition Lists: Nesting: Test 4 ## The Parsoid team believes the following three test exposes a ## bug in the PHP parser. (Parsoid team thinks the PHP parser is ## wrong to close the
    after the
    containing the
      .) +## It also exposes a "misfeature" in tidy, which doesn't like +##
      tags with a single
      child; it converts the
      into +## a
      in that case. (Parsoid leaves the
      alone!) !! test Definition Lists: Mixed Lists: Test 1 !! wikitext @@ -3401,6 +3521,22 @@ Definition Lists: Mixed Lists: Test 1
    • bar
    baz
    +!! html/php+tidy +
    +
    +
    +
    +
      +
    • foo
    • +
    • bar
    • +
    +
    +
    +
    +
    baz
    +
    +
    +
    !! html/parsoid
    @@ -3528,6 +3664,7 @@ Definition Lists: Mixed Lists: Test 10 # rules regarding dd/dt on the next two tests. Parsoid is more # consistent, and recognizes the shared nesting and keeps the # still-open tags around until the nesting is complete. +# (And tidy again converts
    to
    before 'bar'.) !! test Definition Lists: Mixed Lists: Test 11 @@ -3540,6 +3677,43 @@ Definition Lists: Mixed Lists: Test 11
    boo 
    baz
    +!! html/php+tidy +
      +
    • +
        +
      1. +
          +
        • +
            +
          1. +
            +
            foo 
            +
            +
              +
            • +
              +
              +
              +
              bar
              +
              +
              +
              +
            • +
            +
            +
            +
            +
            boo 
            +
            baz
            +
            +
          2. +
          +
        • +
        +
      2. +
      +
    • +
    !! html/parsoid
    • @@ -3571,6 +3745,7 @@ Definition Lists: Mixed Lists: Test 11 !! end +# Another case where tidy converts a
      to a
      (but Parsoid doesn't). !! test Definition Lists: Weird Ones: Test 1 !! wikitext @@ -3579,6 +3754,39 @@ Definition Lists: Weird Ones: Test 1
        1. foo 
          • bar (who uses this?)
    +!! html/php+tidy +
      +
    • +
        +
      1. +
        +
        foo 
        +
        +
          +
        • +
          +
          +
          +
          +
          +
          +
          +
          bar (who uses this?)
          +
          +
          +
          +
          +
          +
          +
          +
        • +
        +
        +
        +
      2. +
      +
    • +
    !! html/parsoid
    ab ab
    + + +
    Foo!! +
    + !! html/parsoid @@ -5286,6 +5508,14 @@ Table-cell after a comment-only-empty-line | b |} +!! html +
    Foo!!
    + + +
    a + b +
    + !! html/parsoid @@ -5312,9 +5542,14 @@ Wikitext table with html-syntax row
    afoo
    !! end +## Note that Parsoid output differs from PHP and PHP+tidy here. +## The lack of tags in the PHP output is arguably a bug in the +## PHP parser, which tidy then compounds by fostering the content +## entirely out of the table. Parsoid recognizes the table context +## and generates and wrappers as needed. Hopefully nobody +## depends on PHP's treatment of broken table markup! !! test Implicit after a |- -(PHP parser relies on Tidy to add the missing tags) !! options parsoid=wt2html,wt2wt !! wikitext @@ -5322,15 +5557,23 @@ parsoid=wt2html,wt2wt |- a |} -!! html +!! html/php + + +a +
    + +!! html/php+tidy +

    a

    +!! html/parsoid
    a
    !! end +# Again, Parsoid adds implicit s here, PHP and Tidy strip the b out. !! test -Pres should be recognized in an explicit context, but not in an implicit context -(PHP parser relies on Tidy to add the missing tags) +
     tags should be recognized in an explicit  context, but not in an implicit  context
     !! options
     parsoid=wt2html,wt2wt
     !! wikitext
    @@ -5341,7 +5584,28 @@ parsoid=wt2html,wt2wt
     |-
      b
     |}
    -!! html
    +!! html/php
    +
    +
    +
    +
    + b
    +
    +
    a
    +
    +
    + +!! html/php+tidy +

    b

    + + + + +
    +
    +a
    +
    +!! html/parsoid @@ -5350,9 +5614,9 @@ parsoid=wt2html,wt2wt
    a
    !! end +# PHP + Tidy strips the list out of the table; Parsoid wraps it. !! test Lists should be recognized in an implicit context -(PHP parser relies on Tidy to add the missing tags) !! options parsoid=wt2html,wt2wt !! wikitext @@ -5360,7 +5624,17 @@ parsoid=wt2html,wt2wt |- *a |} -!! html +!! html/php + + +
    • a
    +
    + +!! html/php+tidy +
      +
    • a
    • +
    +!! html/parsoid +
      @@ -5433,15 +5707,26 @@ parsoid=wt2html,wt2wt ! foo || bar || baz || quux |} +!! html/php + + + + + +
      foo bar + baz quux +
      + !! html/parsoid - - - -
      foo bar baz quux
      +
    foo bar + baz quux
    !! end +# PHP throws away the (semi-broken) "foo" class here; Parsoid +# preserves it. !!test Parsoid: Recover better from broken table attributes !!options @@ -5451,6 +5736,14 @@ parsoid=wt2html | class="bar" | foo |} +!!html/php+tidy + + + + +
    +

    foo

    +
    !!html/parsoid @@ -6028,6 +6321,8 @@ title=[[Bug462]] !! html/php

    Bug462 Bug462

    +!! html/php+tidy +

    Bug462 Bug462

    !! html/parsoid

    Bug462 Bug462

    !! end @@ -6161,6 +6456,9 @@ Purely hash wikilink title=[[User:test/123]] !! wikitext [[#a|b]] +!! html/php +

    b +

    !! html/parsoid

    b

    !! end @@ -6269,8 +6567,8 @@ Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved [http://wp.org ''foo''] !! html -

    bar -

    bar +

    bar +

    bar

    foo

    foo

    @@ -6290,6 +6588,9 @@ parsoid Link with angle bracket after anchor !! wikitext [[Foo#]] +!! html/php +

    Foo#<bar> +

    !! html/parsoid

    Foo#<bar>

    !! end @@ -6325,6 +6626,11 @@ Interwiki link encoding conversion (bug 1636) +!! html+tidy + !! end !! test @@ -6803,6 +7109,9 @@ Broken br tag sanitization !! end # TODO: Fix html2html mode (bug 51055)! +# This
    handling was added as part of bug 50831; but it +# differs from how PHP+tidy handles this. We should investigate +# this. !! test Parsoid: Broken br tag recognition !! options @@ -6811,6 +7120,9 @@ parsoid=wt2html

    +!! html/php+tidy +

    </br>

    +


    !! html/parsoid



    @@ -6938,6 +7250,9 @@ Horizontal ruler -- Supports content following dashes on same line !! html
    Foo +!! html+tidy +
    +

    Foo

    !! end ### @@ -7189,6 +7504,12 @@ Multiple list tags generated by templates +!! html+tidy +
      +
    • a
    • +
    • b
    • +
    • c
    • +
    !!end !!test @@ -7230,7 +7551,7 @@ Replacing whitespace with tabs still doesn't break the list (gerrit 78327) !!test Test the li-hack -(Cannot test this with PHP parser since it relies on Tidy for the hack) +(The PHP parser relies on Tidy for the hack) !!options parsoid=wt2html,wt2wt !! wikitext @@ -7243,19 +7564,15 @@ parsoid=wt2html,wt2wt
  • not a li-hack
  • -!! html +!! html+tidy
      -
    • foo
    • +
    • foo
    • li-hack
    • -
    • templated li-hack
    • -
    • -
    • li-hack with preceding comments
    • +
    • templated li-hack
    • +
    • unsupported li-hack with preceding comments
    -
      -
    • -
    • not a li-hack -
    • +
    • not a li-hack
    !!end @@ -7305,47 +7622,60 @@ parsoid !! test Unbalanced closing block tags break a list -(Parsoid-only since php parser generates broken html -- relies on Tidy to fix up) +(php parser relies on Tidy to fix up) !! wikitext
    *a
    *b
    -!! html/parsoid +!! html+tidy
      -
    • a -
    • -
    +
  • a
  • + +
    +
      -
    • b -
    • -
    +
  • b
  • + + !! end +# Parsoid fails this test, but it might be tricky to support properly. +# See bug 68395. !! test Unbalanced closing non-block tags don't break a list -(Parsoid-only since php parser generates broken html -- relies on Tidy to fix up) +(php parser relies on Tidy to fix up) !! wikitext *a *b +!! html/php+tidy +
      +
    • a
    • +
    • b
    • +
    !! html/parsoid -

    -

    +
    • a
    • b
    +
    !! end !! test Unclosed formatting tags that straddle lists are closed and reopened -(Parsoid-only since php parser generates broken html -- relies on Tidy to fix up) +(php parser relies on Tidy to fix up) !! wikitext # a # b +!! html/php+tidy +
      +
    1. a
    2. +
    3. b
    4. +
    !! html/parsoid
    1. a @@ -7355,23 +7685,31 @@ Unclosed formatting tags that straddle lists are closed and reopened
    !! end +# Parsoid fails this test, but it might be tricky to support properly. +# See bug 68395. !!test List embedded in a non-block tag -(Ugly Parsoid output -- worth fixing; Disabled for PHP parser since it relies on Tidy) +(Ugly Parsoid output -- worth fixing; PHP parser relies on Tidy) !! wikitext * foo +!! html/php+tidy +
      +
    • foo
    • +
    !! html/parsoid -

    • foo
    -

    !!end +# This is a bug in the PHP parser + tidy combination. +# (The
    tag gets parsed as text and html-escaped by PHP, +# and then fostered out of the table by tidy.) +# We believe the Parsoid output to be correct. !! test Table with missing opening tag !! options @@ -7381,6 +7719,13 @@ parsoid=wt2html,wt2wt
    foo
    +!! html/php+tidy +

    </tr>

    + + + + +
    foo
    !! html/parsoid @@ -7837,9 +8182,11 @@ Magic Word: {{PAGENAME}} with metacharacters title=[['foo & bar = baz']] !! wikitext ''{{PAGENAME}}'' -!! html +!! html/php

    'foo & bar = baz'

    +!! html+tidy +

    'foo & bar = baz'

    !! end !! test @@ -7848,9 +8195,11 @@ Magic Word: {{PAGENAME}} with metacharacters (bug 26781) title=[[*RFC 1234 http://example.com/]] !! wikitext {{PAGENAME}} -!! html +!! html/php

    *RFC 1234 http://example.com/

    +!! html+tidy +

    *RFC 1234 http://example.com/

    !! end !! test @@ -7870,9 +8219,11 @@ Magic Word: {{PAGENAMEE}} with metacharacters (bug 26781) title=[[*RFC 1234 http://example.com/]] !! wikitext {{PAGENAMEE}} -!! html +!! html/php

    *RFC_1234_http://example.com/

    +!! html+tidy +

    *RFC_1234_http://example.com/

    !! end !! test @@ -8376,9 +8727,16 @@ Template with thumb image (with link in description) !! wikitext {{paramtest| param =[[Image:noimage.png|thumb|[[no link|link]] [[no link|caption]]]]}} -!! html +!! html/php This is a test template with parameter +!! html+tidy +

    This is a test template with parameter

    + !! end !! article @@ -8580,8 +8938,8 @@ Template with targets containing wikilinks {{{{echo|[[foo}}]]}} !! html -

    {{foo}} -

    {{foo}} +

    {{foo}} +

    {{foo}}

    {{[[foo}}]]

    !! end @@ -9071,6 +9429,11 @@ Templates: 2. Inside a block tag
    Foo
    Foo
    +!! html+tidy +
    Foo
    +
    +

    Foo

    +
    !!end !!test @@ -9108,7 +9471,11 @@ Templates: P-wrapping: 1c. Templates on consecutive lines

    bar
    baz
    -!!end +!! html+tidy +

    Foo

    +

    bar

    +
    baz
    +!! end !!test Templates: P-wrapping: 1d. Template preceded by comment-only line @@ -9164,7 +9531,7 @@ Templates: Links: 1. Simple example !! wikitext {{echo|[[Foo|bar]]}} !! html -

    bar +

    bar

    !!end @@ -9173,7 +9540,7 @@ Templates: Links: 2. Generation of link href !! wikitext [[{{echo|Foo}}|bar]] !! html -

    bar +

    bar

    !!end @@ -9192,7 +9559,7 @@ Templates: Links: 3. Generation of part of a link href [[:Foo{{echo|bar}}|bar]] !! html -

    bar +

    bar

    Foobar

    Foobarbaz

    bar @@ -9215,7 +9582,7 @@ Templates: Links: 5. Generation of link text !! wikitext [[Foo|{{echo|bar}}]] !! html -

    bar +

    bar

    !!end @@ -9224,7 +9591,7 @@ Templates: Links: 5. Nested templates (only outermost template should be marked) !! wikitext {{echo|[[{{echo|Foo}}|bar]]}} !! html -

    bar +

    bar

    !!end @@ -9406,7 +9773,14 @@ Templates: Wiki Tables: 1a. Fostering of entire template content a
    -!!end +!! html+tidy +

    a

    + + + + +
    +!! end !!test Templates: Wiki Tables: 1b. Fostering of entire template content @@ -9424,7 +9798,16 @@ foo -!!end +!! html+tidy +
    +

    foo

    +
    + + + + +
    +!! end !!test Templates: Wiki Tables: 2. Fostering of partial template content @@ -9439,7 +9822,15 @@ a
    b
    -!!end +!! html+tidy +

    a

    +
    b
    + + + + +
    +!! end !!test Templates: Wiki Tables: 3. td-content via multiple templates @@ -9545,7 +9936,11 @@ a
    b{{echo|c
    d}}e !! html a
    bc
    de -!!end +!! html+tidy +

    a

    +
    bc
    +

    de

    +!! end !!test Templates: Ugly templates: 1. Navbox template parses badly leading to table misnesting @@ -10535,6 +10930,7 @@ Image with multiple attributes from the same template
    Caption text
    !! end +# Parsoid's output here is broken (incorrect p-wrapping); see bug 64901. !! test Image with link tails !! options @@ -10549,6 +10945,19 @@ thumbsize=220 123
    Foobar.jpg
    456 123
    Foobar.jpg
    456 +!! html/php+tidy +

    123Foobar.jpg456

    +

    123

    +
    Foobar.jpg
    +

    456 123

    +
    +
    Foobar.jpg +
    +
    +
    +
    +
    +

    456

    !! html/parsoid

    123456

    123
    456 @@ -12761,6 +13170,8 @@ I always thought é was a cute letter. !! html

    I always thought é was a cute letter.

    +!! html+tidy +

    I always thought é was a cute letter.

    !! end !! test @@ -12822,6 +13233,7 @@ Ensure that HTML adoption agency algorithm is properly implemented. !! end # This was bug 41545 in the PHP parser. +# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -12834,6 +13246,8 @@ Nesting of # The following cases were bug 51081 in the PHP parser. # Note that there are some other nestable tags (b, i, etc) which are # not covered; see bug 51081 for discussion. + +# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -12843,6 +13257,7 @@ Nesting of

    !! end +# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -12856,11 +13271,11 @@ Nesting of Nesting of !! wikitext XYZ -!! html -

    XYZ -

    +!! html+tidy +

    XYZ

    !! end +# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -12870,6 +13285,7 @@ Nesting of

    !! end +# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -12911,6 +13327,8 @@ fixme: doBlockLevels won't wrap this in a paragraph because it contains a div !! html Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div> +!! html+tidy +

    Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div>

    !! end !! test @@ -14056,6 +14474,17 @@ http://

    Contents

    +!! html+tidy +

    onmouseover=[edit]

    +

    http://

    +
    +
    +

    Contents

    +
    + +
    !! end !! test @@ -14069,6 +14498,13 @@ Fuzz testing: Parser14-table +!! html+tidy +

    a[edit]

    + + + + +
    !! end # Known to produce bogus xml (extra ) @@ -14090,6 +14526,15 @@ noxml +!! html+tidy + + + + + + + +
    https://
    !! end !! test @@ -14177,7 +14622,6 @@ Fuzz testing: Parser25 (bug 6055) !!test Fuzz testing: URL adjacent extension (with space, clean) -!! options !! wikitext http://example.com junk !! html @@ -14187,7 +14631,6 @@ http://example.com junk !!test Fuzz testing: URL adjacent extension (no space, dirty; nowiki) -!! options !! wikitext http://example.comjunk !! html @@ -14197,12 +14640,16 @@ http://example.comjunk !!test Fuzz testing: URL adjacent extension (no space, dirty; pre) -!! options !! wikitext http://example.com
    junk
    !! html http://example.com
    junk
    +!! html+tidy +

    http://example.com

    +
    +junk
    +
    !!end !!test @@ -15595,6 +16042,8 @@ parsoid=wt2html,wt2wt,html2html !! html/php

    JavaScript

    +!! html/php+tidy +

    JavaScript

    !! html/parsoid

    JavaScript

    !! end @@ -15619,6 +16068,8 @@ parsoid=wt2html,wt2wt,html2html !! html/php

    îî

    +!! html/php+tidy +

    îî

    !! html/parsoid

    îî

    !! end @@ -15640,6 +16091,8 @@ ISBN 978-0-1234-56 789 !! html

    ISBN 978-0-1234-56 789

    +!! html+tidy +

    ISBN 978-0-1234-56 789

    !! end !! test @@ -15711,6 +16164,8 @@ RFC 983 987 !! html

    RFC 983 987

    +!! html+tidy +

    RFC 983 987

    !! end !! test @@ -16928,6 +17383,10 @@ Line two
    Line one Line two
    +!! html+tidy +
    +

    Line one Line two

    +
    !! end !! test @@ -16943,6 +17402,10 @@ Line two

    Line two +!! html+tidy +
    +

    Line one

    +Line two
    !! end !! test @@ -16958,6 +17421,11 @@ Line two

    +!! html+tidy +
    +

    Line one

    +

    Line two

    +
    !! end !! test @@ -16975,6 +17443,11 @@ Line two

    +!! html+tidy +
    +

    Line one

    +

    Line two

    +
    !! end !! test @@ -17735,6 +18208,20 @@ __TOC__

    Quote
    [edit]

    +!! html+tidy +
    +
    +

    Contents

    +
    + +
    +

    +
    +

    Quote

    +
    +

    [edit]

    !! end !! test @@ -17777,6 +18264,22 @@ __TOC__

    Foo Bar[edit]

    Foo
    Bar
    [edit]

    +!! html+tidy +
    +
    +

    Contents

    +
    + +
    +

    Foo Bar[edit]

    +

    Foo

    +
    +

    Bar

    +
    +

    [edit]

    !! end !! test @@ -17959,7 +18462,7 @@ nowiki inside link inside heading (bug 18295) !! wikitext ==[[foo|xyz]]== !! html -

    xyz[edit]

    +

    xyz[edit]

    !! end @@ -20159,6 +20662,13 @@ Indented block & table {| |foo |} +!! html/php +
    foo
    + + +
    foo +
    + !! html/parsoid
    foo
    @@ -20173,6 +20683,13 @@ Indent and comment before table row |- | there |} +!! html/php +
    + + +
    there +
    + !! html/parsoid diff --git a/tests/phpunit/includes/parser/NewParserTest.php b/tests/phpunit/includes/parser/NewParserTest.php index 0499f882e7..750ada8338 100644 --- a/tests/phpunit/includes/parser/NewParserTest.php +++ b/tests/phpunit/includes/parser/NewParserTest.php @@ -36,6 +36,10 @@ class NewParserTest extends MediaWikiTestCase { * @var DjVuSupport */ private $djVuSupport; + /** + * @var TidySupport + */ + private $tidySupport; protected $file = false; @@ -95,8 +99,6 @@ class NewParserTest extends MediaWikiTestCase { $tmpGlobals['wgUseImageResize'] = true; $tmpGlobals['wgAllowExternalImages'] = true; $tmpGlobals['wgRawHtml'] = false; - $tmpGlobals['wgUseTidy'] = false; - $tmpGlobals['wgAlwaysUseTidy'] = false; $tmpGlobals['wgWellFormedXml'] = true; $tmpGlobals['wgAllowMicrodataAttributes'] = true; $tmpGlobals['wgExperimentalHtmlIds'] = false; @@ -153,8 +155,19 @@ class NewParserTest extends MediaWikiTestCase { # see https://gerrit.wikimedia.org/r/111390 $tmpGlobals['wgExtraInterlanguageLinkPrefixes'] = array( 'mul' ); - //DjVu support + // DjVu support $this->djVuSupport = new DjVuSupport(); + // Tidy support + $this->tidySupport = new TidySupport(); + // We always set 'wgUseTidy' to false when parsing, but certain + // test-running modes still use tidy if available, so ensure + // that the tidy-related options are all set to their defaults. + $tmpGlobals['wgUseTidy'] = false; + $tmpGlobals['wgAlwaysUseTidy'] = false; + $tmpGlobals['wgDebugTidy'] = false; + $tmpGlobals['wgTidyConf'] = $IP . '/includes/tidy.conf'; + $tmpGlobals['wgTidyOpts'] = ''; + $tmpGlobals['wgTidyInternal'] = $this->tidySupport->isInternal(); $this->setMwGlobals( $tmpGlobals ); @@ -735,6 +748,14 @@ class NewParserTest extends MediaWikiTestCase { $output = $parser->parse( $input, $title, $options, true, true, 1337 ); $output->setTOCEnabled( !isset( $opts['notoc'] ) ); $out = $output->getText(); + if ( isset( $opts['tidy'] ) ) { + if ( !$this->tidySupport->isEnabled() ) { + $this->markTestSkipped( "SKIPPED: tidy extension is not installed.\n" ); + } else { + $out = MWTidy::tidy( $out ); + $out = preg_replace( '/\s+$/', '', $out); + } + } if ( isset( $opts['showtitle'] ) ) { if ( $output->getTitleText() ) { @@ -745,21 +766,19 @@ class NewParserTest extends MediaWikiTestCase { } if ( isset( $opts['ill'] ) ) { - $out = $this->tidy( implode( ' ', $output->getLanguageLinks() ) ); + $out = implode( ' ', $output->getLanguageLinks() ); } elseif ( isset( $opts['cat'] ) ) { $outputPage = $context->getOutput(); $outputPage->addCategoryLinks( $output->getCategories() ); $cats = $outputPage->getCategoryLinks(); if ( isset( $cats['normal'] ) ) { - $out = $this->tidy( implode( ' ', $cats['normal'] ) ); + $out = implode( ' ', $cats['normal'] ); } else { $out = ''; } } $parser->mPreprocessor = null; - - $result = $this->tidy( $result ); } $this->teardownGlobals(); @@ -963,23 +982,6 @@ class NewParserTest extends MediaWikiTestCase { //Various "cleanup" functions - /** - * Run the "tidy" command on text if the $wgUseTidy - * global is true - * - * @param string $text The text to tidy - * @return string - */ - protected function tidy( $text ) { - global $wgUseTidy; - - if ( $wgUseTidy ) { - $text = MWTidy::tidy( $text ); - } - - return $text; - } - /** * Remove last character if it is a newline * @param string $s diff --git a/tests/testHelpers.inc b/tests/testHelpers.inc index 717c5f34d7..ea4d3c53d4 100644 --- a/tests/testHelpers.inc +++ b/tests/testHelpers.inc @@ -364,6 +364,10 @@ class TestFileIterator implements Iterator { private $sectionData = array(); private $lineNum; private $eof; + # Create a fake parser tests which never run anything unless + # asked to do so. This will avoid running hooks for a disabled test + private $delayedParserTest; + private $nextSubTest = 0; function __construct( $file, $parserTest ) { $this->file = $file; @@ -374,6 +378,7 @@ class TestFileIterator implements Iterator { } $this->parserTest = $parserTest; + $this->delayedParserTest = new DelayedParserTest(); $this->lineNum = $this->index = 0; } @@ -412,12 +417,71 @@ class TestFileIterator implements Iterator { return $this->eof != true; } + function setupCurrentTest() { + // "input" and "result" are old section names allowed + // for backwards-compatibility. + $input = $this->checkSection( array( 'wikitext', 'input' ), false ); + $result = $this->checkSection( array( 'html/php', 'html/*', 'html', 'result' ), false ); + // some tests have "with tidy" and "without tidy" variants + $tidy = $this->checkSection( array( 'html/php+tidy', 'html+tidy'), false ); + if ( $tidy != false ) { + if ( $this->nextSubTest == 0 ) { + if ( $result != false ) { + $this->nextSubTest = 1; // rerun non-tidy variant later + } + $result = $tidy; + } else { + $this->nextSubTest = 0; // go on to next test after this + $tidy = false; + } + } + + if ( !isset( $this->sectionData['options'] ) ) { + $this->sectionData['options'] = ''; + } + + if ( !isset( $this->sectionData['config'] ) ) { + $this->sectionData['config'] = ''; + } + + $isDisabled = preg_match( '/\\bdisabled\\b/i', $this->sectionData['options'] ) && !$this->parserTest->runDisabled; + $isParsoidOnly = preg_match( '/\\bparsoid\\b/i', $this->sectionData['options'] ) && $result == 'html' && !$this->parserTest->runParsoid; + $isFiltered = !preg_match( "/" . $this->parserTest->regex . "/i", $this->sectionData['test'] ); + if ( $input == false || $result == false || $isDisabled || $isParsoidOnly || $isFiltered ) { + # disabled test + return false; + } + + # We are really going to run the test, run pending hooks and hooks function + wfDebug( __METHOD__ . " unleashing delayed test for: {$this->sectionData['test']}" ); + $hooksResult = $this->delayedParserTest->unleash( $this->parserTest ); + if ( !$hooksResult ) { + # Some hook reported an issue. Abort. + throw new MWException( "Problem running hook" ); + } + + $this->test = array( + 'test' => ParserTest::chomp( $this->sectionData['test'] ), + 'input' => ParserTest::chomp( $this->sectionData[$input] ), + 'result' => ParserTest::chomp( $this->sectionData[$result] ), + 'options' => ParserTest::chomp( $this->sectionData['options'] ), + 'config' => ParserTest::chomp( $this->sectionData['config'] ), + ); + if ( $tidy != false ) { + $this->test['options'] .= " tidy"; + } + return true; + } + function readNextTest() { - $this->clearSection(); + # Run additional subtests of previous test + while ( $this->nextSubTest > 0 ) + if ( $this->setupCurrentTest() ) + return true; - # Create a fake parser tests which never run anything unless - # asked to do so. This will avoid running hooks for a disabled test - $delayedParserTest = new DelayedParserTest(); + $this->clearSection(); + # Reset hooks for the delayed test object + $this->delayedParserTest->reset(); while ( false !== ( $line = fgets( $this->fh ) ) ) { $this->lineNum++; @@ -446,7 +510,7 @@ class TestFileIterator implements Iterator { $line = trim( $line ); if ( $line ) { - $delayedParserTest->requireHook( $line ); + $this->delayedParserTest->requireHook( $line ); } } @@ -462,7 +526,7 @@ class TestFileIterator implements Iterator { $line = trim( $line ); if ( $line ) { - $delayedParserTest->requireFunctionHook( $line ); + $this->delayedParserTest->requireFunctionHook( $line ); } } @@ -489,52 +553,14 @@ class TestFileIterator implements Iterator { if ( $this->section == 'end' ) { $this->checkSection( 'test' ); - // "input" and "result" are old section names allowed - // for backwards-compatibility. - $input = $this->checkSection( array( 'wikitext', 'input' ), false ); - $result = $this->checkSection( array( 'html/php', 'html/*', 'html', 'result' ), false ); - - if ( !isset( $this->sectionData['options'] ) ) { - $this->sectionData['options'] = ''; - } - - if ( !isset( $this->sectionData['config'] ) ) { - $this->sectionData['config'] = ''; - } - - if ( $input == false || $result == false || - ( ( preg_match( '/\\bdisabled\\b/i', $this->sectionData['options'] ) - && !$this->parserTest->runDisabled ) - || ( preg_match( '/\\bparsoid\\b/i', $this->sectionData['options'] ) - && $result != 'html/php' && !$this->parserTest->runParsoid ) - || !preg_match( "/" . $this->parserTest->regex . "/i", $this->sectionData['test'] ) ) - ) { - # disabled test - $this->clearSection(); - - # Forget any pending hooks call since test is disabled - $delayedParserTest->reset(); - - continue; - } - - # We are really going to run the test, run pending hooks and hooks function - wfDebug( __METHOD__ . " unleashing delayed test for: {$this->sectionData['test']}" ); - $hooksResult = $delayedParserTest->unleash( $this->parserTest ); - if ( !$hooksResult ) { - # Some hook reported an issue. Abort. - return false; - } - - $this->test = array( - 'test' => ParserTest::chomp( $this->sectionData['test'] ), - 'input' => ParserTest::chomp( $this->sectionData[$input] ), - 'result' => ParserTest::chomp( $this->sectionData[$result] ), - 'options' => ParserTest::chomp( $this->sectionData['options'] ), - 'config' => ParserTest::chomp( $this->sectionData['config'] ), - ); - - return true; + do { + if ( $this->setupCurrentTest() ) + return true; + } while ( $this->nextSubTest > 0 ); + # go on to next test (since this was disabled) + $this->clearSection(); + $this->delayedParserTest->reset(); + continue; } if ( isset( $this->sectionData[$this->section] ) ) { @@ -732,7 +758,7 @@ class DjVuSupport { } /** - * Returns if the DjVu tools are usable + * Returns true if the DjVu tools are usable * * @return bool */ @@ -745,3 +771,43 @@ class DjVuSupport { && is_executable( $wgDjvuTxt ); } } + +/** + * Initialize and detect the tidy support + */ +class TidySupport { + private $internalTidy; + private $externalTidy; + + /** + * Determine if there is a usable tidy. + */ + public function __construct() { + global $wgTidyBin; + + $this->internalTidy = extension_loaded( 'tidy' ) && + class_exists( 'tidy' ); + + $this->externalTidy = is_executable( $wgTidyBin ) || + Installer::locateExecutableInDefaultPaths( array( $wgTidyBin ) ) + !== false; + } + + /** + * Returns true if we should use internal tidy. + * + * @return bool + */ + public function isInternal() { + return $this->internalTidy; + } + + /** + * Returns true if tidy is usable + * + * @return bool + */ + public function isEnabled() { + return $this->internalTidy || $this->externalTidy; + } +} -- 2.20.1