From 78c3f2f4b146dd714593d85a3b74d005bdd2eae6 Mon Sep 17 00:00:00 2001 From: Arlo Breault Date: Mon, 16 Mar 2015 10:26:52 -0800 Subject: [PATCH] Tidy up tidy usage * There's a branch path in the sanitizer that depends on $wgUseTidy, which means the test output differs from on wiki. * In general, we should set these variables to match the wiki behaviour in tests. * Exposes T92892, Sanitizer removes empty tags when tidy is disabled. * Tweaked tests for T19663 to use an extension tag to show that HTML5 tags with non-word characters make it through the parser intact (before being ultimately sanitized). Change-Id: I09c72fd739e11a8b757f37dc4c790758d782ad73 --- RELEASE-NOTES-1.25 | 2 + includes/parser/ParserOptions.php | 2 +- tests/parser/parserTest.inc | 17 +-- tests/parser/parserTests.txt | 121 +++++++++++------- tests/parser/parserTestsParserHook.php | 1 + .../phpunit/includes/parser/NewParserTest.php | 20 +-- 6 files changed, 101 insertions(+), 62 deletions(-) diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25 index 5e08efdd0e..88bfc15846 100644 --- a/RELEASE-NOTES-1.25 +++ b/RELEASE-NOTES-1.25 @@ -174,6 +174,8 @@ production. This requires the fa_sha1 field being populated. * Removed rel="archives" from the "View history" link, as it did not pass HTML validation. +* $wgUseTidy is now set when parserTests are run with the tidy option to match + output on wiki. === Action API changes in 1.25 === * (T67403) XML tag highlighting is now only performed for formats diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index b09fe76029..9e06ee2d64 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -25,7 +25,7 @@ * @brief Set options of the Parser * * All member variables are supposed to be private in theory, although in - * practise this is not the case. + * practice this is not the case. * * @ingroup Parser */ diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc index 17769ad47e..e18c22b709 100644 --- a/tests/parser/parserTest.inc +++ b/tests/parser/parserTest.inc @@ -593,6 +593,14 @@ class ParserTest { } } + if ( isset( $opts['tidy'] ) ) { + if ( !$this->tidySupport->isEnabled() ) { + return $this->showSkipped(); + } else { + $options->setTidy( true ); + } + } + if ( isset( $opts['title'] ) ) { $titleText = $opts['title']; } else { @@ -624,10 +632,6 @@ class ParserTest { $output->setTOCEnabled( !isset( $opts['notoc'] ) ); $out = $output->getText(); if ( isset( $opts['tidy'] ) ) { - if ( !$this->tidySupport->isEnabled() ) { - return $this->showSkipped(); - } - $out = MWTidy::tidy( $out ); $out = preg_replace( '/\s+$/', '', $out ); } @@ -877,10 +881,7 @@ class ParserTest { 'wgDisableLangConversion' => false, 'wgDisableTitleConversion' => false, // Tidy options. - // We always set 'wgUseTidy' to false when parsing, but certain - // test-running modes still use tidy if available, so ensure - // that the tidy-related options are all set to their defaults. - 'wgUseTidy' => false, + 'wgUseTidy' => isset( $opts['tidy'] ), 'wgAlwaysUseTidy' => false, 'wgDebugTidy' => false, 'wgTidyConf' => $IP . '/includes/tidy.conf', diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 966b666897..53814c53e9 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -1209,6 +1209,8 @@ Ruby markup (W3C-style) !! test Non-word characters don't terminate tag names (bug 17663, 40670, 52022) !! wikitext +a + doesn't terminate doesn't terminate @@ -1219,7 +1221,8 @@ Non-word characters don't terminate tag names (bug 17663, 40670, 52022) !! html -

<b→> doesn't terminate </b→> +

<blockquote|>a</blockquote> +

<b→> doesn't terminate </b→>

<bä> doesn't terminate </bä>

<boo> doesn't terminate </boo>

<s.foo> doesn't terminate </s.foo> @@ -1228,9 +1231,13 @@ Non-word characters don't terminate tag names (bug 17663, 40670, 52022) !! end # There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/ +# If the non-word-character tag made it through the sanitizer, tidy +# would munge it up. !! test Non-word characters don't terminate tag names + tidy !! wikitext +a + doesn't terminate doesn't terminate @@ -1241,6 +1248,7 @@ Non-word characters don't terminate tag names + tidy !! html+tidy +

<blockquote|>a

<b→> doesn't terminate </b→>

<bä> doesn't terminate </bä>

<boo> doesn't terminate </boo>

@@ -1248,16 +1256,33 @@ Non-word characters don't terminate tag names + tidy

<sub-ID#1>

!! end +### +### See tests/parser/parserTestsParserHook.php for the extension) +### This checks that HTML5 tags (with non-word characters in the tag +### name) make it safely through the parser -- the Sanitizer will +### munge them later, as it should. +### +!! test +Non-word characters are valid in extension tags (T19663) +!! wikitext +tåg +!! html +
+'tåg'
+array (
+)
+
+ +!! end + !! test Isolated close tags should be treated as literal text (bug 52760) !! wikitext s -!! html -

</b> -

<s.foo>s</s> -

+!! html+tidy +

<s.foo>s

!! end ### @@ -1745,7 +1770,6 @@ b !! end ## PHP parser emits output which is broken -## XXX The parsoid output doesn't match the tidy output. !! test Unclosed HTML p-tags should be handled properly !! wikitext @@ -1755,9 +1779,10 @@ a b !! html/php+tidy
-

foo</div>

+

foo

+

a

-b +

b

!! html/parsoid

foo

a

@@ -7690,9 +7715,6 @@ Broken br tag sanitization !! end # TODO: Fix html2html mode (bug 51055)! -# This
handling was added as part of bug 50831; but it -# differs from how PHP+tidy handles this. We should investigate -# this. !! test Parsoid: Broken br tag recognition !! options @@ -7701,12 +7723,9 @@ parsoid=wt2html

-!! html/php+tidy -

</br>

+!! html+tidy +



-!! html/parsoid -


-


!! end !! test @@ -8303,10 +8322,6 @@ List embedded in a non-block tag !!end -# This is a bug in the PHP parser + tidy combination. -# (The tag gets parsed as text and html-escaped by PHP, -# and then fostered out of the table by tidy.) -# We believe the Parsoid output to be correct. !! test Table with missing opening tag !! options @@ -8316,14 +8331,7 @@ parsoid=wt2html,wt2wt foo -!! html/php+tidy -

</tr>

- - - - -
foo
-!! html/parsoid +!! html+tidy @@ -13413,7 +13421,7 @@ Handling of sections up to level 6 and beyond !! end !! test -TOC regression (bug 9764) +TOC regression (T11764) !! wikitext == title 1 == === title 1.1 === @@ -13585,7 +13593,7 @@ Link inside a section heading !! end !! test -TOC regression (bug 12077) +TOC regression (T14077) !! wikitext __TOC__ == title 1 == @@ -14210,16 +14218,17 @@ Media link with text !! end # FIXME: this is still bad HTML tag nesting +# FIXME: doBlockLevels won't wrap this in a paragraph because it contains a div !! test Media link with nasty text -fixme: doBlockLevels won't wrap this in a paragraph because it contains a div !! wikitext [[Media:Foobar.jpg|Safe Link
" onmouseover="alert(document.cookie)" onfoo="
]] !! html Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div> !! html+tidy -

Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div>

+

Safe Link

+
" onmouseover="alert(document.cookie)" onfoo="
!! end !! test @@ -15433,6 +15442,7 @@ http://

Contents

  • 1 onmouseover=
  • +

    !! end !! test @@ -19205,6 +19215,7 @@ __TOC__

    Quote
    [edit]

    !! html+tidy +

    Contents

    @@ -19213,6 +19224,7 @@ __TOC__
  • 1 Quote
  • +

    Quote

    @@ -19261,6 +19273,7 @@ __TOC__

    Foo
    Bar
    [edit]

    !! html+tidy +

    Contents

    @@ -19270,6 +19283,7 @@ __TOC__
  • 2 Foo Bar
  • +

    Foo Bar[edit]

    Foo

    @@ -19346,6 +19360,37 @@ __TOC__ !! end +# Note that the html output does not have the

    , but the +# html+tidy output *does*. This is because the empty

    is +# removed by the sanitizer, but only when tidy is *not* enabled (!). +!! test +Empty

    tag in TOC, removed by Sanitizer (T92892) +!! wikitext +__TOC__ +== x == +!! html +

    Contents

    + +
    + +

    x[edit]

    + +!! html+tidy +

    +
    +
    +

    Contents

    +
    + +
    +

    +

    x[edit]

    +!! end + !! article MediaWiki:Bug32057 !! text @@ -21892,18 +21937,6 @@ a>b

    !! end - -# This was a bug in the PHP parser (see bug 17663 and its dups, -# https://bugzilla.wikimedia.org/show_bug.cgi?id=17663) -!! test -Tag names followed by punctuation should not be recognized as tags -!! wikitext - text -!! html -

    <s.ome> text -

    -!! end - !! test HTML tag with necessary entities in attributes !! wikitext diff --git a/tests/parser/parserTestsParserHook.php b/tests/parser/parserTestsParserHook.php index c8b3e8974b..221fc79aba 100644 --- a/tests/parser/parserTestsParserHook.php +++ b/tests/parser/parserTestsParserHook.php @@ -29,6 +29,7 @@ class ParserTestParserHook { static function setup( &$parser ) { $parser->setHook( 'tag', array( __CLASS__, 'dumpHook' ) ); + $parser->setHook( 'tåg', array( __CLASS__, 'dumpHook' ) ); $parser->setHook( 'statictag', array( __CLASS__, 'staticTagHook' ) ); return true; } diff --git a/tests/phpunit/includes/parser/NewParserTest.php b/tests/phpunit/includes/parser/NewParserTest.php index 3ce3e1ff00..ccd7f96220 100644 --- a/tests/phpunit/includes/parser/NewParserTest.php +++ b/tests/phpunit/includes/parser/NewParserTest.php @@ -160,9 +160,6 @@ class NewParserTest extends MediaWikiTestCase { $this->djVuSupport = new DjVuSupport(); // Tidy support $this->tidySupport = new TidySupport(); - // We always set 'wgUseTidy' to false when parsing, but certain - // test-running modes still use tidy if available, so ensure - // that the tidy-related options are all set to their defaults. $tmpGlobals['wgUseTidy'] = false; $tmpGlobals['wgAlwaysUseTidy'] = false; $tmpGlobals['wgDebugTidy'] = false; @@ -419,6 +416,7 @@ class NewParserTest extends MediaWikiTestCase { 'wgMathDirectory' => $uploadDir . '/math', 'wgDefaultLanguageVariant' => $variant, 'wgLinkHolderBatchSize' => $linkHolderBatchSize, + 'wgUseTidy' => isset( $opts['tidy'] ), ); if ( $config ) { @@ -727,12 +725,21 @@ class NewParserTest extends MediaWikiTestCase { . "Current configuration is:\n\$wgTexvc = '$wgTexvc'" ); } } + if ( isset( $opts['djvu'] ) ) { if ( !$this->djVuSupport->isEnabled() ) { $this->markTestSkipped( "SKIPPED: djvu binaries do not exist or are not executable.\n" ); } } + if ( isset( $opts['tidy'] ) ) { + if ( !$this->tidySupport->isEnabled() ) { + $this->markTestSkipped( "SKIPPED: tidy extension is not installed.\n" ); + } else { + $options->setTidy( true ); + } + } + if ( isset( $opts['pst'] ) ) { $out = $parser->preSaveTransform( $input, $title, $user, $options ); } elseif ( isset( $opts['msg'] ) ) { @@ -753,12 +760,7 @@ class NewParserTest extends MediaWikiTestCase { $output->setTOCEnabled( !isset( $opts['notoc'] ) ); $out = $output->getText(); if ( isset( $opts['tidy'] ) ) { - if ( !$this->tidySupport->isEnabled() ) { - $this->markTestSkipped( "SKIPPED: tidy extension is not installed.\n" ); - } else { - $out = MWTidy::tidy( $out ); - $out = preg_replace( '/\s+$/', '', $out ); - } + $out = preg_replace( '/\s+$/', '', $out ); } if ( isset( $opts['showtitle'] ) ) { -- 2.20.1
    foo