From: Tim Starling Date: Wed, 9 Mar 2016 04:42:33 +0000 (+1100) Subject: New parserTests.php features X-Git-Tag: 1.31.0-rc.0~6426 X-Git-Url: https://git.cyclocoop.org/%27.%24link.%27?a=commitdiff_plain;h=507a6aece2707816375468589b7a7992c9ad9380;p=lhc%2Fweb%2Fwiklou.git New parserTests.php features Features to support T89331 analysis: * Support dwdiff for word-level diffing * Add --mark-ws feature which produces cleaner diffs when line breaks differ * Add optional normalization of parser test output, allowing significant differences to be separated from insignificant differences. Change-Id: I0e151caad1f8b2f97bf20b219f26f3101be82506 --- diff --git a/tests/TestsAutoLoader.php b/tests/TestsAutoLoader.php index 8b100a23e1..2bb1d2ef21 100644 --- a/tests/TestsAutoLoader.php +++ b/tests/TestsAutoLoader.php @@ -143,6 +143,7 @@ $wgAutoloadClasses += [ 'NewParserTest' => "$testDir/phpunit/includes/parser/NewParserTest.php", 'MediaWikiParserTest' => "$testDir/phpunit/includes/parser/MediaWikiParserTest.php", 'ParserTest' => "$testDir/parser/parserTest.inc", + 'ParserTestResultNormalizer' => "$testDir/parser/parserTest.inc", 'ParserTestParserHook' => "$testDir/parser/parserTestsParserHook.php", # tests/phpunit/includes/site diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc index e519f59d0a..d602194b0e 100644 --- a/tests/parser/parserTest.inc +++ b/tests/parser/parserTest.inc @@ -82,6 +82,9 @@ class ParserTest { public $regex = ""; private $savedGlobals = []; + private $useDwdiff = false; + private $markWhitespace = false; + private $normalizationFunctions = []; /** * Sets terminal colorization and diff/quick modes depending on OS and @@ -116,6 +119,18 @@ class ParserTest { || isset( $options['compare'] ) ) ); // redundant output $this->showOutput = isset( $options['show-output'] ); + $this->useDwdiff = isset( $options['dwdiff'] ); + $this->markWhitespace = isset( $options['mark-ws'] ); + + if ( isset( $options['norm'] ) ) { + foreach ( explode( ',', $options['norm'] ) as $func ) { + if ( in_array( $func, [ 'removeTbody', 'trimWhitespace' ] ) ) { + $this->normalizationFunctions[] = $func; + } else { + echo "Warning: unknown normalization option \"$func\"\n"; + } + } + } if ( isset( $options['filter'] ) ) { $options['regex'] = $options['filter']; @@ -700,6 +715,11 @@ class ParserTest { $this->teardownGlobals(); + if ( count( $this->normalizationFunctions ) ) { + $result = ParserTestResultNormalizer::normalize( $result, $this->normalizationFunctions ); + $out = ParserTestResultNormalizer::normalize( $out, $this->normalizationFunctions ); + } + $testResult = new ParserTestResult( $desc ); $testResult->expected = $result; $testResult->actual = $out; @@ -1469,6 +1489,16 @@ class ParserTest { protected function quickDiff( $input, $output, $inFileTail = 'expected', $outFileTail = 'actual' ) { + if ( $this->markWhitespace ) { + $pairs = [ + "\n" => '¶', + ' ' => '·', + "\t" => '→' + ]; + $input = strtr( $input, $pairs ); + $output = strtr( $output, $pairs ); + } + # Windows, or at least the fc utility, is retarded $slash = wfIsWindows() ? '\\' : '/'; $prefix = wfTempDir() . "{$slash}mwParser-" . mt_rand(); @@ -1484,14 +1514,22 @@ class ParserTest { global $wgDiff3; // we assume that people with diff3 also have usual diff - $shellCommand = ( wfIsWindows() && !$wgDiff3 ) ? 'fc' : 'diff -au'; + if ( $this->useDwdiff ) { + $shellCommand = 'dwdiff -Pc'; + } else { + $shellCommand = ( wfIsWindows() && !$wgDiff3 ) ? 'fc' : 'diff -au'; + } $diff = wfShellExec( "$shellCommand $shellInfile $shellOutfile" ); unlink( $infile ); unlink( $outfile ); - return $this->colorDiff( $diff ); + if ( $this->useDwdiff ) { + return $diff; + } else { + return $this->colorDiff( $diff ); + } } /** @@ -1699,3 +1737,84 @@ class ParserTest { return true; } } + +class ParserTestResultNormalizer { + protected $doc, $xpath, $invalid; + + public static function normalize( $text, $funcs ) { + $norm = new self( $text ); + if ( $norm->invalid ) { + return $text; + } + foreach ( $funcs as $func ) { + $norm->$func(); + } + return $norm->serialize(); + } + + protected function __construct( $text ) { + $this->doc = new DOMDocument( '1.0', 'utf-8' ); + + // Note: parsing a supposedly XHTML document with an XML parser is not + // guaranteed to give accurate results. For example, it may introduce + // differences in the number of line breaks in
 tags.
+
+		MediaWiki\suppressWarnings();
+		if ( !$this->doc->loadXML( '' . $text . '' ) ) {
+			$this->invalid = true;
+		}
+		MediaWiki\restoreWarnings();
+		$this->xpath = new DOMXPath( $this->doc );
+		$this->body = $this->xpath->query( '//body' )->item( 0 );
+	}
+
+	protected function removeTbody() {
+		foreach ( $this->xpath->query( '//tbody' ) as $tbody ) {
+			while ( $tbody->firstChild ) {
+				$child = $tbody->firstChild;
+				$tbody->removeChild( $child );
+				$tbody->parentNode->insertBefore( $child, $tbody );
+			}
+			$tbody->parentNode->removeChild( $tbody );
+		}
+	}
+
+	/**
+	 * The point of this function is to produce a normalized DOM in which
+	 * Tidy's output matches the output of html5depurate. Tidy both trims
+	 * and pretty-prints, so this requires fairly aggressive treatment.
+	 *
+	 * In particular, note that Tidy converts 
x
to
\nx\n
, + * which theoretically affects display since the second line break is not + * ignored by compliant HTML parsers. + * + * This function also removes empty elements, as does Tidy. + */ + protected function trimWhitespace() { + foreach ( $this->xpath->query( '//text()' ) as $child ) { + if ( strtolower( $child->parentNode->nodeName ) === 'pre' ) { + // Just trim one line break from the start and end + if ( substr_compare( $child->data, "\n", 0 ) === 0 ) { + $child->data = substr( $child->data, 1 ); + } + if ( substr_compare( $child->data, "\n", -1 ) === 0 ) { + $child->data = substr( $child->data, 0, -1 ); + } + } else { + // Trim all whitespace + $child->data = trim( $child->data ); + } + if ( $child->data === '' ) { + $child->parentNode->removeChild( $child ); + } + } + } + + /** + * Serialize the XML DOM for comparison purposes. This does not generate HTML. + */ + protected function serialize() { + return strtr( $this->doc->saveXML( $this->body ), + [ '' => '', '' => '' ] ); + } +} diff --git a/tests/parserTests.php b/tests/parserTests.php index b3cb89ae02..5e15694c4c 100644 --- a/tests/parserTests.php +++ b/tests/parserTests.php @@ -27,8 +27,8 @@ define( 'MW_PARSER_TEST', true ); $options = [ 'quick', 'color', 'quiet', 'help', 'show-output', - 'record', 'run-disabled', 'run-parsoid' ]; -$optionsWithArgs = [ 'regex', 'filter', 'seed', 'setversion', 'file' ]; + 'record', 'run-disabled', 'run-parsoid', 'dwdiff', 'mark-ws' ]; +$optionsWithArgs = [ 'regex', 'filter', 'seed', 'setversion', 'file', 'norm' ]; require_once __DIR__ . '/../maintenance/commandLine.inc'; require_once __DIR__ . '/TestsAutoLoader.php'; @@ -54,9 +54,16 @@ Options: --keep-uploads Re-use the same upload directory for each test, don't delete it --fuzz Do a fuzz test instead of a normal test --seed Start the fuzz test from the specified seed - --help Show this help message --run-disabled run disabled tests --run-parsoid run parsoid tests (normally disabled) + --dwdiff Use dwdiff to display diff output + --mark-ws Mark whitespace in diffs by replacing it with symbols + --norm= Apply a comma-separated list of normalization functions to + both the expected and actual output in order to resolve + irrelevant differences. The accepted normalization functions + are: removeTbody to remove tags; and trimWhitespace + to trim whitespace from the start and end of text nodes. + --help Show this help message ENDS; exit( 0 );