Start removing the Latin-1 hacks. We're going pure UTF-8 for 1.5...
[lhc/web/wiklou.git] / maintenance / parserTests.php
1 <?php
2 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21 * @todo Make this more independent of the configuration (and if possible the database)
22 * @todo document
23 * @package MediaWiki
24 * @subpackage Maintenance
25 */
26
27 /** */
28 $options = array( 'quick', 'color', 'quiet', 'help' );
29 $optionsWithArgs = array( 'regex' );
30
31 require_once( 'commandLine.inc' );
32 require_once( "$IP/includes/ObjectCache.php" );
33 require_once( "$IP/languages/LanguageUtf8.php" );
34
35 /**
36 * @package MediaWiki
37 * @subpackage Maintenance
38 */
39 class ParserTest {
40 /**
41 * boolean $color whereas output should be colorized
42 * @access private
43 */
44 var $color;
45
46 /**
47 * boolean $lightcolor whereas output should use light colors
48 * @access private
49 */
50 var $lightcolor;
51
52 /**
53 * Sets terminal colorization and diff/quick modes depending on OS and
54 * command-line options (--color and --quick).
55 *
56 * @access public
57 */
58 function ParserTest() {
59 global $options;
60
61 # Only colorize output if stdout is a terminal.
62 $this->lightcolor = false;
63 $this->color = !wfIsWindows() && posix_isatty(1);
64
65 if( isset( $options['color'] ) ) {
66 switch( $options['color'] ) {
67 case 'no':
68 $this->color = false;
69 break;
70 case 'light':
71 $this->lightcolor = true;
72 # Fall through
73 case 'yes':
74 default:
75 $this->color = true;
76 break;
77 }
78 }
79
80 $this->showDiffs = !isset( $options['quick'] );
81
82 $this->quiet = isset( $options['quiet'] );
83
84 if (isset($options['regex'])) {
85 $this->regex = $options['regex'];
86 } else {
87 # Matches anything
88 $this->regex = '';
89 }
90 }
91
92 /**
93 * Remove last character if it is a newline
94 * @access private
95 */
96 function chomp($s) {
97 if (substr($s, -1) === "\n") {
98 return substr($s, 0, -1);
99 }
100 else {
101 return $s;
102 }
103 }
104
105 /**
106 * Run a series of tests listed in the given text file.
107 * Each test consists of a brief description, wikitext input,
108 * and the expected HTML output.
109 *
110 * Prints status updates on stdout and counts up the total
111 * number and percentage of passed tests.
112 *
113 * @param string $filename
114 * @return bool True if passed all tests, false if any tests failed.
115 * @access public
116 */
117 function runTestsFromFile( $filename ) {
118 $infile = fopen( $filename, 'rt' );
119 if( !$infile ) {
120 die( "Couldn't open parserTests.txt\n" );
121 }
122
123 $data = array();
124 $section = null;
125 $success = 0;
126 $total = 0;
127 $n = 0;
128 while( false !== ($line = fgets( $infile ) ) ) {
129 $n++;
130 if( preg_match( '/^!!\s*(\w+)/', $line, $matches ) ) {
131 $section = strtolower( $matches[1] );
132 if( $section == 'endarticle') {
133 if( !isset( $data['text'] ) ) {
134 die( "'endarticle' without 'text' at line $n\n" );
135 }
136 if( !isset( $data['article'] ) ) {
137 die( "'endarticle' without 'article' at line $n\n" );
138 }
139 $this->addArticle($this->chomp($data['article']), $this->chomp($data['text']), $n);
140 $data = array();
141 $section = null;
142 continue;
143 }
144 if( $section == 'end' ) {
145 if( !isset( $data['test'] ) ) {
146 die( "'end' without 'test' at line $n\n" );
147 }
148 if( !isset( $data['input'] ) ) {
149 die( "'end' without 'input' at line $n\n" );
150 }
151 if( !isset( $data['result'] ) ) {
152 die( "'end' without 'result' at line $n\n" );
153 }
154 if( !isset( $data['options'] ) ) {
155 $data['options'] = '';
156 }
157 else {
158 $data['options'] = $this->chomp( $data['options'] );
159 }
160 if (preg_match('/\\bdisabled\\b/i', $data['options'])
161 || !preg_match("/{$this->regex}/i", $data['test'])) {
162 # disabled test
163 $data = array();
164 $section = null;
165 continue;
166 }
167 if( $this->runTest(
168 $this->chomp( $data['test'] ),
169 $this->chomp( $data['input'] ),
170 $this->chomp( $data['result'] ),
171 $this->chomp( $data['options'] ) ) ) {
172 $success++;
173 }
174 $total++;
175 $data = array();
176 $section = null;
177 continue;
178 }
179 if ( isset ($data[$section] ) ) {
180 die ( "duplicate section '$section' at line $n\n" );
181 }
182 $data[$section] = '';
183 continue;
184 }
185 if( $section ) {
186 $data[$section] .= $line;
187 }
188 }
189 if( $total > 0 ) {
190 $ratio = IntVal( 100.0 * $success / $total );
191 print $this->termColor( 1 ) . "\nPassed $success of $total tests ($ratio%) ";
192 if( $success == $total ) {
193 print $this->termColor( 32 ) . "PASSED!";
194 } else {
195 print $this->termColor( 31 ) . "FAILED!";
196 }
197 print $this->termReset() . "\n";
198 return ($success == $total);
199 } else {
200 die( "No tests found.\n" );
201 }
202 }
203
204 /**
205 * Run a given wikitext input through a freshly-constructed wiki parser,
206 * and compare the output against the expected results.
207 * Prints status and explanatory messages to stdout.
208 *
209 * @param string $input Wikitext to try rendering
210 * @param string $result Result to output
211 * @return bool
212 */
213 function runTest( $desc, $input, $result, $opts ) {
214 if( !$this->quiet ) {
215 $this->showTesting( $desc );
216 }
217
218 $this->setupGlobals($opts);
219
220 $user =& new User();
221 $options =& ParserOptions::newFromUser( $user );
222
223 if (preg_match('/\\bmath\\b/i', $opts)) {
224 # XXX this should probably be done by the ParserOptions
225 require_once('Math.php');
226
227 $options->setUseTex(true);
228 }
229
230 if (preg_match('/title=\[\[(.*)\]\]/', $opts, $m)) {
231 $titleText = $m[1];
232 }
233 else {
234 $titleText = 'Parser test';
235 }
236
237 $parser =& new Parser();
238 $title =& Title::makeTitle( NS_MAIN, $titleText );
239
240 if (preg_match('/\\bpst\\b/i', $opts)) {
241 $out = $parser->preSaveTransform( $input, $title, $user, $options );
242 } elseif (preg_match('/\\bmsg\\b/i', $opts)) {
243 $out = $parser->transformMsg( $input, $options );
244 } else {
245 $output =& $parser->parse( $input, $title, $options );
246 $out = $output->getText();
247
248 if (preg_match('/\\bill\\b/i', $opts)) {
249 $out = $this->tidy( implode( ' ', $output->getLanguageLinks() ) );
250 } else if (preg_match('/\\bcat\\b/i', $opts)) {
251 $out = $this->tidy ( implode( ' ', $output->getCategoryLinks() ) );
252 }
253
254 $result = $this->tidy($result);
255 }
256
257 $this->teardownGlobals();
258
259 if( $result === $out && $this->wellFormed( $out ) ) {
260 return $this->showSuccess( $desc );
261 } else {
262 return $this->showFailure( $desc, $result, $out );
263 }
264 }
265
266 /**
267 * Set up the global variables for a consistent environment for each test.
268 * Ideally this should replace the global configuration entirely.
269 *
270 * @access private
271 */
272 function setupGlobals($opts = '') {
273 # Save the prefixed / quoted table names for later use when we make the temporaries.
274 $db =& wfGetDB( DB_READ );
275 $this->oldTableNames = array();
276 foreach( $this->listTables() as $table ) {
277 $this->oldTableNames[$table] = $db->tableName( $table );
278 }
279 if( !isset( $this->uploadDir ) ) {
280 $this->uploadDir = $this->setupUploadDir();
281 }
282
283 $settings = array(
284 'wgServer' => 'http://localhost',
285 'wgScript' => '/index.php',
286 'wgScriptPath' => '/',
287 'wgArticlePath' => '/wiki/$1',
288 'wgUploadPath' => '/images',
289 'wgUploadDirectory' => $this->uploadDir,
290 'wgStyleSheetPath' => '/skins',
291 'wgSitename' => 'MediaWiki',
292 'wgLanguageCode' => 'en',
293 'wgContLanguageCode' => 'en',
294 'wgDBprefix' => 'parsertest',
295 'wgDefaultUserOptions' => array(),
296
297 'wgLoadBalancer' => LoadBalancer::newFromParams( $GLOBALS['wgDBservers'] ),
298 'wgLang' => new LanguageUtf8(),
299 'wgContLang' => new LanguageUtf8(),
300 'wgNamespacesWithSubpages' => array( 0 => preg_match('/\\bsubpage\\b/i', $opts)),
301 'wgMaxTocLevel' => 999,
302 'wgCapitalLinks' => true,
303 'wgDefaultUserOptions' => array(),
304 'wgNoFollowLinks' => true,
305 );
306 $this->savedGlobals = array();
307 foreach( $settings as $var => $val ) {
308 $this->savedGlobals[$var] = $GLOBALS[$var];
309 $GLOBALS[$var] = $val;
310 }
311 $GLOBALS['wgLoadBalancer']->loadMasterPos();
312 $GLOBALS['wgMessageCache']->initialise( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] );
313 $this->setupDatabase();
314
315 global $wgUser;
316 $wgUser = new User();
317 }
318
319 # List of temporary tables to create, without prefix
320 # Some of these probably aren't necessary
321 function listTables() {
322 return array('user', 'page', 'revision', 'text', 'links',
323 'brokenlinks', 'imagelinks', 'categorylinks',
324 'linkscc', 'site_stats', 'hitcounter',
325 'ipblocks', 'image', 'oldimage',
326 'recentchanges',
327 'watchlist', 'math', 'searchindex',
328 'interwiki', 'querycache',
329 'objectcache', 'group'
330 );
331 }
332
333 /**
334 * Set up a temporary set of wiki tables to work with for the tests.
335 * Currently this will only be done once per run, and any changes to
336 * the db will be visible to later tests in the run.
337 *
338 * @access private
339 */
340 function setupDatabase() {
341 static $setupDB = false;
342 global $wgDBprefix;
343
344 # Make sure we don't mess with the live DB
345 if (!$setupDB && $wgDBprefix === 'parsertest') {
346 $db =& wfGetDB( DB_MASTER );
347
348 $tables = $this->listTables();
349
350 if (!(strcmp($db->getServerVersion(), '4.1') < 0 and stristr($db->getSoftwareLink(), 'MySQL'))) {
351 # Database that supports CREATE TABLE ... LIKE
352 global $wgDBtype;
353 if( $wgDBtype == 'PostgreSQL' ) {
354 $def = 'INCLUDING DEFAULTS';
355 } else {
356 $def = '';
357 }
358 foreach ($tables as $tbl) {
359 $newTableName = $db->tableName( $tbl );
360 $tableName = $this->oldTableNames[$tbl];
361 $db->query("CREATE TEMPORARY TABLE $newTableName (LIKE $tableName $def)");
362 }
363 } else {
364 # Hack for MySQL versions < 4.1, which don't support
365 # "CREATE TABLE ... LIKE". Note that
366 # "CREATE TEMPORARY TABLE ... SELECT * FROM ... LIMIT 0"
367 # would not create the indexes we need....
368 foreach ($tables as $tbl) {
369 $res = $db->query("SHOW CREATE TABLE {$this->oldTableNames[$tbl]}");
370 $row = $db->fetchRow($res);
371 $create = $row[1];
372 $create_tmp = preg_replace('/CREATE TABLE `(.*?)`/', 'CREATE TEMPORARY TABLE `'
373 . $wgDBprefix . $tbl .'`', $create);
374 if ($create === $create_tmp) {
375 # Couldn't do replacement
376 die("could not create temporary table $tbl");
377 }
378 $db->query($create_tmp);
379 }
380
381 }
382
383 # Hack: insert a few Wikipedia in-project interwiki prefixes,
384 # for testing inter-language links
385 $db->insert( 'interwiki', array(
386 array( 'iw_prefix' => 'Wikipedia',
387 'iw_url' => 'http://en.wikipedia.org/wiki/$1',
388 'iw_local' => 0 ),
389 array( 'iw_prefix' => 'MeatBall',
390 'iw_url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1',
391 'iw_local' => 0 ),
392 array( 'iw_prefix' => 'zh',
393 'iw_url' => 'http://zh.wikipedia.org/wiki/$1',
394 'iw_local' => 1 ),
395 array( 'iw_prefix' => 'es',
396 'iw_url' => 'http://es.wikipedia.org/wiki/$1',
397 'iw_local' => 1 ),
398 array( 'iw_prefix' => 'fr',
399 'iw_url' => 'http://fr.wikipedia.org/wiki/$1',
400 'iw_local' => 1 ),
401 array( 'iw_prefix' => 'ru',
402 'iw_url' => 'http://ru.wikipedia.org/wiki/$1',
403 'iw_local' => 1 ),
404 ) );
405
406 # Hack: initialize a group
407 $db->insert( 'group', array(
408 'group_id' => 1,
409 'group_name' => 'Anonymous',
410 'group_description' => 'Anonymous users',
411 'group_rights' => 'read' ) );
412
413 $setupDB = true;
414 }
415 }
416
417 /**
418 * Create a dummy uploads directory which will contain a couple
419 * of files in order to pass existence tests.
420 * @return string The directory
421 * @access private
422 */
423 function setupUploadDir() {
424 $dir = "/tmp/mwParser-" . mt_rand() . "-images";
425 mkdir( $dir );
426 mkdir( $dir . '/3' );
427 mkdir( $dir . '/3/3a' );
428 $f = fopen( $dir . '/3/3a/Foobar.jpg', 'wb' );
429 fwrite( $f, 'Dummy file' );
430 fclose( $f );
431 return $dir;
432 }
433
434 /**
435 * Restore default values and perform any necessary clean-up
436 * after each test runs.
437 *
438 * @access private
439 */
440 function teardownGlobals() {
441 foreach( $this->savedGlobals as $var => $val ) {
442 $GLOBALS[$var] = $val;
443 }
444 if( isset( $this->uploadDir ) ) {
445 $this->teardownUploadDir( $this->uploadDir );
446 unset( $this->uploadDir );
447 }
448 }
449
450 /**
451 * Remove the dummy uploads directory
452 * @access private
453 */
454 function teardownUploadDir( $dir ) {
455 unlink( "$dir/3/3a/Foobar.jpg" );
456 rmdir( "$dir/3/3a" );
457 rmdir( "$dir/3" );
458 @rmdir( "$dir/thumb/3/39" );
459 @rmdir( "$dir/thumb/3" );
460 @rmdir( "$dir/thumb" );
461 rmdir( "$dir" );
462 }
463
464 /**
465 * "Running test $desc..."
466 * @access private
467 */
468 function showTesting( $desc ) {
469 print "Running test $desc... ";
470 }
471
472 /**
473 * Print a happy success message.
474 *
475 * @param string $desc The test name
476 * @return bool
477 * @access private
478 */
479 function showSuccess( $desc ) {
480 if( !$this->quiet ) {
481 print $this->termColor( '1;32' ) . 'PASSED' . $this->termReset() . "\n";
482 }
483 return true;
484 }
485
486 /**
487 * Print a failure message and provide some explanatory output
488 * about what went wrong if so configured.
489 *
490 * @param string $desc The test name
491 * @param string $result Expected HTML output
492 * @param string $html Actual HTML output
493 * @return bool
494 * @access private
495 */
496 function showFailure( $desc, $result, $html ) {
497 if( $this->quiet ) {
498 # In quiet mode we didn't show the 'Testing' message before the
499 # test, in case it succeeded. Show it now:
500 $this->showTesting( $desc );
501 }
502 print $this->termColor( '1;31' ) . 'FAILED!' . $this->termReset() . "\n";
503 if( $this->showDiffs ) {
504 print $this->quickDiff( $result, $html );
505 }
506 if( !$this->wellFormed( $html ) ) {
507 print "XML error: $this->mXmlError\n";
508 }
509 return false;
510 }
511
512 /**
513 * Run given strings through a diff and return the (colorized) output.
514 * Requires writable /tmp directory and a 'diff' command in the PATH.
515 *
516 * @param string $input
517 * @param string $output
518 * @return string
519 * @access private
520 */
521 function quickDiff( $input, $output ) {
522 $prefix = "/tmp/mwParser-" . mt_rand();
523
524 $infile = "$prefix-expected";
525 $this->dumpToFile( $input, $infile );
526
527 $outfile = "$prefix-actual";
528 $this->dumpToFile( $output, $outfile );
529
530 $diff = `diff -au $infile $outfile`;
531 unlink( $infile );
532 unlink( $outfile );
533
534 return $this->colorDiff( $diff );
535 }
536
537 /**
538 * Write the given string to a file, adding a final newline.
539 *
540 * @param string $data
541 * @param string $filename
542 * @access private
543 */
544 function dumpToFile( $data, $filename ) {
545 $file = fopen( $filename, "wt" );
546 fwrite( $file, $data . "\n" );
547 fclose( $file );
548 }
549
550 /**
551 * Return ANSI terminal escape code for changing text attribs/color,
552 * or empty string if color output is disabled.
553 *
554 * @param string $color Semicolon-separated list of attribute/color codes
555 * @return string
556 * @access private
557 */
558 function termColor( $color ) {
559 if($this->lightcolor) {
560 return $this->color ? "\x1b[1;{$color}m" : '';
561 } else {
562 return $this->color ? "\x1b[{$color}m" : '';
563 }
564 }
565
566 /**
567 * Return ANSI terminal escape code for restoring default text attributes,
568 * or empty string if color output is disabled.
569 *
570 * @return string
571 * @access private
572 */
573 function termReset() {
574 return $this->color ? "\x1b[0m" : '';
575 }
576
577 /**
578 * Colorize unified diff output if set for ANSI color output.
579 * Subtractions are colored blue, additions red.
580 *
581 * @param string $text
582 * @return string
583 * @access private
584 */
585 function colorDiff( $text ) {
586 return preg_replace(
587 array( '/^(-.*)$/m', '/^(\+.*)$/m' ),
588 array( $this->termColor( 34 ) . '$1' . $this->termReset(),
589 $this->termColor( 31 ) . '$1' . $this->termReset() ),
590 $text );
591 }
592
593 /**
594 * Insert a temporary test article
595 * @param string $name the title, including any prefix
596 * @param string $text the article text
597 * @param int $line the input line number, for reporting errors
598 * @static
599 * @access private
600 */
601 function addArticle($name, $text, $line) {
602 $this->setupGlobals();
603 $title = Title::newFromText( $name );
604 if ( is_null($title) ) {
605 die( "invalid title at line $line\n" );
606 }
607
608 $aid = $title->getArticleID( GAID_FOR_UPDATE );
609 if ($aid != 0) {
610 die( "duplicate article at line $line\n" );
611 }
612
613 $art = new Article($title);
614 $art->insertNewArticle($text, '', false, false );
615 $this->teardownGlobals();
616 }
617
618 /*
619 * Run the "tidy" command on text if the $wgUseTidy
620 * global is true
621 *
622 * @param string $text the text to tidy
623 * @return string
624 * @static
625 * @access private
626 */
627 function tidy( $text ) {
628 global $wgUseTidy;
629 if ($wgUseTidy) {
630 $text = Parser::tidy($text);
631 }
632 return $text;
633 }
634
635 function wellFormed( $text ) {
636 $html =
637 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" ' .
638 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' .
639 '<html>' .
640 $text .
641 '</html>';
642
643 $parser = xml_parser_create( "UTF-8" );
644
645 # case folding violates XML standard, turn it off
646 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
647
648 if( !xml_parse( $parser, $html, true ) ) {
649 $err = xml_error_string( xml_get_error_code( $parser ) );
650 $position = xml_get_current_byte_index( $parser );
651 $fragment = $this->extractFragment( $html, $position );
652 $this->mXmlError = "$err at byte $position:\n$fragment";
653 xml_parser_free( $parser );
654 return false;
655 }
656 xml_parser_free( $parser );
657 return true;
658 }
659
660 function extractFragment( $text, $position ) {
661 $start = max( 0, $position - 10 );
662 $before = $position - $start;
663 $fragment = '...' .
664 $this->termColor( 34 ) .
665 substr( $text, $start, $before ) .
666 $this->termColor( 0 ) .
667 $this->termColor( 31 ) .
668 $this->termColor( 1 ) .
669 substr( $text, $position, 1 ) .
670 $this->termColor( 0 ) .
671 $this->termColor( 34 ) .
672 substr( $text, $position + 1, 9 ) .
673 $this->termColor( 0 ) .
674 '...';
675 $display = str_replace( "\n", ' ', $fragment );
676 $caret = ' ' .
677 str_repeat( ' ', $before ) .
678 $this->termColor( 31 ) .
679 '^' .
680 $this->termColor( 0 );
681 return "$display\n$caret";
682 }
683
684 }
685
686 if( isset( $options['help'] ) ) {
687 echo <<<END
688 MediaWiki $wgVersion parser test suite
689 Usage: php parserTests.php [--quick] [--quiet] [--color[=(yes|no|light)]]
690 [--regex <expression>] [--help]
691 Options:
692 --quick Suppress diff output of failed tests
693 --quiet Suppress notification of passed tests (shows only failed tests)
694 --color Override terminal detection and force color output on or off
695 'light' option is similar to 'yes' but with color for dark backgrounds
696 --regex Only run tests whose descriptions which match given regex
697 --help Show this help message
698
699
700 END;
701 exit( 0 );
702 }
703
704 # There is a convention that the parser should never
705 # refer to $wgTitle directly, but instead use the title
706 # passed to it.
707 $wgTitle = Title::newFromText( 'Parser test script do not use' );
708 $tester =& new ParserTest();
709
710 # Note: the command line setup changes the current working directory
711 # to the parent, which is why we have to put the subdir here:
712 $ok = $tester->runTestsFromFile( 'maintenance/parserTests.txt' );
713
714 exit ($ok ? 0 : -1);
715
716 ?>