3 * Interface with html tidy, used if $wgUseTidy = true from Parser::parse.
4 * If tidy isn't able to correct the markup, the original will be
5 * returned in all its glory with a warning comment appended.
10 * Either the external tidy program or the in-process tidy extension
11 * will be used depending on availability. Override the default
12 * $wgTidyInternal setting to disable the internal if it's not working.
13 * @param string $text Hideous HTML input
14 * @return string Corrected HTML output
18 public static function RunOn( $text ) {
19 global $wgTidyInternal;
20 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
21 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
22 '<head><title>test</title></head><body>'.$text.'</body></html>';
23 if( $wgTidyInternal ) {
24 $correctedtext = Tidy
::internal( $wrappedtext );
26 $correctedtext = Tidy
::external( $wrappedtext );
28 if( is_null( $correctedtext ) ) {
29 wfDebug( "Tidy error detected!\n" );
30 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
32 return $correctedtext;
36 * Spawn an external HTML tidy process and get corrected markup back from it.
38 private static function external( $text ) {
39 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
40 $fname = 'Parser::externalTidy';
41 wfProfileIn( $fname );
46 $descriptorspec = array(
47 0 => array('pipe', 'r'),
48 1 => array('pipe', 'w'),
49 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file.
52 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
53 if (is_resource($process)) {
54 // Theoretically, this style of communication could cause a deadlock
55 // here. If the stdout buffer fills up, then writes to stdin could
56 // block. This doesn't appear to happen with tidy, because tidy only
57 // writes to stdout after it's finished reading from stdin. Search
58 // for tidyParseStdin and tidySaveStdout in console/tidy.c
59 fwrite($pipes[0], $text);
61 while (!feof($pipes[1])) {
62 $cleansource .= fgets($pipes[1], 1024);
68 wfProfileOut( $fname );
70 if( $cleansource == '' && $text != '') {
71 // Some kind of error happened, so we couldn't get the corrected text.
72 // Just give up; we'll use the source text and append a warning.
80 * Use the HTML tidy PECL extension to use the tidy library in-process,
81 * saving the overhead of spawning a new process. Currently written to
82 * the PHP 4.3.x version of the extension, may not work on PHP 5.
84 * 'pear install tidy' should be able to compile the extension module.
86 private static function internal( $text ) {
88 $fname = 'Parser::internalTidy';
89 wfProfileIn( $fname );
91 tidy_load_config( $wgTidyConf );
92 tidy_set_encoding( 'utf8' );
93 tidy_parse_string( $text );
95 if( tidy_get_status() == 2 ) {
96 // 2 is magic number for fatal error
97 // http://www.php.net/manual/en/function.tidy-get-status.php
100 $cleansource = tidy_get_output();
102 wfProfileOut( $fname );