Move tidy callbacks from the Parser class to a new Tidy class. This is to keep
[lhc/web/wiklou.git] / includes / Tidy.php
1 <?php
2 /**
3 * Interface with html tidy, used if $wgUseTidy = true from Parser::parse.
4 * If tidy isn't able to correct the markup, the original will be
5 * returned in all its glory with a warning comment appended.
6 *
7 */
8 class Tidy {
9 /*
10 * Either the external tidy program or the in-process tidy extension
11 * will be used depending on availability. Override the default
12 * $wgTidyInternal setting to disable the internal if it's not working.
13 * @param string $text Hideous HTML input
14 * @return string Corrected HTML output
15 * @public
16 * @static
17 */
18 public static function RunOn( $text ) {
19 global $wgTidyInternal;
20 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
21 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
22 '<head><title>test</title></head><body>'.$text.'</body></html>';
23 if( $wgTidyInternal ) {
24 $correctedtext = Tidy::internal( $wrappedtext );
25 } else {
26 $correctedtext = Tidy::external( $wrappedtext );
27 }
28 if( is_null( $correctedtext ) ) {
29 wfDebug( "Tidy error detected!\n" );
30 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
31 }
32 return $correctedtext;
33 }
34
35 /**
36 * Spawn an external HTML tidy process and get corrected markup back from it.
37 */
38 private static function external( $text ) {
39 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
40 $fname = 'Parser::externalTidy';
41 wfProfileIn( $fname );
42
43 $cleansource = '';
44 $opts = ' -utf8';
45
46 $descriptorspec = array(
47 0 => array('pipe', 'r'),
48 1 => array('pipe', 'w'),
49 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file.
50 );
51 $pipes = array();
52 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
53 if (is_resource($process)) {
54 // Theoretically, this style of communication could cause a deadlock
55 // here. If the stdout buffer fills up, then writes to stdin could
56 // block. This doesn't appear to happen with tidy, because tidy only
57 // writes to stdout after it's finished reading from stdin. Search
58 // for tidyParseStdin and tidySaveStdout in console/tidy.c
59 fwrite($pipes[0], $text);
60 fclose($pipes[0]);
61 while (!feof($pipes[1])) {
62 $cleansource .= fgets($pipes[1], 1024);
63 }
64 fclose($pipes[1]);
65 proc_close($process);
66 }
67
68 wfProfileOut( $fname );
69
70 if( $cleansource == '' && $text != '') {
71 // Some kind of error happened, so we couldn't get the corrected text.
72 // Just give up; we'll use the source text and append a warning.
73 return null;
74 } else {
75 return $cleansource;
76 }
77 }
78
79 /**
80 * Use the HTML tidy PECL extension to use the tidy library in-process,
81 * saving the overhead of spawning a new process. Currently written to
82 * the PHP 4.3.x version of the extension, may not work on PHP 5.
83 *
84 * 'pear install tidy' should be able to compile the extension module.
85 */
86 private static function internal( $text ) {
87 global $wgTidyConf;
88 $fname = 'Parser::internalTidy';
89 wfProfileIn( $fname );
90
91 tidy_load_config( $wgTidyConf );
92 tidy_set_encoding( 'utf8' );
93 tidy_parse_string( $text );
94 tidy_clean_repair();
95 if( tidy_get_status() == 2 ) {
96 // 2 is magic number for fatal error
97 // http://www.php.net/manual/en/function.tidy-get-status.php
98 $cleansource = null;
99 } else {
100 $cleansource = tidy_get_output();
101 }
102 wfProfileOut( $fname );
103 return $cleansource;
104 }
105 }
106 ?>