From 8eb0cdfdface85fc76a81cf0eedc01103b0cdf9c Mon Sep 17 00:00:00 2001 From: Antoine Musso Date: Sat, 21 Apr 2007 23:25:36 +0000 Subject: [PATCH] Move tidy callbacks from the Parser class to a new Tidy class. This is to keep Parser.php at a reasonable size. --- includes/AutoLoader.php | 1 + includes/Parser.php | 110 +----------------------------------- includes/Tidy.php | 106 ++++++++++++++++++++++++++++++++++ maintenance/parserTests.inc | 2 +- 4 files changed, 109 insertions(+), 110 deletions(-) create mode 100644 includes/Tidy.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 23d0384a22..596e6f805a 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -130,6 +130,7 @@ function __autoload($className) { 'ReverseChronologicalPager' => 'includes/Pager.php', 'TablePager' => 'includes/Pager.php', 'Parser' => 'includes/Parser.php', + 'Tidy' => 'includes/Tidy.php', 'ParserOutput' => 'includes/ParserOutput.php', 'ParserOptions' => 'includes/ParserOptions.php', 'ParserCache' => 'includes/ParserCache.php', diff --git a/includes/Parser.php b/includes/Parser.php index 7f34fa1e7b..c58460467c 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -330,7 +330,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = Parser::tidy($text); + $text = Tidy::RunOn($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -677,114 +677,6 @@ class Parser return $rnd; } - /** - * Interface with html tidy, used if $wgUseTidy = true. - * If tidy isn't able to correct the markup, the original will be - * returned in all its glory with a warning comment appended. - * - * Either the external tidy program or the in-process tidy extension - * will be used depending on availability. Override the default - * $wgTidyInternal setting to disable the internal if it's not working. - * - * @param string $text Hideous HTML input - * @return string Corrected HTML output - * @public - * @static - */ - function tidy( $text ) { - global $wgTidyInternal; - $wrappedtext = ''. -'test'.$text.''; - if( $wgTidyInternal ) { - $correctedtext = Parser::internalTidy( $wrappedtext ); - } else { - $correctedtext = Parser::externalTidy( $wrappedtext ); - } - if( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; - } - return $correctedtext; - } - - /** - * Spawn an external HTML tidy process and get corrected markup back from it. - * - * @private - * @static - */ - function externalTidy( $text ) { - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::externalTidy'; - wfProfileIn( $fname ); - - $cleansource = ''; - $opts = ' -utf8'; - - $descriptorspec = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file. - ); - $pipes = array(); - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); - } - fclose($pipes[1]); - proc_close($process); - } - - wfProfileOut( $fname ); - - if( $cleansource == '' && $text != '') { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - return null; - } else { - return $cleansource; - } - } - - /** - * Use the HTML tidy PECL extension to use the tidy library in-process, - * saving the overhead of spawning a new process. Currently written to - * the PHP 4.3.x version of the extension, may not work on PHP 5. - * - * 'pear install tidy' should be able to compile the extension module. - * - * @private - * @static - */ - function internalTidy( $text ) { - global $wgTidyConf; - $fname = 'Parser::internalTidy'; - wfProfileIn( $fname ); - - tidy_load_config( $wgTidyConf ); - tidy_set_encoding( 'utf8' ); - tidy_parse_string( $text ); - tidy_clean_repair(); - if( tidy_get_status() == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output(); - } - wfProfileOut( $fname ); - return $cleansource; - } - /** * parse the wiki syntax used to render tables * diff --git a/includes/Tidy.php b/includes/Tidy.php new file mode 100644 index 0000000000..a198b190d5 --- /dev/null +++ b/includes/Tidy.php @@ -0,0 +1,106 @@ +'. +'test'.$text.''; + if( $wgTidyInternal ) { + $correctedtext = Tidy::internal( $wrappedtext ); + } else { + $correctedtext = Tidy::external( $wrappedtext ); + } + if( is_null( $correctedtext ) ) { + wfDebug( "Tidy error detected!\n" ); + return $text . "\n\n"; + } + return $correctedtext; + } + + /** + * Spawn an external HTML tidy process and get corrected markup back from it. + */ + private static function external( $text ) { + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; + $fname = 'Parser::externalTidy'; + wfProfileIn( $fname ); + + $cleansource = ''; + $opts = ' -utf8'; + + $descriptorspec = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file. + ); + $pipes = array(); + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); + if (is_resource($process)) { + // Theoretically, this style of communication could cause a deadlock + // here. If the stdout buffer fills up, then writes to stdin could + // block. This doesn't appear to happen with tidy, because tidy only + // writes to stdout after it's finished reading from stdin. Search + // for tidyParseStdin and tidySaveStdout in console/tidy.c + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + proc_close($process); + } + + wfProfileOut( $fname ); + + if( $cleansource == '' && $text != '') { + // Some kind of error happened, so we couldn't get the corrected text. + // Just give up; we'll use the source text and append a warning. + return null; + } else { + return $cleansource; + } + } + + /** + * Use the HTML tidy PECL extension to use the tidy library in-process, + * saving the overhead of spawning a new process. Currently written to + * the PHP 4.3.x version of the extension, may not work on PHP 5. + * + * 'pear install tidy' should be able to compile the extension module. + */ + private static function internal( $text ) { + global $wgTidyConf; + $fname = 'Parser::internalTidy'; + wfProfileIn( $fname ); + + tidy_load_config( $wgTidyConf ); + tidy_set_encoding( 'utf8' ); + tidy_parse_string( $text ); + tidy_clean_repair(); + if( tidy_get_status() == 2 ) { + // 2 is magic number for fatal error + // http://www.php.net/manual/en/function.tidy-get-status.php + $cleansource = null; + } else { + $cleansource = tidy_get_output(); + } + wfProfileOut( $fname ); + return $cleansource; + } +} +?> diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index ddf8b89acf..d0a5f6fc95 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -783,7 +783,7 @@ class ParserTest { private function tidy( $text ) { global $wgUseTidy; if ($wgUseTidy) { - $text = Parser::tidy($text); + $text = Tidy::RunOn($text); } return $text; } -- 2.20.1