From 3d0b03d0ebc1a5b374760ba480a350b4ea44387b Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 23 Apr 2007 18:52:51 +0000 Subject: [PATCH] Revert r21461 for now; a cleanup patch which doesn't change functionality but makes more things to clean up -- ugly function names, incorrect profiling function titles... --- includes/AutoLoader.php | 1 - includes/Parser.php | 110 +++++++++++++++++++++++++++++++++++- includes/Tidy.php | 106 ---------------------------------- maintenance/parserTests.inc | 2 +- 4 files changed, 110 insertions(+), 109 deletions(-) diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index a83216f153..72a71c71cc 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -130,7 +130,6 @@ function __autoload($className) { 'ReverseChronologicalPager' => 'includes/Pager.php', 'TablePager' => 'includes/Pager.php', 'Parser' => 'includes/Parser.php', - 'Tidy' => 'includes/Tidy.php', 'ParserOutput' => 'includes/ParserOutput.php', 'ParserOptions' => 'includes/ParserOptions.php', 'ParserCache' => 'includes/ParserCache.php', diff --git a/includes/Parser.php b/includes/Parser.php index c58460467c..7f34fa1e7b 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -330,7 +330,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = Tidy::RunOn($text); + $text = Parser::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -677,6 +677,114 @@ class Parser return $rnd; } + /** + * Interface with html tidy, used if $wgUseTidy = true. + * If tidy isn't able to correct the markup, the original will be + * returned in all its glory with a warning comment appended. + * + * Either the external tidy program or the in-process tidy extension + * will be used depending on availability. Override the default + * $wgTidyInternal setting to disable the internal if it's not working. + * + * @param string $text Hideous HTML input + * @return string Corrected HTML output + * @public + * @static + */ + function tidy( $text ) { + global $wgTidyInternal; + $wrappedtext = ''. +'test'.$text.''; + if( $wgTidyInternal ) { + $correctedtext = Parser::internalTidy( $wrappedtext ); + } else { + $correctedtext = Parser::externalTidy( $wrappedtext ); + } + if( is_null( $correctedtext ) ) { + wfDebug( "Tidy error detected!\n" ); + return $text . "\n\n"; + } + return $correctedtext; + } + + /** + * Spawn an external HTML tidy process and get corrected markup back from it. + * + * @private + * @static + */ + function externalTidy( $text ) { + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; + $fname = 'Parser::externalTidy'; + wfProfileIn( $fname ); + + $cleansource = ''; + $opts = ' -utf8'; + + $descriptorspec = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file. + ); + $pipes = array(); + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); + if (is_resource($process)) { + // Theoretically, this style of communication could cause a deadlock + // here. If the stdout buffer fills up, then writes to stdin could + // block. This doesn't appear to happen with tidy, because tidy only + // writes to stdout after it's finished reading from stdin. Search + // for tidyParseStdin and tidySaveStdout in console/tidy.c + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + proc_close($process); + } + + wfProfileOut( $fname ); + + if( $cleansource == '' && $text != '') { + // Some kind of error happened, so we couldn't get the corrected text. + // Just give up; we'll use the source text and append a warning. + return null; + } else { + return $cleansource; + } + } + + /** + * Use the HTML tidy PECL extension to use the tidy library in-process, + * saving the overhead of spawning a new process. Currently written to + * the PHP 4.3.x version of the extension, may not work on PHP 5. + * + * 'pear install tidy' should be able to compile the extension module. + * + * @private + * @static + */ + function internalTidy( $text ) { + global $wgTidyConf; + $fname = 'Parser::internalTidy'; + wfProfileIn( $fname ); + + tidy_load_config( $wgTidyConf ); + tidy_set_encoding( 'utf8' ); + tidy_parse_string( $text ); + tidy_clean_repair(); + if( tidy_get_status() == 2 ) { + // 2 is magic number for fatal error + // http://www.php.net/manual/en/function.tidy-get-status.php + $cleansource = null; + } else { + $cleansource = tidy_get_output(); + } + wfProfileOut( $fname ); + return $cleansource; + } + /** * parse the wiki syntax used to render tables * diff --git a/includes/Tidy.php b/includes/Tidy.php index a198b190d5..e69de29bb2 100644 --- a/includes/Tidy.php +++ b/includes/Tidy.php @@ -1,106 +0,0 @@ -'. -'test'.$text.''; - if( $wgTidyInternal ) { - $correctedtext = Tidy::internal( $wrappedtext ); - } else { - $correctedtext = Tidy::external( $wrappedtext ); - } - if( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; - } - return $correctedtext; - } - - /** - * Spawn an external HTML tidy process and get corrected markup back from it. - */ - private static function external( $text ) { - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::externalTidy'; - wfProfileIn( $fname ); - - $cleansource = ''; - $opts = ' -utf8'; - - $descriptorspec = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file. - ); - $pipes = array(); - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); - } - fclose($pipes[1]); - proc_close($process); - } - - wfProfileOut( $fname ); - - if( $cleansource == '' && $text != '') { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - return null; - } else { - return $cleansource; - } - } - - /** - * Use the HTML tidy PECL extension to use the tidy library in-process, - * saving the overhead of spawning a new process. Currently written to - * the PHP 4.3.x version of the extension, may not work on PHP 5. - * - * 'pear install tidy' should be able to compile the extension module. - */ - private static function internal( $text ) { - global $wgTidyConf; - $fname = 'Parser::internalTidy'; - wfProfileIn( $fname ); - - tidy_load_config( $wgTidyConf ); - tidy_set_encoding( 'utf8' ); - tidy_parse_string( $text ); - tidy_clean_repair(); - if( tidy_get_status() == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output(); - } - wfProfileOut( $fname ); - return $cleansource; - } -} -?> diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index d0a5f6fc95..ddf8b89acf 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -783,7 +783,7 @@ class ParserTest { private function tidy( $text ) { global $wgUseTidy; if ($wgUseTidy) { - $text = Tidy::RunOn($text); + $text = Parser::tidy($text); } return $text; } -- 2.20.1