From: Brion Vibber Date: Wed, 20 Apr 2005 21:58:57 +0000 (+0000) Subject: * Use in-process Tidy extension if available when $wgUseTidy is on X-Git-Tag: 1.5.0alpha1~224 X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=8ce19a62762b198aa2d1a5ceb013d0a8b53dd4ab;p=lhc%2Fweb%2Fwiklou.git * Use in-process Tidy extension if available when $wgUseTidy is on It seems I forgot to commit this last night. Whoops! --- diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 15156b9498..0c8bf011c5 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -979,11 +979,18 @@ $wgRawHtml = false; * $wgTidyBin should be set to the path of the binary and * $wgTidyConf to the path of the configuration file. * $wgTidyOpts can include any number of parameters. + * + * $wgTidyInternal controls the use of the PECL extension to use an in- + * process tidy library instead of spawning a separate program. + * Normally you shouldn't need to override the setting except for + * debugging. To install, use 'pear install tidy' and add a line + * 'extension=tidy.so' to php.ini. */ $wgUseTidy = false; $wgTidyBin = 'tidy'; $wgTidyConf = $IP.'/extensions/tidy/tidy.conf'; $wgTidyOpts = ''; +$wgTidyInternal = function_exists( 'tidy_load_config' ); /** See list of skins and their symbolic names in languagel/Language.php */ $wgDefaultSkin = 'monobook'; diff --git a/includes/Parser.php b/includes/Parser.php index 9ad6ed8e25..7325323a02 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -478,22 +478,50 @@ class Parser } /** - * interface with html tidy, used if $wgUseTidy = true + * Interface with html tidy, used if $wgUseTidy = true. + * If tidy isn't able to correct the markup, the original will be + * returned in all its glory with a warning comment appended. * + * Either the external tidy program or the in-process tidy extension + * will be used depending on availability. Override the default + * $wgTidyInternal setting to disable the internal if it's not working. + * + * @param string $text Hideous HTML input + * @return string Corrected HTML output * @access public * @static */ - function tidy ( $text ) { + function tidy( $text ) { + global $wgTidyInternal; + $wrappedtext = ''. +'test'.$text.''; + if( $wgTidyInternal ) { + $correctedtext = Parser::internalTidy( $wrappedtext ); + } else { + $correctedtext = Parser::externalTidy( $wrappedtext ); + } + if( is_null( $correctedtext ) ) { + wfDebug( "Tidy error detected!\n" ); + return $text . "\n\n"; + } + return $correctedtext; + } + + /** + * Spawn an external HTML tidy process and get corrected markup back from it. + * + * @access private + * @static + */ + function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::tidy'; + $fname = 'Parser::externalTidy'; wfProfileIn( $fname ); $cleansource = ''; $opts = ' -utf8'; - $wrappedtext = ''. -'test'.$text.''; $descriptorspec = array( 0 => array('pipe', 'r'), 1 => array('pipe', 'w'), @@ -501,7 +529,7 @@ class Parser ); $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); if (is_resource($process)) { - fwrite($pipes[0], $wrappedtext); + fwrite($pipes[0], $text); fclose($pipes[0]); while (!feof($pipes[1])) { $cleansource .= fgets($pipes[1], 1024); @@ -513,13 +541,44 @@ class Parser wfProfileOut( $fname ); if( $cleansource == '' && $text != '') { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; + // Some kind of error happened, so we couldn't get the corrected text. + // Just give up; we'll use the source text and append a warning. + return null; } else { return $cleansource; } } + /** + * Use the HTML tidy PECL extension to use the tidy library in-process, + * saving the overhead of spawning a new process. Currently written to + * the PHP 4.3.x version of the extension, may not work on PHP 5. + * + * 'pear install tidy' should be able to compile the extension module. + * + * @access private + * @static + */ + function internalTidy( $text ) { + global $wgTidyConf; + $fname = 'Parser::internalTidy'; + wfProfileIn( $fname ); + + tidy_load_config( $wgTidyConf ); + tidy_set_encoding( 'utf8' ); + tidy_parse_string( $text ); + tidy_clean_repair(); + if( tidy_get_status() == 2 ) { + // 2 is magic number for fatal error + // http://www.php.net/manual/en/function.tidy-get-status.php + $cleansource = null; + } else { + $cleansource = tidy_get_output(); + } + wfProfileOut( $fname ); + return $cleansource; + } + /** * parse the wiki syntax used to render tables *