* Use in-process Tidy extension if available when $wgUseTidy is on
authorBrion Vibber <brion@users.mediawiki.org>
Wed, 20 Apr 2005 21:58:57 +0000 (21:58 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Wed, 20 Apr 2005 21:58:57 +0000 (21:58 +0000)
It seems I forgot to commit this last night. Whoops!

includes/DefaultSettings.php
includes/Parser.php

index 15156b9..0c8bf01 100644 (file)
@@ -979,11 +979,18 @@ $wgRawHtml = false;
  * $wgTidyBin should be set to the path of the binary and
  * $wgTidyConf to the path of the configuration file.
  * $wgTidyOpts can include any number of parameters.
+ *
+ * $wgTidyInternal controls the use of the PECL extension to use an in-
+ *   process tidy library instead of spawning a separate program.
+ *   Normally you shouldn't need to override the setting except for
+ *   debugging. To install, use 'pear install tidy' and add a line
+ *   'extension=tidy.so' to php.ini.
  */
 $wgUseTidy = false;
 $wgTidyBin = 'tidy';
 $wgTidyConf = $IP.'/extensions/tidy/tidy.conf'; 
 $wgTidyOpts = '';
+$wgTidyInternal = function_exists( 'tidy_load_config' );
 
 /** See list of skins and their symbolic names in languagel/Language.php */
 $wgDefaultSkin = 'monobook';
index 9ad6ed8..7325323 100644 (file)
@@ -478,22 +478,50 @@ class Parser
        }
 
        /**
-        * interface with html tidy, used if $wgUseTidy = true
+        * Interface with html tidy, used if $wgUseTidy = true.
+        * If tidy isn't able to correct the markup, the original will be
+        * returned in all its glory with a warning comment appended.
         *
+        * Either the external tidy program or the in-process tidy extension
+        * will be used depending on availability. Override the default
+        * $wgTidyInternal setting to disable the internal if it's not working.
+        *
+        * @param string $text Hideous HTML input
+        * @return string Corrected HTML output
         * @access public
         * @static
         */
-       function tidy ( $text ) {
+       function tidy( $text ) {
+               global $wgTidyInternal;
+               $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+               if( $wgTidyInternal ) {
+                       $correctedtext = Parser::internalTidy( $wrappedtext );
+               } else {
+                       $correctedtext = Parser::externalTidy( $wrappedtext );
+               }
+               if( is_null( $correctedtext ) ) {
+                       wfDebug( "Tidy error detected!\n" );
+                       return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
+               }
+               return $correctedtext;
+       }
+       
+       /**
+        * Spawn an external HTML tidy process and get corrected markup back from it.
+        *
+        * @access private
+        * @static
+        */
+       function externalTidy( $text ) {
                global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
-               $fname = 'Parser::tidy';
+               $fname = 'Parser::externalTidy';
                wfProfileIn( $fname );
 
                $cleansource = '';
                $opts = ' -utf8';
 
-               $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
-' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
-'<head><title>test</title></head><body>'.$text.'</body></html>';
                $descriptorspec = array(
                        0 => array('pipe', 'r'),
                        1 => array('pipe', 'w'),
@@ -501,7 +529,7 @@ class Parser
                );
                $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
                if (is_resource($process)) {
-                       fwrite($pipes[0], $wrappedtext);
+                       fwrite($pipes[0], $text);
                        fclose($pipes[0]);
                        while (!feof($pipes[1])) {
                                $cleansource .= fgets($pipes[1], 1024);
@@ -513,13 +541,44 @@ class Parser
                wfProfileOut( $fname );
 
                if( $cleansource == '' && $text != '') {
-                       wfDebug( "Tidy error detected!\n" );
-                       return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
+                       // Some kind of error happened, so we couldn't get the corrected text.
+                       // Just give up; we'll use the source text and append a warning.
+                       return null;
                } else {
                        return $cleansource;
                }
        }
 
+       /**
+        * Use the HTML tidy PECL extension to use the tidy library in-process,
+        * saving the overhead of spawning a new process. Currently written to
+        * the PHP 4.3.x version of the extension, may not work on PHP 5.
+        *
+        * 'pear install tidy' should be able to compile the extension module.
+        *
+        * @access private
+        * @static
+        */
+       function internalTidy( $text ) {
+               global $wgTidyConf;
+               $fname = 'Parser::internalTidy';
+               wfProfileIn( $fname );
+               
+               tidy_load_config( $wgTidyConf );
+               tidy_set_encoding( 'utf8' );
+               tidy_parse_string( $text );
+               tidy_clean_repair();
+               if( tidy_get_status() == 2 ) {
+                       // 2 is magic number for fatal error
+                       // http://www.php.net/manual/en/function.tidy-get-status.php
+                       $cleansource = null;
+               } else {
+                       $cleansource = tidy_get_output();
+               }
+               wfProfileOut( $fname );
+               return $cleansource;
+       }
+
        /**
         * parse the wiki syntax used to render tables
         *