From: Gabriel Wicke Date: Thu, 6 May 2004 12:50:04 +0000 (+0000) Subject: New tidy function that uses html tidy (http://tidy.sf.net) to make sure the output... X-Git-Tag: 1.3.0beta1~136 X-Git-Url: http://git.cyclocoop.org/geomaker.php?a=commitdiff_plain;h=c6fe32ac5bb25d91c0c87c2314281ff8ba97a229;p=lhc%2Fweb%2Fwiklou.git New tidy function that uses html tidy (tidy.sf.net) to make sure the output is valid xhtml/xml Disabled by default, set $wgUseTidy = true to enable Possible improvements: * Tweaking it to work on windows (don't know if that's possible) * use the php5 built-in tidy module if available, that seems to allow a fine-grained config on which tags are allowed as well (see http://www.php.net/manual/en/ref.tidy.php) --- diff --git a/includes/Parser.php b/includes/Parser.php index 159b271b4d..5c88ba3ed0 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -86,6 +86,7 @@ class Parser # function parse( $text, &$title, $options, $linestart = true, $clearState = true ) { + global $wgUseTidy; $fname = "Parser::parse"; wfProfileIn( $fname ); @@ -102,20 +103,29 @@ class Parser $text = $this->internalParse( $text, $linestart ); $text = $this->unstrip( $text, $this->mStripState ); # Clean up special characters, only run once, next-to-last before doBlockLevels - $fixtags = array( - "/
/i" => '
', - "/
/i" => '
', - "/
/i"=>'
', - "/<\\/center *>/i" => '
', - # Clean up spare ampersands; note that we probably ought to be - # more careful about named entities. - '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' - ); - $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - + if(!$wgUseTidy) { + $fixtags = array( + "/
/i" => '
', + "/
/i" => '
', + "/
/i"=>'
', + "/<\\/center *>/i" => '
', + # Clean up spare ampersands; note that we probably ought to be + # more careful about named entities. + '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + } else { + $fixtags = array( + "/
/i"=>'
', + "/<\\/center *>/i" => '
' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + } # only once and last $text = $this->doBlockLevels( $text, $linestart ); - + if($wgUseTidy) { + $text = $this->tidy($text); + } $this->mOutput->setText( $text ); wfProfileOut( $fname ); return $this->mOutput; @@ -392,6 +402,29 @@ class Parser return trim ( $t ) ; } + /* interface with html tidy, used if $wgUseTidy = true */ + function tidy ( $text ) { + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; + $cleansource = ''; + $descriptorspec = array( + 0 => array("pipe", "r"), + 1 => array("pipe", "w"), + 2 => array("file", "/dev/null", "a") + ); + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes); + if (is_resource($process)) { + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + $return_value = proc_close($process); + } + return preg_replace("/(^.*]*>|<\\/body[^>]*>.*$)/s", '', $cleansource); + + } + function doTableStuff ( $t ) { $t = explode ( "\n" , $t ) ;