From 315d66c7961629034db8fddb24fea51164c19f18 Mon Sep 17 00:00:00 2001 From: Gabriel Wicke Date: Thu, 6 May 2004 19:01:05 +0000 Subject: [PATCH] * simpler html strip function if tidy is used * error reporting if tidy doesn't honour force-output option *and* html is severely broken (if unknown elements are encountered), never return uncleaned version --- includes/Parser.php | 130 ++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/includes/Parser.php b/includes/Parser.php index b39f704d96..5c4d169696 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -406,10 +406,13 @@ class Parser function tidy ( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; $cleansource = ''; + $text = ''. +'test'.$text.''; $descriptorspec = array( 0 => array("pipe", "r"), 1 => array("pipe", "w"), - 2 => array("file", "/dev/null", "a") + 2 => array("pipe", "w") ); $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes); if (is_resource($process)) { @@ -420,13 +423,19 @@ class Parser } fclose($pipes[1]); $return_value = proc_close($process); + if($return_value == 2) { + $errors = ''; + while (!feof($pipes[2])) { + $errors .= fgets($pipes[2], 1024); + } + } + fclose($pipes[2]); } - if( $cleansource == '') { - return $text; + if( $cleansource == '' and !empty($errors)) { + return '
'.htmlspecialchars($errors).'
'; } else { - return preg_replace("/(^.*]*>|<\\/body[^>]*>.*$)/s", '', $cleansource); + return $cleansource; } - } function doTableStuff ( $t ) @@ -1526,6 +1535,7 @@ class Parser # Cleans up HTML, removes dangerous tags and attributes /* private */ function removeHTMLtags( $text ) { + global $wgUseTidy; $fname = "Parser::removeHTMLtags"; wfProfileIn( $fname ); $htmlpairs = array( # Tags that must be closed @@ -1556,66 +1566,82 @@ class Parser $bits = explode( "<", $text ); $text = array_shift( $bits ); - $tagstack = array(); $tablestack = array(); - - foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); - - $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { - # Check our stack - if ( $slash ) { - # Closing a tag... - if ( ! in_array( $t, $htmlsingle ) && - ( $ot = array_pop( $tagstack ) ) != $t ) { - array_push( $tagstack, $ot ); - $badtag = 1; + if(!$wgUseTidy) { + $tagstack = array(); $tablestack = array(); + foreach ( $bits as $x ) { + $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); + preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + $x, $regs ); + list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; + error_reporting( $prev ); + + $badtag = 0 ; + if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + # Check our stack + if ( $slash ) { + # Closing a tag... + if ( ! in_array( $t, $htmlsingle ) && + ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) { + if(!empty($ot)) array_push( $tagstack, $ot ); + $badtag = 1; + } else { + if ( $t == "table" ) { + $tagstack = array_pop( $tablestack ); + } + $newparams = ""; + } } else { - if ( $t == "table" ) { - $tagstack = array_pop( $tablestack ); + # Keep track for later + if ( in_array( $t, $tabletags ) && + ! in_array( "table", $tagstack ) ) { + $badtag = 1; + } else if ( in_array( $t, $tagstack ) && + ! in_array ( $t , $htmlnest ) ) { + $badtag = 1 ; + } else if ( ! in_array( $t, $htmlsingle ) ) { + if ( $t == "table" ) { + array_push( $tablestack, $tagstack ); + $tagstack = array(); + } + array_push( $tagstack, $t ); } - $newparams = ""; + # Strip non-approved attributes from the tag + $newparams = $this->fixTagAttributes($params); + } - } else { - # Keep track for later - if ( in_array( $t, $tabletags ) && - ! in_array( "table", $tagstack ) ) { - $badtag = 1; - } else if ( in_array( $t, $tagstack ) && - ! in_array ( $t , $htmlnest ) ) { - $badtag = 1 ; - } else if ( ! in_array( $t, $htmlsingle ) ) { - if ( $t == "table" ) { - array_push( $tablestack, $tagstack ); - $tagstack = array(); - } - array_push( $tagstack, $t ); + if ( ! $badtag ) { + $rest = str_replace( ">", ">", $rest ); + $text .= "<$slash$t $newparams$brace$rest"; + continue; } - # Strip non-approved attributes from the tag - $newparams = $this->fixTagAttributes($params); - } - if ( ! $badtag ) { + $text .= "<" . str_replace( ">", ">", $x); + } + # Close off any remaining tags + while ( $t = array_pop( $tagstack ) ) { + $text .= "\n"; + if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); } + } + } else { + # this might be possible using tidy itself + foreach ( $bits as $x ) { + preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + $x, $regs ); + list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; + if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + $newparams = $this->fixTagAttributes($params); $rest = str_replace( ">", ">", $rest ); $text .= "<$slash$t $newparams$brace$rest"; - continue; + } else { + $text .= "<" . str_replace( ">", ">", $x); } - } - $text .= "<" . str_replace( ">", ">", $x); - } - # Close off any remaining tags - while ( $t = array_pop( $tagstack ) ) { - $text .= "\n"; - if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); } + } } wfProfileOut( $fname ); return $text; } + /* * * This function accomplishes several tasks: -- 2.20.1