From 3da7dcf91daa8d26b111853f144376c0b42c69c7 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 1 Jun 2006 19:38:14 +0000 Subject: [PATCH] Fix regressions in parser with incomplete tag stripping, plus some old bugs: * (bug 885) Pre-save transform no longer silently appends close tags * Pre-save transform no longer changes the case of close tags --- RELEASE-NOTES | 2 + includes/Parser.php | 116 +++++++++++++++++++++----------------------- 2 files changed, 57 insertions(+), 61 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index a4525533fb..f16adbeb38 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -405,6 +405,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN further parsing (-style). There should no longer be surprise expansion of foreign extensions inside HTML output, or differences in behavior based on the order tags are loaded. +* (bug 885) Pre-save transform no longer silently appends close tags +* Pre-save transform no longer changes the case of close tags == Compatibility == diff --git a/includes/Parser.php b/includes/Parser.php index bc08e4da89..f969f7b910 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -311,20 +311,20 @@ class Parser function getOptions() { return $this->mOptions; } /** - * Replaces all occurrences of <$tag>content in the text - * with a random marker and returns the new text. the output parameter - * $content will be an associative array filled with data on the form - * $unique_marker => content. + * Replaces all occurrences of HTML-style comments and the given tags + * in the text with a random marker and returns teh next text. The output + * parameter $matches will be an associative array filled with data in + * the form: + * 'UNIQ-xxxxx' => array( + * 'element', + * 'tag content', + * array( 'param' => 'x' ), + * 'tag content' ) ) * - * If $content is already set, the additional entries will be appended - * If $tag is set to STRIP_COMMENTS, the function will extract - * + * @param $elements list of element names. Comments are always extracted. + * @param $text Source text string. + * @param $uniq_prefix * - * $output: array( 'UNIQ-xxxxx' => array( - * 'element', - * 'tag content', - * array( 'param' => 'x' ), - * '' ) ) * @private * @static */ @@ -334,58 +334,59 @@ class Parser $stripped = ''; $matches = array(); - if( $elements == STRIP_COMMENTS ) { - $start = '/)/'; } else { - $end = '/-->/'; + $end = "/(<\\/$element\\s*>)/i"; } - $q = preg_split( $end, $inside, 2 ); + $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); $content = $q[0]; - if( count( $q ) < 2 ) { + if( count( $q ) < 3 ) { # No end tag -- let it run out to the end of the text. + $tail = ''; $text = ''; } else { - $text = $q[1]; + $tail = $q[1]; + $text = $q[2]; } } $matches[$marker] = array( $element, $content, Sanitizer::decodeTagAttributes( $attributes ), - "<$element$attributes$empty>" ); + "<$element$attributes$close$content$tail" ); } return $stripped; } @@ -409,6 +410,7 @@ class Parser # Replace any instances of the placeholders $uniq_prefix = $this->mUniqPrefix; #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); + $commentState = array(); $elements = array_merge( array( 'nowiki', 'pre', 'gallery' ), @@ -422,27 +424,24 @@ class Parser } - // Strip comments in a first pass. - // This saves us from needlessly rendering extensions in comment text - $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix); - $commentState = array(); - foreach( $comment_matches as $marker => $data ){ - list( $element, $content, $params, $tag ) = $data; - $commentState[$marker] = ''; - } - $matches = array(); $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; - // Restore any comments; the extension can deal with them. - if( $content !== null) { - $content = strtr( $content, $commentState ); - } if( $render ) { $tagName = strtolower( $element ); switch( $tagName ) { + case '!--': + // Comment + if( substr( $tag, -3 ) == '-->' ) { + $output = $tag; + } else { + // Unclosed comment in input. + // Close it so later stripping can remove it + $output = "$tag-->"; + } + break; case 'html': if( $wgRawHtml ) { $output = $content; @@ -473,25 +472,20 @@ class Parser } } else { // Just stripping tags; keep the source - if( $content === null ) { - $output = $tag; - } else { - $output = "$tag$content"; - } + $output = $tag; + } + if( !$stripcomments && $element == '!--' ) { + $commentState[$marker] = $output; + } else { + $state[$element][$marker] = $output; } - $state[$element][$marker] = $output; } # Unstrip comments unless explicitly told otherwise. # (The comments are always stripped prior to this point, so as to # not invoke any extension tags / parser hooks contained within # a comment.) - if ( $stripcomments ) { - // Add remaining comments to the state array - foreach( $commentState as $marker => $content ) { - $state['comment'][$marker] = $content; - } - } else { + if ( !$stripcomments ) { // Put them all back and forget them $text = strtr( $text, $commentState ); } -- 2.20.1