From 48b0722fd977cf9c2169d3d9e03b4e6935e0099e Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 1 Jun 2006 06:16:55 +0000 Subject: [PATCH] * (bug 5384) Fix in extension * Nesting of different tag extensions and comments should now work more consistently and more safely. A cleaner, one-pass tag strip lets the 'outer' tag either take source (-style) or pass it down to further parsing (-style). There should no longer be surprise expansion of foreign extensions inside HTML output, or differences in behavior based on the order tags are loaded. --- RELEASE-NOTES | 7 + includes/Parser.php | 268 +++++++++++++++--------------------- maintenance/parserTests.txt | 1 + 3 files changed, 120 insertions(+), 156 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 0eb51eded3..a4525533fb 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -398,6 +398,13 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * parserTests.php accepts a --file parameter to run an alternate test sutie * parser tests can now test extensions using !!hooks sections * Fix oddity with open tag parameters getting stuck on +* (bug 5384) Fix in extension +* Nesting of different tag extensions and comments should now work more + consistently and more safely. A cleaner, one-pass tag strip lets the + 'outer' tag either take source (-style) or pass it down to + further parsing (-style). There should no longer be surprise + expansion of foreign extensions inside HTML output, or differences + in behavior based on the order tags are loaded. == Compatibility == diff --git a/includes/Parser.php b/includes/Parser.php index c6029d03a0..6dacd60dcc 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -9,6 +9,7 @@ /** */ require_once( 'Sanitizer.php' ); require_once( 'HttpFunctions.php' ); +require_once( 'ImageGallery.php' ); /** * Update this version number when the ParserOutput format @@ -319,63 +320,60 @@ class Parser * If $tag is set to STRIP_COMMENTS, the function will extract * * + * $output: array( 'UNIQ-xxxxx' => array( + * 'element', + * 'tag content', + * array( 'param' => 'x' ), + * '' ) ) * @private * @static */ - function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){ - $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); - if ( !$content ) { - $content = array( ); - } + function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ + $rand = Parser::getRandomString(); $n = 1; $stripped = ''; + $matches = array(); - if ( !$tags ) { - $tags = array( ); - } - - if ( !$params ) { - $params = array( ); - } - - if( $tag == STRIP_COMMENTS ) { - $start = '//'; + if( $elements == STRIP_COMMENTS ) { + $start = '//'; + } $q = preg_split( $end, $inside, 2 ); - $content[$marker] = $q[0]; + $content = $q[0]; if( count( $q ) < 2 ) { # No end tag -- let it run out to the end of the text. break; @@ -383,26 +381,15 @@ class Parser $text = $q[1]; } } + + $matches[$marker] = array( $element, + $content, + Sanitizer::decodeTagAttributes( $attributes ), + "<$element$attributes$empty>" ); } return $stripped; } - /** - * Wrapper function for extractTagsAndParams - * for cases where $tags and $params isn't needed - * i.e. where tags will never have params, like - * - * @private - * @static - */ - function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) { - $dummy_tags = array(); - $dummy_params = array(); - - return Parser::extractTagsAndParams( $tag, $text, $content, - $dummy_tags, $dummy_params, $uniq_prefix ); - } - /** * Strips and renders nowiki, pre, math, hiero * If $render is set, performs necessary rendering operations on plugins @@ -418,124 +405,102 @@ class Parser */ function strip( $text, &$state, $stripcomments = false ) { $render = ($this->mOutputType == OT_HTML); - $html_content = array(); - $nowiki_content = array(); - $math_content = array(); - $pre_content = array(); - $comment_content = array(); - $ext_content = array(); - $ext_tags = array(); - $ext_params = array(); - $gallery_content = array(); # Replace any instances of the placeholders $uniq_prefix = $this->mUniqPrefix; #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); - - # html + + $elements = array_merge( + array( 'nowiki', 'pre', 'gallery' ), + array_keys( $this->mTagHooks ) ); global $wgRawHtml; if( $wgRawHtml ) { - $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix); - foreach( $html_content as $marker => $content ) { - if ($render ) { - # Raw and unchecked for validity. - $state['html'][$marker] = $content; - } else { - $state['html'][$marker] = ''.$content.''; - } - } + $elements[] = 'html'; } - - # nowiki - $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix); - foreach( $nowiki_content as $marker => $content ) { - if( $render ){ - $state['nowiki'][$marker] = wfEscapeHTMLTagsOnly( $content ); - } else { - $state['nowiki'][$marker] = ''.$content.''; - } - } - - # math if( $this->mOptions->getUseTeX() ) { - $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix); - foreach( $math_content as $marker => $content ){ - if( $render ) { - $state['math'][$marker] = renderMath( $content ); - } else { - $state['math'][$marker] = ''.$content.''; - } - } + $elements[] = 'math'; } + - # pre - $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix); - foreach( $pre_content as $marker => $content ){ - if( $render ){ - $state['pre'][$marker] = '
' . wfEscapeHTMLTagsOnly( $content ) . '
'; - } else { - $state['pre'][$marker] = '
'.$content.'
'; - } + // Strip comments in a first pass. + // This saves us from needlessly rendering extensions in comment text + $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix); + $commentState = array(); + foreach( $comment_matches as $marker => $data ){ + list( $element, $content, $params, $tag ) = $data; + $commentState[$marker] = ''; } - - # gallery - $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix); - foreach( $gallery_content as $marker => $content ) { - require_once( 'ImageGallery.php' ); - if ( $render ) { - $state['gallery'][$marker] = $this->renderImageGallery( $content ); - } else { - $state['gallery'][$marker] = ''.$content.''; + + $matches = array(); + $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); + + foreach( $matches as $marker => $data ) { + list( $element, $content, $params, $tag ) = $data; + // Restore any comments; the extension can deal with them. + if( $content !== null) { + $content = strtr( $content, $commentState ); } - } - - # Comments - $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); - foreach( $comment_content as $marker => $content ){ - $comment_content[$marker] = ''; - } - - # Extensions - foreach ( $this->mTagHooks as $tag => $callback ) { - $ext_content[$tag] = array(); - $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag], - $ext_tags[$tag], $ext_params[$tag], $uniq_prefix ); - foreach( $ext_content[$tag] as $marker => $content ) { - $full_tag = $ext_tags[$tag][$marker]; - $params = $ext_params[$tag][$marker]; - if ( $render ) - $state[$tag][$marker] = call_user_func_array( $callback, array( $content, $params, $this ) ); - else { - if ( is_null( $content ) ) { - // Empty element tag - $state[$tag][$marker] = $full_tag; + if( $render ) { + switch( $element ) { + case 'html': + if( $wgRawHtml ) { + $output = $content; + break; + } + // Shouldn't happen otherwise. :) + case 'nowiki': + $output = wfEscapeHTMLTagsOnly( $content ); + break; + case 'math': + $output = renderMath( $content ); + break; + case 'pre': + // Backwards-compatibility hack + $content = preg_replace( '!(.*?)!is', '\\1', $content ); + $output = '
' . wfEscapeHTMLTagsOnly( $content ) . '
'; + break; + case 'gallery': + $output = $this->renderImageGallery( $content ); + break; + default: + $tagName = strtolower( $element ); + if( isset( $this->mTagHooks[$tagName] ) ) { + $output = call_user_func_array( $this->mTagHooks[$tagName], + array( $content, $params, $this ) ); } else { - $state[$tag][$marker] = "$full_tag$content"; + wfDebugDieBacktrace( "Invalid call hook $element" ); } } + } else { + // Just stripping tags; keep the source + if( $content === null ) { + $output = $tag; + } else { + $output = "$tag$content"; + } } + $state[$element][$marker] = $output; } # Unstrip comments unless explicitly told otherwise. # (The comments are always stripped prior to this point, so as to # not invoke any extension tags / parser hooks contained within # a comment.) - if ( !$stripcomments ) { - $tempstate = array( 'comment' => $comment_content ); - $text = $this->unstrip( $text, $tempstate ); - $comment_content = array(); - } else { - if( !isset( $state['comment'] ) ) { - $state['comment'] = array(); + if ( $stripcomments ) { + // Add remaining comments to the state array + foreach( $commentState as $marker => $content ) { + $state['comment'][$marker] = $content; } - $state['comment'] += $comment_content; + } else { + // Put them all back and forget them + $text = strtr( $text, $commentState ); } return $text; } /** - * restores pre, math, and hiero removed by strip() + * Restores pre, math, and other extensions removed by strip() * * always call unstripNoWiki() after this one * @private @@ -545,20 +510,21 @@ class Parser return $text; } - # Must expand in reverse order, otherwise nested tags will be corrupted - foreach( array_reverse( $state, true ) as $tag => $contentDict ) { + $replacements = array(); + foreach( $state as $tag => $contentDict ) { if( $tag != 'nowiki' && $tag != 'html' ) { - foreach( array_reverse( $contentDict, true ) as $uniq => $content ) { - $text = str_replace( $uniq, $content, $text ); + foreach( $contentDict as $uniq => $content ) { + $replacements[$uniq] = $content; } } } + $text = strtr( $text, $replacements ); return $text; } /** - * always call this after unstrip() to preserve the order + * Always call this after unstrip() to preserve the order * * @private */ @@ -567,18 +533,15 @@ class Parser return $text; } - # Must expand in reverse order, otherwise nested tags will be corrupted - if( isset( $state['nowiki'] ) ) - foreach( array_reverse( $state['nowiki'], true ) as $uniq => $content ) { - $text = str_replace( $uniq, $content, $text ); - } - - global $wgRawHtml; - if ($wgRawHtml && isset( $state['html'] ) ) { - foreach( array_reverse( $state['html'], true ) as $uniq => $content ) { - $text = str_replace( $uniq, $content, $text ); + $replacements = array(); + foreach( $state as $tag => $contentDict ) { + if( $tag == 'nowiki' || $tag == 'html' ) { + foreach( $contentDict as $uniq => $content ) { + $replacements[$uniq] = $content; + } } } + $text = strtr( $text, $replacements ); return $text; } @@ -593,14 +556,7 @@ class Parser function insertStripItem( $text, &$state ) { $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString(); if ( !$state ) { - $state = array( - 'html' => array(), - 'nowiki' => array(), - 'math' => array(), - 'pre' => array(), - 'comment' => array(), - 'gallery' => array(), - ); + $state = array(); } $state['item'][$rnd] = $text; return $rnd; diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 4ff78f0430..553c8ff3fe 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -301,6 +301,7 @@ And a link <cite> <em> + !! end ### -- 2.20.1