From: Tim Starling Date: Mon, 17 Dec 2007 15:07:25 +0000 (+0000) Subject: * Strip comments early, before template expansion. This mimics the behaviour in the... X-Git-Tag: 1.31.0-rc.0~50377 X-Git-Url: http://git.cyclocoop.org/%22%2C%20generer_url_ecrire%28?a=commitdiff_plain;h=9d4c2639965eafeff1feef3728b52a9bdbe23f1e;p=lhc%2Fweb%2Fwiklou.git * Strip comments early, before template expansion. This mimics the behaviour in the old parser. Added parser tests demonstrating the regression this fixes. The syntactic effect is fairly elegant, with comments taking effect at source level, as expected. The removeHTMLcomments() and preprocessToDom() passes could be merged at a later date. * No need for comment stripping in Expr.php anymore * Updated srvus() to roughly account for these changes * Gave comment handling its own preprocessor tag, and split off comment handling from extensionSubstitution(). This only applies for the non-HTML modes, since in HTML mode, comments are stripped early. * Strip comments from template argument names (PPFrame::newChild). --- diff --git a/includes/Parser.php b/includes/Parser.php index 567bcd9b3f..97e18f5733 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -2693,18 +2693,18 @@ class Parser $endpos = strpos( $text, '-->', $i + 4 ); if ( $endpos === false ) { // Unclosed comment in input, runs to end - $accum .= htmlspecialchars( substr( $text, $i ) ); + $inner = substr( $text, $i ); if ( $this->ot['html'] ) { // Close it so later stripping can remove it - $accum .= htmlspecialchars( '-->' ); + $inner .= '-->'; } + $accum .= '' . htmlspecialchars( $inner ) . ''; $i = strlen( $text ); - continue; + } else { + $inner = substr( $text, $i, $endpos - $i + 3 ); + $accum .= '' . htmlspecialchars( $inner ) . ''; + $i = $endpos + 3; } - $accum .= htmlspecialchars( substr( $text, $i, $endpos - $i + 3 ) ); - #$inner = substr( $text, $i + 4, $endpos - $i - 4 ); - #$accum .= '!--' . htmlspecialchars( $inner ) . ''; - $i = $endpos + 3; continue; } $name = $matches[1]; @@ -3047,6 +3047,12 @@ class Parser throw new MWException( __METHOD__ . ' called using the old argument format' ); } + # Remove comments + # This could theoretically be merged into preprocessToDom() + if ( $this->ot['html'] || ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) ) { + $text = Sanitizer::removeHTMLcomments( $text ); + } + $dom = $this->preprocessToDom( $text ); $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); @@ -3439,6 +3445,12 @@ class Parser $text = strtr( $text, array( '' => '' , '' => '' ) ); } + # Remove comments + # This could theoretically be merged into preprocessToDom() + if ( $this->ot['html'] || ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) ) { + $text = Sanitizer::removeHTMLcomments( $text ); + } + $dom = $this->preprocessToDom( $text ); $this->mTplDomCache[ $titleText ] = $dom; @@ -3626,9 +3638,6 @@ class Parser $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; if ( $this->ot['html'] ) { - if ( $name == '!--' ) { - return ''; - } $name = strtolower( $name ); $params = Sanitizer::decodeTagAttributes( $attrText ); @@ -3659,15 +3668,11 @@ class Parser } } } else { - if ( $name == '!--' ) { - $output = ''; + if ( $content === null ) { + $output = "<$name$attrText/>"; } else { - if ( $content === null ) { - $output = "<$name$attrText/>"; - } else { - $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); - $output = "<$name$attrText>$content$close"; - } + $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); + $output = "<$name$attrText>$content$close"; } } @@ -4961,7 +4966,9 @@ class Parser * for "replace", the whole page with the section replaced. */ private function extractSections( $text, $section, $mode, $newText='' ) { + global $wgTitle; $this->clearState(); + $this->mTitle = $wgTitle; // not generally used but removes an ugly failure mode $this->mOptions = new ParserOptions; $this->setOutputType( OT_WIKI ); $curIndex = 0; @@ -5179,6 +5186,7 @@ class Parser function srvus( $text ) { $text = $this->replaceVariables( $text ); $text = $this->mStripState->unstripBoth( $text ); + $text = Sanitizer::removeHTMLtags( $text ); return $text; } } @@ -5252,6 +5260,8 @@ class PPFrame { const NO_ARGS = 1; const NO_TEMPLATES = 2; + const STRIP_COMMENTS = 4; + const RECOVER_ORIG = 3; /** @@ -5287,7 +5297,7 @@ class PPFrame { $name = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; } else { // Named parameter - $name = $this->expand( $nameNodes->item( 0 ) ); + $name = $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ); } $value = $xpath->query( 'value', $arg ); @@ -5355,6 +5365,13 @@ class PPFrame { $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' ); $s = $this->parser->argSubstitution( $params, $this ); } + } elseif ( $root->nodeName == 'comment' ) { + # HTML-style comment + if ( $flags & self::STRIP_COMMENTS ) { + $s = ''; + } else { + $s = $root->textContent; + } } elseif ( $root->nodeName == 'ext' ) { # Extension tag $xpath = new DOMXPath( $root->ownerDocument ); diff --git a/includes/Parser_OldPP.php b/includes/Parser_OldPP.php index d8c662703d..b8a1723b3d 100644 --- a/includes/Parser_OldPP.php +++ b/includes/Parser_OldPP.php @@ -4918,6 +4918,7 @@ class Parser_OldPP */ function srvus( $text ) { $text = $this->strip( $text, $this->mStripState ); + $text = Sanitizer::removeHTMLtags( $text ); $text = $this->replaceVariables( $text ); $text = preg_replace( '//', '', $text ); $text = $this->mStripState->unstripBoth( $text ); diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index d733ec222e..7940c96ef7 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -33,6 +33,18 @@ Main Page blah blah !! endarticle +!!article +Template:Foo +!!text +FOO +!!endarticle + +!! article +Template:Blank +!! text +!! endarticle + + ### ### Basic tests ### @@ -278,6 +290,26 @@ Comment semantics: unclosed comment at end !! end +!! test +Comment in template title +!! input +{{foo}} +!! result +

FOO +

+!! end + +!! test +Comment on its own line post-expand +!! input +a +{{blank}} +b +!! result +

a +

b +

+!! end ### ### Preformatted text