From: Tim Starling Date: Sun, 11 Apr 2004 16:46:06 +0000 (+0000) Subject: template arguments, various improvements to handling of recursive inclusion X-Git-Tag: 1.3.0beta1~459 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/modifier.php?a=commitdiff_plain;h=5733d215639e574db8f8fee1ee255b87dcb71ce7;p=lhc%2Fweb%2Fwiklou.git template arguments, various improvements to handling of recursive inclusion --- diff --git a/includes/Parser.php b/includes/Parser.php index c5139143f8..7927aa5268 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -33,10 +33,8 @@ if( $GLOBALS['wgUseWikiHiero'] ){ # page would be proportional to the square of the input size. Hence, we limit the number # of inclusions of any given page, thus bringing any attack back to O(N). # -define( "MAX_INCLUDE_REPEAT", 5 ); -# Recursion depth of variable/inclusion evaluation -define( "MAX_INCLUDE_PASSES", 3 ); +define( "MAX_INCLUDE_REPEAT", 5 ); # Allowed values for $mOutputType define( "OT_HTML", 1 ); @@ -50,7 +48,7 @@ class Parser { # Cleared with clearState(): var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array(); - var $mVariables, $mIncludeCount; + var $mVariables, $mIncludeCount, $mArgStack; # Temporary: var $mOptions, $mTitle, $mOutputType; @@ -69,10 +67,11 @@ class Parser $this->mVariables = false; $this->mIncludeCount = array(); $this->mStripState = array(); + $this->mArgStack = array(); } # First pass--just handle sections, pass the rest off - # to doWikiPass2() which does all the real work. + # to internalParse() which does all the real work. # # Returns a ParserOutput # @@ -91,21 +90,8 @@ class Parser $stripState = NULL; $text = $this->strip( $text, $this->mStripState ); - $text = $this->doWikiPass2( $text, $linestart ); - # needs to be called last - $text = $this->doBlockLevels( $text, $linestart ); + $text = $this->internalParse( $text, $linestart ); $text = $this->unstrip( $text, $this->mStripState ); - # Clean up special characters - $fixtags = array( - "/
/i" => '
', - "/
/i" => '
', - "/
/i"=>'', - "/<\\/center *>/i" => '', - # Clean up spare ampersands; note that we probably ought to be - # more careful about named entities. - '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' - ); - $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); $this->mOutput->setText( $text ); wfProfileOut( $fname ); @@ -155,23 +141,15 @@ class Parser function strip( $text, &$state ) { $render = ($this->mOutputType == OT_HTML); - if ( $state ) { - $nowiki_content = $state['nowiki']; - $hiero_content = $state['hiero']; - $math_content = $state['math']; - $pre_content = $state['pre']; - $item_content = $state['item']; - } else { - $nowiki_content = array(); - $hiero_content = array(); - $math_content = array(); - $pre_content = array(); - $item_content = array(); - } + $nowiki_content = array(); + $hiero_content = array(); + $math_content = array(); + $pre_content = array(); + $item_content = array(); # Replace any instances of the placeholders $uniq_prefix = UNIQ_PREFIX; - $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); + #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix); foreach( $nowiki_content as $marker => $content ){ @@ -213,35 +191,34 @@ class Parser } } - $state = array( - 'nowiki' => $nowiki_content, - 'hiero' => $hiero_content, - 'math' => $math_content, - 'pre' => $pre_content, - 'item' => $item_content - ); + # Merge state with the pre-existing state, if there is one + if ( $state ) { + $state['nowiki'] = $state['nowiki'] + $nowiki_content; + $state['hiero'] = $state['hiero'] + $hiero_content; + $state['math'] = $state['math'] + $math_content; + $state['pre'] = $state['pre'] + $pre_content; + } else { + $state = array( + 'nowiki' => $nowiki_content, + 'hiero' => $hiero_content, + 'math' => $math_content, + 'pre' => $pre_content, + 'item' => $item_content + ); + } return $text; } function unstrip( $text, &$state ) { # Must expand in reverse order, otherwise nested tags will be corrupted - /* - $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' ); - foreach ( $dicts as $dictName ) { - $content_dict = $state[$dictName]; - foreach( $content_dict as $marker => $content ){ - $text = str_replace( $marker, $content, $text ); - } - }*/ - $contentDict = end( $state ); for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { $text = str_replace( key( $contentDict ), $content, $text ); } } - + return $text; } @@ -471,39 +448,47 @@ class Parser return $t ; } - # Well, OK, it's actually about 14 passes. But since all the - # hard lifting is done inside PHP's regex code, it probably - # wouldn't speed things up much to add a real parser. - # - function doWikiPass2( $text, $linestart ) + function internalParse( $text, $linestart, $args = array() ) { - $fname = "Parser::doWikiPass2"; + $fname = "Parser::internalParse"; wfProfileIn( $fname ); - + $text = $this->removeHTMLtags( $text ); - $text = $this->replaceVariables( $text ); + $text = $this->replaceVariables( $text, $args ); # $text = preg_replace( "/(^|\n)-----*/", "\\1
", $text ); $text = $this->doHeadings( $text ); - if($this->mOptions->getUseDynamicDates()) { global $wgDateFormatter; $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); } - $text = $this->replaceExternalLinks( $text ); $text = $this->doTokenizedParser ( $text ); - $text = $this->doTableStuff ( $text ) ; - $text = $this->formatHeadings( $text ); - $sk =& $this->mOptions->getSkin(); $text = $sk->transformContent( $text ); - + + $fixtags = array( + "/
/i" => '
', + "/
/i" => '
', + "/
/i"=>'', + "/<\\/center *>/i" => '' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + // another round, but without regex + $fixtags = array( + '& ' => '&', + '&<' => '&<', + ); + $text = str_replace( array_keys($fixtags), array_values($fixtags), $text ); + $text .= $this->categoryMagic () ; + # needs to be called last + $text = $this->doBlockLevels( $text, $linestart ); + wfProfileOut( $fname ); return $text; } @@ -736,8 +721,7 @@ class Parser $nextToken = $tokenizer->nextToken(); $txt .= $nextToken["text"]; } - $fakestate = $this->mStripState; - $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix ); + $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix ); # did the tag start with 3 [ ? if($threeopen) { @@ -1035,12 +1019,14 @@ class Parser # and making lists from lines starting with * # : etc. # $a = explode( "\n", $text ); - $lastPref = $text = ''; - $this->mDTopen = $inBlockElem = $pstack = false; + + $lastPref = $text = $lastLine = ''; + $this->mDTopen = $inBlockElem = false; + $npl = 0; + $pstack = false; if ( ! $linestart ) { $text .= array_shift( $a ); } foreach ( $a as $t ) { - $oLine = $t; $opl = strlen( $lastPref ); $npl = strspn( $t, "*#:;" ); @@ -1054,7 +1040,7 @@ class Parser if ( ";" == substr( $pref, -1 ) ) { $cpos = strpos( $t, ":" ); - if ( ! ( false === $cpos ) ) { + if ( false !== $cpos ) { $term = substr( $t, 0, $cpos ); $text .= $term . $this->nextItem( ":" ); $t = substr( $t, $cpos + 1 ); @@ -1151,6 +1137,7 @@ class Parser $text .= "mLastSection . ">"; $this->mLastSection = ""; } + wfProfileOut( $fname ); return $text; } @@ -1194,10 +1181,9 @@ class Parser } } - /* private */ function replaceVariables( $text ) + /* private */ function replaceVariables( $text, $args = array() ) { - global $wgLang, $wgCurParser; - global $wgScript, $wgArticlePath; + global $wgLang, $wgScript, $wgArticlePath; $fname = "Parser::replaceVariables"; wfProfileIn( $fname ); @@ -1207,54 +1193,18 @@ class Parser $this->initialiseVariables(); } $titleChars = Title::legalChars(); - $regex = "/{{([$titleChars\\|]*?)}}/s"; - - # "Recursive" variable expansion: run it through a couple of passes - for ( $i=0; $ifork(); - - $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); - if ( $oldText == $text ) { - $bail = true; - } - $this->merge( $wgCurParser ); - } - - return $text; - } - - # Returns a copy of this object except with various variables cleared - # This copy can be re-merged with the parent after operations on the copy - function fork() - { - $copy = $this; - $copy->mOutput = new ParserOutput; - return $copy; - } - - # Merges a copy split off with fork() - function merge( &$copy ) - { - # Output objects - $this->mOutput->merge( $copy->mOutput ); + $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s"; - # Include throttling arrays - foreach( $copy->mIncludeCount as $dbk => $count ) { - if ( array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] += $count; - } else { - $this->mIncludeCount[$dbk] = $count; - } - } + # This function is called recursively. To keep track of arguments we need a stack: + array_push( $this->mArgStack, $args ); - # Strip states - foreach( $copy->mStripState as $dictName => $contentDict ) { - $this->mStripState[$dictName] += $contentDict; - } + # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array + $GLOBALS['wgCurParser'] =& $this; + $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); + + array_pop( $this->mArgStack ); + + return $text; } function braceSubstitution( $matches ) @@ -1263,12 +1213,25 @@ class Parser $fname = "Parser::braceSubstitution"; $found = false; $nowiki = false; - - $text = $matches[1]; + $title = NULL; + + # $newline is an optional newline character before the braces + # $part1 is the bit before the first |, and must contain only title characters + # $args is a list of arguments, starting from index 0, not including $part1 + + $newline = $matches[1]; + $part1 = $matches[2]; + # If the third subpattern matched anything, it will start with | + if ( $matches[3] !== "" ) { + $args = explode( "|", substr( $matches[3], 1 ) ); + } else { + $args = array(); + } + $argc = count( $args ); # SUBST $mwSubst =& MagicWord::get( MAG_SUBST ); - if ( $mwSubst->matchStartAndRemove( $text ) ) { + if ( $mwSubst->matchStartAndRemove( $part1 ) ) { if ( $this->mOutputType != OT_WIKI ) { # Invalid SUBST not replaced at PST time # Return without further processing @@ -1285,19 +1248,21 @@ class Parser if ( !$found ) { # Check for MSGNW: $mwMsgnw =& MagicWord::get( MAG_MSGNW ); - if ( $mwMsgnw->matchStartAndRemove( $text ) ) { + if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: $mwMsg =& MagicWord::get( MAG_MSG ); - $mwMsg->matchStartAndRemove( $text ); + $mwMsg->matchStartAndRemove( $part1 ); } # Check if it is an internal message $mwInt =& MagicWord::get( MAG_INT ); - if ( $mwInt->matchStartAndRemove( $text ) ) { - $text = wfMsg( $text ); - $found = true; + if ( $mwInt->matchStartAndRemove( $part1 ) ) { + if ( $this->incrementIncludeCount( "int:$part1" ) ) { + $text = wfMsgReal( $part1, $args, true ); + $found = true; + } } } @@ -1305,12 +1270,12 @@ class Parser if ( !$found ) { # Check for NS: (namespace expansion) $mwNs = MagicWord::get( MAG_NS ); - if ( $mwNs->matchStartAndRemove( $text ) ) { - if ( intval( $text ) ) { - $text = $wgLang->getNsText( intval( $text ) ); + if ( $mwNs->matchStartAndRemove( $part1 ) ) { + if ( intval( $part1 ) ) { + $text = $wgLang->getNsText( intval( $part1 ) ); $found = true; } else { - $index = Namespace::getCanonicalIndex( strtolower( $text ) ); + $index = Namespace::getCanonicalIndex( strtolower( $part1 ) ); if ( !is_null( $index ) ) { $text = $wgLang->getNsText( $index ); $found = true; @@ -1324,78 +1289,54 @@ class Parser $mwLocal = MagicWord::get( MAG_LOCALURL ); $mwLocalE = MagicWord::get( MAG_LOCALURLE ); - if ( $mwLocal->matchStartAndRemove( $text ) ) { + if ( $mwLocal->matchStartAndRemove( $part1 ) ) { $func = 'getLocalURL'; - } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) { + } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) { $func = 'escapeLocalURL'; } else { $func = ''; } if ( $func !== '' ) { - $args = explode( "|", $text ); - $n = count( $args ); - if ( $n > 0 ) { - $title = Title::newFromText( $args[0] ); - if ( !is_null( $title ) ) { - if ( $n > 1 ) { - $text = $title->$func( $args[1] ); - } else { - $text = $title->$func(); - } - $found = true; + $title = Title::newFromText( $part1 ); + if ( !is_null( $title ) ) { + if ( $argc > 0 ) { + $text = $title->$func( $args[0] ); + } else { + $text = $title->$func(); } + $found = true; } - } + } } - # Check for a match against internal variables - if ( !$found && array_key_exists( $text, $this->mVariables ) ) { - $text = $this->mVariables[$text]; + # Internal variables + if ( !$found && array_key_exists( $part1, $this->mVariables ) ) { + $text = $this->mVariables[$part1]; $found = true; $this->mOutput->mContainsOldMagic = true; } + # Arguments input from the caller + $inputArgs = end( $this->mArgStack ); + if ( !$found && array_key_exists( $part1, $inputArgs ) ) { + $text = $inputArgs[$part1]; + $found = true; + } + # Load from database if ( !$found ) { - $title = Title::newFromText( $text, NS_TEMPLATE ); - if ( is_object( $title ) && !$title->isExternal() ) { + $title = Title::newFromText( $part1, NS_TEMPLATE ); + if ( !is_null( $title ) && !$title->isExternal() ) { # Check for excessive inclusion $dbk = $title->getPrefixedDBkey(); - if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] = 0; - } - if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + if ( $this->incrementIncludeCount( $dbk ) ) { $article = new Article( $title ); $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals(); if ( $articleContent !== false ) { $found = true; $text = $articleContent; - # Escaping and link table handling - # Not required for preSaveTransform() - if ( $this->mOutputType == OT_HTML ) { - if ( $nowiki ) { - $text = wfEscapeWikiText( $text ); - } else { - $text = $this->removeHTMLtags( $text ); - } - # Do not enter included links in link table - $wgLinkCache->suspend(); - - # Run full parser on the included text - $text = $this->strip( $text, $this->mStripState ); - $text = $this->doWikiPass2( $text, true ); - - # Add the result to the strip state for re-inclusion after - # the rest of the processing - $text = $this->insertStripItem( $text, $this->mStripState ); - - # Resume the link cache and register the inclusion as a link - $wgLinkCache->resume(); - $wgLinkCache->addLinkObj( $title ); - - } } } @@ -1406,14 +1347,72 @@ class Parser } } } + + # Recursive parsing, escaping and link table handling + # Only for HTML output + if ( $nowiki && $found && $this->mOutputType == OT_HTML ) { + $text = wfEscapeWikiText( $text ); + } elseif ( $this->mOutputType == OT_HTML && $found ) { + # Clean up argument array + $assocArgs = array(); + $index = 1; + foreach( $args as $arg ) { + $eqpos = strpos( $arg, "=" ); + if ( $eqpos === false ) { + $assocArgs[$index++] = $arg; + } else { + $name = trim( substr( $arg, 0, $eqpos ) ); + $value = trim( substr( $arg, $eqpos+1 ) ); + if ( $value === false ) { + $value = ""; + } + if ( $name !== false ) { + $assocArgs[$name] = $value; + } + } + } + + # Do not enter included links in link table + if ( !is_null( $title ) ) { + $wgLinkCache->suspend(); + } + + # Run full parser on the included text + $text = $this->strip( $text, $this->mStripState ); + $text = $this->internalParse( $text, (bool)$newline, $assocArgs ); + + # Add the result to the strip state for re-inclusion after + # the rest of the processing + $text = $this->insertStripItem( $text, $this->mStripState ); + + # Resume the link cache and register the inclusion as a link + if ( !is_null( $title ) ) { + $wgLinkCache->resume(); + $wgLinkCache->addLinkObj( $title ); + } + } if ( !$found ) { return $matches[0]; } else { - return $text; + return $newline . $text; + } + } + + # Returns true if the function is allowed to include this entity + function incrementIncludeCount( $dbk ) + { + if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { + $this->mIncludeCount[$dbk] = 0; + } + if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + return true; + } else { + return false; } } + # Cleans up HTML, removes dangerous tags and attributes /* private */ function removeHTMLtags( $text ) { @@ -1617,7 +1616,7 @@ class Parser # The canonized header is a version of the header text safe to use for links # Avoid insertion of weird stuff like by expanding the relevant sections - $canonized_headline = Parser::unstrip( $headline, $this->mStripState ); + $canonized_headline = $this->unstrip( $headline, $this->mStripState ); # strip out HTML $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline ); @@ -1682,7 +1681,10 @@ class Parser if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { # This is the [edit] link that appears for the top block of text when # section editing is enabled - $full .= $sk->editSectionLink(0); + + # Disabled because it broke block formatting + # For example, a bullet point in the top line + # $full .= $sk->editSectionLink(0); } $full .= $block; if( $doShowToc && !$i) {