From 915c24e8bd5c5b7e651be4d289c0b540e5019948 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 19 Aug 2008 20:59:18 +0000 Subject: [PATCH] Revert Parser.php to r39295 good state. Whitespace handling for image width magic words failed, possibly other problems. NEEDS PARSER TESTS --- includes/parser/Parser.php | 881 +++++++++++++++++++++++++------------ 1 file changed, 590 insertions(+), 291 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 3ac6872927..5f97184772 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -98,7 +98,7 @@ class Parser # Cleared with clearState(): var $mOutput, $mAutonumber, $mDTopen, $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + var $mInterwikiLinkHolders, $mLinkHolders; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; // empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; @@ -179,8 +179,17 @@ class Parser $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; - $this->mLinkHolders = new LinkHolderArray( $this ); - $this->mLinkID = 0; + $this->mInterwikiLinkHolders = array( + 'texts' => array(), + 'titles' => array() + ); + $this->mLinkHolders = array( + 'namespaces' => array(), + 'dbkeys' => array(), + 'queries' => array(), + 'texts' => array(), + 'titles' => array() + ); $this->mRevisionTimestamp = $this->mRevisionId = null; /** @@ -195,7 +204,7 @@ class Parser */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS - $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); + $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); # Clear these on every parse, bug 4549 @@ -285,7 +294,7 @@ class Parser */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = __METHOD__.'-' . wfGetCaller(); + $fname = 'Parser::parse-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -319,6 +328,7 @@ class Parser ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + # only once and last $text = $this->doBlockLevels( $text, $linestart ); $this->replaceLinkHolders( $text ); @@ -338,7 +348,7 @@ class Parser $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); + $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -356,7 +366,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = self::tidy($text); + $text = Parser::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -461,8 +471,6 @@ class Parser function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } function getRevisionId() { return $this->mRevisionId; } - function getOutput() { return $this->mOutput; } - function nextLinkID() { return $this->mLinkID++; } function getFunctionLang() { global $wgLang, $wgContLang; @@ -504,7 +512,7 @@ class Parser * @public * @static */ - static function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ + function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ static $n = 1; $stripped = ''; $matches = array(); @@ -541,7 +549,7 @@ class Parser $text = $inside; $tail = null; } else { - if( $element === '!--' ) { + if( $element == '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; @@ -644,15 +652,15 @@ class Parser * @public * @static */ - static function tidy( $text ) { + function tidy( $text ) { global $wgTidyInternal; $wrappedtext = ''. 'test'.$text.''; if( $wgTidyInternal ) { - $correctedtext = self::internalTidy( $wrappedtext ); + $correctedtext = Parser::internalTidy( $wrappedtext ); } else { - $correctedtext = self::externalTidy( $wrappedtext ); + $correctedtext = Parser::externalTidy( $wrappedtext ); } if( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); @@ -667,9 +675,10 @@ class Parser * @private * @static */ - static function externalTidy( $text ) { + function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::externalTidy'; + wfProfileIn( $fname ); $cleansource = ''; $opts = ' -utf8'; @@ -698,7 +707,7 @@ class Parser } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. @@ -718,9 +727,10 @@ class Parser * @private * @static */ - static function internalTidy( $text ) { + function internalTidy( $text ) { global $wgTidyConf, $IP, $wgDebugTidy; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::internalTidy'; + wfProfileIn( $fname ); $tidy = new tidy; $tidy->parseString( $text, $wgTidyConf, 'utf8' ); @@ -738,7 +748,7 @@ class Parser "\n-->"; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $cleansource; } @@ -748,35 +758,34 @@ class Parser * @private */ function doTableStuff ( $text ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doTableStuff'; + wfProfileIn( $fname ); - $lines = StringUtils::explode( "\n", $text ); - $out = ''; + $lines = explode ( "\n" , $text ); $td_history = array (); // Is currently a td tag open? $last_tag_history = array (); // Save history of last lag activated (td, th or caption) $tr_history = array (); // Is currently a tr tag open? $tr_attributes = array (); // history of tr attributes $has_opened_tr = array(); // Did this table open a element? $indent_level = 0; // indent level of the table - - foreach ( $lines as $outLine ) { - $line = trim( $outLine ); + foreach ( $lines as $key => $line ) + { + $line = trim ( $line ); if( $line == '' ) { // empty line, go to next line - $out .= "\n"; continue; } - $first_character = $line[0]; + $first_character = $line{0}; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { // First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); - $outLine = str_repeat( '
' , $indent_level ) . ""; + $lines[$key] = str_repeat( '
' , $indent_level ) . ""; array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); array_push ( $tr_history , false ); @@ -784,9 +793,8 @@ class Parser array_push ( $has_opened_tr , false ); } else if ( count ( $td_history ) == 0 ) { // Don't do any of the following - $out .= $outLine."\n"; continue; - } else if ( substr ( $line , 0 , 2 ) === '|}' ) { + } else if ( substr ( $line , 0 , 2 ) == '|}' ) { // We are ending a table $line = '' . substr ( $line , 2 ); $last_tag = array_pop ( $last_tag_history ); @@ -803,8 +811,8 @@ class Parser $line = "{$line}"; } array_pop ( $tr_attributes ); - $outLine = $line . str_repeat( '
' , $indent_level ); - } else if ( substr ( $line , 0 , 2 ) === '|-' ) { + $lines[$key] = $line . str_repeat( '
' , $indent_level ); + } else if ( substr ( $line , 0 , 2 ) == '|-' ) { // Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -827,21 +835,21 @@ class Parser $line = "{$line}"; } - $outLine = $line; + $lines[$key] = $line; array_push ( $tr_history , false ); array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); } - else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { + else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { // This might be cell elements, td, th or captions - if ( substr ( $line , 0 , 2 ) === '|+' ) { + if ( substr ( $line , 0 , 2 ) == '|+' ) { $first_character = '+'; $line = substr ( $line , 1 ); } $line = substr ( $line , 1 ); - if ( $first_character === '!' ) { + if ( $first_character == '!' ) { $line = str_replace ( '!!' , '||' , $line ); } @@ -851,13 +859,13 @@ class Parser // attribute values containing literal "||". $cells = StringUtils::explodeMarkup( '||' , $line ); - $outLine = ''; + $lines[$key] = ''; // Loop through each table cell foreach ( $cells as $cell ) { $previous = ''; - if ( $first_character !== '+' ) + if ( $first_character != '+' ) { $tr_after = array_pop ( $tr_attributes ); if ( !array_pop ( $tr_history ) ) { @@ -875,11 +883,11 @@ class Parser $previous = "{$previous}"; } - if ( $first_character === '|' ) { + if ( $first_character == '|' ) { $last_tag = 'td'; - } else if ( $first_character === '!' ) { + } else if ( $first_character == '!' ) { $last_tag = 'th'; - } else if ( $first_character === '+' ) { + } else if ( $first_character == '+' ) { $last_tag = 'caption'; } else { $last_tag = ''; @@ -902,42 +910,38 @@ class Parser $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } - $outLine .= $cell; + $lines[$key] .= $cell; array_push ( $td_history , true ); } } - $out .= $outLine . "\n"; } // Closing open td, tr && table while ( count ( $td_history ) > 0 ) { if ( array_pop ( $td_history ) ) { - $out .= "\n"; + $lines[] = '' ; } if ( array_pop ( $tr_history ) ) { - $out .= "\n"; + $lines[] = '' ; } if ( !array_pop ( $has_opened_tr ) ) { - $out .= "\n" ; + $lines[] = "" ; } - $out .= "\n"; + $lines[] = '' ; } - // Remove trailing line-ending (b/c) - if ( substr( $out, -1 ) === "\n" ) { - $out = substr( $out, 0, -1 ); - } + $output = implode ( "\n" , $lines ) ; // special case: don't return empty table - if( $out === "\n\n
" ) { - $out = ''; + if( $output == "\n\n
" ) { + $output = ''; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); - return $out; + return $output; } /** @@ -948,11 +952,12 @@ class Parser */ function internalParse( $text ) { $isMain = true; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::internalParse'; + wfProfileIn( $fname ); # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text ; } @@ -985,7 +990,7 @@ class Parser $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } @@ -1013,10 +1018,10 @@ class Parser } function magicLinkCallback( $m ) { - if ( substr( $m[0], 0, 1 ) === '<' ) { + if ( substr( $m[0], 0, 1 ) == '<' ) { # Skip HTML element return $m[0]; - } elseif ( substr( $m[0], 0, 4 ) === 'ISBN' ) { + } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { $isbn = $m[2]; $num = strtr( $isbn, array( '-' => '', @@ -1028,11 +1033,11 @@ class Parser $titleObj->escapeLocalUrl() . "\" class=\"internal\">ISBN $isbn"; } else { - if ( substr( $m[0], 0, 3 ) === 'RFC' ) { + if ( substr( $m[0], 0, 3 ) == 'RFC' ) { $keyword = 'RFC'; $urlmsg = 'rfcurl'; $id = $m[1]; - } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { + } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { $keyword = 'PMID'; $urlmsg = 'pubmedurl'; $id = $m[1]; @@ -1054,14 +1059,15 @@ class Parser * * @private */ - static function doHeadings( $text ) { - wfProfileIn( __METHOD__ ); + function doHeadings( $text ) { + $fname = 'Parser::doHeadings'; + wfProfileIn( $fname ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^$h(.+)$h\\s*$/m", "\\1", $text ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } @@ -1070,22 +1076,23 @@ class Parser * @private * @return string the altered text */ - static function doAllQuotes( $text ) { - wfProfileIn( __METHOD__ ); + function doAllQuotes( $text ) { + $fname = 'Parser::doAllQuotes'; + wfProfileIn( $fname ); $outtext = ''; - $lines = StringUtils::explode( "\n", $text ); + $lines = explode( "\n", $text ); foreach ( $lines as $line ) { - $outtext .= self::doQuotes( $line ) . "\n"; + $outtext .= $this->doQuotes ( $line ) . "\n"; } $outtext = substr($outtext, 0,-1); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $outtext; } /** * Helper function for doAllQuotes() */ - public static function doQuotes( $text ) { + public function doQuotes( $text ) { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( count( $arr ) == 1 ) return $text; @@ -1140,9 +1147,9 @@ class Parser { $x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 === ' ') { + if ($x1 == ' ') { if ($firstspace == -1) $firstspace = $i; - } else if ($x2 === ' ') { + } else if ($x2 == ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; @@ -1182,7 +1189,7 @@ class Parser { if (($i % 2) == 0) { - if ($state === 'both') + if ($state == 'both') $buffer .= $r; else $output .= $r; @@ -1191,41 +1198,41 @@ class Parser { if (strlen ($r) == 2) { - if ($state === 'i') + if ($state == 'i') { $output .= ''; $state = ''; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = 'b'; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = 'b'; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = 'b'; } else # $state can be 'b' or '' { $output .= ''; $state .= 'i'; } } else if (strlen ($r) == 3) { - if ($state === 'b') + if ($state == 'b') { $output .= ''; $state = ''; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = 'i'; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = 'i'; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = 'i'; } else # $state can be 'i' or '' { $output .= ''; $state .= 'b'; } } else if (strlen ($r) == 5) { - if ($state === 'b') + if ($state == 'b') { $output .= ''; $state = 'i'; } - else if ($state === 'i') + else if ($state == 'i') { $output .= ''; $state = 'b'; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = ''; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = ''; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } @@ -1234,14 +1241,14 @@ class Parser $i++; } # Now close all remaining tags. Notice that the order is important. - if ($state === 'b' || $state === 'ib') + if ($state == 'b' || $state == 'ib') $output .= ''; - if ($state === 'i' || $state === 'bi' || $state === 'ib') + if ($state == 'i' || $state == 'bi' || $state == 'ib') $output .= ''; - if ($state === 'bi') + if ($state == 'bi') $output .= ''; # There might be lonely ''''', so make sure we have a buffer - if ($state === 'both' && $buffer) + if ($state == 'both' && $buffer) $output .= ''.$buffer.''; return $output; } @@ -1257,7 +1264,8 @@ class Parser */ function replaceExternalLinks( $text ) { global $wgContLang; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::replaceExternalLinks'; + wfProfileIn( $fname ); $sk = $this->mOptions->getSkin(); @@ -1291,7 +1299,7 @@ class Parser $dtrail = ''; # Set linktype for CSS - if URL==text, link is essentially free - $linktype = ($text === $url) ? 'free' : 'text'; + $linktype = ($text == $url) ? 'free' : 'text'; # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { @@ -1327,11 +1335,11 @@ class Parser # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $s; } @@ -1341,7 +1349,8 @@ class Parser */ function replaceFreeExternalLinks( $text ) { global $wgContLang; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::replaceFreeExternalLinks'; + wfProfileIn( $fname ); $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = array_shift( $bits ); @@ -1403,7 +1412,7 @@ class Parser $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } $s .= $text . $trail; @@ -1411,7 +1420,7 @@ class Parser $s .= $protocol . $remainder; } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $s; } @@ -1427,7 +1436,7 @@ class Parser */ static function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); + array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); } /** @@ -1471,48 +1480,35 @@ class Parser /** * Process [[ ]] wikilinks - * @return processed text * * @private */ function replaceInternalLinks( $s ) { - $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); - return $s; - } - - /** - * Process [[ ]] wikilinks - * @return LinkHolderArray - * - * @private - */ - function replaceInternalLinks2( &$s ) { global $wgContLang; + static $fname = 'Parser::replaceInternalLinks' ; - wfProfileIn( __METHOD__ ); + wfProfileIn( $fname ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $e1, $e1_img; + wfProfileIn( $fname.'-setup' ); + static $tc = FALSE; # the % is needed to support urlencoded titles as well - if ( !$tc ) { - $tc = Title::legalChars() . '#%'; - # Match a link having the form [[namespace:link|alternate]]trail - $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; - # Match cases where there is no "]]", which might still be images - $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; - } + if ( !$tc ) { $tc = Title::legalChars() . '#%'; } $sk = $this->mOptions->getSkin(); - $holders = new LinkHolderArray( $this ); #split the entire text string on occurences of [[ - $a = StringUtils::explode( '[[', ' ' . $s ); + $a = explode( '[[', ' ' . $s ); #get the first element (all text up to first [[), and remove the space we added - $s = $a->current(); - $a->next(); - $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" + $s = array_shift( $a ); $s = substr( $s, 1 ); + # Match a link having the form [[namespace:link|alternate]]trail + static $e1 = FALSE; + if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } + # Match cases where there is no "]]", which might still be images + static $e1_img = FALSE; + if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } + $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); $e2 = null; if ( $useLinkPrefixExtension ) { @@ -1522,8 +1518,8 @@ class Parser } if( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__.'-setup' ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); + wfProfileOut( $fname.'-setup' ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1545,20 +1541,13 @@ class Parser $selflink = array($this->mTitle->getPrefixedText()); } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( $fname.'-setup' ); # Loop for each link - for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { - # Check for excessive memory usage - if ( $holders->isBig() ) { - # Too big - # Do the existence check, replace the link holders and clear the array - $holders->replace( $s ); - $holders->clear(); - } - + for ($k = 0; isset( $a[$k] ); $k++) { + $line = $a[$k]; if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__.'-prefixhandling' ); + wfProfileIn( $fname.'-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; @@ -1570,12 +1559,12 @@ class Parser $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__.'-prefixhandling' ); + wfProfileOut( $fname.'-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__."-e1" ); + wfProfileIn( "$fname-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1609,18 +1598,18 @@ class Parser $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-e1" ); + wfProfileOut( "$fname-e1" ); continue; } - wfProfileOut( __METHOD__."-e1" ); - wfProfileIn( __METHOD__."-misc" ); + wfProfileOut( "$fname-e1" ); + wfProfileIn( "$fname-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-misc" ); + wfProfileOut( "$fname-misc" ); continue; } @@ -1631,36 +1620,33 @@ class Parser $link = $m[1]; } - $noforce = (substr($m[1], 0, 1) !== ':'); + $noforce = (substr($m[1], 0, 1) != ':'); if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } - wfProfileOut( __METHOD__."-misc" ); - wfProfileIn( __METHOD__."-title" ); + wfProfileOut( "$fname-misc" ); + wfProfileIn( "$fname-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); if( !$nt ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( "$fname-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( "$fname-title" ); if ($might_be_img) { # if this is actually an invalid link - wfProfileIn( __METHOD__."-might_be_img" ); + wfProfileIn( "$fname-might_be_img" ); if ($ns == NS_IMAGE && $noforce) { #but might be an image $found = false; - while ( true ) { + while (isset ($a[$k+1]) ) { #look at the next 'line' to see if we can close it there - $a->next(); - $next_line = $a->current(); - if ( $next_line === false || $next_line === null ) { - break; - } + $spliced = array_splice( $a, $k + 1, 1 ); + $next_line = array_shift( $spliced ); $m = explode( ']]', $next_line, 3 ); if ( count( $m ) == 3 ) { # the first ]] closes the inner link, the second the image @@ -1680,19 +1666,19 @@ class Parser if ( !$found ) { # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined - $holders->merge( $this->replaceInternalLinks2( $text ) ); + $text = $this->replaceInternalLinks($text); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); continue; } } else { #it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); continue; } - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); } $wasblank = ( '' == $text ); @@ -1702,36 +1688,41 @@ class Parser if( $noforce ) { # Interwikis - wfProfileIn( __METHOD__."-interwiki" ); + wfProfileIn( "$fname-interwiki" ); if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . $prefix); $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( "$fname-interwiki" ); continue; } - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( "$fname-interwiki" ); if ( $ns == NS_IMAGE ) { - wfProfileIn( __METHOD__."-image" ); + wfProfileIn( "$fname-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); - $holders->merge( $this->replaceInternalLinks2( $text ) ); + $text = $this->replaceInternalLinks($text); # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; + $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; + $this->mOutput->addImage( $nt->getDBkey() ); + + wfProfileOut( "$fname-image" ); + continue; + } else { + # We still need to record the image's presence on the page + $this->mOutput->addImage( $nt->getDBkey() ); } - $this->mOutput->addImage( $nt->getDBkey() ); - wfProfileOut( __METHOD__."-image" ); - continue; + wfProfileOut( "$fname-image" ); } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__."-category" ); + wfProfileIn( "$fname-category" ); $s = rtrim($s . "\n"); # bug 87 if ( $wasblank ) { @@ -1750,7 +1741,7 @@ class Parser */ $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-category" ); + wfProfileOut( "$fname-category" ); continue; } } @@ -1781,7 +1772,7 @@ class Parser if( SpecialPage::exists( $nt->getDBkey() ) ) { $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); } else { - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); } continue; } elseif( $ns == NS_IMAGE ) { @@ -1795,10 +1786,10 @@ class Parser continue; } } - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); } - wfProfileOut( __METHOD__ ); - return $holders; + wfProfileOut( $fname ); + return $s; } /** @@ -1807,10 +1798,32 @@ class Parser * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * - * @deprecated */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); + wfProfileIn( __METHOD__ ); + if ( ! is_object($nt) ) { + # Fail gracefully + $retVal = "{$prefix}{$text}{$trail}"; + } else { + # Separate the link trail from the rest of the link + list( $inside, $trail ) = Linker::splitTrail( $trail ); + + if ( $nt->isExternal() ) { + $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); + $this->mInterwikiLinkHolders['titles'][] = $nt; + $retVal = '{$trail}"; + } else { + $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); + $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); + $this->mLinkHolders['queries'][] = $query; + $this->mLinkHolders['texts'][] = $prefix.$text.$inside; + $this->mLinkHolders['titles'][] = $nt; + + $retVal = '{$trail}"; + } + } + wfProfileOut( __METHOD__ ); + return $retVal; } /** @@ -1876,7 +1889,8 @@ class Parser # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage - wfProfileIn( __METHOD__ ); + $fname = 'Parser::maybeDoSubpageLink'; + wfProfileIn( $fname ); $ret = $target; # default return value is no change # Some namespaces don't allow subpages, @@ -1892,7 +1906,7 @@ class Parser # bug 7425 $target = trim( $target ); # Look at the first character - if( $target != '' && $target{0} === '/' ) { + if( $target != '' && $target{0} == '/' ) { # / at end means we don't want the slash to be shown $m = array(); $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); @@ -1919,7 +1933,7 @@ class Parser if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); # / at the end means don't show full path - if( substr( $nodotdot, -1, 1 ) === '/' ) { + if( substr( $nodotdot, -1, 1 ) == '/' ) { $nodotdot = substr( $nodotdot, 0, -1 ); if( '' === $text ) { $text = $nodotdot . $suffix; @@ -1935,7 +1949,7 @@ class Parser } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $ret; } @@ -1955,7 +1969,7 @@ class Parser # getCommon() returns the length of the longest common substring # of both arguments, starting at the beginning of both. # - /* private */ static function getCommon( $st1, $st2 ) { + /* private */ function getCommon( $st1, $st2 ) { $fl = strlen( $st1 ); $shorter = strlen( $st2 ); if ( $fl < $shorter ) { $shorter = $fl; } @@ -1971,10 +1985,10 @@ class Parser /* private */ function openList( $char ) { $result = $this->closeParagraph(); - if ( '*' === $char ) { $result .= ''; } - else if ( '#' === $char ) { $text = ''; } - else if ( ':' === $char ) { + if ( '*' == $char ) { $text = ''; } + else if ( '#' == $char ) { $text = ''; } + else if ( ':' == $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; $text = ''; @@ -2022,53 +2036,50 @@ class Parser * @return string the lists rendered as HTML */ function doBlockLevels( $text, $linestart ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doBlockLevels'; + wfProfileIn( $fname ); # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $textLines = StringUtils::explode( "\n", $text ); + $textLines = explode( "\n", $text ); $lastPrefix = $output = ''; $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; + if ( !$linestart ) { + $output .= array_shift( $textLines ); + } foreach ( $textLines as $oLine ) { - # Fix up $linestart - if ( !$linestart ) { - $output .= $oLine; - $linestart = true; - continue; - } - $lastPrefixLength = strlen( $lastPrefix ); $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); $preOpenMatch = preg_match('/
mInPre ) {
 				# Multiple prefixes may abut each other for nested lists.
 				$prefixLength = strspn( $oLine, '*#:;' );
-				$prefix = substr( $oLine, 0, $prefixLength );
+				$pref = substr( $oLine, 0, $prefixLength );
 
 				# eh?
-				$prefix2 = str_replace( ';', ':', $prefix );
+				$pref2 = str_replace( ';', ':', $pref );
 				$t = substr( $oLine, $prefixLength );
-				$this->mInPre = (bool)$preOpenMatch;
+				$this->mInPre = !empty($preOpenMatch);
 			} else {
 				# Don't interpret any other prefixes in preformatted text
 				$prefixLength = 0;
-				$prefix = $prefix2 = '';
+				$pref = $pref2 = '';
 				$t = $oLine;
 			}
 
 			# List generation
-			if( $prefixLength && $lastPrefix === $prefix2 ) {
+			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
 				# Same as the last item, so no need to deal with nesting or opening stuff
-				$output .= $this->nextItem( substr( $prefix, -1 ) );
+				$output .= $this->nextItem( substr( $pref, -1 ) );
 				$paragraphStack = false;
 
-				if ( substr( $prefix, -1 ) === ';') {
+				if ( substr( $pref, -1 ) == ';') {
 					# The one nasty exception: definition lists work like this:
 					# ; title : definition text
 					# So we check for : in the remainder text to split up the
@@ -2081,21 +2092,21 @@ class Parser
 				}
 			} elseif( $prefixLength || $lastPrefixLength ) {
 				# Either open or close a level...
-				$commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
+				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
 				$paragraphStack = false;
 
 				while( $commonPrefixLength < $lastPrefixLength ) {
-					$output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
+					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
 					--$lastPrefixLength;
 				}
 				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
-					$output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
+					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 				}
 				while ( $prefixLength > $commonPrefixLength ) {
-					$char = substr( $prefix, $commonPrefixLength, 1 );
+					$char = substr( $pref, $commonPrefixLength, 1 );
 					$output .= $this->openList( $char );
 
-					if ( ';' === $char ) {
+					if ( ';' == $char ) {
 						# FIXME: This is dupe of code above
 						if ($this->findColonNoLinks($t, $term, $t2) !== false) {
 							$t = $t2;
@@ -2104,10 +2115,10 @@ class Parser
 					}
 					++$commonPrefixLength;
 				}
-				$lastPrefix = $prefix2;
+				$lastPrefix = $pref2;
 			}
 			if( 0 == $prefixLength ) {
-				wfProfileIn( __METHOD__."-paragraph" );
+				wfProfileIn( "$fname-paragraph" );
 				# No prefix (not in list)--go to paragraph mode
 				// XXX: use a stack for nestable elements like span, table and div
 				$openmatch = preg_match('/(?:mInPre ) {
-					if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
+					if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
 						// pre
-						if ($this->mLastSection !== 'pre') {
+						if ($this->mLastSection != 'pre') {
 							$paragraphStack = false;
 							$output .= $this->closeParagraph().'
';
 							$this->mLastSection = 'pre';
@@ -2143,7 +2154,7 @@ class Parser
 								$paragraphStack = false;
 								$this->mLastSection = 'p';
 							} else {
-								if ($this->mLastSection !== 'p' ) {
+								if ($this->mLastSection != 'p' ) {
 									$output .= $this->closeParagraph();
 									$this->mLastSection = '';
 									$paragraphStack = '

'; @@ -2156,14 +2167,14 @@ class Parser $output .= $paragraphStack; $paragraphStack = false; $this->mLastSection = 'p'; - } else if ($this->mLastSection !== 'p') { + } else if ($this->mLastSection != 'p') { $output .= $this->closeParagraph().'

'; $this->mLastSection = 'p'; } } } } - wfProfileOut( __METHOD__."-paragraph" ); + wfProfileOut( "$fname-paragraph" ); } // somewhere above we forget to get out of pre block (bug 785) if($preCloseMatch && $this->mInPre) { @@ -2174,7 +2185,7 @@ class Parser } } while ( $prefixLength ) { - $output .= $this->closeList( $prefix2[$prefixLength-1] ); + $output .= $this->closeList( $pref2{$prefixLength-1} ); --$prefixLength; } if ( '' != $this->mLastSection ) { @@ -2182,7 +2193,7 @@ class Parser $this->mLastSection = ''; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $output; } @@ -2194,13 +2205,14 @@ class Parser * @param string &$after set to everything after the ':' * return string the position of the ':', or false if none found */ - static function findColonNoLinks($str, &$before, &$after) { - wfProfileIn( __METHOD__ ); + function findColonNoLinks($str, &$before, &$after) { + $fname = 'Parser::findColonNoLinks'; + wfProfileIn( $fname ); $pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } @@ -2209,7 +2221,7 @@ class Parser // Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos+1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $pos; } @@ -2233,7 +2245,7 @@ class Parser // We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $i; } // Embedded in a tag; don't break it. @@ -2243,7 +2255,7 @@ class Parser $colon = strpos( $str, ':', $i ); if( $colon === false ) { // Nothing else interesting - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } $lt = strpos( $str, '<', $i ); @@ -2252,7 +2264,7 @@ class Parser // We found it! $before = substr( $str, 0, $colon ); $after = substr( $str, $colon + 1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $i; } } @@ -2299,18 +2311,18 @@ class Parser break; case 3: // self::COLON_STATE_CLOSETAG: // In a - if( $c === ">" ) { + if( $c == ">" ) { $stack--; if( $stack < 0 ) { - wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); - wfProfileOut( __METHOD__ ); + wfDebug( "Invalid input in $fname; too many close tags\n" ); + wfProfileOut( $fname ); return false; } $state = self::COLON_STATE_TEXT; } break; case self::COLON_STATE_TAGSLASH: - if( $c === ">" ) { + if( $c == ">" ) { // Yes, a self-closed tag $state = self::COLON_STATE_TEXT; } else { @@ -2319,33 +2331,33 @@ class Parser } break; case 5: // self::COLON_STATE_COMMENT: - if( $c === "-" ) { + if( $c == "-" ) { $state = self::COLON_STATE_COMMENTDASH; } break; case self::COLON_STATE_COMMENTDASH: - if( $c === "-" ) { + if( $c == "-" ) { $state = self::COLON_STATE_COMMENTDASHDASH; } else { $state = self::COLON_STATE_COMMENT; } break; case self::COLON_STATE_COMMENTDASHDASH: - if( $c === ">" ) { + if( $c == ">" ) { $state = self::COLON_STATE_TEXT; } else { $state = self::COLON_STATE_COMMENT; } break; default: - throw new MWException( "State machine error in " . __METHOD__ ); + throw new MWException( "State machine error in $fname" ); } } if( $stack > 0 ) { - wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); return false; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } @@ -2575,11 +2587,12 @@ class Parser * @private */ function initialiseVariables() { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::initialiseVariables'; + wfProfileIn( $fname ); $variableIDs = MagicWord::getVariableIDs(); $this->mVariables = new MagicWordArray( $variableIDs ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); } /** @@ -2648,7 +2661,8 @@ class Parser return $text; } - wfProfileIn( __METHOD__ ); + $fname = __METHOD__; + wfProfileIn( $fname ); if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); @@ -2661,7 +2675,7 @@ class Parser $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } @@ -2724,7 +2738,8 @@ class Parser */ function braceSubstitution( $piece, $frame ) { global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; - wfProfileIn( __METHOD__ ); + $fname = __METHOD__; + wfProfileIn( $fname ); wfProfileIn( __METHOD__.'-setup' ); # Flags @@ -2911,7 +2926,7 @@ class Parser } } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { $found = false; //access denied - wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -2945,7 +2960,7 @@ class Parser # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return array( 'object' => $text ); } @@ -3004,7 +3019,7 @@ class Parser $ret = array( 'text' => $text ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $ret; } @@ -3143,7 +3158,7 @@ class Parser return $this->fetchScaryTemplateMaybeFromCache($url); } - static function fetchScaryTemplateMaybeFromCache($url) { + function fetchScaryTemplateMaybeFromCache($url) { global $wgTranscludeCacheExpiry; $dbr = wfGetDB(DB_SLAVE); $obj = $dbr->selectRow('transcache', array('tc_time', 'tc_contents'), @@ -3291,7 +3306,7 @@ class Parser } } - if ( $name === 'html' || $name === 'nowiki' ) { + if ( $name == 'html' || $name == 'nowiki' ) { $this->mStripState->nowiki->setPair( $marker, $output ); } else { $this->mStripState->general->setPair( $marker, $output ); @@ -3547,7 +3562,12 @@ class Parser # # turns into # link text with suffix - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); + $safeHeadline = preg_replace( '//e', + "\$this->mLinkHolders['texts'][\$1]", + $safeHeadline ); + $safeHeadline = preg_replace( '//e', + "\$this->mInterwikiLinkHolders['texts'][\$1]", + $safeHeadline ); # Strip out HTML (other than plain and : bug 8393) $tocline = preg_replace( @@ -3623,7 +3643,7 @@ class Parser $i = 0; foreach( $blocks as $block ) { - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) { + if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { # This is the [edit] link that appears for the top block of text when # section editing is enabled @@ -3775,7 +3795,7 @@ class Parser } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); + wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); } } @@ -3798,7 +3818,7 @@ class Parser * @param string $text * @return mixed An expanded string, or false if invalid. */ - static function validateSig( $text ) { + function validateSig( $text ) { return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); } @@ -3850,7 +3870,7 @@ class Parser * @param string $text * @return string Signature text with /~{3,5}/ removed */ - static function cleanSigInSig( $text ) { + function cleanSigInSig( $text ) { $text = preg_replace( '/~{3,5}/', '', $text ); return $text; } @@ -3881,17 +3901,19 @@ class Parser global $wgTitle; static $executing = false; + $fname = "Parser::transformMsg"; + # Guard against infinite recursion if ( $executing ) { return $text; } $executing = true; - wfProfileIn(__METHOD__); + wfProfileIn($fname); $text = $this->preprocess( $text, $wgTitle, $options ); $executing = false; - wfProfileOut(__METHOD__); + wfProfileOut($fname); return $text; } @@ -3988,7 +4010,7 @@ class Parser # Add to function cache $mw = MagicWord::get( $id ); if( !$mw ) - throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); + throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4003,7 +4025,7 @@ class Parser $syn = '#' . $syn; } # Remove trailing colon - if ( substr( $syn, -1, 1 ) === ':' ) { + if ( substr( $syn, -1, 1 ) == ':' ) { $syn = substr( $syn, 0, -1 ); } $this->mFunctionSynonyms[$sensitive][$syn] = $id; @@ -4024,9 +4046,266 @@ class Parser * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. + * $options is a bit field, RLH_FOR_UPDATE to select for update */ function replaceLinkHolders( &$text, $options = 0 ) { - return $this->mLinkHolders->replace( $text ); + global $wgUser; + global $wgContLang; + + $fname = 'Parser::replaceLinkHolders'; + wfProfileIn( $fname ); + + $pdbks = array(); + $colours = array(); + $linkcolour_ids = array(); + $sk = $this->mOptions->getSkin(); + $linkCache = LinkCache::singleton(); + + if ( !empty( $this->mLinkHolders['namespaces'] ) ) { + wfProfileIn( $fname.'-check' ); + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $threshold = $wgUser->getOption('stubthreshold'); + + # Sort by namespace + asort( $this->mLinkHolders['namespaces'] ); + + # Generate query + $query = false; + $current = null; + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + # Make title object + $title = $this->mLinkHolders['titles'][$key]; + + # Skip invalid entries. + # Result will be ugly, but prevents crash. + if ( is_null( $title ) ) { + continue; + } + $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); + + # Check if it's a static known link, e.g. interwiki + if ( $title->isAlwaysKnown() ) { + $colours[$pdbk] = ''; + } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { + $colours[$pdbk] = ''; + $this->mOutput->addLink( $title, $id ); + } elseif ( $linkCache->isBadLink( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } else { + # Not in the link cache, add it to the query + if ( !isset( $current ) ) { + $current = $ns; + $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; + } elseif ( $current != $ns ) { + $current = $ns; + $query .= ")) OR (page_namespace=$ns AND page_title IN("; + } else { + $query .= ', '; + } + + $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); + } + } + if ( $query ) { + $query .= '))'; + if ( $options & RLH_FOR_UPDATE ) { + $query .= ' FOR UPDATE'; + } + + $res = $dbr->query( $query, $fname ); + + # Fetch data and form into an associative array + # non-existent = broken + while ( $s = $dbr->fetchObject($res) ) { + $title = Title::makeTitle( $s->page_namespace, $s->page_title ); + $pdbk = $title->getPrefixedDBkey(); + $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); + $this->mOutput->addLink( $title, $s->page_id ); + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + //add id to the extension todolist + $linkcolour_ids[$s->page_id] = $pdbk; + } + //pass an array of page_ids to an extension + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + wfProfileOut( $fname.'-check' ); + + # Do a second query for different language variants of links and categories + if($wgContLang->hasVariants()){ + $linkBatch = new LinkBatch(); + $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) + $categoryMap = array(); // maps $category_variant => $category (dbkeys) + $varCategories = array(); // category replacements oldDBkey => newDBkey + + $categories = $this->mOutput->getCategoryLinks(); + + // Add variants of links to link batch + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) + continue; + + $pdbk = $title->getPrefixedDBkey(); + $titleText = $title->getText(); + + // generate all variants of the link title text + $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); + + // if link was not found (in first query), add all variants to query + if ( !isset($colours[$pdbk]) ){ + foreach($allTextVariants as $textVariant){ + if($textVariant != $titleText){ + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; + } + } + } + } + + // process categories, check if a category exists in some variant + foreach( $categories as $category ){ + $variants = $wgContLang->convertLinkToAllVariants($category); + foreach($variants as $variant){ + if($variant != $category){ + $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $categoryMap[$variant] = $category; + } + } + } + + + if(!$linkBatch->isEmpty()){ + // construct query + $titleClause = $linkBatch->constructSet('page', $dbr); + + $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + + $variantQuery .= " FROM $page WHERE $titleClause"; + if ( $options & RLH_FOR_UPDATE ) { + $variantQuery .= ' FOR UPDATE'; + } + + $varRes = $dbr->query( $variantQuery, $fname ); + + // for each found variants, figure out link holders and replace + while ( $s = $dbr->fetchObject($varRes) ) { + + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); + $varPdbk = $variantTitle->getPrefixedDBkey(); + $vardbk = $variantTitle->getDBkey(); + + $holderKeys = array(); + if(isset($variantMap[$varPdbk])){ + $holderKeys = $variantMap[$varPdbk]; + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); + $this->mOutput->addLink( $variantTitle, $s->page_id ); + } + + // loop over link holders + foreach($holderKeys as $key){ + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) continue; + + $pdbk = $title->getPrefixedDBkey(); + + if(!isset($colours[$pdbk])){ + // found link in some of the variants, replace the link holder data + $this->mLinkHolders['titles'][$key] = $variantTitle; + $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); + + // set pdbk and colour + $pdbks[$key] = $varPdbk; + $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $linkcolour_ids[$s->page_id] = $pdbk; + } + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + + // check if the object is a variant of a category + if(isset($categoryMap[$vardbk])){ + $oldkey = $categoryMap[$vardbk]; + if($oldkey != $vardbk) + $varCategories[$oldkey]=$vardbk; + } + } + + // rebuild the categories in original order (if there are replacements) + if(count($varCategories)>0){ + $newCats = array(); + $originalCats = $this->mOutput->getCategories(); + foreach($originalCats as $cat => $sortkey){ + // make the replacement + if( array_key_exists($cat,$varCategories) ) + $newCats[$varCategories[$cat]] = $sortkey; + else $newCats[$cat] = $sortkey; + } + $this->mOutput->setCategoryLinks($newCats); + } + } + } + + # Construct search and replace arrays + wfProfileIn( $fname.'-construct' ); + $replacePairs = array(); + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + $pdbk = $pdbks[$key]; + $searchkey = ""; + $title = $this->mLinkHolders['titles'][$key]; + if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); + $colours[$pdbk] = 'new'; + $this->mOutput->addLink( $title, 0 ); + $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, + $this->mLinkHolders['texts'][$key], + $this->mLinkHolders['queries'][$key] ); + } else { + $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], + $this->mLinkHolders['texts'][$key], + $this->mLinkHolders['queries'][$key] ); + } + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + wfProfileOut( $fname.'-construct' ); + + # Do the thing + wfProfileIn( $fname.'-replace' ); + $text = preg_replace_callback( + '/()/', + $replacer->cb(), + $text); + + wfProfileOut( $fname.'-replace' ); + } + + # Now process interwiki link holders + # This is quite a bit simpler than internal links + if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { + wfProfileIn( $fname.'-interwiki' ); + # Make interwiki link HTML + $replacePairs = array(); + foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { + $title = $this->mInterwikiLinkHolders['titles'][$key]; + $replacePairs[$key] = $sk->link( $title, $link ); + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + + $text = preg_replace_callback( + '//', + $replacer->cb(), + $text ); + wfProfileOut( $fname.'-interwiki' ); + } + + wfProfileOut( $fname ); + return $colours; } /** @@ -4036,13 +4315,42 @@ class Parser * @return string */ function replaceLinkHoldersText( $text ) { - return $this->mLinkHolders->replaceText( $text ); + $fname = 'Parser::replaceLinkHoldersText'; + wfProfileIn( $fname ); + + $text = preg_replace_callback( + '//', + array( &$this, 'replaceLinkHoldersTextCallback' ), + $text ); + + wfProfileOut( $fname ); + return $text; + } + + /** + * @param array $matches + * @return string + * @private + */ + function replaceLinkHoldersTextCallback( $matches ) { + $type = $matches[1]; + $key = $matches[2]; + if( $type == 'LINK' ) { + if( isset( $this->mLinkHolders['texts'][$key] ) ) { + return $this->mLinkHolders['texts'][$key]; + } + } elseif( $type == 'IWLINK' ) { + if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { + return $this->mInterwikiLinkHolders['texts'][$key]; + } + } + return $matches[0]; } /** * Tag hook handler for 'pre'. */ - static function renderPreTag( $text, $attribs ) { + function renderPreTag( $text, $attribs ) { // Backwards-compatibility hack $content = StringUtils::delimiterReplace( '', '', '$1', $text, 'i' ); @@ -4090,7 +4398,7 @@ class Parser wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); - $lines = StringUtils::explode( "\n", $text ); + $lines = explode( "\n", $text ); foreach ( $lines as $line ) { # match lines like these: # Image:someimage.jpg|This is some image @@ -4103,7 +4411,7 @@ class Parser if ( strpos( $matches[0], '%' ) !== false ) $matches[1] = urldecode( $matches[1] ); - $tp = Title::newFromText( $matches[1], NS_IMAGE ); + $tp = Title::newFromText( $matches[1] ); $nt =& $tp; if( is_null( $nt ) ) { # Bogus title. Ignore these so we don't bomb out later. @@ -4169,11 +4477,8 @@ class Parser /** * Parse image options text and use it to make an image - * @param Title $title - * @param string $options - * @param LinkHolderArray $holders */ - function makeImage( $title, $options, $holders = false ) { + function makeImage( $title, $options ) { # Check if the options text is of the form "options|alt text" # Options are: # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang @@ -4196,7 +4501,7 @@ class Parser # * bottom # * text-bottom - $parts = StringUtils::explode( "|", $options ); + $parts = array_map( 'trim', explode( '|', $options) ); $sk = $this->mOptions->getSkin(); # Give extensions a chance to select the file revision for us @@ -4224,7 +4529,7 @@ class Parser list( $type, $paramName ) = $paramMap[$magicName]; // Special case; width and height come in one variable together - if( $type === 'handler' && $paramName === 'width' ) { + if( $type == 'handler' && $paramName == 'width' ) { $m = array(); # (bug 13500) In both cases (width/height and width only), # permit trailing "px" for backward compatibility. @@ -4247,7 +4552,7 @@ class Parser } } // else no validation -- bug 13436 } else { - if ( $type === 'handler' ) { + if ( $type == 'handler' ) { # Validate handler parameter $validated = $handler->validateParam( $paramName, $value ); } else { @@ -4283,13 +4588,7 @@ class Parser } # Strip bad stuff out of the alt text - # We can't just use replaceLinkHoldersText() here, because if this function - # is called from replaceInternalLinks2(), mLinkHolders won't be up to date. - if ( $holders ) { - $alt = $holders->replaceText( $caption ); - } else { - $alt = $this->replaceLinkHoldersText( $caption ); - } + $alt = $this->replaceLinkHoldersText( $caption ); # make sure there are no placeholders in thumbnail attributes # that are later expanded to html- so expand them now and @@ -4392,7 +4691,7 @@ class Parser $sectionParts = explode( '-', $section ); $sectionIndex = array_pop( $sectionParts ); foreach ( $sectionParts as $part ) { - if ( $part === 'T' ) { + if ( $part == 'T' ) { $flags |= self::PTD_FOR_INCLUSION; } } @@ -4409,14 +4708,14 @@ class Parser $targetLevel = 1000; } else { while ( $node ) { - if ( $node->getName() === 'h' ) { + if ( $node->getName() == 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { $targetLevel = $bits['level']; break; } } - if ( $mode === 'replace' ) { + if ( $mode == 'replace' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); @@ -4425,7 +4724,7 @@ class Parser if ( !$node ) { // Not found - if ( $mode === 'get' ) { + if ( $mode == 'get' ) { return $newText; } else { return $text; @@ -4434,21 +4733,21 @@ class Parser // Find the end of the section, including nested sections do { - if ( $node->getName() === 'h' ) { + if ( $node->getName() == 'h' ) { $bits = $node->splitHeading(); $curLevel = $bits['level']; if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { break; } } - if ( $mode === 'get' ) { + if ( $mode == 'get' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); } while ( $node ); // Write out the remainder (in replace mode only) - if ( $mode === 'replace' ) { + if ( $mode == 'replace' ) { // Output the replacement text // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap @@ -4678,7 +4977,7 @@ class StripState { do { $oldText = $text; $text = $this->general->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4688,7 +4987,7 @@ class StripState { do { $oldText = $text; $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4699,7 +4998,7 @@ class StripState { $oldText = $text; $text = $this->general->replace( $text ); $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4713,7 +5012,7 @@ class OnlyIncludeReplacer { var $output = ''; function replace( $matches ) { - if ( substr( $matches[1], -1 ) === "\n" ) { + if ( substr( $matches[1], -1 ) == "\n" ) { $this->output .= substr( $matches[1], 0, -1 ); } else { $this->output .= $matches[1]; -- 2.20.1