From c83882e96ef5e07a9c52b43fe5221cd25665e734 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 25 Aug 2008 22:19:50 +0000 Subject: [PATCH] Revert r39949 "* Revert revert r39662 of my parser changes." Causes weird regressions on http://meta.wikimedia.org/wiki/Talk:Spam_blacklist Couldn't isolate to a parser test in a few minutes; some kind of template interaction perhaps. Sample bad HTML like: The associated page is used by the Mediawiki http://www.mediawiki.org/wiki/Extension:SpamBlacklist" class="extiw" title="mw:Extension:SpamBlacklist">Spam Blacklist extension, and lists strings of text that may not be used in URLs in any page in Wikimedia Foundation projects (as well as many external wikis). Any meta administrator can edit the spam blacklist. There is also a more aggressive way to block spamming through direct use of $wgSpamRegex. Only developers can make changes to $wgSpamRegex, and its use is to be avoided whenever possible. --- RELEASE-NOTES | 1 - includes/Title.php | 6 - includes/parser/LinkHolderArray.php | 9 - includes/parser/Parser.php | 875 +++++++++++++------- includes/parser/Parser_DiffTest.php | 10 +- languages/Language.php | 9 - languages/LanguageConverter.php | 12 +- maintenance/parserTests.inc | 167 +--- maintenance/parserTests.php | 22 +- maintenance/parserTestsStaticParserHook.php | 9 +- 10 files changed, 613 insertions(+), 507 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index d9508efd42..95c698c0c2 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -94,7 +94,6 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * HTML entities like   now work (are not escaped) in edit summaries. * (bug 13815) In the comment for page moves, use the colon-separator message instead of a hardcoded colon. -* Allow to accept image names without an Image: prefix === Bug fixes in 1.14 === diff --git a/includes/Title.php b/includes/Title.php index 071334444e..2234eb19b1 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -410,12 +410,6 @@ class Title { global $wgInterwikiCache, $wgContLang; $fname = 'Title::getInterwikiLink'; - if ( count( Title::$interwikiCache ) >= self::CACHE_MAX ) { - // Don't use infinite memory - reset( Title::$interwikiCache ); - unset( Title::$interwikiCache[ key( Title::$interwikiCache ) ] ); - } - $key = $wgContLang->lc( $key ); $k = wfMemcKey( 'interwiki', $key ); diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index ab63ac5b03..c9b33a2699 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -11,15 +11,6 @@ class LinkHolderArray { $this->parent = $parent; } - /** - * Reduce memory usage to reduce the impact of circular references - */ - function __destruct() { - foreach ( $this as $name => $value ) { - unset( $this->$name ); - } - } - /** * Merge another LinkHolderArray into this one */ diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index a6d9f936d6..6e3b932a68 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -98,7 +98,7 @@ class Parser # Cleared with clearState(): var $mOutput, $mAutonumber, $mDTopen, $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + var $mInterwikiLinkHolders, $mLinkHolders; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; // empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; @@ -142,18 +142,6 @@ class Parser $this->mFirstCall = true; } - /** - * Reduce memory usage to reduce the impact of circular references - */ - function __destruct() { - if ( isset( $this->mLinkHolders ) ) { - $this->mLinkHolders->__destruct(); - } - foreach ( $this as $name => $value ) { - unset( $this->$name ); - } - } - /** * Do various kinds of initialisation on the first call of the parser */ @@ -191,8 +179,17 @@ class Parser $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; - $this->mLinkHolders = new LinkHolderArray( $this ); - $this->mLinkID = 0; + $this->mInterwikiLinkHolders = array( + 'texts' => array(), + 'titles' => array() + ); + $this->mLinkHolders = array( + 'namespaces' => array(), + 'dbkeys' => array(), + 'queries' => array(), + 'texts' => array(), + 'titles' => array() + ); $this->mRevisionTimestamp = $this->mRevisionId = null; /** @@ -207,7 +204,7 @@ class Parser */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS - $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); + $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); # Clear these on every parse, bug 4549 @@ -297,7 +294,7 @@ class Parser */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = __METHOD__.'-' . wfGetCaller(); + $fname = 'Parser::parse-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -331,6 +328,7 @@ class Parser ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + # only once and last $text = $this->doBlockLevels( $text, $linestart ); $this->replaceLinkHolders( $text ); @@ -350,7 +348,7 @@ class Parser $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); + $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -368,7 +366,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = self::tidy($text); + $text = Parser::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -473,8 +471,6 @@ class Parser function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } function getRevisionId() { return $this->mRevisionId; } - function getOutput() { return $this->mOutput; } - function nextLinkID() { return $this->mLinkID++; } function getFunctionLang() { global $wgLang, $wgContLang; @@ -553,7 +549,7 @@ class Parser $text = $inside; $tail = null; } else { - if( $element === '!--' ) { + if( $element == '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; @@ -662,9 +658,9 @@ class Parser ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'. 'test'.$text.''; if( $wgTidyInternal ) { - $correctedtext = self::internalTidy( $wrappedtext ); + $correctedtext = Parser::internalTidy( $wrappedtext ); } else { - $correctedtext = self::externalTidy( $wrappedtext ); + $correctedtext = Parser::externalTidy( $wrappedtext ); } if( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); @@ -681,7 +677,8 @@ class Parser */ function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::externalTidy'; + wfProfileIn( $fname ); $cleansource = ''; $opts = ' -utf8'; @@ -710,7 +707,7 @@ class Parser } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. @@ -732,7 +729,8 @@ class Parser */ function internalTidy( $text ) { global $wgTidyConf, $IP, $wgDebugTidy; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::internalTidy'; + wfProfileIn( $fname ); $tidy = new tidy; $tidy->parseString( $text, $wgTidyConf, 'utf8' ); @@ -750,7 +748,7 @@ class Parser "\n-->"; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $cleansource; } @@ -760,35 +758,34 @@ class Parser * @private */ function doTableStuff ( $text ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doTableStuff'; + wfProfileIn( $fname ); - $lines = StringUtils::explode( "\n", $text ); - $out = ''; + $lines = explode ( "\n" , $text ); $td_history = array (); // Is currently a td tag open? $last_tag_history = array (); // Save history of last lag activated (td, th or caption) $tr_history = array (); // Is currently a tr tag open? $tr_attributes = array (); // history of tr attributes $has_opened_tr = array(); // Did this table open a element? $indent_level = 0; // indent level of the table - - foreach ( $lines as $outLine ) { - $line = trim( $outLine ); + foreach ( $lines as $key => $line ) + { + $line = trim ( $line ); if( $line == '' ) { // empty line, go to next line - $out .= $outLine."\n"; continue; } - $first_character = $line[0]; + $first_character = $line{0}; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { // First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); - $outLine = str_repeat( '
' , $indent_level ) . ""; + $lines[$key] = str_repeat( '
' , $indent_level ) . ""; array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); array_push ( $tr_history , false ); @@ -796,9 +793,8 @@ class Parser array_push ( $has_opened_tr , false ); } else if ( count ( $td_history ) == 0 ) { // Don't do any of the following - $out .= $outLine."\n"; continue; - } else if ( substr ( $line , 0 , 2 ) === '|}' ) { + } else if ( substr ( $line , 0 , 2 ) == '|}' ) { // We are ending a table $line = '' . substr ( $line , 2 ); $last_tag = array_pop ( $last_tag_history ); @@ -815,8 +811,8 @@ class Parser $line = "{$line}"; } array_pop ( $tr_attributes ); - $outLine = $line . str_repeat( '
' , $indent_level ); - } else if ( substr ( $line , 0 , 2 ) === '|-' ) { + $lines[$key] = $line . str_repeat( '
' , $indent_level ); + } else if ( substr ( $line , 0 , 2 ) == '|-' ) { // Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -839,21 +835,21 @@ class Parser $line = "{$line}"; } - $outLine = $line; + $lines[$key] = $line; array_push ( $tr_history , false ); array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); } - else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { + else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { // This might be cell elements, td, th or captions - if ( substr ( $line , 0 , 2 ) === '|+' ) { + if ( substr ( $line , 0 , 2 ) == '|+' ) { $first_character = '+'; $line = substr ( $line , 1 ); } $line = substr ( $line , 1 ); - if ( $first_character === '!' ) { + if ( $first_character == '!' ) { $line = str_replace ( '!!' , '||' , $line ); } @@ -863,13 +859,13 @@ class Parser // attribute values containing literal "||". $cells = StringUtils::explodeMarkup( '||' , $line ); - $outLine = ''; + $lines[$key] = ''; // Loop through each table cell foreach ( $cells as $cell ) { $previous = ''; - if ( $first_character !== '+' ) + if ( $first_character != '+' ) { $tr_after = array_pop ( $tr_attributes ); if ( !array_pop ( $tr_history ) ) { @@ -887,11 +883,11 @@ class Parser $previous = "{$previous}"; } - if ( $first_character === '|' ) { + if ( $first_character == '|' ) { $last_tag = 'td'; - } else if ( $first_character === '!' ) { + } else if ( $first_character == '!' ) { $last_tag = 'th'; - } else if ( $first_character === '+' ) { + } else if ( $first_character == '+' ) { $last_tag = 'caption'; } else { $last_tag = ''; @@ -914,42 +910,38 @@ class Parser $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } - $outLine .= $cell; + $lines[$key] .= $cell; array_push ( $td_history , true ); } } - $out .= $outLine . "\n"; } // Closing open td, tr && table while ( count ( $td_history ) > 0 ) { if ( array_pop ( $td_history ) ) { - $out .= "\n"; + $lines[] = '' ; } if ( array_pop ( $tr_history ) ) { - $out .= "\n"; + $lines[] = '' ; } if ( !array_pop ( $has_opened_tr ) ) { - $out .= "\n" ; + $lines[] = "" ; } - $out .= "\n"; + $lines[] = '' ; } - // Remove trailing line-ending (b/c) - if ( substr( $out, -1 ) === "\n" ) { - $out = substr( $out, 0, -1 ); - } + $output = implode ( "\n" , $lines ) ; // special case: don't return empty table - if( $out === "\n\n
" ) { - $out = ''; + if( $output == "\n\n
" ) { + $output = ''; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); - return $out; + return $output; } /** @@ -960,11 +952,12 @@ class Parser */ function internalParse( $text ) { $isMain = true; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::internalParse'; + wfProfileIn( $fname ); # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text ; } @@ -997,15 +990,14 @@ class Parser $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } /** * Replace special strings like "ISBN xxx" and "RFC xxx" with * magic external links. - * - * DML + * * @private */ function doMagicLinks( $text ) { @@ -1026,10 +1018,10 @@ class Parser } function magicLinkCallback( $m ) { - if ( substr( $m[0], 0, 1 ) === '<' ) { + if ( substr( $m[0], 0, 1 ) == '<' ) { # Skip HTML element return $m[0]; - } elseif ( substr( $m[0], 0, 4 ) === 'ISBN' ) { + } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { $isbn = $m[2]; $num = strtr( $isbn, array( '-' => '', @@ -1041,11 +1033,11 @@ class Parser $titleObj->escapeLocalUrl() . "\" class=\"internal\">ISBN $isbn"; } else { - if ( substr( $m[0], 0, 3 ) === 'RFC' ) { + if ( substr( $m[0], 0, 3 ) == 'RFC' ) { $keyword = 'RFC'; $urlmsg = 'rfcurl'; $id = $m[1]; - } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { + } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { $keyword = 'PMID'; $urlmsg = 'pubmedurl'; $id = $m[1]; @@ -1068,13 +1060,14 @@ class Parser * @private */ function doHeadings( $text ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doHeadings'; + wfProfileIn( $fname ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^$h(.+)$h\\s*$/m", "\\1", $text ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } @@ -1084,14 +1077,15 @@ class Parser * @return string the altered text */ function doAllQuotes( $text ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doAllQuotes'; + wfProfileIn( $fname ); $outtext = ''; - $lines = StringUtils::explode( "\n", $text ); + $lines = explode( "\n", $text ); foreach ( $lines as $line ) { - $outtext .= $this->doQuotes( $line ) . "\n"; + $outtext .= $this->doQuotes ( $line ) . "\n"; } $outtext = substr($outtext, 0,-1); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $outtext; } @@ -1153,9 +1147,9 @@ class Parser { $x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 === ' ') { + if ($x1 == ' ') { if ($firstspace == -1) $firstspace = $i; - } else if ($x2 === ' ') { + } else if ($x2 == ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; @@ -1195,7 +1189,7 @@ class Parser { if (($i % 2) == 0) { - if ($state === 'both') + if ($state == 'both') $buffer .= $r; else $output .= $r; @@ -1204,41 +1198,41 @@ class Parser { if (strlen ($r) == 2) { - if ($state === 'i') + if ($state == 'i') { $output .= ''; $state = ''; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = 'b'; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = 'b'; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = 'b'; } else # $state can be 'b' or '' { $output .= ''; $state .= 'i'; } } else if (strlen ($r) == 3) { - if ($state === 'b') + if ($state == 'b') { $output .= ''; $state = ''; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = 'i'; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = 'i'; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = 'i'; } else # $state can be 'i' or '' { $output .= ''; $state .= 'b'; } } else if (strlen ($r) == 5) { - if ($state === 'b') + if ($state == 'b') { $output .= ''; $state = 'i'; } - else if ($state === 'i') + else if ($state == 'i') { $output .= ''; $state = 'b'; } - else if ($state === 'bi') + else if ($state == 'bi') { $output .= ''; $state = ''; } - else if ($state === 'ib') + else if ($state == 'ib') { $output .= ''; $state = ''; } - else if ($state === 'both') + else if ($state == 'both') { $output .= ''.$buffer.''; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } @@ -1247,21 +1241,21 @@ class Parser $i++; } # Now close all remaining tags. Notice that the order is important. - if ($state === 'b' || $state === 'ib') + if ($state == 'b' || $state == 'ib') $output .= ''; - if ($state === 'i' || $state === 'bi' || $state === 'ib') + if ($state == 'i' || $state == 'bi' || $state == 'ib') $output .= ''; - if ($state === 'bi') + if ($state == 'bi') $output .= ''; # There might be lonely ''''', so make sure we have a buffer - if ($state === 'both' && $buffer) + if ($state == 'both' && $buffer) $output .= ''.$buffer.''; return $output; } } /** - * Replace external links (REL) + * Replace external links * * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. @@ -1270,7 +1264,8 @@ class Parser */ function replaceExternalLinks( $text ) { global $wgContLang; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::replaceExternalLinks'; + wfProfileIn( $fname ); $sk = $this->mOptions->getSkin(); @@ -1304,7 +1299,7 @@ class Parser $dtrail = ''; # Set linktype for CSS - if URL==text, link is essentially free - $linktype = ($text === $url) ? 'free' : 'text'; + $linktype = ($text == $url) ? 'free' : 'text'; # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { @@ -1340,11 +1335,11 @@ class Parser # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $s; } @@ -1354,7 +1349,8 @@ class Parser */ function replaceFreeExternalLinks( $text ) { global $wgContLang; - wfProfileIn( __METHOD__ ); + $fname = 'Parser::replaceFreeExternalLinks'; + wfProfileIn( $fname ); $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = array_shift( $bits ); @@ -1416,7 +1412,7 @@ class Parser $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = Parser::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } $s .= $text . $trail; @@ -1424,7 +1420,7 @@ class Parser $s .= $protocol . $remainder; } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $s; } @@ -1440,7 +1436,7 @@ class Parser */ static function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); + array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); } /** @@ -1484,48 +1480,35 @@ class Parser /** * Process [[ ]] wikilinks - * @return processed text * * @private */ function replaceInternalLinks( $s ) { - $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); - return $s; - } - - /** - * Process [[ ]] wikilinks (RIL) - * @return LinkHolderArray - * - * @private - */ - function replaceInternalLinks2( &$s ) { global $wgContLang; + static $fname = 'Parser::replaceInternalLinks' ; - wfProfileIn( __METHOD__ ); + wfProfileIn( $fname ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $e1, $e1_img; + wfProfileIn( $fname.'-setup' ); + static $tc = FALSE; # the % is needed to support urlencoded titles as well - if ( !$tc ) { - $tc = Title::legalChars() . '#%'; - # Match a link having the form [[namespace:link|alternate]]trail - $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; - # Match cases where there is no "]]", which might still be images - $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; - } + if ( !$tc ) { $tc = Title::legalChars() . '#%'; } $sk = $this->mOptions->getSkin(); - $holders = new LinkHolderArray( $this ); #split the entire text string on occurences of [[ - $a = StringUtils::explode( '[[', ' ' . $s ); + $a = explode( '[[', ' ' . $s ); #get the first element (all text up to first [[), and remove the space we added - $s = $a->current(); - $a->next(); - $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" + $s = array_shift( $a ); $s = substr( $s, 1 ); + # Match a link having the form [[namespace:link|alternate]]trail + static $e1 = FALSE; + if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } + # Match cases where there is no "]]", which might still be images + static $e1_img = FALSE; + if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } + $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); $e2 = null; if ( $useLinkPrefixExtension ) { @@ -1535,8 +1518,8 @@ class Parser } if( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__.'-setup' ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); + wfProfileOut( $fname.'-setup' ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1558,20 +1541,13 @@ class Parser $selflink = array($this->mTitle->getPrefixedText()); } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( $fname.'-setup' ); # Loop for each link - for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { - # Check for excessive memory usage - if ( $holders->isBig() ) { - # Too big - # Do the existence check, replace the link holders and clear the array - $holders->replace( $s ); - $holders->clear(); - } - + for ($k = 0; isset( $a[$k] ); $k++) { + $line = $a[$k]; if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__.'-prefixhandling' ); + wfProfileIn( $fname.'-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; @@ -1583,12 +1559,12 @@ class Parser $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__.'-prefixhandling' ); + wfProfileOut( $fname.'-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__."-e1" ); + wfProfileIn( "$fname-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1622,18 +1598,18 @@ class Parser $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-e1" ); + wfProfileOut( "$fname-e1" ); continue; } - wfProfileOut( __METHOD__."-e1" ); - wfProfileIn( __METHOD__."-misc" ); + wfProfileOut( "$fname-e1" ); + wfProfileIn( "$fname-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-misc" ); + wfProfileOut( "$fname-misc" ); continue; } @@ -1644,36 +1620,33 @@ class Parser $link = $m[1]; } - $noforce = (substr($m[1], 0, 1) !== ':'); + $noforce = (substr($m[1], 0, 1) != ':'); if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } - wfProfileOut( __METHOD__."-misc" ); - wfProfileIn( __METHOD__."-title" ); + wfProfileOut( "$fname-misc" ); + wfProfileIn( "$fname-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); if( !$nt ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( "$fname-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( "$fname-title" ); if ($might_be_img) { # if this is actually an invalid link - wfProfileIn( __METHOD__."-might_be_img" ); + wfProfileIn( "$fname-might_be_img" ); if ($ns == NS_IMAGE && $noforce) { #but might be an image $found = false; - while ( true ) { + while (isset ($a[$k+1]) ) { #look at the next 'line' to see if we can close it there - $a->next(); - $next_line = $a->current(); - if ( $next_line === false || $next_line === null ) { - break; - } + $spliced = array_splice( $a, $k + 1, 1 ); + $next_line = array_shift( $spliced ); $m = explode( ']]', $next_line, 3 ); if ( count( $m ) == 3 ) { # the first ]] closes the inner link, the second the image @@ -1693,19 +1666,19 @@ class Parser if ( !$found ) { # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined - $holders->merge( $this->replaceInternalLinks2( $text ) ); + $text = $this->replaceInternalLinks($text); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); continue; } } else { #it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); continue; } - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( "$fname-might_be_img" ); } $wasblank = ( '' == $text ); @@ -1715,36 +1688,41 @@ class Parser if( $noforce ) { # Interwikis - wfProfileIn( __METHOD__."-interwiki" ); + wfProfileIn( "$fname-interwiki" ); if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . $prefix); $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( "$fname-interwiki" ); continue; } - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( "$fname-interwiki" ); if ( $ns == NS_IMAGE ) { - wfProfileIn( __METHOD__."-image" ); + wfProfileIn( "$fname-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); - $holders->merge( $this->replaceInternalLinks2( $text ) ); + $text = $this->replaceInternalLinks($text); # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; + $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; + $this->mOutput->addImage( $nt->getDBkey() ); + + wfProfileOut( "$fname-image" ); + continue; + } else { + # We still need to record the image's presence on the page + $this->mOutput->addImage( $nt->getDBkey() ); } - $this->mOutput->addImage( $nt->getDBkey() ); - wfProfileOut( __METHOD__."-image" ); - continue; + wfProfileOut( "$fname-image" ); } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__."-category" ); + wfProfileIn( "$fname-category" ); $s = rtrim($s . "\n"); # bug 87 if ( $wasblank ) { @@ -1763,7 +1741,7 @@ class Parser */ $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-category" ); + wfProfileOut( "$fname-category" ); continue; } } @@ -1794,7 +1772,7 @@ class Parser if( SpecialPage::exists( $nt->getDBkey() ) ) { $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); } else { - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); } continue; } elseif( $ns == NS_IMAGE ) { @@ -1808,10 +1786,10 @@ class Parser continue; } } - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); } - wfProfileOut( __METHOD__ ); - return $holders; + wfProfileOut( $fname ); + return $s; } /** @@ -1820,10 +1798,32 @@ class Parser * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * - * @deprecated */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); + wfProfileIn( __METHOD__ ); + if ( ! is_object($nt) ) { + # Fail gracefully + $retVal = "{$prefix}{$text}{$trail}"; + } else { + # Separate the link trail from the rest of the link + list( $inside, $trail ) = Linker::splitTrail( $trail ); + + if ( $nt->isExternal() ) { + $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); + $this->mInterwikiLinkHolders['titles'][] = $nt; + $retVal = '{$trail}"; + } else { + $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); + $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); + $this->mLinkHolders['queries'][] = $query; + $this->mLinkHolders['texts'][] = $prefix.$text.$inside; + $this->mLinkHolders['titles'][] = $nt; + + $retVal = '{$trail}"; + } + } + wfProfileOut( __METHOD__ ); + return $retVal; } /** @@ -1889,7 +1889,8 @@ class Parser # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage - wfProfileIn( __METHOD__ ); + $fname = 'Parser::maybeDoSubpageLink'; + wfProfileIn( $fname ); $ret = $target; # default return value is no change # Some namespaces don't allow subpages, @@ -1905,7 +1906,7 @@ class Parser # bug 7425 $target = trim( $target ); # Look at the first character - if( $target != '' && $target{0} === '/' ) { + if( $target != '' && $target{0} == '/' ) { # / at end means we don't want the slash to be shown $m = array(); $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); @@ -1932,7 +1933,7 @@ class Parser if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); # / at the end means don't show full path - if( substr( $nodotdot, -1, 1 ) === '/' ) { + if( substr( $nodotdot, -1, 1 ) == '/' ) { $nodotdot = substr( $nodotdot, 0, -1 ); if( '' === $text ) { $text = $nodotdot . $suffix; @@ -1948,7 +1949,7 @@ class Parser } } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $ret; } @@ -1984,10 +1985,10 @@ class Parser /* private */ function openList( $char ) { $result = $this->closeParagraph(); - if ( '*' === $char ) { $result .= ''; } - else if ( '#' === $char ) { $text = ''; } - else if ( ':' === $char ) { + if ( '*' == $char ) { $text = ''; } + else if ( '#' == $char ) { $text = ''; } + else if ( ':' == $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; $text = ''; @@ -2029,59 +2030,56 @@ class Parser /**#@-*/ /** - * Make lists from lines starting with ':', '*', '#', etc. (DBL) + * Make lists from lines starting with ':', '*', '#', etc. * * @private * @return string the lists rendered as HTML */ function doBlockLevels( $text, $linestart ) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::doBlockLevels'; + wfProfileIn( $fname ); # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $textLines = StringUtils::explode( "\n", $text ); + $textLines = explode( "\n", $text ); $lastPrefix = $output = ''; $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; + if ( !$linestart ) { + $output .= array_shift( $textLines ); + } foreach ( $textLines as $oLine ) { - # Fix up $linestart - if ( !$linestart ) { - $output .= $oLine; - $linestart = true; - continue; - } - $lastPrefixLength = strlen( $lastPrefix ); $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); $preOpenMatch = preg_match('/
mInPre ) {
 				# Multiple prefixes may abut each other for nested lists.
 				$prefixLength = strspn( $oLine, '*#:;' );
-				$prefix = substr( $oLine, 0, $prefixLength );
+				$pref = substr( $oLine, 0, $prefixLength );
 
 				# eh?
-				$prefix2 = str_replace( ';', ':', $prefix );
+				$pref2 = str_replace( ';', ':', $pref );
 				$t = substr( $oLine, $prefixLength );
-				$this->mInPre = (bool)$preOpenMatch;
+				$this->mInPre = !empty($preOpenMatch);
 			} else {
 				# Don't interpret any other prefixes in preformatted text
 				$prefixLength = 0;
-				$prefix = $prefix2 = '';
+				$pref = $pref2 = '';
 				$t = $oLine;
 			}
 
 			# List generation
-			if( $prefixLength && $lastPrefix === $prefix2 ) {
+			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
 				# Same as the last item, so no need to deal with nesting or opening stuff
-				$output .= $this->nextItem( substr( $prefix, -1 ) );
+				$output .= $this->nextItem( substr( $pref, -1 ) );
 				$paragraphStack = false;
 
-				if ( substr( $prefix, -1 ) === ';') {
+				if ( substr( $pref, -1 ) == ';') {
 					# The one nasty exception: definition lists work like this:
 					# ; title : definition text
 					# So we check for : in the remainder text to split up the
@@ -2094,21 +2092,21 @@ class Parser
 				}
 			} elseif( $prefixLength || $lastPrefixLength ) {
 				# Either open or close a level...
-				$commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
+				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
 				$paragraphStack = false;
 
 				while( $commonPrefixLength < $lastPrefixLength ) {
-					$output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
+					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
 					--$lastPrefixLength;
 				}
 				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
-					$output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
+					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 				}
 				while ( $prefixLength > $commonPrefixLength ) {
-					$char = substr( $prefix, $commonPrefixLength, 1 );
+					$char = substr( $pref, $commonPrefixLength, 1 );
 					$output .= $this->openList( $char );
 
-					if ( ';' === $char ) {
+					if ( ';' == $char ) {
 						# FIXME: This is dupe of code above
 						if ($this->findColonNoLinks($t, $term, $t2) !== false) {
 							$t = $t2;
@@ -2117,10 +2115,10 @@ class Parser
 					}
 					++$commonPrefixLength;
 				}
-				$lastPrefix = $prefix2;
+				$lastPrefix = $pref2;
 			}
 			if( 0 == $prefixLength ) {
-				wfProfileIn( __METHOD__."-paragraph" );
+				wfProfileIn( "$fname-paragraph" );
 				# No prefix (not in list)--go to paragraph mode
 				// XXX: use a stack for nestable elements like span, table and div
 				$openmatch = preg_match('/(?:mInPre ) {
-					if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
+					if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
 						// pre
-						if ($this->mLastSection !== 'pre') {
+						if ($this->mLastSection != 'pre') {
 							$paragraphStack = false;
 							$output .= $this->closeParagraph().'
';
 							$this->mLastSection = 'pre';
@@ -2156,7 +2154,7 @@ class Parser
 								$paragraphStack = false;
 								$this->mLastSection = 'p';
 							} else {
-								if ($this->mLastSection !== 'p' ) {
+								if ($this->mLastSection != 'p' ) {
 									$output .= $this->closeParagraph();
 									$this->mLastSection = '';
 									$paragraphStack = '

'; @@ -2169,14 +2167,14 @@ class Parser $output .= $paragraphStack; $paragraphStack = false; $this->mLastSection = 'p'; - } else if ($this->mLastSection !== 'p') { + } else if ($this->mLastSection != 'p') { $output .= $this->closeParagraph().'

'; $this->mLastSection = 'p'; } } } } - wfProfileOut( __METHOD__."-paragraph" ); + wfProfileOut( "$fname-paragraph" ); } // somewhere above we forget to get out of pre block (bug 785) if($preCloseMatch && $this->mInPre) { @@ -2187,7 +2185,7 @@ class Parser } } while ( $prefixLength ) { - $output .= $this->closeList( $prefix2[$prefixLength-1] ); + $output .= $this->closeList( $pref2{$prefixLength-1} ); --$prefixLength; } if ( '' != $this->mLastSection ) { @@ -2195,7 +2193,7 @@ class Parser $this->mLastSection = ''; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $output; } @@ -2208,12 +2206,13 @@ class Parser * return string the position of the ':', or false if none found */ function findColonNoLinks($str, &$before, &$after) { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::findColonNoLinks'; + wfProfileIn( $fname ); $pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } @@ -2222,7 +2221,7 @@ class Parser // Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos+1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $pos; } @@ -2246,7 +2245,7 @@ class Parser // We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $i; } // Embedded in a tag; don't break it. @@ -2256,7 +2255,7 @@ class Parser $colon = strpos( $str, ':', $i ); if( $colon === false ) { // Nothing else interesting - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } $lt = strpos( $str, '<', $i ); @@ -2265,7 +2264,7 @@ class Parser // We found it! $before = substr( $str, 0, $colon ); $after = substr( $str, $colon + 1 ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $i; } } @@ -2312,18 +2311,18 @@ class Parser break; case 3: // self::COLON_STATE_CLOSETAG: // In a - if( $c === ">" ) { + if( $c == ">" ) { $stack--; if( $stack < 0 ) { - wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); - wfProfileOut( __METHOD__ ); + wfDebug( "Invalid input in $fname; too many close tags\n" ); + wfProfileOut( $fname ); return false; } $state = self::COLON_STATE_TEXT; } break; case self::COLON_STATE_TAGSLASH: - if( $c === ">" ) { + if( $c == ">" ) { // Yes, a self-closed tag $state = self::COLON_STATE_TEXT; } else { @@ -2332,33 +2331,33 @@ class Parser } break; case 5: // self::COLON_STATE_COMMENT: - if( $c === "-" ) { + if( $c == "-" ) { $state = self::COLON_STATE_COMMENTDASH; } break; case self::COLON_STATE_COMMENTDASH: - if( $c === "-" ) { + if( $c == "-" ) { $state = self::COLON_STATE_COMMENTDASHDASH; } else { $state = self::COLON_STATE_COMMENT; } break; case self::COLON_STATE_COMMENTDASHDASH: - if( $c === ">" ) { + if( $c == ">" ) { $state = self::COLON_STATE_TEXT; } else { $state = self::COLON_STATE_COMMENT; } break; default: - throw new MWException( "State machine error in " . __METHOD__ ); + throw new MWException( "State machine error in $fname" ); } } if( $stack > 0 ) { - wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); return false; } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return false; } @@ -2588,11 +2587,12 @@ class Parser * @private */ function initialiseVariables() { - wfProfileIn( __METHOD__ ); + $fname = 'Parser::initialiseVariables'; + wfProfileIn( $fname ); $variableIDs = MagicWord::getVariableIDs(); $this->mVariables = new MagicWordArray( $variableIDs ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); } /** @@ -2661,7 +2661,8 @@ class Parser return $text; } - wfProfileIn( __METHOD__ ); + $fname = __METHOD__; + wfProfileIn( $fname ); if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); @@ -2674,7 +2675,7 @@ class Parser $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $text; } @@ -2737,7 +2738,8 @@ class Parser */ function braceSubstitution( $piece, $frame ) { global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; - wfProfileIn( __METHOD__ ); + $fname = __METHOD__; + wfProfileIn( $fname ); wfProfileIn( __METHOD__.'-setup' ); # Flags @@ -2924,7 +2926,7 @@ class Parser } } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { $found = false; //access denied - wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -2958,7 +2960,7 @@ class Parser # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return array( 'object' => $text ); } @@ -3017,7 +3019,7 @@ class Parser $ret = array( 'text' => $text ); } - wfProfileOut( __METHOD__ ); + wfProfileOut( $fname ); return $ret; } @@ -3304,7 +3306,7 @@ class Parser } } - if ( $name === 'html' || $name === 'nowiki' ) { + if ( $name == 'html' || $name == 'nowiki' ) { $this->mStripState->nowiki->setPair( $marker, $output ); } else { $this->mStripState->general->setPair( $marker, $output ); @@ -3560,7 +3562,12 @@ class Parser # # turns into # link text with suffix - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); + $safeHeadline = preg_replace( '//e', + "\$this->mLinkHolders['texts'][\$1]", + $safeHeadline ); + $safeHeadline = preg_replace( '//e', + "\$this->mInterwikiLinkHolders['texts'][\$1]", + $safeHeadline ); # Strip out HTML (other than plain and : bug 8393) $tocline = preg_replace( @@ -3636,7 +3643,7 @@ class Parser $i = 0; foreach( $blocks as $block ) { - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) { + if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { # This is the [edit] link that appears for the top block of text when # section editing is enabled @@ -3788,7 +3795,7 @@ class Parser } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); + wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); } } @@ -3894,17 +3901,19 @@ class Parser global $wgTitle; static $executing = false; + $fname = "Parser::transformMsg"; + # Guard against infinite recursion if ( $executing ) { return $text; } $executing = true; - wfProfileIn(__METHOD__); + wfProfileIn($fname); $text = $this->preprocess( $text, $wgTitle, $options ); $executing = false; - wfProfileOut(__METHOD__); + wfProfileOut($fname); return $text; } @@ -4001,7 +4010,7 @@ class Parser # Add to function cache $mw = MagicWord::get( $id ); if( !$mw ) - throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); + throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4016,7 +4025,7 @@ class Parser $syn = '#' . $syn; } # Remove trailing colon - if ( substr( $syn, -1, 1 ) === ':' ) { + if ( substr( $syn, -1, 1 ) == ':' ) { $syn = substr( $syn, 0, -1 ); } $this->mFunctionSynonyms[$sensitive][$syn] = $id; @@ -4037,9 +4046,266 @@ class Parser * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. + * $options is a bit field, RLH_FOR_UPDATE to select for update */ function replaceLinkHolders( &$text, $options = 0 ) { - return $this->mLinkHolders->replace( $text ); + global $wgUser; + global $wgContLang; + + $fname = 'Parser::replaceLinkHolders'; + wfProfileIn( $fname ); + + $pdbks = array(); + $colours = array(); + $linkcolour_ids = array(); + $sk = $this->mOptions->getSkin(); + $linkCache = LinkCache::singleton(); + + if ( !empty( $this->mLinkHolders['namespaces'] ) ) { + wfProfileIn( $fname.'-check' ); + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $threshold = $wgUser->getOption('stubthreshold'); + + # Sort by namespace + asort( $this->mLinkHolders['namespaces'] ); + + # Generate query + $query = false; + $current = null; + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + # Make title object + $title = $this->mLinkHolders['titles'][$key]; + + # Skip invalid entries. + # Result will be ugly, but prevents crash. + if ( is_null( $title ) ) { + continue; + } + $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); + + # Check if it's a static known link, e.g. interwiki + if ( $title->isAlwaysKnown() ) { + $colours[$pdbk] = ''; + } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { + $colours[$pdbk] = ''; + $this->mOutput->addLink( $title, $id ); + } elseif ( $linkCache->isBadLink( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } else { + # Not in the link cache, add it to the query + if ( !isset( $current ) ) { + $current = $ns; + $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; + } elseif ( $current != $ns ) { + $current = $ns; + $query .= ")) OR (page_namespace=$ns AND page_title IN("; + } else { + $query .= ', '; + } + + $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); + } + } + if ( $query ) { + $query .= '))'; + if ( $options & RLH_FOR_UPDATE ) { + $query .= ' FOR UPDATE'; + } + + $res = $dbr->query( $query, $fname ); + + # Fetch data and form into an associative array + # non-existent = broken + while ( $s = $dbr->fetchObject($res) ) { + $title = Title::makeTitle( $s->page_namespace, $s->page_title ); + $pdbk = $title->getPrefixedDBkey(); + $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); + $this->mOutput->addLink( $title, $s->page_id ); + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + //add id to the extension todolist + $linkcolour_ids[$s->page_id] = $pdbk; + } + //pass an array of page_ids to an extension + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + wfProfileOut( $fname.'-check' ); + + # Do a second query for different language variants of links and categories + if($wgContLang->hasVariants()){ + $linkBatch = new LinkBatch(); + $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) + $categoryMap = array(); // maps $category_variant => $category (dbkeys) + $varCategories = array(); // category replacements oldDBkey => newDBkey + + $categories = $this->mOutput->getCategoryLinks(); + + // Add variants of links to link batch + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) + continue; + + $pdbk = $title->getPrefixedDBkey(); + $titleText = $title->getText(); + + // generate all variants of the link title text + $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); + + // if link was not found (in first query), add all variants to query + if ( !isset($colours[$pdbk]) ){ + foreach($allTextVariants as $textVariant){ + if($textVariant != $titleText){ + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; + } + } + } + } + + // process categories, check if a category exists in some variant + foreach( $categories as $category ){ + $variants = $wgContLang->convertLinkToAllVariants($category); + foreach($variants as $variant){ + if($variant != $category){ + $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $categoryMap[$variant] = $category; + } + } + } + + + if(!$linkBatch->isEmpty()){ + // construct query + $titleClause = $linkBatch->constructSet('page', $dbr); + + $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + + $variantQuery .= " FROM $page WHERE $titleClause"; + if ( $options & RLH_FOR_UPDATE ) { + $variantQuery .= ' FOR UPDATE'; + } + + $varRes = $dbr->query( $variantQuery, $fname ); + + // for each found variants, figure out link holders and replace + while ( $s = $dbr->fetchObject($varRes) ) { + + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); + $varPdbk = $variantTitle->getPrefixedDBkey(); + $vardbk = $variantTitle->getDBkey(); + + $holderKeys = array(); + if(isset($variantMap[$varPdbk])){ + $holderKeys = $variantMap[$varPdbk]; + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); + $this->mOutput->addLink( $variantTitle, $s->page_id ); + } + + // loop over link holders + foreach($holderKeys as $key){ + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) continue; + + $pdbk = $title->getPrefixedDBkey(); + + if(!isset($colours[$pdbk])){ + // found link in some of the variants, replace the link holder data + $this->mLinkHolders['titles'][$key] = $variantTitle; + $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); + + // set pdbk and colour + $pdbks[$key] = $varPdbk; + $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $linkcolour_ids[$s->page_id] = $pdbk; + } + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + + // check if the object is a variant of a category + if(isset($categoryMap[$vardbk])){ + $oldkey = $categoryMap[$vardbk]; + if($oldkey != $vardbk) + $varCategories[$oldkey]=$vardbk; + } + } + + // rebuild the categories in original order (if there are replacements) + if(count($varCategories)>0){ + $newCats = array(); + $originalCats = $this->mOutput->getCategories(); + foreach($originalCats as $cat => $sortkey){ + // make the replacement + if( array_key_exists($cat,$varCategories) ) + $newCats[$varCategories[$cat]] = $sortkey; + else $newCats[$cat] = $sortkey; + } + $this->mOutput->setCategoryLinks($newCats); + } + } + } + + # Construct search and replace arrays + wfProfileIn( $fname.'-construct' ); + $replacePairs = array(); + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + $pdbk = $pdbks[$key]; + $searchkey = ""; + $title = $this->mLinkHolders['titles'][$key]; + if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); + $colours[$pdbk] = 'new'; + $this->mOutput->addLink( $title, 0 ); + $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, + $this->mLinkHolders['texts'][$key], + $this->mLinkHolders['queries'][$key] ); + } else { + $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], + $this->mLinkHolders['texts'][$key], + $this->mLinkHolders['queries'][$key] ); + } + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + wfProfileOut( $fname.'-construct' ); + + # Do the thing + wfProfileIn( $fname.'-replace' ); + $text = preg_replace_callback( + '/()/', + $replacer->cb(), + $text); + + wfProfileOut( $fname.'-replace' ); + } + + # Now process interwiki link holders + # This is quite a bit simpler than internal links + if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { + wfProfileIn( $fname.'-interwiki' ); + # Make interwiki link HTML + $replacePairs = array(); + foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { + $title = $this->mInterwikiLinkHolders['titles'][$key]; + $replacePairs[$key] = $sk->link( $title, $link ); + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + + $text = preg_replace_callback( + '//', + $replacer->cb(), + $text ); + wfProfileOut( $fname.'-interwiki' ); + } + + wfProfileOut( $fname ); + return $colours; } /** @@ -4049,7 +4315,36 @@ class Parser * @return string */ function replaceLinkHoldersText( $text ) { - return $this->mLinkHolders->replaceText( $text ); + $fname = 'Parser::replaceLinkHoldersText'; + wfProfileIn( $fname ); + + $text = preg_replace_callback( + '//', + array( &$this, 'replaceLinkHoldersTextCallback' ), + $text ); + + wfProfileOut( $fname ); + return $text; + } + + /** + * @param array $matches + * @return string + * @private + */ + function replaceLinkHoldersTextCallback( $matches ) { + $type = $matches[1]; + $key = $matches[2]; + if( $type == 'LINK' ) { + if( isset( $this->mLinkHolders['texts'][$key] ) ) { + return $this->mLinkHolders['texts'][$key]; + } + } elseif( $type == 'IWLINK' ) { + if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { + return $this->mInterwikiLinkHolders['texts'][$key]; + } + } + return $matches[0]; } /** @@ -4103,7 +4398,7 @@ class Parser wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); - $lines = StringUtils::explode( "\n", $text ); + $lines = explode( "\n", $text ); foreach ( $lines as $line ) { # match lines like these: # Image:someimage.jpg|This is some image @@ -4116,7 +4411,7 @@ class Parser if ( strpos( $matches[0], '%' ) !== false ) $matches[1] = urldecode( $matches[1] ); - $tp = Title::newFromText( $matches[1], NS_IMAGE ); + $tp = Title::newFromText( $matches[1] ); $nt =& $tp; if( is_null( $nt ) ) { # Bogus title. Ignore these so we don't bomb out later. @@ -4182,11 +4477,8 @@ class Parser /** * Parse image options text and use it to make an image - * @param Title $title - * @param string $options - * @param LinkHolderArray $holders */ - function makeImage( $title, $options, $holders = false ) { + function makeImage( $title, $options ) { # Check if the options text is of the form "options|alt text" # Options are: # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang @@ -4209,7 +4501,7 @@ class Parser # * bottom # * text-bottom - $parts = StringUtils::explode( "|", $options ); + $parts = array_map( 'trim', explode( '|', $options) ); $sk = $this->mOptions->getSkin(); # Give extensions a chance to select the file revision for us @@ -4231,14 +4523,13 @@ class Parser $params = array( 'frame' => array(), 'handler' => array(), 'horizAlign' => array(), 'vertAlign' => array() ); foreach( $parts as $part ) { - $part = trim( $part ); list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); $validated = false; if( isset( $paramMap[$magicName] ) ) { list( $type, $paramName ) = $paramMap[$magicName]; // Special case; width and height come in one variable together - if( $type === 'handler' && $paramName === 'width' ) { + if( $type == 'handler' && $paramName == 'width' ) { $m = array(); # (bug 13500) In both cases (width/height and width only), # permit trailing "px" for backward compatibility. @@ -4261,7 +4552,7 @@ class Parser } } // else no validation -- bug 13436 } else { - if ( $type === 'handler' ) { + if ( $type == 'handler' ) { # Validate handler parameter $validated = $handler->validateParam( $paramName, $value ); } else { @@ -4297,13 +4588,7 @@ class Parser } # Strip bad stuff out of the alt text - # We can't just use replaceLinkHoldersText() here, because if this function - # is called from replaceInternalLinks2(), mLinkHolders won't be up to date. - if ( $holders ) { - $alt = $holders->replaceText( $caption ); - } else { - $alt = $this->replaceLinkHoldersText( $caption ); - } + $alt = $this->replaceLinkHoldersText( $caption ); # make sure there are no placeholders in thumbnail attributes # that are later expanded to html- so expand them now and @@ -4406,7 +4691,7 @@ class Parser $sectionParts = explode( '-', $section ); $sectionIndex = array_pop( $sectionParts ); foreach ( $sectionParts as $part ) { - if ( $part === 'T' ) { + if ( $part == 'T' ) { $flags |= self::PTD_FOR_INCLUSION; } } @@ -4423,14 +4708,14 @@ class Parser $targetLevel = 1000; } else { while ( $node ) { - if ( $node->getName() === 'h' ) { + if ( $node->getName() == 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { $targetLevel = $bits['level']; break; } } - if ( $mode === 'replace' ) { + if ( $mode == 'replace' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); @@ -4439,7 +4724,7 @@ class Parser if ( !$node ) { // Not found - if ( $mode === 'get' ) { + if ( $mode == 'get' ) { return $newText; } else { return $text; @@ -4448,21 +4733,21 @@ class Parser // Find the end of the section, including nested sections do { - if ( $node->getName() === 'h' ) { + if ( $node->getName() == 'h' ) { $bits = $node->splitHeading(); $curLevel = $bits['level']; if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { break; } } - if ( $mode === 'get' ) { + if ( $mode == 'get' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); } while ( $node ); // Write out the remainder (in replace mode only) - if ( $mode === 'replace' ) { + if ( $mode == 'replace' ) { // Output the replacement text // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap @@ -4692,7 +4977,7 @@ class StripState { do { $oldText = $text; $text = $this->general->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4702,7 +4987,7 @@ class StripState { do { $oldText = $text; $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4713,7 +4998,7 @@ class StripState { $oldText = $text; $text = $this->general->replace( $text ); $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); + } while ( $text != $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4727,7 +5012,7 @@ class OnlyIncludeReplacer { var $output = ''; function replace( $matches ) { - if ( substr( $matches[1], -1 ) === "\n" ) { + if ( substr( $matches[1], -1 ) == "\n" ) { $this->output .= substr( $matches[1], 0, -1 ); } else { $this->output .= $matches[1]; diff --git a/includes/parser/Parser_DiffTest.php b/includes/parser/Parser_DiffTest.php index bc7369ef42..2a677ea964 100644 --- a/includes/parser/Parser_DiffTest.php +++ b/includes/parser/Parser_DiffTest.php @@ -69,17 +69,9 @@ class Parser_DiffTest $lastResult = $currentResult; } if ( $mismatch ) { - if ( count( $results ) == 2 ) { - $results2 = array_values( $results ); - $diff = wfDiff( var_export( $results2[0], true ), var_export( $results2[1], true ) ); - } else { - $diff = '[too many parsers]'; - } throw new MWException( "Parser_DiffTest: results mismatch on call to $name\n" . 'Arguments: ' . $this->formatArray( $args ) . "\n" . - 'Results: ' . $this->formatArray( $results ) . "\n" . - "Diff: $diff\n" - ); + 'Results: ' . $this->formatArray( $results ) . "\n" ); } return $lastResult; } diff --git a/languages/Language.php b/languages/Language.php index 44ac0628d7..18f523ab34 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -176,15 +176,6 @@ class Language { } } - /** - * Reduce memory usage - */ - function __destruct() { - foreach ( $this as $name => $value ) { - unset( $this->$name ); - } - } - /** * Hook which will be called if this is the content language. * Descendants can use this to register hook functions or modify globals diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 1c3926c711..c38d586e76 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -435,9 +435,8 @@ class LanguageConverter { if ($isTitle) return $this->convertTitle($text); $plang = $this->getPreferredVariant(); - $tarray = StringUtils::explode($this->mMarkup['end'], $text); + $tarray = explode($this->mMarkup['end'], $text); $text = ''; - $lastDelim = false; foreach($tarray as $txt) { $marked = explode($this->mMarkup['begin'], $txt, 2); @@ -453,17 +452,8 @@ class LanguageConverter { $text .= $crule->getDisplay(); $this->applyManualConv($crule); - $lastDelim = false; - } else { - // Reinsert the }- which wasn't part of anything - $text .= $this->mMarkup['end']; - $lastDelim = true; } } - if ( $lastDelim ) { - // Remove the last delimiter (wasn't real) - $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) ); - } return $text; } diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index 93eaa8e6ea..01409c174b 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -26,7 +26,7 @@ /** */ $options = array( 'quick', 'color', 'quiet', 'help', 'show-output', 'record' ); -$optionsWithArgs = array( 'regex', 'seed' ); +$optionsWithArgs = array( 'regex' ); require_once( 'commandLine.inc' ); require_once( "$IP/maintenance/parserTestsParserHook.php" ); @@ -62,11 +62,6 @@ class ParserTest { */ private $oldTablePrefix; - private $maxFuzzHairLength = 20; - private $maxFuzzTestLength = 1000; - private $fuzzSeed = 0; - private $memoryLimit = 50; - /** * Sets terminal colorization and diff/quick modes depending on OS and * command-line options (--color and --quick). @@ -122,10 +117,6 @@ class ParserTest { } $this->keepUploads = isset( $options['keep-uploads'] ); - if ( isset( $options['seed'] ) ) { - $this->fuzzSeed = intval( $options['seed'] ) - 1; - } - $this->hooks = array(); $this->functionHooks = array(); } @@ -142,116 +133,6 @@ class ParserTest { } } - /** - * Run a fuzz test series - * Draw input from a set of test files - */ - function fuzzTest( $filenames ) { - $dict = $this->getFuzzInput( $filenames ); - $this->setupDatabase(); - ini_set( 'memory_limit', $this->memoryLimit * 1048576 ); - - $numTotal = 0; - $numSuccess = 0; - $user = new User; - $opts = ParserOptions::newFromUser( $user ); - $title = Title::makeTitle( NS_MAIN, 'Parser_test' ); - - while ( true ) { - // Generate test input - mt_srand( ++$this->fuzzSeed ); - $totalLength = mt_rand( 1, $this->maxFuzzTestLength ); - $input = ''; - while ( strlen( $input ) < $totalLength ) { - $hairLength = mt_rand( 1, $this->maxFuzzHairLength ); - $offset = mt_rand( 0, strlen( $dict ) - $hairLength ); - $input .= substr( $dict, $offset, $hairLength ); - } - - $this->setupGlobals(); - $parser = $this->getParser(); - // Run the test - try { - $parser->parse( $input, $title, $opts ); - $fail = false; - } catch ( Exception $exception ) { - $fail = true; - } - - if ( $fail ) { - echo "Test failed with seed {$this->fuzzSeed}\n"; - echo "Input:\n"; - var_dump( $input ); - echo "\n\n"; - echo "$exception\n"; - } else { - $numSuccess++; - } - $numTotal++; - $this->teardownGlobals(); - $parser->__destruct(); - - if ( $numTotal % 100 == 0 ) { - $usage = intval( memory_get_usage( true ) / $this->memoryLimit / 1048576 * 100 ); - echo "{$this->fuzzSeed}: $numSuccess/$numTotal (mem: $usage%)\n"; - if ( $usage > 90 ) { - echo "Out of memory:\n"; - $memStats = $this->getMemoryBreakdown(); - foreach ( $memStats as $name => $usage ) { - echo "$name: $usage\n"; - } - $this->abort(); - } - } - } - } - - /** - * Get an input dictionary from a set of parser test files - */ - function getFuzzInput( $filenames ) { - $dict = ''; - foreach( $filenames as $filename ) { - $contents = file_get_contents( $filename ); - preg_match_all( '/!!\s*input\n(.*?)\n!!\s*result/s', $contents, $matches ); - foreach ( $matches[1] as $match ) { - $dict .= $match . "\n"; - } - } - return $dict; - } - - /** - * Get a memory usage breakdown - */ - function getMemoryBreakdown() { - $memStats = array(); - foreach ( $GLOBALS as $name => $value ) { - $memStats['$'.$name] = strlen( serialize( $value ) ); - } - $classes = get_declared_classes(); - foreach ( $classes as $class ) { - $rc = new ReflectionClass( $class ); - $props = $rc->getStaticProperties(); - $memStats[$class] = strlen( serialize( $props ) ); - $methods = $rc->getMethods(); - foreach ( $methods as $method ) { - $memStats[$class] += strlen( serialize( $method->getStaticVariables() ) ); - } - } - $functions = get_defined_functions(); - foreach ( $functions['user'] as $function ) { - $rf = new ReflectionFunction( $function ); - $memStats["$function()"] = strlen( serialize( $rf->getStaticVariables() ) ); - } - asort( $memStats ); - return $memStats; - } - - function abort() { - $this->abort(); - } - /** * Run a series of tests listed in the given text files. * Each test consists of a brief description, wikitext input, @@ -385,24 +266,6 @@ class ParserTest { return $ok; } - /** - * Get a Parser object - */ - function getParser() { - global $wgParserConf; - $class = $wgParserConf['class']; - $parser = new $class( $wgParserConf ); - foreach( $this->hooks as $tag => $callback ) { - $parser->setHook( $tag, $callback ); - } - foreach( $this->functionHooks as $tag => $bits ) { - list( $callback, $flags ) = $bits; - $parser->setFunctionHook( $tag, $callback, $flags ); - } - wfRunHooks( 'ParserTestParser', array( &$parser ) ); - return $parser; - } - /** * Run a given wikitext input through a freshly-constructed wiki parser, * and compare the output against the expected results. @@ -413,6 +276,7 @@ class ParserTest { * @return bool */ private function runTest( $desc, $input, $result, $opts ) { + global $wgParserConf; if( $this->showProgress ) { $this->showTesting( $desc ); } @@ -436,7 +300,18 @@ class ParserTest { } $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts ); - $parser = $this->getParser(); + + $class = $wgParserConf['class']; + $parser = new $class( $wgParserConf ); + foreach( $this->hooks as $tag => $callback ) { + $parser->setHook( $tag, $callback ); + } + foreach( $this->functionHooks as $tag => $bits ) { + list( $callback, $flags ) = $bits; + $parser->setFunctionHook( $tag, $callback, $flags ); + } + wfRunHooks( 'ParserTestParser', array( &$parser ) ); + $title =& Title::makeTitle( NS_MAIN, $titleText ); $matches = array(); @@ -566,7 +441,6 @@ class ParserTest { $langObj = Language::factory( $lang ); $GLOBALS['wgLang'] = $langObj; $GLOBALS['wgContLang'] = $langObj; - $GLOBALS['wgMemc'] = new FakeMemCachedClient; //$GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] ); @@ -677,10 +551,10 @@ class ParserTest { # Hack: insert a few Wikipedia in-project interwiki prefixes, # for testing inter-language links $db->insert( 'interwiki', array( - array( 'iw_prefix' => 'wikipedia', + array( 'iw_prefix' => 'Wikipedia', 'iw_url' => 'http://en.wikipedia.org/wiki/$1', 'iw_local' => 0 ), - array( 'iw_prefix' => 'meatball', + array( 'iw_prefix' => 'MeatBall', 'iw_url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1', 'iw_local' => 0 ), array( 'iw_prefix' => 'zh', @@ -747,12 +621,11 @@ class ParserTest { return; } - /* $tables = $this->listTables(); $db = wfGetDB( DB_MASTER ); foreach ( $tables as $table ) { $db->query( "DROP TABLE `parsertest_$table`" ); - }*/ + } } /** @@ -772,10 +645,6 @@ class ParserTest { } wfDebug( "Creating upload directory $dir\n" ); - if ( file_exists( $dir ) ) { - wfDebug( "Already exists!\n" ); - return $dir; - } mkdir( $dir ); mkdir( $dir . '/3' ); mkdir( $dir . '/3/3a' ); @@ -789,8 +658,6 @@ class ParserTest { */ private function teardownGlobals() { RepoGroup::destroySingleton(); - LinkCache::singleton()->clear(); - $GLOBALS['wgLang']->__destruct(); foreach( $this->savedGlobals as $var => $val ) { $GLOBALS[$var] = $val; } diff --git a/maintenance/parserTests.php b/maintenance/parserTests.php index 0d50feb1ac..192eeaa82f 100644 --- a/maintenance/parserTests.php +++ b/maintenance/parserTests.php @@ -28,21 +28,22 @@ require('parserTests.inc'); if( isset( $options['help'] ) ) { echo <<] [--file=] + [--record] [--compare] + [--help] Options: --quick Suppress diff output of failed tests --quiet Suppress notification of passed tests (shows only failed tests) --show-output Show expected and actual output - --color[=yes|no] Override terminal detection and force color output on or off + --color Override terminal detection and force color output on or off use wgCommandLineDarkBg = true; if your term is dark --regex Only run tests whose descriptions which match given regex - --file= Run test cases from a custom file instead of parserTests.txt + --file Run test cases from a custom file instead of parserTests.txt --record Record tests in database --compare Compare with recorded results, without updating the database. --keep-uploads Re-use the same upload directory for each test, don't delete it - --fuzz Do a fuzz test instead of a normal test - --seed Start the fuzz test from the specified seed --help Show this help message @@ -66,10 +67,7 @@ if( isset( $options['file'] ) ) { # Print out software version to assist with locating regressions $version = SpecialVersion::getVersion(); echo( "This is MediaWiki version {$version}.\n\n" ); +$ok = $tester->runTestsFromFiles( $files ); + +exit ($ok ? 0 : -1); -if ( isset( $options['fuzz'] ) ) { - $tester->fuzzTest( $files ); -} else { - $ok = $tester->runTestsFromFiles( $files ); - exit ($ok ? 0 : -1); -} diff --git a/maintenance/parserTestsStaticParserHook.php b/maintenance/parserTestsStaticParserHook.php index 17a11a36a8..5a98a89d2d 100644 --- a/maintenance/parserTestsStaticParserHook.php +++ b/maintenance/parserTestsStaticParserHook.php @@ -27,19 +27,18 @@ function wfParserTestStaticParserHookHook( $in, $argv ) { if ( ! count( $argv ) ) { $buf = $in; return ''; - } else if ( count( $argv ) === 1 && isset( $argv['action'] ) - && $argv['action'] === 'flush' && $in === null ) - { + } else if ( count( $argv ) === 1 && $argv['action'] === 'flush' && $in === null ) { // Clear the buffer, we probably don't need to $tmp = $buf; $buf = null; return $tmp; } else // wtf? - return + die( "\nCall this extension as string or as" . " , not in any other way.\n" . "text: " . var_export( $in, true ) . "\n" . - "argv: " . var_export( $argv, true ) . "\n"; + "argv: " . var_export( $argv, true ) . "\n" + ); } -- 2.20.1