X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=1f780fb2daf80b00e4595975b7461fcb70048906;hb=41231616b63c41fcacdc1631330b22c593f27425;hp=cbe72047b3ef2809548c1f72ec2387c314825a7f;hpb=58d09d4d53976141ae32bd15d3b6a2a3b088cee7;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index cbe72047b3..1f780fb2da 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -22,10 +22,12 @@ * produces altered wiki markup. * preprocess() * removes HTML comments and expands templates - * cleanSig() + * cleanSig() / cleanSigInSig() * Cleans a signature before saving it to preferences - * extractSections() - * Extracts sections from an article for section editing + * getSection() + * Return the content of a section from an article for section editing + * replaceSection() + * Replaces a section by number inside an article * getPreloadText() * Removes sections, and tags. * @@ -35,7 +37,7 @@ * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! * * settings: - * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*, + * $wgUseDynamicDates*, $wgInterwikiMagic*, * $wgNamespacesWithSubpages, $wgAllowExternalImages*, * $wgLocaltimezone, $wgAllowSpecialInclusion*, * $wgMaxArticleSize* @@ -51,7 +53,13 @@ class Parser { * changes in an incompatible way, so the parser cache * can automatically discard old data. */ - const VERSION = '1.6.4'; + const VERSION = '1.6.5'; + + /** + * Update this version number when the output of serialiseHalfParsedText() + * changes in an incompatible way + */ + const HALF_PARSED_VERSION = 2; # Flags for Parser::setFunctionHook # Also available as global constants from Defines.php @@ -60,7 +68,7 @@ class Parser { # Constants needed for external link processing # Everything except bracket, space, or control characters - const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]'; + const EXT_LINK_URL_CLASS = '(?:[^\]\[<>"\\x00-\\x20\\x7F]|(?:\[\]))'; const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; @@ -89,25 +97,56 @@ class Parser { const MARKER_SUFFIX = "-QINU\x7f"; # Persistent: - var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables; - var $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex; - var $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList; - var $mVarCache, $mConf, $mFunctionTagHooks; - + var $mTagHooks = array(); + var $mTransparentTagHooks = array(); + var $mFunctionHooks = array(); + var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); + var $mFunctionTagHooks = array(); + var $mStripList = array(); + var $mDefaultStripList = array(); + var $mVarCache = array(); + var $mImageParams = array(); + var $mImageParamsMagicArray = array(); + var $mMarkerIndex = 0; + var $mFirstCall = true; + var $mVariables, $mSubstWords; # Initialised by initialiseVariables() + var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor # Cleared with clearState(): - var $mOutput, $mAutonumber, $mDTopen, $mStripState; + /** + * @var ParserOutput + */ + var $mOutput; + var $mAutonumber, $mDTopen; + + /** + * @var StripState + */ + var $mStripState; + var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; var $mLinkHolders, $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; # number of expensive parser function calls + + /** + * @var User + */ var $mUser; # User object; only used when doing pre-save transform # Temporary # These are variables reset at least once per parse regardless of $clearState - var $mOptions; # ParserOptions object + + /** + * @var ParserOptions + */ + var $mOptions; + + /** + * @var Title + */ var $mTitle; # Title context, used for self-link rendering and similar things var $mOutputType; # Output type, one of the OT_xxx constants var $ot; # Shortcut alias, see setOutputType() @@ -119,21 +158,12 @@ class Parser { /** * Constructor - * - * @public */ - function __construct( $conf = array() ) { + public function __construct( $conf = array() ) { $this->mConf = $conf; - $this->mTagHooks = array(); - $this->mTransparentTagHooks = array(); - $this->mFunctionHooks = array(); - $this->mFunctionTagHooks = array(); - $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); - $this->mDefaultStripList = $this->mStripList = array(); $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. - '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; - $this->mVarCache = array(); + '(?:[^\]\[<>"\x00-\x20\x7F]|\[\])+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( extension_loaded( 'domxml' ) ) { @@ -145,8 +175,6 @@ class Parser { } else { $this->mPreprocessorClass = 'Preprocessor_Hash'; } - $this->mMarkerIndex = 0; - $this->mFirstCall = true; } /** @@ -154,7 +182,7 @@ class Parser { */ function __destruct() { if ( isset( $this->mLinkHolders ) ) { - $this->mLinkHolders->__destruct(); + unset( $this->mLinkHolders ); } foreach ( $this as $name => $value ) { unset( $this->$name ); @@ -191,11 +219,11 @@ class Parser { $this->firstCallInit(); } $this->mOutput = new ParserOutput; + $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) ); $this->mAutonumber = 0; $this->mLastSection = ''; $this->mDTopen = false; $this->mIncludeCount = array(); - $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; $this->mLinkHolders = new LinkHolderArray( $this ); @@ -218,6 +246,7 @@ class Parser { # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); + $this->mStripState = new StripState( $this->mUniqPrefix ); # Clear these on every parse, bug 4549 @@ -249,7 +278,7 @@ class Parser { * Do not call this function recursively. * * @param $text String: text we want to parse - * @param $title A title object + * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean @@ -267,13 +296,7 @@ class Parser { wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); - if ( $clearState ) { - $this->clearState(); - } - - $options->resetUsage(); - $this->mOptions = $options; - $this->setTitle( $title ); # Page title has to be set for the pre-processor + $this->startParse( $title, $options, self::OT_HTML, $clearState ); $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; @@ -285,7 +308,7 @@ class Parser { $this->mRevisionTimestamp = null; $this->mRevisionUser = null; } - $this->setOutputType( self::OT_HTML ); + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); # No more strip! wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -326,24 +349,17 @@ class Parser { } /** - * A page get its title converted except: - * a) Language conversion is globally disabled - * b) Title convert is globally disabled - * c) The page is a redirect page - * d) User request with a "linkconvert" set to "no" - * e) A "nocontentconvert" magic word has been set - * f) A "notitleconvert" magic word has been set - * g) User sets "noconvertlink" in his/her preference - * - * Note that if a user tries to set a title in a conversion - * rule but content conversion was not done, then the parser - * won't pick it up. This is probably expected behavior. + * A converted title will be provided in the output object if title and + * content conversion are enabled, the article text does not contain + * a conversion-suppressing double-underscore tag, and no + * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over + * automatic link conversion. */ if ( !( $wgDisableLangConversion || $wgDisableTitleConversion || isset( $this->mDoubleUnderscores['nocontentconvert'] ) || isset( $this->mDoubleUnderscores['notitleconvert'] ) - || $this->mOutput->getDisplayTitle() !== false ) ) + || $this->mOutput->getDisplayTitle() !== false ) ) { $convruletitle = $wgContLang->getConvRuleTitle(); if ( $convruletitle ) { @@ -358,23 +374,7 @@ class Parser { wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); -//!JF Move to its own function - - $uniq_prefix = $this->mUniqPrefix; - $matches = array(); - $elements = array_keys( $this->mTransparentTagHooks ); - $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); - - foreach ( $matches as $marker => $data ) { - list( $element, $content, $params, $tag ) = $data; - $tagName = strtolower( $element ); - if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { - $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); - } else { - $output = $tag; - } - $this->mStripState->general->setPair( $marker, $output ); - } + $text = $this->replaceTransparentTags( $text ); $text = $this->mStripState->unstripGeneral( $text ); $text = Sanitizer::normalizeCharReferences( $text ); @@ -462,13 +462,9 @@ class Parser { * Expand templates and variables in the text, producing valid, static wikitext. * Also removes comments. */ - function preprocess( $text, $title, $options, $revid = null ) { + function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) { wfProfileIn( __METHOD__ ); - $this->clearState(); - $this->setOutputType( self::OT_PREPROCESS ); - $options->resetUsage(); - $this->mOptions = $options; - $this->setTitle( $title ); + $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { $this->mRevisionId = $revid; } @@ -486,26 +482,21 @@ class Parser { * , etc. are parsed as for template transclusion, * comments, templates, arguments, tags hooks and parser functions are untouched. */ - public function getPreloadText( $text, $title, $options ) { + public function getPreloadText( $text, Title $title, ParserOptions $options ) { # Parser (re)initialisation - $this->clearState(); - $this->setOutputType( self::OT_PLAIN ); - $options->resetUsage(); - $this->mOptions = $options; - $this->setTitle( $title ); + $this->startParse( $title, $options, self::OT_PLAIN, true ); $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); - return $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); + $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); + $text = $this->mStripState->unstripBoth( $text ); + return $text; } /** * Get a random string - * - * @private - * @static */ - static private function getRandomString() { + static public function getRandomString() { return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); } @@ -541,9 +532,9 @@ class Parser { * Set the context title */ function setTitle( $t ) { - if ( !$t || $t instanceof FakeTitle ) { - $t = Title::newFromText( 'NO TITLE' ); - } + if ( !$t || $t instanceof FakeTitle ) { + $t = Title::newFromText( 'NO TITLE' ); + } if ( strval( $t->getFragment() ) !== '' ) { # Strip the fragment to avoid various odd effects @@ -631,6 +622,13 @@ class Parser { return $this->mLinkID++; } + function setLinkID( $id ) { + $this->mLinkID = $id; + } + + /** + * @return Language + */ function getFunctionLang() { global $wgLang, $wgContLang; @@ -684,10 +682,8 @@ class Parser { * @param $matches Out parameter, Array: extracted tags * @param $uniq_prefix * @return String: stripped text - * - * @static */ - public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { + public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { static $n = 1; $stripped = ''; $matches = array(); @@ -756,41 +752,6 @@ class Parser { return $this->mStripList; } - /** - * @deprecated use replaceVariables - */ - function strip( $text, $state, $stripcomments = false , $dontstrip = array() ) { - return $text; - } - - /** - * Restores pre, math, and other extensions removed by strip() - * - * always call unstripNoWiki() after this one - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstrip( $text, $state ) { - return $state->unstripGeneral( $text ); - } - - /** - * Always call this after unstrip() to preserve the order - * - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstripNoWiki( $text, $state ) { - return $state->unstripNoWiki( $text ); - } - - /** - * @deprecated use $this->mStripState->unstripBoth() - */ - function unstripForHTML( $text ) { - return $this->mStripState->unstripBoth( $text ); - } - /** * Add an item to the strip state * Returns the unique tag which must be inserted into the stripped text @@ -801,19 +762,10 @@ class Parser { function insertStripItem( $text ) { $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; $this->mMarkerIndex++; - $this->mStripState->general->setPair( $rnd, $text ); + $this->mStripState->addGeneral( $rnd, $text ); return $rnd; } - /** - * Interface with html tidy - * @deprecated Use MWTidy::tidy() - */ - public static function tidy( $text ) { - wfDeprecated( __METHOD__ ); - return MWTidy::tidy( $text ); - } - /** * parse the wiki syntax used to render tables * @@ -821,192 +773,304 @@ class Parser { */ function doTableStuff( $text ) { wfProfileIn( __METHOD__ ); - + $lines = StringUtils::explode( "\n", $text ); $out = ''; - $td_history = array(); # Is currently a td tag open? - $last_tag_history = array(); # Save history of last lag activated (td, th or caption) - $tr_history = array(); # Is currently a tr tag open? - $tr_attributes = array(); # history of tr attributes - $has_opened_tr = array(); # Did this table open a element? - $indent_level = 0; # indent level of the table + $output =& $out; foreach ( $lines as $outLine ) { $line = trim( $outLine ); - if ( $line === '' ) { # empty line, go to next line - $out .= $outLine."\n"; + # empty line, go to next line, + # but only append \n if outside of table + if ( $line === '') { + $output .= $outLine . "\n"; continue; } - - $first_character = $line[0]; + $firstChars = $line[0]; + if ( strlen( $line ) > 1 ) { + $firstChars .= in_array( $line[1], array( '}', '+', '-' ) ) ? $line[1] : ''; + } $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { - # First check if we are starting a new table - $indent_level = strlen( $matches[1] ); + if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line , $matches ) ) { + $tables[] = array(); + $table =& $this->last( $tables ); + $table[0] = array(); // first row + $currentRow =& $table[0]; + $table['indent'] = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' ); - $outLine = str_repeat( '
' , $indent_level ) . ""; - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - array_push( $tr_history , false ); - array_push( $tr_attributes , '' ); - array_push( $has_opened_tr , false ); - } elseif ( count( $td_history ) == 0 ) { - # Don't do any of the following - $out .= $outLine."\n"; - continue; - } elseif ( substr( $line , 0 , 2 ) === '|}' ) { - # We are ending a table - $line = '' . substr( $line , 2 ); - $last_tag = array_pop( $last_tag_history ); + if ( $attributes !== '' ) { + $table['attributes'] = $attributes; + } + } else if ( !isset( $tables[0] ) ) { + // we're outside the table + + $out .= $outLine . "\n"; + } else if ( $firstChars === '|}' ) { + // trim the |} code from the line + $line = substr ( $line , 2 ); + + // Shorthand for last row + $lastRow =& $this->last( $table ); + + // a thead at the end becomes a tfoot, unless there is only one row + // Do this before deleting empty last lines to allow headers at the bottom of tables + if ( isset( $lastRow['type'] ) && $lastRow['type'] == 'thead' && isset( $table[1] ) ) { + $lastRow['type'] = 'tfoot'; + for ( $i = 0; isset( $lastRow[$i] ); $i++ ) { + $lastRow[$i]['type'] = 'th'; + } + } - if ( !array_pop( $has_opened_tr ) ) { - $line = "{$line}"; + // Delete empty last lines + if ( empty( $lastRow ) ) { + $lastRow = NULL; } + $o = ''; + $curtable = array_pop( $tables ); - if ( array_pop( $tr_history ) ) { - $line = "{$line}"; + #Add a line-ending before the table, but only if there isn't one already + if ( substr( $out, -1 ) !== "\n" ) { + $o .= "\n"; } + $o .= $this->generateTableHTML( $curtable ) . $line . "\n"; - if ( array_pop( $td_history ) ) { - $line = "{$line}"; + if ( count( $tables ) > 0 ) { + $table =& $this->last( $tables ); + $currentRow =& $this->last( $table ); + $currentElement =& $this->last( $currentRow ); + + $output =& $currentElement['content']; + } else { + $output =& $out; } - array_pop( $tr_attributes ); - $outLine = $line . str_repeat( '
' , $indent_level ); - } elseif ( substr( $line , 0 , 2 ) === '|-' ) { - # Now we have a table row - $line = preg_replace( '#^\|-+#', '', $line ); - # Whats after the tag is now only attributes + $output .= $o; + + } else if ( $firstChars === '|-' ) { + // start a new row element + // but only when we haven't started one already + if ( count( $currentRow ) != 0 ) { + $table[] = array(); + $currentRow =& $this->last( $table ); + } + // Get the attributes, there's nothing else useful in $line now + $line = substr ( $line , 2 ); $attributes = $this->mStripState->unstripBoth( $line ); $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); - array_pop( $tr_attributes ); - array_push( $tr_attributes, $attributes ); - - $line = ''; - $last_tag = array_pop( $last_tag_history ); - array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); - - if ( array_pop( $tr_history ) ) { - $line = ''; + if ( $attributes !== '' ) { + $currentRow['attributes'] = $attributes; } - if ( array_pop( $td_history ) ) { - $line = "{$line}"; + } else if ( $firstChars === '|+' ) { + // a table caption, but only proceed if there isn't one already + if ( !isset ( $table['caption'] ) ) { + $line = substr ( $line , 2 ); + + $c = $this->getCellAttr( $line , 'caption' ); + $table['caption'] = array(); + $table['caption']['content'] = $c[0]; + if ( isset( $c[1] ) ) $table['caption']['attributes'] = $c[1]; + unset( $c ); + $output =& $table['caption']['content']; + } + } else if ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { + // Which kind of cells are we dealing with + $currentTag = 'td'; + $line = substr ( $line , 1 ); + + if ( $firstChars === '!' || $firstChars === '!+' ) { + $line = str_replace ( '!!' , '||' , $line ); + $currentTag = 'th'; } - $outLine = $line; - array_push( $tr_history , false ); - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) { - # This might be cell elements, td, th or captions - if ( substr( $line , 0 , 2 ) === '|+' ) { - $first_character = '+'; - $line = substr( $line , 1 ); + // Split up multiple cells on the same line. + $cells = StringUtils::explodeMarkup( '||' , $line ); + $line = ''; // save memory + + // decide whether thead to tbody + if ( !array_key_exists( 'type', $currentRow ) ) { + $currentRow['type'] = ( $firstChars === '!' ) ? 'thead' : 'tbody' ; + } else if ( $firstChars === '|' ) { + $currentRow['type'] = 'tbody'; } - $line = substr( $line , 1 ); + // Loop through each table cell + foreach ( $cells as $cell ) { + // a new cell + $currentRow[] = array(); + $currentElement =& $this->last( $currentRow ); + + $currentElement['type'] = $currentTag; - if ( $first_character === '!' ) { - $line = str_replace( '!!' , '||' , $line ); + $c = $this->getCellAttr( $cell , $currentTag ); + $currentElement['content'] = $c[0]; + if ( isset( $c[1] ) ) $currentElement['attributes'] = $c[1]; + unset( $c ); } + $output =& $currentElement['content']; - # Split up multiple cells on the same line. - # FIXME : This can result in improper nesting of tags processed - # by earlier parser steps, but should avoid splitting up eg - # attribute values containing literal "||". - $cells = StringUtils::explodeMarkup( '||' , $line ); + } else { + $output .= "\n$outLine"; + } + } - $outLine = ''; + # Remove trailing line-ending (b/c) + if ( substr( $out, -1 ) === "\n" ) { + $out = substr( $out, 0, -1 ); + } - # Loop through each table cell - foreach ( $cells as $cell ) { - $previous = ''; - if ( $first_character !== '+' ) { - $tr_after = array_pop( $tr_attributes ); - if ( !array_pop( $tr_history ) ) { - $previous = "\n"; - } - array_push( $tr_history , true ); - array_push( $tr_attributes , '' ); - array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); - } + # Close any unclosed tables + if ( isset( $tables ) && count( $tables ) > 0 ) { + for ( $i = 0; $i < count( $tables ); $i++ ) { + $curtable = array_pop( $tables ); + $curtable = $this->generateTableHTML( $curtable ); + #Add a line-ending before the table, but only if there isn't one already + if ( substr( $out, -1 ) !== "\n" && $curtable !== "" ) { + $out .= "\n"; + } + $out .= $curtable; + } + } - $last_tag = array_pop( $last_tag_history ); + wfProfileOut( __METHOD__ ); - if ( array_pop( $td_history ) ) { - $previous = "\n{$previous}"; - } + return $out; + } - if ( $first_character === '|' ) { - $last_tag = 'td'; - } elseif ( $first_character === '!' ) { - $last_tag = 'th'; - } elseif ( $first_character === '+' ) { - $last_tag = 'caption'; - } else { - $last_tag = ''; - } + /** + * Helper function for doTableStuff() separating the contents of cells from + * attributes. Particularly useful as there's a possible bug and this action + * is repeated twice. + * + * @private + */ + function getCellAttr ( $cell, $tagName ) { + $content = null; + $attributes = null; - array_push( $last_tag_history , $last_tag ); + $cell = trim ( $cell ); - # A cell could contain both parameters and data - $cell_data = explode( '|' , $cell , 2 ); + // A cell could contain both parameters and data + $cellData = explode ( '|' , $cell , 2 ); - # Bug 553: Note that a '|' inside an invalid link should not - # be mistaken as delimiting cell parameters - if ( strpos( $cell_data[0], '[[' ) !== false ) { - $cell = "{$previous}<{$last_tag}>{$cell}"; - } elseif ( count( $cell_data ) == 1 ) { - $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; - } else { - $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); - $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; - } + // Bug 553: Note that a '|' inside an invalid link should not + // be mistaken as delimiting cell parameters + if ( strpos( $cellData[0], '[[' ) !== false ) { + $content = trim ( $cell ); + } + else if ( count ( $cellData ) == 1 ) { + $content = trim ( $cellData[0] ); + } + else { + $attributes = $this->mStripState->unstripBoth( $cellData[0] ); + $attributes = Sanitizer::fixTagAttributes( $attributes , $tagName ); - $outLine .= $cell; - array_push( $td_history , true ); - } - } - $out .= $outLine . "\n"; + $content = trim ( $cellData[1] ); } + return array( $content, $attributes ); + } + - # Closing open td, tr && table - while ( count( $td_history ) > 0 ) { - if ( array_pop( $td_history ) ) { - $out .= "\n"; + /** + * Helper function for doTableStuff(). This converts the structured array into html. + * + * @private + */ + function generateTableHTML ( &$table ) { + $return = ""; + $return .= str_repeat( '
' , $table['indent'] ); + $return .= ''; } - $out .= "\n"; - } + $return .= "\n'; + unset( $table[$i][$j] ); + } + $return .= "\n"; - # Remove trailing line-ending (b/c) - if ( substr( $out, -1 ) === "\n" ) { - $out = substr( $out, 0, -1 ); + if ( ( !isset( $table[$i + 1] ) && !$simple ) || ( isset( $table[$i + 1] ) && isset( $table[$i + 1]['type'] ) && $table[$i]['type'] != $table[$i + 1]['type'] ) ) { + $return .= ''; + } + $lastSection = $table[$i]['type']; + unset( $table[$i] ); } - - # special case: don't return empty table - if ( $out === "\n\n
" ) { - $out = ''; + if ( $empty ) { + if ( isset( $table['caption'] ) ) { + $return .= "\n"; + } else { + return ''; + } } + $return .= "\n"; + $return .= str_repeat( '
' , $table['indent'] ); - wfProfileOut( __METHOD__ ); + return $return; + } - return $out; + /** + * like end() but only works on the numeric array index and php's internal pointers + * returns a reference to the last element of an array much like "\$arr[-1]" in perl + * ignores associative elements and will create a 0 key will a NULL value if there were + * no numric elements and an array itself if not previously defined. + * + * @private + */ + function &last ( &$arr ) { + for ( $i = count( $arr ); ( !isset( $arr[$i] ) && $i > 0 ); $i-- ) { } + return $arr[$i]; } /** @@ -1093,10 +1157,10 @@ class Parser { (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . ' (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number ISBN\s+(\b # m[5]: ISBN, capture number - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit - \b) + (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix + (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + [0-9Xx] # check digit + \b) )!x', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; @@ -1128,10 +1192,8 @@ class Parser { throw new MWException( __METHOD__.': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } - $url = wfMsgForContent( $urlmsg, $id); - $sk = $this->mOptions->getSkin( $this->mTitle ); - $la = $sk->getExternalLinkAttributes( "external $CssClass" ); - return "{$keyword} {$id}"; + $url = wfMsgForContent( $urlmsg, $id ); + return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass ); } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # ISBN $isbn = $m[5]; @@ -1158,7 +1220,6 @@ class Parser { global $wgContLang; wfProfileIn( __METHOD__ ); - $sk = $this->mOptions->getSkin( $this->mTitle ); $trail = ''; # The characters '<' and '>' (which were escaped by @@ -1189,7 +1250,7 @@ class Parser { $text = $this->maybeMakeExternalImage( $url ); if ( $text === false ) { # Not an image, make a link - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', + $text = Linker::makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters @@ -1396,7 +1457,7 @@ class Parser { /** * Replace external links (REL) * - * Note: this is all very hackish and the order of execution matters a lot. + * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. * * @private @@ -1405,8 +1466,6 @@ class Parser { global $wgContLang; wfProfileIn( __METHOD__ ); - $sk = $this->mOptions->getSkin( $this->mTitle ); - $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = array_shift( $bits ); @@ -1464,7 +1523,7 @@ class Parser { # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, + $s .= Linker::makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; # Register link in the output object. @@ -1514,7 +1573,6 @@ class Parser { return $attribs; } - /** * Replace unusual URL escape codes with their equivalent characters * @@ -1554,7 +1612,6 @@ class Parser { * @private */ function maybeMakeExternalImage( $url ) { - $sk = $this->mOptions->getSkin( $this->mTitle ); $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); $imagesexception = !empty( $imagesfrom ); $text = false; @@ -1573,10 +1630,10 @@ class Parser { $imagematch = false; } if ( $this->mOptions->getAllowExternalImages() - || ( $imagesexception && $imagematch ) ) { + || ( $imagesexception && $imagematch ) ) { if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { # Image found - $text = $sk->makeExternalImage( $url ); + $text = Linker::makeExternalImage( $url ); } } if ( !$text && $this->mOptions->getEnableImageWhitelist() @@ -1589,7 +1646,7 @@ class Parser { } if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { # Image matches a whitelist entry - $text = $sk->makeExternalImage( $url ); + $text = Linker::makeExternalImage( $url ); break; } } @@ -1630,10 +1687,9 @@ class Parser { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } - $sk = $this->mOptions->getSkin( $this->mTitle ); $holders = new LinkHolderArray( $this ); - # split the entire text string on occurences of [[ + # split the entire text string on occurences of [[ $a = StringUtils::explode( '[[', ' ' . $s ); # get the first element (all text up to first [[), and remove the space we added $s = $a->current(); @@ -1725,14 +1781,14 @@ class Parser { # fix up urlencoded title texts if ( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode( $m[1] ) ); + $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); } $trail = $m[3]; } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption $might_be_img = true; $text = $m[2]; if ( strpos( $m[1], '%' ) !== false ) { - $m[1] = urldecode( $m[1] ); + $m[1] = rawurldecode( $m[1] ); } $trail = ""; } else { # Invalid form; output directly @@ -1828,9 +1884,10 @@ class Parser { $text = $link; } else { # Bug 4598 madness. Handle the quotes only if they come from the alternate part - # [[Lista d''e paise d''o munno]] -> Lista d''e paise d''o munno - # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] -> Criticism of Harry Potter - $text = $this->doQuotes($text); + # [[Lista d''e paise d''o munno]] -> Lista d''e paise d''o munno + # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] + # -> Criticism of Harry Potter + $text = $this->doQuotes( $text ); } # Link not escaped by : , create the various objects @@ -1864,14 +1921,13 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); } # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; + $s .= $prefix . $this->armorLinks( + $this->makeImage( $nt, $text, $holders ) ) . $trail; } else { $s .= $prefix . $trail; } - $this->mOutput->addImage( $nt->getDBkey() ); wfProfileOut( __METHOD__."-image" ); continue; - } if ( $ns == NS_CATEGORY ) { @@ -1902,7 +1958,7 @@ class Parser { # Self-link checking if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) { - $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); + $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); continue; } } @@ -1912,16 +1968,14 @@ class Parser { if ( $ns == NS_MEDIA ) { wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us - $skip = $time = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); - if ( $skip ) { - $link = $sk->link( $nt ); - } else { - $link = $sk->makeMediaLinkObj( $nt, $text, $time ); - } + $time = $sha1 = $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $nt, &$time, &$sha1, &$descQuery ) ); + # Fetch and register the file (file title may be different via hooks) + list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $time, $sha1 ); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks - $s .= $prefix . $this->armorLinks( $link ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); + $s .= $prefix . $this->armorLinks( + Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; wfProfileOut( __METHOD__."-media" ); continue; } @@ -1934,10 +1988,10 @@ class Parser { # batch file existence checks for NS_FILE and NS_MEDIA if ( $iw == '' && $nt->isAlwaysKnown() ) { $this->mOutput->addLink( $nt ); - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); } else { # Links will be added to the output link list after checking - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } wfProfileOut( __METHOD__."-always_known" ); } @@ -1945,18 +1999,6 @@ class Parser { return $holders; } - /** - * Make a link placeholder. The text returned can be later resolved to a real link with - * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all existence checks and - * article length checks (for stub links) to be bundled into a single query. - * - * @deprecated - */ - function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); - } - /** * Render a forced-blue link inline; protect against double expansion of * URLs if we're in a mode that prepends full URL prefixes to internal links. @@ -1966,16 +2008,23 @@ class Parser { * * @param $nt Title * @param $text String - * @param $query String + * @param $query Array or String * @param $trail String * @param $prefix String * @return String: HTML-wikitext mix oh yuck */ - function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { list( $inside, $trail ) = Linker::splitTrail( $trail ); - $sk = $this->mOptions->getSkin( $this->mTitle ); - # FIXME: use link() instead of deprecated makeKnownLinkObj() - $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix ); + + if ( is_string( $query ) ) { + $query = wfCgiToArray( $query ); + } + if ( $text == '' ) { + $text = htmlspecialchars( $nt->getPrefixedText() ); + } + + $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query ); + return $this->armorLinks( $link ) . $trail; } @@ -2018,6 +2067,8 @@ class Parser { /**#@+ * Used by doBlockLevels() * @private + * + * @return string */ function closeParagraph() { $result = ''; @@ -2042,7 +2093,7 @@ class Parser { } for ( $i = 0; $i < $shorter; ++$i ) { - if ( $st1{$i} != $st2{$i} ) { + if ( $st1[$i] != $st2[$i] ) { break; } } @@ -2053,6 +2104,8 @@ class Parser { * These next three functions open, continue, and close the list * element appropriate to the prefix character passed into them. * @private + * + * @return string */ function openList( $char ) { $result = $this->closeParagraph(); @@ -2077,6 +2130,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function nextItem( $char ) { if ( '*' === $char || '#' === $char ) { @@ -2101,6 +2156,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function closeList( $char ) { if ( '*' === $char ) { @@ -2241,7 +2298,7 @@ class Parser { 'mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; - # TODO bug 5718: paragraph closed + # TODO bug 5718: paragraph closed $output .= $this->closeParagraph(); if ( $preOpenMatch and !$preCloseMatch ) { $this->mInPre = true; @@ -2340,7 +2397,7 @@ class Parser { $stack = 0; $len = strlen( $str ); for( $i = 0; $i < $len; $i++ ) { - $c = $str{$i}; + $c = $str[$i]; switch( $state ) { # (Using the number is a performance hack for common cases) @@ -2465,6 +2522,7 @@ class Parser { } if ( $stack > 0 ) { wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfProfileOut( __METHOD__ ); return false; } wfProfileOut( __METHOD__ ); @@ -2475,6 +2533,9 @@ class Parser { * Return value of a magic variable (like PAGENAME) * * @private + * + * @param $index integer + * @param $frame PPFrame */ function getVariableValue( $index, $frame=false ) { global $wgContLang, $wgSitename, $wgServer; @@ -2561,25 +2622,25 @@ class Parser { $value = wfEscapeWikiText( $this->mTitle->getText() ); break; case 'pagenamee': - $value = $this->mTitle->getPartialURL(); + $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); break; case 'fullpagename': $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); break; case 'fullpagenamee': - $value = $this->mTitle->getPrefixedURL(); + $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); break; case 'subpagename': $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); break; case 'subpagenamee': - $value = $this->mTitle->getSubpageUrlForm(); + $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); break; case 'basepagename': $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); break; case 'basepagenamee': - $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ); + $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) ); break; case 'talkpagename': if ( $this->mTitle->canTalk() ) { @@ -2592,7 +2653,7 @@ class Parser { case 'talkpagenamee': if ( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); - $value = $talkPage->getPrefixedUrl(); + $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() ); } else { $value = ''; } @@ -2603,7 +2664,7 @@ class Parser { break; case 'subjectpagenamee': $subjPage = $this->mTitle->getSubjectPage(); - $value = $subjPage->getPrefixedUrl(); + $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() ); break; case 'revisionid': # Let the edit saving system know we should parse the page @@ -2823,6 +2884,8 @@ class Parser { * dependency requirements. * * @private + * + * @return PPNode */ function preprocessToDom( $text, $flags = 0 ) { $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); @@ -2831,6 +2894,8 @@ class Parser { /** * Return a three-element array: leading whitespace, string contents, trailing whitespace + * + * @return array */ public static function splitWhitespace( $s ) { $ltrimmed = ltrim( $s ); @@ -2861,6 +2926,8 @@ class Parser { * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. * @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion * @private + * + * @return string */ function replaceVariables( $text, $frame = false, $argsOnly = false ) { # Is there any text? Also, Prevent too big inclusions! @@ -2884,7 +2951,11 @@ class Parser { return $text; } - # Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + /** + * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + * + * @return array + */ static function createAssocArgs( $args ) { $assocArgs = array(); $index = 1; @@ -2970,6 +3041,7 @@ class Parser { $originalTitle = $part1; # $args is a list of argument nodes, starting from index 0, not including $part1 + # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; wfProfileOut( __METHOD__.'-setup' ); @@ -3140,7 +3212,7 @@ class Parser { && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { - $text = SpecialPage::capturePath( $title ); + $text = SpecialPageFactory::capturePath( $title ); if ( is_string( $text ) ) { $found = true; $isHTML = true; @@ -3265,6 +3337,8 @@ class Parser { /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. + * + * @return array */ function getTemplateDom( $title ) { $cacheTitle = $title; @@ -3300,6 +3374,8 @@ class Parser { /** * Fetch the unparsed text of a template and register a reference to it. + * @param Title $title + * @return Array ( string or false, Title ) */ function fetchTemplateAndTitle( $title ) { $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate() @@ -3314,6 +3390,11 @@ class Parser { return array( $text, $finalTitle ); } + /** + * Fetch the unparsed text of a template and register a reference to it. + * @param Title $title + * @return mixed string or false + */ function fetchTemplate( $title ) { $rv = $this->fetchTemplateAndTitle( $title ); return $rv[0]; @@ -3322,8 +3403,10 @@ class Parser { /** * Static function to get a template * Can be overridden via ParserOptions::setTemplateCallback(). + * + * @return array */ - static function statelessFetchTemplate( $title, $parser=false ) { + static function statelessFetchTemplate( $title, $parser = false ) { $text = $skip = false; $finalTitle = $title; $deps = array(); @@ -3332,17 +3415,22 @@ class Parser { for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { # Give extensions a chance to select the revision instead $id = false; # Assume current - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) ); + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', + array( $parser, $title, &$skip, &$id ) ); if ( $skip ) { $text = false; $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => null ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => null + ); break; } - $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); + # Get the revision + $rev = $id + ? Revision::newFromId( $id ) + : Revision::newFromTitle( $title ); $rev_id = $rev ? $rev->getId() : 0; # If there is no current revision, there is no page if ( $id === false && !$rev ) { @@ -3351,20 +3439,27 @@ class Parser { } $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => $rev_id ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => $rev_id ); + if ( $rev && !$title->equals( $rev->getTitle() ) ) { + # We fetched a rev from a different title; register it too... + $deps[] = array( + 'title' => $rev->getTitle(), + 'page_id' => $rev->getPage(), + 'rev_id' => $rev_id ); + } if ( $rev ) { $text = $rev->getText(); } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { global $wgContLang; - $message = $wgContLang->lcfirst( $title->getText() ); - $text = wfMsgForContentNoTrans( $message ); - if ( wfEmptyMsg( $message, $text ) ) { + $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); + if ( !$message->exists() ) { $text = false; break; } + $text = $message->plain(); } else { break; } @@ -3381,8 +3476,48 @@ class Parser { 'deps' => $deps ); } + /** + * Fetch a file and its title and register a reference to it. + * @param Title $title + * @param string $time MW timestamp + * @param string $sha1 base 36 SHA-1 + * @return mixed File or false + */ + function fetchFile( $title, $time = false, $sha1 = false ) { + $res = $this->fetchFileAndTitle( $title, $time, $sha1 ); + return $res[0]; + } + + /** + * Fetch a file and its title and register a reference to it. + * @param Title $title + * @param string $time MW timestamp + * @param string $sha1 base 36 SHA-1 + * @return Array ( File or false, Title of file ) + */ + function fetchFileAndTitle( $title, $time = false, $sha1 = false ) { + if ( $time === '0' ) { + $file = false; // broken thumbnail forced by hook + } elseif ( $sha1 ) { // get by (sha1,timestamp) + $file = RepoGroup::singleton()->findFileFromKey( $sha1, array( 'time' => $time ) ); + } else { // get by (name,timestamp) + $file = wfFindFile( $title, array( 'time' => $time ) ); + } + $time = $file ? $file->getTimestamp() : false; + $sha1 = $file ? $file->getSha1() : false; + # Register the file as a dependency... + $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); + if ( $file && !$title->equals( $file->getTitle() ) ) { + # Update fetched file title + $title = $file->getTitle(); + } + return array( $file, $title ); + } + /** * Transclude an interwiki link. + * + * @return string */ function interwikiTransclude( $title, $action ) { global $wgEnableScaryTranscluding; @@ -3399,6 +3534,10 @@ class Parser { return $this->fetchScaryTemplateMaybeFromCache( $url ); } + /** + * @param $url string + * @return Mixed|String + */ function fetchScaryTemplateMaybeFromCache( $url ) { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); @@ -3423,10 +3562,14 @@ class Parser { return $text; } - /** * Triple brace replacement -- used for template arguments * @private + * + * @param $peice array + * @param $frame PPFrame + * + * @return array */ function argSubstitution( $piece, $frame ) { wfProfileIn( __METHOD__ ); @@ -3439,9 +3582,9 @@ class Parser { $text = $frame->getArgument( $argName ); if ( $text === false && $parts->getLength() > 0 && ( - $this->ot['html'] - || $this->ot['pre'] - || ( $this->ot['wiki'] && $frame->isTemplate() ) + $this->ot['html'] + || $this->ot['pre'] + || ( $this->ot['wiki'] && $frame->isTemplate() ) ) ) { # No match in frame, use the supplied default @@ -3480,6 +3623,8 @@ class Parser { * inner Contents of extension element * noClose Original text did not have a close tag * @param $frame PPFrame + * + * @return string */ function extensionSubstitution( $params, $frame ) { $name = $frame->expand( $params['name'] ); @@ -3548,9 +3693,9 @@ class Parser { if ( $markerType === 'none' ) { return $output; } elseif ( $markerType === 'nowiki' ) { - $this->mStripState->nowiki->setPair( $marker, $output ); + $this->mStripState->addNoWiki( $marker, $output ); } elseif ( $markerType === 'general' ) { - $this->mStripState->general->setPair( $marker, $output ); + $this->mStripState->addGeneral( $marker, $output ); } else { throw new MWException( __METHOD__.': invalid marker type' ); } @@ -3565,7 +3710,7 @@ class Parser { * @return Boolean: false if this inclusion would take it over the maximum, true otherwise */ function incrementIncludeSize( $type, $size ) { - if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) { + if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { return false; } else { $this->mIncludeSizes[$type] += $size; @@ -3632,7 +3777,7 @@ class Parser { $this->mOutput->setIndexPolicy( 'index' ); $this->addTrackingCategory( 'index-category' ); } - + # Cache all double underscores in the database foreach ( $this->mDoubleUnderscores as $key => $val ) { $this->mOutput->setProperty( $key, '' ); @@ -3685,14 +3830,15 @@ class Parser { function formatHeadings( $text, $origText, $isMain=true ) { global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds; - $doNumberHeadings = $this->mOptions->getNumberHeadings(); - # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { $showEditLink = 0; } else { $showEditLink = $this->mOptions->getEditSection(); } + if ( $showEditLink ) { + $this->mOutput->setEditSectionTokens( true ); + } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now @@ -3723,9 +3869,6 @@ class Parser { $enoughToc = true; } - # We need this to perform operations on the HTML - $sk = $this->mOptions->getSkin( $this->mTitle ); - # headline counter $headlineCount = 0; $numVisible = 0; @@ -3776,7 +3919,7 @@ class Parser { $sublevelCount[$toclevel] = 0; if ( $toclevel<$wgMaxTocLevel ) { $prevtoclevel = $toclevel; - $toc .= $sk->tocIndent(); + $toc .= Linker::tocIndent(); $numVisible++; } } elseif ( $level < $prevlevel && $toclevel > 1 ) { @@ -3799,16 +3942,16 @@ class Parser { if ( $toclevel<$wgMaxTocLevel ) { if ( $prevtoclevel < $wgMaxTocLevel ) { # Unindent only if the previous toc level was shown :p - $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); + $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); $prevtoclevel = $toclevel; } else { - $toc .= $sk->tocLineEnd(); + $toc .= Linker::tocLineEnd(); } } } else { # No change in level, end TOC line if ( $toclevel<$wgMaxTocLevel ) { - $toc .= $sk->tocLineEnd(); + $toc .= Linker::tocLineEnd(); } } @@ -3877,8 +4020,8 @@ class Parser { 'noninitial' ); } - # HTML names must be case-insensitively unique (bug 10721). - # This does not apply to Unicode characters per + # HTML names must be case-insensitively unique (bug 10721). + # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison # FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); @@ -3901,7 +4044,7 @@ class Parser { } # Don't number the heading if it is the only one (looks silly) - if ( $doNumberHeadings && count( $matches[3] ) > 1) { + if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { # the two are different if the line contains a link $headline = $numbering . ' ' . $headline; } @@ -3916,7 +4059,7 @@ class Parser { $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { - $toc .= $sk->tocLine( $anchor, $tocline, + $toc .= Linker::tocLine( $anchor, $tocline, $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); } @@ -3946,17 +4089,32 @@ class Parser { # give headline the correct tag if ( $showEditLink && $sectionIndex !== false ) { + // Output edit section links as markers with styles that can be customized by skins if ( $isTemplate ) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside should be counted. - $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() ); + $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ ); + } else { + $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint ); + } + // We use a bit of pesudo-xml for editsection markers. The language converter is run later on + // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff + // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped + // so we don't have to worry about a user trying to input one of these markers directly. + // We use a page and section attribute to stop the language converter from converting these important bits + // of data, but put the headline hint inside a content block because the language converter is supposed to + // be able to convert that piece of data. + $editlink = ''; } else { - $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() ); + $editlink .= '/>'; } } else { $editlink = ''; } - $head[$headlineCount] = $sk->makeHeadline( $level, + $head[$headlineCount] = Linker::makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor ); @@ -3972,9 +4130,9 @@ class Parser { if ( $enoughToc ) { if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { - $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); + $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); } - $toc = $sk->tocList( $toc ); + $toc = Linker::tocList( $toc, $this->mOptions->getUserLang() ); $this->mOutput->setTOCHTML( $toc ); } @@ -4025,22 +4183,17 @@ class Parser { * @param $clearState Boolean: whether to clear the parser state first * @return String: the altered wiki markup */ - public function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) { - $options->resetUsage(); - $this->mOptions = $options; - $this->setTitle( $title ); + public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) { + $this->startParse( $title, $options, self::OT_WIKI, $clearState ); $this->setUser( $user ); - $this->setOutputType( self::OT_WIKI ); - - if ( $clearState ) { - $this->clearState(); - } $pairs = array( "\r\n" => "\n", ); $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); - $text = $this->pstPass2( $text, $user ); + if( $options->getPreSaveTransform() ) { + $text = $this->pstPass2( $text, $user ); + } $text = $this->mStripState->unstripBoth( $text ); $this->setUser( null ); #Reset @@ -4077,9 +4230,9 @@ class Parser { # whatever crap the system uses, localised or not, so we cannot # ship premade translations. $key = 'timezone-' . strtolower( trim( $tzMsg ) ); - $value = wfMsgForContent( $key ); - if ( !wfEmptyMsg( $key, $value ) ) { - $tzMsg = $value; + $msg = wfMessage( $key )->inContentLanguage(); + if ( $msg->exists() ) { + $tzMsg = $msg->text(); } date_default_timezone_set( $oldtz ); @@ -4090,6 +4243,9 @@ class Parser { # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags $text = $this->replaceVariables( $text ); + # This works almost by chance, as the replaceVariables are done before the getUserSig(), + # which may corrupt this parser instance via its wfMsgExt( parsemag ) call- + # Signatures $sigText = $this->getUserSig( $user ); $text = strtr( $text, array( @@ -4135,6 +4291,8 @@ class Parser { * validated, ready-to-insert wikitext. * If you have pre-fetched the nickname or the fancySig option, you can * specify them here to save a database query. + * Do not reuse this parser instance after calling getUserSig(), + * as it may have changed if it's the $wgParser. * * @param $user User * @param $nickname String: nickname to use or false to use user's default nickname @@ -4208,9 +4366,9 @@ class Parser { function cleanSig( $text, $parsing = false ) { if ( !$parsing ) { global $wgTitle; + $this->mOptions = new ParserOptions; $this->clearState(); $this->setTitle( $wgTitle ); - $this->mOptions = new ParserOptions; $this->setOutputType = self::OT_PREPROCESS; } @@ -4253,9 +4411,12 @@ class Parser { * Set up some variables which are usually set up in parse() * so that an external function can call some class members with confidence */ - public function startExternalParse( &$title, $options, $outputType, $clearState = true ) { + public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { + $this->startParse( $title, $options, $outputType, $clearState ); + } + + private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { $this->setTitle( $title ); - $options->resetUsage(); $this->mOptions = $options; $this->setOutputType( $outputType ); if ( $clearState ) { @@ -4268,10 +4429,10 @@ class Parser { * * @param $text String: the text to preprocess * @param $options ParserOptions: options + * @param $title Title object or null to use $wgTitle * @return String */ - public function transformMsg( $text, $options ) { - global $wgTitle; + public function transformMsg( $text, $options, $title = null ) { static $executing = false; # Guard against infinite recursion @@ -4281,7 +4442,16 @@ class Parser { $executing = true; wfProfileIn( __METHOD__ ); - $text = $this->preprocess( $text, $wgTitle, $options ); + if ( !$title ) { + global $wgTitle; + $title = $wgTitle; + } + if ( !$title ) { + # It's not uncommon having a null $wgTitle in scripts. See r80898 + # Create a ghost title in such case + $title = Title::newFromText( 'Dwimmerlaik' ); + } + $text = $this->preprocess( $text, $title, $options ); $executing = false; wfProfileOut( __METHOD__ ); @@ -4291,17 +4461,29 @@ class Parser { /** * Create an HTML-style tag, e.g. special text * The callback should have the following form: - * function myParserHook( $text, $params, $parser ) { ... } + * function myParserHook( $text, $params, $parser, $frame ) { ... } * * Transform and return $text. Use $parser for any required context, e.g. use * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions * + * Hooks may return extended information by returning an array, of which the + * first numbered element (index 0) must be the return string, and all other + * entries are extracted into local variables within an internal function + * in the Parser class. + * + * This interface (introduced r61913) appears to be undocumented, but + * 'markerName' is used by some core tag hooks to override which strip + * array their results are placed in. **Use great caution if attempting + * this interface, as it is not documented and injudicious use could smash + * private variables.** + * * @param $tag Mixed: the tag to use, e.g. 'hook' for * @param $callback Mixed: the callback function (and object) to use for the tag * @return The old value of the mTagHooks array associated with the hook */ public function setHook( $tag, $callback ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; $this->mTagHooks[$tag] = $callback; if ( !in_array( $tag, $this->mStripList ) ) { @@ -4311,8 +4493,25 @@ class Parser { return $oldVal; } + /** + * As setHook(), but letting the contents be parsed. + * + * Transparent tag hooks are like regular XML-style tag hooks, except they + * operate late in the transformation sequence, on HTML instead of wikitext. + * + * This is probably obsoleted by things dealing with parser frames? + * The only extension currently using it is geoserver. + * + * @since 1.10 + * @todo better document or deprecate this + * + * @param $tag Mixed: the tag to use, e.g. 'hook' for + * @param $callback Mixed: the callback function (and object) to use for the tag + * @return The old value of the mTagHooks array associated with the hook + */ function setTransparentTagHook( $tag, $callback ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; $this->mTransparentTagHooks[$tag] = $callback; @@ -4417,6 +4616,7 @@ class Parser { */ function setFunctionTagHook( $tag, $callback, $flags ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); $old = isset( $this->mFunctionTagHooks[$tag] ) ? $this->mFunctionTagHooks[$tag] : null; $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); @@ -4457,6 +4657,10 @@ class Parser { * given as text will return the HTML of a gallery with two images, * labeled 'The number "1"' and * 'A tree'. + * + * @param string $text + * @param array $param + * @return string HTML */ function renderImageGallery( $text, $params ) { $ig = new ImageGallery(); @@ -4466,8 +4670,6 @@ class Parser { $ig->setParser( $this ); $ig->setHideBadImages(); $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); - $ig->useSkin( $this->mOptions->getSkin( $this->mTitle ) ); - $ig->mRevisionId = $this->mRevisionId; if ( isset( $params['showfilename'] ) ) { $ig->setShowFilename( true ); @@ -4504,28 +4706,40 @@ class Parser { } if ( strpos( $matches[0], '%' ) !== false ) { - $matches[1] = urldecode( $matches[1] ); + $matches[1] = rawurldecode( $matches[1] ); } - $tp = Title::newFromText( $matches[1] ); - $nt =& $tp; - if ( is_null( $nt ) ) { + $title = Title::newFromText( $matches[1], NS_FILE ); + if ( is_null( $title ) ) { # Bogus title. Ignore these so we don't bomb out later. continue; } + + $label = ''; + $alt = ''; if ( isset( $matches[3] ) ) { - $label = $matches[3]; - } else { - $label = ''; + // look for an |alt= definition while trying not to break existing + // captions with multiple pipes (|) in it, until a more sensible grammar + // is defined for images in galleries + + $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); + $altmatches = StringUtils::explode('|', $matches[3]); + $magicWordAlt = MagicWord::get( 'img_alt' ); + + foreach ( $altmatches as $altmatch ) { + $match = $magicWordAlt->matchVariableStartToEnd( $altmatch ); + if ( $match ) { + $alt = $this->stripAltText( $match, false ); + } + else { + // concatenate all other pipes + $label .= '|' . $altmatch; + } + } + // remove the first pipe + $label = substr( $label, 1 ); } - $html = $this->recursiveTagParse( trim( $label ) ); - - $ig->add( $nt, $html ); - - # Only add real images (bug #5586) - if ( $nt->getNamespace() == NS_FILE ) { - $this->mOutput->addImage( $nt->getDBkey() ); - } + $ig->add( $title, $label, $alt ); } return $ig->toHTML(); } @@ -4576,6 +4790,7 @@ class Parser { * @param $title Title * @param $options String * @param $holders LinkHolderArray + * @return string HTML */ function makeImage( $title, $options, $holders = false ) { # Check if the options text is of the form "options|alt text" @@ -4604,23 +4819,23 @@ class Parser { # * text-bottom $parts = StringUtils::explode( "|", $options ); - $sk = $this->mOptions->getSkin( $this->mTitle ); # Give extensions a chance to select the file revision for us - $skip = $time = $descQuery = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) ); - - if ( $skip ) { - return $sk->link( $title ); - } + $time = $sha1 = $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $title, &$time, &$sha1, &$descQuery ) ); + # Fetch and register the file (file title may be different via hooks) + list( $file, $title ) = $this->fetchFileAndTitle( $title, $time, $sha1 ); - # Get the file - $file = wfFindFile( $title, array( 'time' => $time ) ); # Get parameter map $handler = $file ? $file->getHandler() : false; list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); + if ( !$file ) { + $this->addTrackingCategory( 'broken-file-category' ); + } + # Process the input parameters $caption = ''; $params = array( 'frame' => array(), 'handler' => array(), @@ -4724,9 +4939,9 @@ class Parser { # Will the image be presented in a frame, with the caption below? $imageIsFramed = isset( $params['frame']['frame'] ) || - isset( $params['frame']['framed'] ) || - isset( $params['frame']['thumbnail'] ) || - isset( $params['frame']['manualthumb'] ); + isset( $params['frame']['framed'] ) || + isset( $params['frame']['thumbnail'] ) || + isset( $params['frame']['manualthumb'] ); # In the old days, [[Image:Foo|text...]] would set alt text. Later it # came to also set the caption, ordinary text after the image -- which @@ -4770,7 +4985,8 @@ class Parser { wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); # Linker does the rest - $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery, $this->mOptions->getThumbSize() ); + $ret = Linker::makeImageLink2( $title, $file, $params['frame'], $params['handler'], + $time, $descQuery, $this->mOptions->getThumbSize() ); # Give the handler a chance to modify the parser object if ( $handler ) { @@ -4816,7 +5032,6 @@ class Parser { * @param $text String * @param $frame PPFrame * @return String - * @private */ function attributeStripCallback( &$text, $frame = false ) { $text = $this->replaceVariables( $text, $frame ); @@ -4826,11 +5041,37 @@ class Parser { /** * Accessor + * + * @return array */ function getTags() { return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) ); } + /** + * Replace transparent tags in $text with the values given by the callbacks. + * + * Transparent tag hooks are like regular XML-style tag hooks, except they + * operate late in the transformation sequence, on HTML instead of wikitext. + */ + function replaceTransparentTags( $text ) { + $matches = array(); + $elements = array_keys( $this->mTransparentTagHooks ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + + foreach ( $matches as $marker => $data ) { + list( $element, $content, $params, $tag ) = $data; + $tagName = strtolower( $element ); + if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { + $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); + } else { + $output = $tag; + } + $this->mStripState->addGeneral( $marker, $output ); + } + return $text; + } + /** * Break wikitext input into sections, and either pull or replace * some particular section's text. @@ -4857,11 +5098,8 @@ class Parser { * for "replace", the whole page with the section replaced. */ private function extractSections( $text, $section, $mode, $newText='' ) { - global $wgTitle; - $this->clearState(); - $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode - $this->mOptions = new ParserOptions; - $this->setOutputType( self::OT_PLAIN ); + global $wgTitle; # not generally used but removes an ugly failure mode + $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); $outText = ''; $frame = $this->getPreprocessor()->newFrame(); @@ -4885,6 +5123,10 @@ class Parser { if ( $sectionIndex == 0 ) { # Section zero doesn't nest, level=big $targetLevel = 1000; + if ( !$node ) { + # The page definitely exists - we checked that earlier - so it must be blank: see bug #14005 + return $text; + } } else { while ( $node ) { if ( $node->getName() === 'h' ) { @@ -4965,6 +5207,15 @@ class Parser { return $this->extractSections( $text, $section, "get", $deftext ); } + /** + * This function returns $oldtext after the content of the section + * specified by $section has been replaced with $text. + * + * @param $oldtext String: former text of the article + * @param $section Numeric: section identifier + * @param $text String: replacing text + * @return String: modified text + */ public function replaceSection( $oldtext, $section, $text ) { return $this->extractSections( $oldtext, $section, "replace", $text ); } @@ -4981,7 +5232,7 @@ class Parser { /** * Get the revision object for $this->mRevisionId * - * @return either a Revision object or null + * @return Revision|null either a Revision object or null */ protected function getRevisionObject() { if ( !is_null( $this->mRevisionObject ) ) { @@ -5055,7 +5306,11 @@ class Parser { /** * Accessor for $mDefaultSort - * Will use the title/prefixed title if none is set + * Will use the empty string if none is set. + * + * This value is treated as a prefix, so the + * empty string is equivalent to sorting by + * page name. * * @return string */ @@ -5063,7 +5318,7 @@ class Parser { if ( $this->mDefaultSort !== false ) { return $this->mDefaultSort; } else { - return $this->mTitle->getCategorySortkey(); + return ''; } } @@ -5138,16 +5393,15 @@ class Parser { /** * strip/replaceVariables/unstrip for preprocessor regression testing + * + * @return string */ - function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) { - $this->clearState(); + function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) { if ( !$title instanceof Title ) { $title = Title::newFromText( $title ); } - $this->mTitle = $title; - $options->resetUsage(); - $this->mOptions = $options; - $this->setOutputType( $outputType ); + $this->startParse( $title, $options, $outputType, true ); + $text = $this->replaceVariables( $text ); $text = $this->mStripState->unstripBoth( $text ); $text = Sanitizer::removeHTMLtags( $text ); @@ -5169,6 +5423,19 @@ class Parser { return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); } + /** + * Call a callback function on all regions of the given text that are not + * inside strip markers, and replace those regions with the return value + * of the callback. For example, with input: + * + * aaabbb + * + * This will call the callback function twice, with 'aaa' and 'bbb'. Those + * two strings will be replaced with the value returned by the callback in + * each case. + * + * @return string + */ function markerSkipCallback( $s, $callback ) { $i = 0; $out = ''; @@ -5193,168 +5460,72 @@ class Parser { return $out; } - function serialiseHalfParsedText( $text ) { - $data = array(); - $data['text'] = $text; - - # First, find all strip markers, and store their - # data in an array. - $stripState = new StripState; - $pos = 0; - while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) - && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) - { - $end_pos += strlen( self::MARKER_SUFFIX ); - $marker = substr( $text, $start_pos, $end_pos-$start_pos ); - - if ( !empty( $this->mStripState->general->data[$marker] ) ) { - $replaceArray = $stripState->general; - $stripText = $this->mStripState->general->data[$marker]; - } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) { - $replaceArray = $stripState->nowiki; - $stripText = $this->mStripState->nowiki->data[$marker]; - } else { - throw new MWException( "Hanging strip marker: '$marker'." ); - } - - $replaceArray->setPair( $marker, $stripText ); - $pos = $end_pos; - } - $data['stripstate'] = $stripState; - - # Now, find all of our links, and store THEIR - # data in an array! :) - $links = array( 'internal' => array(), 'interwiki' => array() ); - $pos = 0; - - # Internal links - while ( ( $start_pos = strpos( $text, '' ) ) ); - $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key]; - $pos = $start_pos + strlen( "" ); - } - - $pos = 0; - - # Interwiki links - while ( ( $start_pos = strpos( $text, '' ) ) ); - $links['interwiki'][] = $this->mLinkHolders->interwiki[$key]; - $pos = $start_pos + strlen( "" ); - } - - $data['linkholder'] = $links; - + /** + * Save the parser state required to convert the given half-parsed text to + * HTML. "Half-parsed" in this context means the output of + * recursiveTagParse() or internalParse(). This output has strip markers + * from replaceVariables (extensionSubstitution() etc.), and link + * placeholders from replaceLinkHolders(). + * + * Returns an array which can be serialized and stored persistently. This + * array can later be loaded into another parser instance with + * unserializeHalfParsedText(). The text can then be safely incorporated into + * the return value of a parser hook. + * + * @return array + */ + function serializeHalfParsedText( $text ) { + wfProfileIn( __METHOD__ ); + $data = array( + 'text' => $text, + 'version' => self::HALF_PARSED_VERSION, + 'stripState' => $this->mStripState->getSubState( $text ), + 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) + ); + wfProfileOut( __METHOD__ ); return $data; } /** - * TODO: document - * @param $data Array - * @param $intPrefix String unique identifying prefix + * Load the parser state given in the $data array, which is assumed to + * have been generated by serializeHalfParsedText(). The text contents is + * extracted from the array, and its markers are transformed into markers + * appropriate for the current Parser instance. This transformed text is + * returned, and can be safely included in the return value of a parser + * hook. + * + * If the $data array has been stored persistently, the caller should first + * check whether it is still valid, by calling isValidHalfParsedText(). + * + * @param $data Serialized data * @return String */ - function unserialiseHalfParsedText( $data, $intPrefix = null ) { - if ( !$intPrefix ) { - $intPrefix = self::getRandomString(); + function unserializeHalfParsedText( $data ) { + if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { + throw new MWException( __METHOD__.': invalid version' ); } # First, extract the strip state. - $stripState = $data['stripstate']; - $this->mStripState->general->merge( $stripState->general ); - $this->mStripState->nowiki->merge( $stripState->nowiki ); - - # Now, extract the text, and renumber links - $text = $data['text']; - $links = $data['linkholder']; - - # Internal... - foreach ( $links['internal'] as $ns => $nsLinks ) { - foreach ( $nsLinks as $key => $entry ) { - $newKey = $intPrefix . '-' . $key; - $this->mLinkHolders->internals[$ns][$newKey] = $entry; - - $text = str_replace( "", "", $text ); - } - } + $texts = array( $data['text'] ); + $texts = $this->mStripState->merge( $data['stripState'], $texts ); - # Interwiki... - foreach ( $links['interwiki'] as $key => $entry ) { - $newKey = "$intPrefix-$key"; - $this->mLinkHolders->interwikis[$newKey] = $entry; - - $text = str_replace( "", "", $text ); - } + # Now renumber links + $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); # Should be good to go. - return $text; - } -} - -/** - * @todo document, briefly. - * @ingroup Parser - */ -class StripState { - var $general, $nowiki; - - function __construct() { - $this->general = new ReplacementArray; - $this->nowiki = new ReplacementArray; - } - - function unstripGeneral( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->general->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; - } - - function unstripNoWiki( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; - } - - function unstripBoth( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->general->replace( $text ); - $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; + return $texts[0]; } -} -/** - * @todo document, briefly. - * @ingroup Parser - */ -class OnlyIncludeReplacer { - var $output = ''; - - function replace( $matches ) { - if ( substr( $matches[1], -1 ) === "\n" ) { - $this->output .= substr( $matches[1], 0, -1 ); - } else { - $this->output .= $matches[1]; - } + /** + * Returns true if the given array, presumed to be generated by + * serializeHalfParsedText(), is compatible with the current version of the + * parser. + * + * @param $data Array + * + * @return bool + */ + function isValidHalfParsedText( $data ) { + return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; } }