X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=76974c935c21149f38f88028e27fe0b3812db731;hb=1c6dcbb71d421aec8a01c76ef81a3c12e7f6f4e0;hp=7045690b15985d9bdb3ff15d53525fa676bd902e;hpb=109f7bcb9de9b9f82baa5a9a7454dc7448a05aec;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 7045690b15..76974c935c 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -34,7 +34,7 @@ * Globals used: * objects: $wgLang, $wgContLang * - * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! + * NOT $wgUser or $wgTitle. Keep them away! * * settings: * $wgUseDynamicDates*, $wgInterwikiMagic*, @@ -68,7 +68,7 @@ class Parser { # Constants needed for external link processing # Everything except bracket, space, or control characters - const EXT_LINK_URL_CLASS = '(?:[^\]\[<>"\\x00-\\x20\\x7F]|(?:\[\]))'; + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]'; const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; @@ -109,12 +109,23 @@ class Parser { var $mImageParamsMagicArray = array(); var $mMarkerIndex = 0; var $mFirstCall = true; - var $mVariables, $mSubstWords; # Initialised by initialiseVariables() + + # Initialised by initialiseVariables() + + /** + * @var MagicWordArray + */ + var $mVariables; + + /** + * @var MagicWordArray + */ + var $mSubstWords; var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor # Cleared with clearState(): /** - * @var OutputPage + * @var ParserOutput */ var $mOutput; var $mAutonumber, $mDTopen; @@ -125,7 +136,12 @@ class Parser { var $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + /** + * @var LinkHolderArray + */ + var $mLinkHolders; + + var $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; @@ -153,9 +169,14 @@ class Parser { var $mRevisionObject; # The revision object of the specified revision ID var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID - var $mRevisionUser; # Userto display in {{REVISIONUSER}} tag + var $mRevisionUser; # User to display in {{REVISIONUSER}} tag var $mRevIdForTs; # The revision ID which was used to fetch the timestamp + /** + * @var string + */ + var $mUniqPrefix; + /** * Constructor */ @@ -163,9 +184,12 @@ class Parser { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. - '(?:[^\]\[<>"\x00-\x20\x7F]|\[\])+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; + '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; + } elseif ( defined( 'MW_COMPILED' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); @@ -175,6 +199,7 @@ class Parser { } else { $this->mPreprocessorClass = 'Preprocessor_Hash'; } + wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); } /** @@ -182,7 +207,7 @@ class Parser { */ function __destruct() { if ( isset( $this->mLinkHolders ) ) { - $this->mLinkHolders->__destruct(); + unset( $this->mLinkHolders ); } foreach ( $this as $name => $value ) { unset( $this->$name ); @@ -278,7 +303,7 @@ class Parser { * Do not call this function recursively. * * @param $text String: text we want to parse - * @param $title A title object + * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean @@ -448,6 +473,8 @@ class Parser { * * @param $text String: text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables + * + * @return string */ function recursiveTagParse( $text, $frame=false ) { wfProfileIn( __METHOD__ ); @@ -496,7 +523,7 @@ class Parser { /** * Get a random string * - * @static + * @return string */ static public function getRandomString() { return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); @@ -519,7 +546,7 @@ class Parser { */ public function uniqPrefix() { if ( !isset( $this->mUniqPrefix ) ) { - # @todo Fixme: this is probably *horribly wrong* + # @todo FIXME: This is probably *horribly wrong* # LanguageConverter seems to want $wgParser's uniqPrefix, however # if this is called for a parser cache hit, the parser may not # have ever been initialized in the first place. @@ -532,6 +559,8 @@ class Parser { /** * Set the context title + * + * @param $t Title */ function setTitle( $t ) { if ( !$t || $t instanceof FakeTitle ) { @@ -620,10 +649,16 @@ class Parser { return wfSetVar( $this->mOptions, $x ); } + /** + * @return int + */ function nextLinkID() { return $this->mLinkID++; } + /** + * @param $id int + */ function setLinkID( $id ) { $this->mLinkID = $id; } @@ -638,7 +673,7 @@ class Parser { if ( $target !== null ) { return $target; } else { - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + return $this->mOptions->getInterfaceMessage() ? $wgLang : $this->mTitle->getPageLanguage(); } } @@ -679,15 +714,13 @@ class Parser { * array( 'param' => 'x' ), * 'tag content' ) ) * - * @param $elements list of element names. Comments are always extracted. - * @param $text Source text string. - * @param $matches Out parameter, Array: extracted tags - * @param $uniq_prefix + * @param $elements array list of element names. Comments are always extracted. + * @param $text string Source text string. + * @param $matches array Out parameter, Array: extracted tags + * @param $uniq_prefix string * @return String: stripped text - * - * @static */ - public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { + public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { static $n = 1; $stripped = ''; $matches = array(); @@ -751,52 +784,17 @@ class Parser { /** * Get a list of strippable XML-like elements + * + * @return array */ function getStripList() { return $this->mStripList; } - /** - * @deprecated use replaceVariables - */ - function strip( $text, $state, $stripcomments = false , $dontstrip = array() ) { - return $text; - } - - /** - * Restores pre, math, and other extensions removed by strip() - * - * always call unstripNoWiki() after this one - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstrip( $text, $state ) { - return $state->unstripGeneral( $text ); - } - - /** - * Always call this after unstrip() to preserve the order - * - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstripNoWiki( $text, $state ) { - return $state->unstripNoWiki( $text ); - } - - /** - * @deprecated use $this->mStripState->unstripBoth() - */ - function unstripForHTML( $text ) { - return $this->mStripState->unstripBoth( $text ); - } - /** * Add an item to the strip state * Returns the unique tag which must be inserted into the stripped text * The tag will be replaced with the original text in unstrip() - * - * @private */ function insertStripItem( $text ) { $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; @@ -805,15 +803,6 @@ class Parser { return $rnd; } - /** - * Interface with html tidy - * @deprecated Use MWTidy::tidy() - */ - public static function tidy( $text ) { - wfDeprecated( __METHOD__ ); - return MWTidy::tidy( $text ); - } - /** * parse the wiki syntax used to render tables * @@ -829,22 +818,23 @@ class Parser { foreach ( $lines as $outLine ) { $line = trim( $outLine ); - if ( $line == '') { //empty line, go to next line - $out .= $outLine."\n"; + # empty line, go to next line, + # but only append \n if outside of table + if ( $line === '') { + $output .= $outLine . "\n"; continue; } - $first_chars = $line[0]; - if ( strlen($line) > 1) { - $first_chars .= in_array($line[1], array('}', '+', '-')) ? $line[1] : ''; + $firstChars = $line[0]; + if ( strlen( $line ) > 1 ) { + $firstChars .= in_array( $line[1], array( '}', '+', '-' ) ) ? $line[1] : ''; } $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line , $matches ) ) { $tables[] = array(); - $table =& $this->last($tables); - $table[0] = array(); //first row - $current_row =& $table[0]; - + $table =& $this->last( $tables ); + $table[0] = array(); // first row + $currentRow =& $table[0]; $table['indent'] = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); @@ -853,78 +843,86 @@ class Parser { if ( $attributes !== '' ) { $table['attributes'] = $attributes; } - } else if ( !isset($tables[0]) ) { + } elseif ( !isset( $tables[0] ) ) { // we're outside the table - $out .= $outLine."\n"; - } else if ( $first_chars === '|}' ) { + $out .= $outLine . "\n"; + } elseif ( $firstChars === '|}' ) { // trim the |} code from the line $line = substr ( $line , 2 ); // Shorthand for last row - $last_row =& $this->last($table); + $lastRow =& $this->last( $table ); // a thead at the end becomes a tfoot, unless there is only one row // Do this before deleting empty last lines to allow headers at the bottom of tables - if ( isset($last_row['type'] ) && $last_row['type'] == 'thead' && isset($table[1])) { - $last_row['type'] = 'tfoot'; - for($i = 0; isset($last_row[$i]); $i++ ) { - $last_row[$i]['type'] = 'td'; + if ( isset( $lastRow['type'] ) && $lastRow['type'] == 'thead' && isset( $table[1] ) ) { + $lastRow['type'] = 'tfoot'; + for ( $i = 0; isset( $lastRow[$i] ); $i++ ) { + $lastRow[$i]['type'] = 'th'; } } // Delete empty last lines - if ( empty($last_row) ) { - $last_row = NULL; + if ( empty( $lastRow ) ) { + $lastRow = NULL; } - $o = $this->printTableHtml( array_pop($tables) ) . $line; + $o = ''; + $curtable = array_pop( $tables ); - if ( count($tables) > 0 ) { - $table =& $this->last($tables); - $current_row =& $this->last($table); - $current_element =& $this->last($current_row); + #Add a line-ending before the table, but only if there isn't one already + if ( substr( $out, -1 ) !== "\n" ) { + $o .= "\n"; + } + $o .= $this->generateTableHTML( $curtable ) . $line . "\n"; - $output =& $current_element['content']; + if ( count( $tables ) > 0 ) { + $table =& $this->last( $tables ); + $currentRow =& $this->last( $table ); + $currentElement =& $this->last( $currentRow ); + + $output =& $currentElement['content']; } else { $output =& $out; } $output .= $o; - } else if ( $first_chars === '|-' ) { + } elseif ( $firstChars === '|-' ) { // start a new row element // but only when we haven't started one already - if( count($current_row) != 0 ) { + if ( count( $currentRow ) != 0 ) { $table[] = array(); - $current_row =& $this->last($table); + $currentRow =& $this->last( $table ); } // Get the attributes, there's nothing else useful in $line now $line = substr ( $line , 2 ); $attributes = $this->mStripState->unstripBoth( $line ); $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); - if( $attributes !== '') { - $current_row['attributes'] = $attributes; + if ( $attributes !== '' ) { + $currentRow['attributes'] = $attributes; } - - } else if ( $first_chars === '|+' ) { - // a table caption - $line = substr ( $line , 2 ); - - $c = $this->getCellAttr($line , 'caption'); - $table['caption'] = array(); - $table['caption']['content'] = $c[0]; - if(isset($c[1])) $table['caption']['attributes'] = $c[1]; - unset($c); - - $output =& $table['caption']; - } else if ( $first_chars === '|' || $first_chars === '!' || $first_chars === '!+' ) { + + } elseif ( $firstChars === '|+' ) { + // a table caption, but only proceed if there isn't one already + if ( !isset ( $table['caption'] ) ) { + $line = substr ( $line , 2 ); + + $c = $this->getCellAttr( $line , 'caption' ); + $table['caption'] = array(); + $table['caption']['content'] = $c[0]; + if ( isset( $c[1] ) ) $table['caption']['attributes'] = $c[1]; + unset( $c ); + $output =& $table['caption']['content']; + } + } elseif ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { // Which kind of cells are we dealing with - $this_tag = 'td'; + $currentTag = 'td'; $line = substr ( $line , 1 ); - if ( $first_chars === '!' || $first_chars === '!+' ) { + if ( $firstChars === '!' || $firstChars === '!+' ) { $line = str_replace ( '!!' , '||' , $line ); - $this_tag = 'th'; + $currentTag = 'th'; } // Split up multiple cells on the same line. @@ -932,81 +930,87 @@ class Parser { $line = ''; // save memory // decide whether thead to tbody - if ( !array_key_exists('type', $current_row) ) { - $current_row['type'] = ( $first_chars === '!' ) ? 'thead' : 'tbody' ; - } else if( $first_chars === '|' ) { - $current_row['type'] = 'tbody'; + if ( !array_key_exists( 'type', $currentRow ) ) { + $currentRow['type'] = ( $firstChars === '!' ) ? 'thead' : 'tbody' ; + } elseif ( $firstChars === '|' ) { + $currentRow['type'] = 'tbody'; } // Loop through each table cell foreach ( $cells as $cell ) { // a new cell - $current_row[] = array(); - $current_element =& $this->last($current_row); + $currentRow[] = array(); + $currentElement =& $this->last( $currentRow ); - $current_element['type'] = $this_tag; + $currentElement['type'] = $currentTag; - $c = $this->getCellAttr($cell , $this_tag); - $current_element['content'] = $c[0]; - if(isset($c[1])) $current_element['attributes'] = $c[1]; - unset($c); + $c = $this->getCellAttr( $cell , $currentTag ); + $currentElement['content'] = $c[0]; + if ( isset( $c[1] ) ) $currentElement['attributes'] = $c[1]; + unset( $c ); } - $output =& $current_element['content']; - + $output =& $currentElement['content']; + } else { - $output .= $outLine."\n"; + $output .= "\n$outLine"; } } - + # Remove trailing line-ending (b/c) if ( substr( $out, -1 ) === "\n" ) { $out = substr( $out, 0, -1 ); } - - #Close any unclosed tables - if (isset($tables) && count($tables) > 0 ) { - for ($i = 0; $i < count($tables); $i++) { - $out .= $this->printTableHtml( array_pop($tables) ); + + # Close any unclosed tables + if ( isset( $tables ) && count( $tables ) > 0 ) { + for ( $i = 0; $i < count( $tables ); $i++ ) { + $curtable = array_pop( $tables ); + $curtable = $this->generateTableHTML( $curtable ); + #Add a line-ending before the table, but only if there isn't one already + if ( substr( $out, -1 ) !== "\n" && $curtable !== "" ) { + $out .= "\n"; + } + $out .= $curtable; } } - + wfProfileOut( __METHOD__ ); return $out; } - /** * Helper function for doTableStuff() separating the contents of cells from - * attributes. Particularly useful as there's a possible bug and this action + * attributes. Particularly useful as there's a possible bug and this action * is repeated twice. * * @private + * @param $cell + * @param $tagName + * @return array */ - function getCellAttr ($cell , $tag_name) { - $content = null; + function getCellAttr ( $cell, $tagName ) { $attributes = null; $cell = trim ( $cell ); // A cell could contain both parameters and data - $cell_data = explode ( '|' , $cell , 2 ); + $cellData = explode ( '|' , $cell , 2 ); // Bug 553: Note that a '|' inside an invalid link should not // be mistaken as delimiting cell parameters - if ( strpos( $cell_data[0], '[[' ) !== false ) { + if ( strpos( $cellData[0], '[[' ) !== false ) { $content = trim ( $cell ); } - else if ( count ( $cell_data ) == 1 ) { - $content = trim ( $cell_data[0] ); - } - else { - $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $tag_name ); + elseif ( count ( $cellData ) == 1 ) { + $content = trim ( $cellData[0] ); + } else { + $attributes = $this->mStripState->unstripBoth( $cellData[0] ); + $attributes = Sanitizer::fixTagAttributes( $attributes , $tagName ); - $content = trim ( $cell_data[1] ); + $content = trim ( $cellData[1] ); } - return array($content, $attributes); + return array( $content, $attributes ); } @@ -1015,79 +1019,81 @@ class Parser { * * @private */ - function printTableHtml (&$t) { - $r = "\n"; - $r .= str_repeat( '
' , $t['indent'] ); - $r .= '
' , $table['indent'] ); + $return .= ''; + if ( !$lastSection ) { + $lastSection = $table[$i]['type']; + } elseif ( $lastSection != $table[$i]['type'] ) { + $simple = false; } + } + $lastSection = ''; + for ( $i = 0; isset( $table[$i] ); $i++ ) { + if ( !count( $table[$i] ) ) continue; + $empty = false; // check for empty tables - $r .= "\n'; + } - $r .= ''; - unset($t[$i][$j]); + $return .= "\n'; + unset( $table[$i][$j] ); } - $r .= "\n"; + $return .= "\n"; - if( ( !isset($t[$i+1]) && !$simple )|| ( isset($t[$i+1]) && ($t[$i]['type'] != $t[$i+1]['type'])) ) { - $r .= ''; + if ( ( !isset( $table[$i + 1] ) && !$simple ) || ( isset( $table[$i + 1] ) && isset( $table[$i + 1]['type'] ) && $table[$i]['type'] != $table[$i + 1]['type'] ) ) { + $return .= ''; } - $last_section = $t[$i]['type']; - unset($t[$i]); + $lastSection = $table[$i]['type']; + unset( $table[$i] ); } if ( $empty ) { - if ( isset($t['caption']) ) { - $r .= "\n"; + if ( isset( $table['caption'] ) ) { + $return .= "\n"; } else { return ''; } } - $r .= "\n"; - $r .= str_repeat( '
' , $t['indent'] ); + $return .= "\n"; + $return .= str_repeat( '' , $table['indent'] ); - return $r; + return $return; } /** @@ -1098,8 +1104,8 @@ class Parser { * * @private */ - function &last (&$arr) { - for($i = count($arr); (!isset($arr[$i]) && $i > 0); $i--) { } + function &last ( &$arr ) { + for ( $i = count( $arr ); ( !isset( $arr[$i] ) && $i > 0 ); $i-- ) { } return $arr[$i]; } @@ -1196,6 +1202,11 @@ class Parser { return $text; } + /** + * @throws MWException + * @param $m array + * @return HTML|string + */ function magicLinkCallback( $m ) { if ( isset( $m[1] ) && $m[1] !== '' ) { # Skip anchor @@ -1603,7 +1614,6 @@ class Parser { return $attribs; } - /** * Replace unusual URL escape codes with their equivalent characters * @@ -1995,7 +2005,7 @@ class Parser { } # NS_MEDIA is a pseudo-namespace for linking directly to a file - # FIXME: Should do batch file existence checks, see comment below + # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us @@ -2015,7 +2025,7 @@ class Parser { # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # - # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # batch file existence checks for NS_FILE and NS_MEDIA if ( $iw == '' && $nt->isAlwaysKnown() ) { $this->mOutput->addLink( $nt ); @@ -2030,18 +2040,6 @@ class Parser { return $holders; } - /** - * Make a link placeholder. The text returned can be later resolved to a real link with - * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all existence checks and - * article length checks (for stub links) to be bundled into a single query. - * - * @deprecated - */ - function makeLinkHolder( &$nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); - } - /** * Render a forced-blue link inline; protect against double expansion of * URLs if we're in a mode that prepends full URL prefixes to internal links. @@ -2110,6 +2108,8 @@ class Parser { /**#@+ * Used by doBlockLevels() * @private + * + * @return string */ function closeParagraph() { $result = ''; @@ -2134,7 +2134,7 @@ class Parser { } for ( $i = 0; $i < $shorter; ++$i ) { - if ( $st1{$i} != $st2{$i} ) { + if ( $st1[$i] != $st2[$i] ) { break; } } @@ -2145,6 +2145,8 @@ class Parser { * These next three functions open, continue, and close the list * element appropriate to the prefix character passed into them. * @private + * + * @return string */ function openList( $char ) { $result = $this->closeParagraph(); @@ -2169,6 +2171,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function nextItem( $char ) { if ( '*' === $char || '#' === $char ) { @@ -2193,6 +2197,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function closeList( $char ) { if ( '*' === $char ) { @@ -2311,7 +2317,7 @@ class Parser { $output .= $this->openList( $char ); if ( ';' === $char ) { - # FIXME: This is dupe of code above + # @todo FIXME: This is dupe of code above if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { $t = $t2; $output .= $term . $this->nextItem( ':' ); @@ -2432,7 +2438,7 @@ class Parser { $stack = 0; $len = strlen( $str ); for( $i = 0; $i < $len; $i++ ) { - $c = $str{$i}; + $c = $str[$i]; switch( $state ) { # (Using the number is a performance hack for common cases) @@ -2568,6 +2574,9 @@ class Parser { * Return value of a magic variable (like PAGENAME) * * @private + * + * @param $index integer + * @param $frame PPFrame */ function getVariableValue( $index, $frame=false ) { global $wgContLang, $wgSitename, $wgServer; @@ -2916,6 +2925,8 @@ class Parser { * dependency requirements. * * @private + * + * @return PPNode */ function preprocessToDom( $text, $flags = 0 ) { $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); @@ -2924,6 +2935,8 @@ class Parser { /** * Return a three-element array: leading whitespace, string contents, trailing whitespace + * + * @return array */ public static function splitWhitespace( $s ) { $ltrimmed = ltrim( $s ); @@ -2954,6 +2967,8 @@ class Parser { * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. * @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion * @private + * + * @return string */ function replaceVariables( $text, $frame = false, $argsOnly = false ) { # Is there any text? Also, Prevent too big inclusions! @@ -2977,7 +2992,11 @@ class Parser { return $text; } - # Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + /** + * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + * + * @return array + */ static function createAssocArgs( $args ) { $assocArgs = array(); $index = 1; @@ -3063,9 +3082,10 @@ class Parser { $originalTitle = $part1; # $args is a list of argument nodes, starting from index 0, not including $part1 - # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object + # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; wfProfileOut( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__."-title-$originalTitle" ); # SUBST wfProfileIn( __METHOD__.'-modifiers' ); @@ -3234,7 +3254,7 @@ class Parser { && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { - $text = SpecialPage::capturePath( $title ); + $text = SpecialPageFactory::capturePath( $title ); if ( is_string( $text ) ) { $found = true; $isHTML = true; @@ -3284,6 +3304,7 @@ class Parser { # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3352,6 +3373,7 @@ class Parser { $ret = array( 'text' => $text ); } + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return $ret; } @@ -3359,6 +3381,8 @@ class Parser { /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. + * + * @return array */ function getTemplateDom( $title ) { $cacheTitle = $title; @@ -3423,8 +3447,10 @@ class Parser { /** * Static function to get a template * Can be overridden via ParserOptions::setTemplateCallback(). + * + * @return array */ - static function statelessFetchTemplate( $title, $parser=false ) { + static function statelessFetchTemplate( $title, $parser = false ) { $text = $skip = false; $finalTitle = $title; $deps = array(); @@ -3526,16 +3552,19 @@ class Parser { # Register the file as a dependency... $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); if ( $file && !$title->equals( $file->getTitle() ) ) { - # We fetched a rev from a different title; register it too... - $this->mOutput->addImage( $file->getTitle()->getDBkey(), $time, $sha1 ); - # Update fetched file title + # Update fetched file title $title = $file->getTitle(); } - return array( $file, $title ); + return array( $file, $title ); } /** * Transclude an interwiki link. + * + * @param $title Title + * @param $action + * + * @return string */ function interwikiTransclude( $title, $action ) { global $wgEnableScaryTranscluding; @@ -3552,6 +3581,10 @@ class Parser { return $this->fetchScaryTemplateMaybeFromCache( $url ); } + /** + * @param $url string + * @return Mixed|String + */ function fetchScaryTemplateMaybeFromCache( $url ) { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); @@ -3576,10 +3609,14 @@ class Parser { return $text; } - /** * Triple brace replacement -- used for template arguments * @private + * + * @param $peice array + * @param $frame PPFrame + * + * @return array */ function argSubstitution( $piece, $frame ) { wfProfileIn( __METHOD__ ); @@ -3633,6 +3670,8 @@ class Parser { * inner Contents of extension element * noClose Original text did not have a close tag * @param $frame PPFrame + * + * @return string */ function extensionSubstitution( $params, $frame ) { $name = $frame->expand( $params['name'] ); @@ -3775,7 +3814,7 @@ class Parser { } # (bug 8068) Allow control over whether robots index a page. # - # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This # is not desirable, the last one on the page should win. if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { $this->mOutput->setIndexPolicy( 'noindex' ); @@ -3973,7 +4012,10 @@ class Parser { if ( $dot ) { $numbering .= '.'; } - $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); + global $wgBetterDirectionality; + $pagelang = $this->mTitle->getPageLanguage(); + $toclang = ( $wgBetterDirectionality ? $pagelang : $wgContLang ); + $numbering .= $toclang->formatNum( $sublevelCount[$i] ); $dot = 1; } } @@ -4031,7 +4073,7 @@ class Parser { # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison - # FIXME: We may be changing them depending on the current locale. + # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); if ( $legacyHeadline === false ) { $legacyArrayKey = false; @@ -4344,11 +4386,9 @@ class Parser { # If we're still here, make it a link to the user page $userText = wfEscapeWikiText( $username ); $nickText = wfEscapeWikiText( $nickname ); - if ( $user->isAnon() ) { - return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); - } else { - return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); - } + $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; + + return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); } /** @@ -4385,7 +4425,7 @@ class Parser { return $text; } - # FIXME: regex doesn't respect extension tags or nowiki + # @todo FIXME: Regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); @@ -4469,11 +4509,22 @@ class Parser { /** * Create an HTML-style tag, e.g. special text * The callback should have the following form: - * function myParserHook( $text, $params, $parser ) { ... } + * function myParserHook( $text, $params, $parser, $frame ) { ... } * * Transform and return $text. Use $parser for any required context, e.g. use * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions * + * Hooks may return extended information by returning an array, of which the + * first numbered element (index 0) must be the return string, and all other + * entries are extracted into local variables within an internal function + * in the Parser class. + * + * This interface (introduced r61913) appears to be undocumented, but + * 'markerName' is used by some core tag hooks to override which strip + * array their results are placed in. **Use great caution if attempting + * this interface, as it is not documented and injudicious use could smash + * private variables.** + * * @param $tag Mixed: the tag to use, e.g. 'hook' for * @param $callback Mixed: the callback function (and object) to use for the tag * @return The old value of the mTagHooks array associated with the hook @@ -4490,6 +4541,22 @@ class Parser { return $oldVal; } + /** + * As setHook(), but letting the contents be parsed. + * + * Transparent tag hooks are like regular XML-style tag hooks, except they + * operate late in the transformation sequence, on HTML instead of wikitext. + * + * This is probably obsoleted by things dealing with parser frames? + * The only extension currently using it is geoserver. + * + * @since 1.10 + * @todo better document or deprecate this + * + * @param $tag Mixed: the tag to use, e.g. 'hook' for + * @param $callback Mixed: the callback function (and object) to use for the tag + * @return The old value of the mTagHooks array associated with the hook + */ function setTransparentTagHook( $tag, $callback ) { $tag = strtolower( $tag ); if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); @@ -4507,6 +4574,19 @@ class Parser { $this->mStripList = $this->mDefaultStripList; } + /** + * Remove a specific tag hook. Should not be called on $wgParser. + * Does not change the strip list. + * + * @param string $tag + * @return void + */ + function clearTagHook( $tag ) { + if ( isset( $this->mTagHooks[$tag] ) ) { + unset( $this->mTagHooks[$tag] ); + } + } + /** * Create a function, e.g. {{sum:1|2|3}} * The callback function should have the form: @@ -4610,7 +4690,7 @@ class Parser { } /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -4638,6 +4718,10 @@ class Parser { * given as text will return the HTML of a gallery with two images, * labeled 'The number "1"' and * 'A tree'. + * + * @param string $text + * @param array $param + * @return string HTML */ function renderImageGallery( $text, $params ) { $ig = new ImageGallery(); @@ -4685,21 +4769,38 @@ class Parser { if ( strpos( $matches[0], '%' ) !== false ) { $matches[1] = rawurldecode( $matches[1] ); } - $tp = Title::newFromText( $matches[1], NS_FILE ); - $nt =& $tp; - if ( is_null( $nt ) ) { + $title = Title::newFromText( $matches[1], NS_FILE ); + if ( is_null( $title ) ) { # Bogus title. Ignore these so we don't bomb out later. continue; } + + $label = ''; + $alt = ''; if ( isset( $matches[3] ) ) { - $label = $matches[3]; - } else { - $label = ''; + // look for an |alt= definition while trying not to break existing + // captions with multiple pipes (|) in it, until a more sensible grammar + // is defined for images in galleries + + $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); + $altmatches = StringUtils::explode('|', $matches[3]); + $magicWordAlt = MagicWord::get( 'img_alt' ); + + foreach ( $altmatches as $altmatch ) { + $match = $magicWordAlt->matchVariableStartToEnd( $altmatch ); + if ( $match ) { + $alt = $this->stripAltText( $match, false ); + } + else { + // concatenate all other pipes + $label .= '|' . $altmatch; + } + } + // remove the first pipe + $label = substr( $label, 1 ); } - $html = $this->recursiveTagParse( trim( $label ) ); - - $ig->add( $nt, $html ); + $ig->add( $title, $label, $alt ); } return $ig->toHTML(); } @@ -4792,6 +4893,10 @@ class Parser { list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); + if ( !$file ) { + $this->addTrackingCategory( 'broken-file-category' ); + } + # Process the input parameters $caption = ''; $params = array( 'frame' => array(), 'handler' => array(), @@ -4835,7 +4940,7 @@ class Parser { switch( $paramName ) { case 'manualthumb': case 'alt': - # @todo Fixme: possibly check validity here for + # @todo FIXME: Possibly check validity here for # manualthumb? downstream behavior seems odd with # missing manual thumbs. $validated = true; @@ -4952,6 +5057,11 @@ class Parser { return $ret; } + /** + * @param $caption + * @param $holders LinkHolderArray + * @return mixed|String + */ protected function stripAltText( $caption, $holders ) { # Strip bad stuff out of the title (tooltip). We can't just use # replaceLinkHoldersText() here, because if this function is called @@ -4988,7 +5098,6 @@ class Parser { * @param $text String * @param $frame PPFrame * @return String - * @private */ function attributeStripCallback( &$text, $frame = false ) { $text = $this->replaceVariables( $text, $frame ); @@ -4998,6 +5107,8 @@ class Parser { /** * Accessor + * + * @return array */ function getTags() { return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) ); @@ -5012,7 +5123,8 @@ class Parser { function replaceTransparentTags( $text ) { $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $replacements = array(); foreach ( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -5022,9 +5134,9 @@ class Parser { } else { $output = $tag; } - $this->mStripState->addGeneral( $marker, $output ); + $replacements[$marker] = $output; } - return $text; + return strtr( $text, $replacements ); } /** @@ -5051,6 +5163,8 @@ class Parser { * @param $newText String: replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. + * If the page is empty and section 0 is requested, $text (as '') + * is returned */ private function extractSections( $text, $section, $mode, $newText='' ) { global $wgTitle; # not generally used but removes an ugly failure mode @@ -5166,10 +5280,10 @@ class Parser { * This function returns $oldtext after the content of the section * specified by $section has been replaced with $text. * - * @param $text String: former text of the article + * @param $oldtext String: former text of the article * @param $section Numeric: section identifier * @param $text String: replacing text - * #return String: modified text + * @return String: modified text */ public function replaceSection( $oldtext, $section, $text ) { return $this->extractSections( $oldtext, $section, "replace", $text ); @@ -5187,7 +5301,7 @@ class Parser { /** * Get the revision object for $this->mRevisionId * - * @return either a Revision object or null + * @return Revision|null either a Revision object or null */ protected function getRevisionObject() { if ( !is_null( $this->mRevisionObject ) ) { @@ -5333,7 +5447,8 @@ class Parser { $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); - # Strip external link markup (FIXME: Not Tolerant to blank link text + # Strip external link markup + # @todo FIXME: Not tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace( '/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); @@ -5348,11 +5463,10 @@ class Parser { /** * strip/replaceVariables/unstrip for preprocessor regression testing + * + * @return string */ - function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { $this->startParse( $title, $options, $outputType, true ); $text = $this->replaceVariables( $text ); @@ -5361,18 +5475,11 @@ class Parser { return $text; } - function testPst( $text, $title, $options ) { - global $wgUser; - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } - return $this->preSaveTransform( $text, $title, $wgUser, $options ); + function testPst( $text, Title $title, ParserOptions $options ) { + return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); } - function testPreprocess( $text, $title, $options ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testPreprocess( $text, Title $title, ParserOptions $options ) { return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); } @@ -5386,6 +5493,8 @@ class Parser { * This will call the callback function twice, with 'aaa' and 'bbb'. Those * two strings will be replaced with the value returned by the callback in * each case. + * + * @return string */ function markerSkipCallback( $s, $callback ) { $i = 0; @@ -5422,6 +5531,8 @@ class Parser { * array can later be loaded into another parser instance with * unserializeHalfParsedText(). The text can then be safely incorporated into * the return value of a parser hook. + * + * @return array */ function serializeHalfParsedText( $text ) { wfProfileIn( __METHOD__ ); @@ -5470,7 +5581,9 @@ class Parser { * serializeHalfParsedText(), is compatible with the current version of the * parser. * - * @param $data Array. + * @param $data Array + * + * @return bool */ function isValidHalfParsedText( $data ) { return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;