X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=76974c935c21149f38f88028e27fe0b3812db731;hb=1c6dcbb71d421aec8a01c76ef81a3c12e7f6f4e0;hp=a63614553474e610dbc9c05cfa53dd41572b6cf9;hpb=018a22d827f0f2d1ee28e979794b3d69f48dd344;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index a636145534..76974c935c 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -34,7 +34,7 @@ * Globals used: * objects: $wgLang, $wgContLang * - * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! + * NOT $wgUser or $wgTitle. Keep them away! * * settings: * $wgUseDynamicDates*, $wgInterwikiMagic*, @@ -68,7 +68,7 @@ class Parser { # Constants needed for external link processing # Everything except bracket, space, or control characters - const EXT_LINK_URL_CLASS = '(?:[^\]\[<>"\\x00-\\x20\\x7F]|(?:\[\]))'; + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]'; const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; @@ -109,7 +109,18 @@ class Parser { var $mImageParamsMagicArray = array(); var $mMarkerIndex = 0; var $mFirstCall = true; - var $mVariables, $mSubstWords; # Initialised by initialiseVariables() + + # Initialised by initialiseVariables() + + /** + * @var MagicWordArray + */ + var $mVariables; + + /** + * @var MagicWordArray + */ + var $mSubstWords; var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor # Cleared with clearState(): @@ -125,7 +136,12 @@ class Parser { var $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + /** + * @var LinkHolderArray + */ + var $mLinkHolders; + + var $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; @@ -153,9 +169,14 @@ class Parser { var $mRevisionObject; # The revision object of the specified revision ID var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID - var $mRevisionUser; # Userto display in {{REVISIONUSER}} tag + var $mRevisionUser; # User to display in {{REVISIONUSER}} tag var $mRevIdForTs; # The revision ID which was used to fetch the timestamp + /** + * @var string + */ + var $mUniqPrefix; + /** * Constructor */ @@ -163,9 +184,12 @@ class Parser { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. - '(?:[^\]\[<>"\x00-\x20\x7F]|\[\])+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; + '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; + } elseif ( defined( 'MW_COMPILED' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); @@ -175,6 +199,7 @@ class Parser { } else { $this->mPreprocessorClass = 'Preprocessor_Hash'; } + wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); } /** @@ -448,6 +473,8 @@ class Parser { * * @param $text String: text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables + * + * @return string */ function recursiveTagParse( $text, $frame=false ) { wfProfileIn( __METHOD__ ); @@ -495,6 +522,8 @@ class Parser { /** * Get a random string + * + * @return string */ static public function getRandomString() { return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); @@ -517,7 +546,7 @@ class Parser { */ public function uniqPrefix() { if ( !isset( $this->mUniqPrefix ) ) { - # @todo Fixme: this is probably *horribly wrong* + # @todo FIXME: This is probably *horribly wrong* # LanguageConverter seems to want $wgParser's uniqPrefix, however # if this is called for a parser cache hit, the parser may not # have ever been initialized in the first place. @@ -530,6 +559,8 @@ class Parser { /** * Set the context title + * + * @param $t Title */ function setTitle( $t ) { if ( !$t || $t instanceof FakeTitle ) { @@ -618,10 +649,16 @@ class Parser { return wfSetVar( $this->mOptions, $x ); } + /** + * @return int + */ function nextLinkID() { return $this->mLinkID++; } + /** + * @param $id int + */ function setLinkID( $id ) { $this->mLinkID = $id; } @@ -636,7 +673,7 @@ class Parser { if ( $target !== null ) { return $target; } else { - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + return $this->mOptions->getInterfaceMessage() ? $wgLang : $this->mTitle->getPageLanguage(); } } @@ -677,15 +714,13 @@ class Parser { * array( 'param' => 'x' ), * 'tag content' ) ) * - * @param $elements list of element names. Comments are always extracted. - * @param $text Source text string. - * @param $matches Out parameter, Array: extracted tags - * @param $uniq_prefix + * @param $elements array list of element names. Comments are always extracted. + * @param $text string Source text string. + * @param $matches array Out parameter, Array: extracted tags + * @param $uniq_prefix string * @return String: stripped text - * - * @static */ - public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { + public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { static $n = 1; $stripped = ''; $matches = array(); @@ -749,52 +784,17 @@ class Parser { /** * Get a list of strippable XML-like elements + * + * @return array */ function getStripList() { return $this->mStripList; } - /** - * @deprecated use replaceVariables - */ - function strip( $text, $state, $stripcomments = false , $dontstrip = array() ) { - return $text; - } - - /** - * Restores pre, math, and other extensions removed by strip() - * - * always call unstripNoWiki() after this one - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstrip( $text, $state ) { - return $state->unstripGeneral( $text ); - } - - /** - * Always call this after unstrip() to preserve the order - * - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstripNoWiki( $text, $state ) { - return $state->unstripNoWiki( $text ); - } - - /** - * @deprecated use $this->mStripState->unstripBoth() - */ - function unstripForHTML( $text ) { - return $this->mStripState->unstripBoth( $text ); - } - /** * Add an item to the strip state * Returns the unique tag which must be inserted into the stripped text * The tag will be replaced with the original text in unstrip() - * - * @private */ function insertStripItem( $text ) { $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; @@ -803,15 +803,6 @@ class Parser { return $rnd; } - /** - * Interface with html tidy - * @deprecated Use MWTidy::tidy() - */ - public static function tidy( $text ) { - wfDeprecated( __METHOD__ ); - return MWTidy::tidy( $text ); - } - /** * parse the wiki syntax used to render tables * @@ -829,7 +820,7 @@ class Parser { # empty line, go to next line, # but only append \n if outside of table - if ( $line === '') { + if ( $line === '') { $output .= $outLine . "\n"; continue; } @@ -852,11 +843,11 @@ class Parser { if ( $attributes !== '' ) { $table['attributes'] = $attributes; } - } else if ( !isset( $tables[0] ) ) { + } elseif ( !isset( $tables[0] ) ) { // we're outside the table $out .= $outLine . "\n"; - } else if ( $firstChars === '|}' ) { + } elseif ( $firstChars === '|}' ) { // trim the |} code from the line $line = substr ( $line , 2 ); @@ -897,7 +888,7 @@ class Parser { $output .= $o; - } else if ( $firstChars === '|-' ) { + } elseif ( $firstChars === '|-' ) { // start a new row element // but only when we haven't started one already if ( count( $currentRow ) != 0 ) { @@ -912,11 +903,11 @@ class Parser { $currentRow['attributes'] = $attributes; } - } else if ( $firstChars === '|+' ) { + } elseif ( $firstChars === '|+' ) { // a table caption, but only proceed if there isn't one already if ( !isset ( $table['caption'] ) ) { $line = substr ( $line , 2 ); - + $c = $this->getCellAttr( $line , 'caption' ); $table['caption'] = array(); $table['caption']['content'] = $c[0]; @@ -924,7 +915,7 @@ class Parser { unset( $c ); $output =& $table['caption']['content']; } - } else if ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { + } elseif ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { // Which kind of cells are we dealing with $currentTag = 'td'; $line = substr ( $line , 1 ); @@ -941,7 +932,7 @@ class Parser { // decide whether thead to tbody if ( !array_key_exists( 'type', $currentRow ) ) { $currentRow['type'] = ( $firstChars === '!' ) ? 'thead' : 'tbody' ; - } else if ( $firstChars === '|' ) { + } elseif ( $firstChars === '|' ) { $currentRow['type'] = 'tbody'; } @@ -990,13 +981,15 @@ class Parser { /** * Helper function for doTableStuff() separating the contents of cells from - * attributes. Particularly useful as there's a possible bug and this action + * attributes. Particularly useful as there's a possible bug and this action * is repeated twice. * * @private + * @param $cell + * @param $tagName + * @return array */ function getCellAttr ( $cell, $tagName ) { - $content = null; $attributes = null; $cell = trim ( $cell ); @@ -1009,10 +1002,9 @@ class Parser { if ( strpos( $cellData[0], '[[' ) !== false ) { $content = trim ( $cell ); } - else if ( count ( $cellData ) == 1 ) { + elseif ( count ( $cellData ) == 1 ) { $content = trim ( $cellData[0] ); - } - else { + } else { $attributes = $this->mStripState->unstripBoth( $cellData[0] ); $attributes = Sanitizer::fixTagAttributes( $attributes , $tagName ); @@ -1027,7 +1019,7 @@ class Parser { * * @private */ - function generateTableHTML ( &$table ) { + function generateTableHTML( &$table ) { $return = ""; $return .= str_repeat( '
' , $table['indent'] ); $return .= ''; } @@ -1087,7 +1077,7 @@ class Parser { } $return .= "\n"; - if ( ( !isset( $table[$i + 1] ) && !$simple ) || ( isset( $table[$i + 1] ) && ( $table[$i]['type'] != $table[$i + 1]['type'] ) ) ) { + if ( ( !isset( $table[$i + 1] ) && !$simple ) || ( isset( $table[$i + 1] ) && isset( $table[$i + 1]['type'] ) && $table[$i]['type'] != $table[$i + 1]['type'] ) ) { $return .= ''; } $lastSection = $table[$i]['type']; @@ -1212,6 +1202,11 @@ class Parser { return $text; } + /** + * @throws MWException + * @param $m array + * @return HTML|string + */ function magicLinkCallback( $m ) { if ( isset( $m[1] ) && $m[1] !== '' ) { # Skip anchor @@ -2010,7 +2005,7 @@ class Parser { } # NS_MEDIA is a pseudo-namespace for linking directly to a file - # FIXME: Should do batch file existence checks, see comment below + # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us @@ -2030,7 +2025,7 @@ class Parser { # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # - # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # batch file existence checks for NS_FILE and NS_MEDIA if ( $iw == '' && $nt->isAlwaysKnown() ) { $this->mOutput->addLink( $nt ); @@ -2045,18 +2040,6 @@ class Parser { return $holders; } - /** - * Make a link placeholder. The text returned can be later resolved to a real link with - * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all existence checks and - * article length checks (for stub links) to be bundled into a single query. - * - * @deprecated - */ - function makeLinkHolder( &$nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); - } - /** * Render a forced-blue link inline; protect against double expansion of * URLs if we're in a mode that prepends full URL prefixes to internal links. @@ -2334,7 +2317,7 @@ class Parser { $output .= $this->openList( $char ); if ( ';' === $char ) { - # FIXME: This is dupe of code above + # @todo FIXME: This is dupe of code above if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { $t = $t2; $output .= $term . $this->nextItem( ':' ); @@ -3099,9 +3082,10 @@ class Parser { $originalTitle = $part1; # $args is a list of argument nodes, starting from index 0, not including $part1 - # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object + # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; wfProfileOut( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__."-title-$originalTitle" ); # SUBST wfProfileIn( __METHOD__.'-modifiers' ); @@ -3320,6 +3304,7 @@ class Parser { # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3388,6 +3373,7 @@ class Parser { $ret = array( 'text' => $text ); } + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return $ret; } @@ -3566,9 +3552,7 @@ class Parser { # Register the file as a dependency... $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); if ( $file && !$title->equals( $file->getTitle() ) ) { - # We fetched a rev from a different title; register it too... - $this->mOutput->addImage( $file->getTitle()->getDBkey(), $time, $sha1 ); - # Update fetched file title + # Update fetched file title $title = $file->getTitle(); } return array( $file, $title ); @@ -3577,6 +3561,9 @@ class Parser { /** * Transclude an interwiki link. * + * @param $title Title + * @param $action + * * @return string */ function interwikiTransclude( $title, $action ) { @@ -3827,7 +3814,7 @@ class Parser { } # (bug 8068) Allow control over whether robots index a page. # - # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This # is not desirable, the last one on the page should win. if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { $this->mOutput->setIndexPolicy( 'noindex' ); @@ -4025,7 +4012,10 @@ class Parser { if ( $dot ) { $numbering .= '.'; } - $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); + global $wgBetterDirectionality; + $pagelang = $this->mTitle->getPageLanguage(); + $toclang = ( $wgBetterDirectionality ? $pagelang : $wgContLang ); + $numbering .= $toclang->formatNum( $sublevelCount[$i] ); $dot = 1; } } @@ -4083,7 +4073,7 @@ class Parser { # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison - # FIXME: We may be changing them depending on the current locale. + # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); if ( $legacyHeadline === false ) { $legacyArrayKey = false; @@ -4396,11 +4386,9 @@ class Parser { # If we're still here, make it a link to the user page $userText = wfEscapeWikiText( $username ); $nickText = wfEscapeWikiText( $nickname ); - if ( $user->isAnon() ) { - return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); - } else { - return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); - } + $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; + + return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); } /** @@ -4437,7 +4425,7 @@ class Parser { return $text; } - # FIXME: regex doesn't respect extension tags or nowiki + # @todo FIXME: Regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); @@ -4586,6 +4574,19 @@ class Parser { $this->mStripList = $this->mDefaultStripList; } + /** + * Remove a specific tag hook. Should not be called on $wgParser. + * Does not change the strip list. + * + * @param string $tag + * @return void + */ + function clearTagHook( $tag ) { + if ( isset( $this->mTagHooks[$tag] ) ) { + unset( $this->mTagHooks[$tag] ); + } + } + /** * Create a function, e.g. {{sum:1|2|3}} * The callback function should have the form: @@ -4689,7 +4690,7 @@ class Parser { } /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -4773,14 +4774,14 @@ class Parser { # Bogus title. Ignore these so we don't bomb out later. continue; } - + $label = ''; $alt = ''; if ( isset( $matches[3] ) ) { // look for an |alt= definition while trying not to break existing // captions with multiple pipes (|) in it, until a more sensible grammar // is defined for images in galleries - + $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); $altmatches = StringUtils::explode('|', $matches[3]); $magicWordAlt = MagicWord::get( 'img_alt' ); @@ -4939,7 +4940,7 @@ class Parser { switch( $paramName ) { case 'manualthumb': case 'alt': - # @todo Fixme: possibly check validity here for + # @todo FIXME: Possibly check validity here for # manualthumb? downstream behavior seems odd with # missing manual thumbs. $validated = true; @@ -5056,6 +5057,11 @@ class Parser { return $ret; } + /** + * @param $caption + * @param $holders LinkHolderArray + * @return mixed|String + */ protected function stripAltText( $caption, $holders ) { # Strip bad stuff out of the title (tooltip). We can't just use # replaceLinkHoldersText() here, because if this function is called @@ -5117,7 +5123,8 @@ class Parser { function replaceTransparentTags( $text ) { $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $replacements = array(); foreach ( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -5127,9 +5134,9 @@ class Parser { } else { $output = $tag; } - $this->mStripState->addGeneral( $marker, $output ); + $replacements[$marker] = $output; } - return $text; + return strtr( $text, $replacements ); } /** @@ -5156,6 +5163,8 @@ class Parser { * @param $newText String: replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. + * If the page is empty and section 0 is requested, $text (as '') + * is returned */ private function extractSections( $text, $section, $mode, $newText='' ) { global $wgTitle; # not generally used but removes an ugly failure mode @@ -5438,7 +5447,8 @@ class Parser { $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); - # Strip external link markup (FIXME: Not Tolerant to blank link text + # Strip external link markup + # @todo FIXME: Not tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace( '/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); @@ -5456,10 +5466,7 @@ class Parser { * * @return string */ - function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { $this->startParse( $title, $options, $outputType, true ); $text = $this->replaceVariables( $text ); @@ -5468,18 +5475,11 @@ class Parser { return $text; } - function testPst( $text, $title, $options ) { - global $wgUser; - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } - return $this->preSaveTransform( $text, $title, $wgUser, $options ); + function testPst( $text, Title $title, ParserOptions $options ) { + return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); } - function testPreprocess( $text, $title, $options ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testPreprocess( $text, Title $title, ParserOptions $options ) { return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); }