Tell regexp parser to use extra analysis on external link regexp;
[lhc/web/wiklou.git] / includes / Parser.php
index 67faaa7..433487a 100644 (file)
@@ -7,6 +7,13 @@
  * @version $Id$
  */
 
+/**
+ * Update this version number when the ParserOutput format
+ * changes in an incompatible way, so the parser cache
+ * can automatically discard old data.
+ */
+define( 'MW_PARSER_VERSION', '1.4.0' );
+
 /**
  * Variable substitution O(N^2) attack
  *
@@ -1033,7 +1040,10 @@ class Parser
         * @access private
         */
        function replaceFreeExternalLinks( $text ) {
-               $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+               $fname = 'Parser::replaceFreeExternalLinks';
+               wfProfileIn( $fname );
+               
+               $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
                $s = array_shift( $bits );
                $i = 0;
 
@@ -1085,6 +1095,7 @@ class Parser
                                $s .= $protocol . $remainder;
                        }
                }
+               wfProfileOut();
                return $s;
        }
 
@@ -1161,14 +1172,15 @@ class Parser
                        $prefix = '';
                }
 
+               $selflink = $this->mTitle->getPrefixedText();
                wfProfileOut( $fname.'-setup' );
 
                $checkVariantLink = sizeof($wgContLang->getVariants())>1;
                # Loop for each link
                for ($k = 0; isset( $a[$k] ); $k++) {
                        $line = $a[$k];
-                       wfProfileIn( $fname.'-prefixhandling' );
                        if ( $useLinkPrefixExtension ) {
+                               wfProfileIn( $fname.'-prefixhandling' );
                                if ( preg_match( $e2, $s, $m ) ) {
                                        $prefix = $m[2];
                                        $s = $m[1];
@@ -1180,8 +1192,8 @@ class Parser
                                        $prefix = $first_prefix;
                                        $first_prefix = false;
                                }
+                               wfProfileOut( $fname.'-prefixhandling' );
                        }
-                       wfProfileOut( $fname.'-prefixhandling' );
 
                        $might_be_img = false;
                        
@@ -1217,7 +1229,7 @@ class Parser
                                $link = substr($link, 1);
                        }
                        
-                       $nt = Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );
+                       $nt =& Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );
                        if( !$nt ) {
                                $s .= $prefix . '[[' . $line;
                                continue;
@@ -1225,8 +1237,8 @@ class Parser
 
                        #check other language variants of the link
                        #if the article does not exist
-                       if( $nt->getArticleID() == 0
-                               && $checkVariantLink ) {
+                       if( $checkVariantLink
+                           && $nt->getArticleID() == 0 ) {
                                $wgContLang->findVariantLink($link, $nt);
                        }
 
@@ -1285,6 +1297,8 @@ class Parser
                                }
                                
                                if ( $ns == NS_IMAGE ) {
+                                       wfProfileIn( "$fname-image" );
+                                       
                                        # recursively parse links inside the image caption
                                        # actually, this will parse them in any other parameters, too,
                                        # but it might be hard to fix that, and it doesn't matter ATM
@@ -1294,11 +1308,14 @@ class Parser
                                        # replace the image with a link-holder so that replaceExternalLinks() can't mess with it
                                        $s .= $prefix . $this->insertStripItem( $sk->makeImageLinkObj( $nt, $text ), $this->mStripState ) . $trail;
                                        $wgLinkCache->addImageLinkObj( $nt );
+                                       
+                                       wfProfileOut( "$fname-image" );
                                        continue;
                                }
                                
                                if ( $ns == NS_CATEGORY ) {
-                                       $t = $nt->getText() ;
+                                       wfProfileIn( "$fname-category" );
+                                       $t = $nt->getText();
 
                                        $wgLinkCache->suspend(); # Don't save in links/brokenlinks
                                        $pPLC=$sk->postParseLinkColour();
@@ -1317,13 +1334,15 @@ class Parser
                                                $sortkey = $text;
                                        }
                                        $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
-                                       $this->mOutput->mCategoryLinks[] = $t ;
+                                       $this->mOutput->addCategoryLink( $t );
                                        $s .= $prefix . $trail ;
+                                       
+                                       wfProfileOut( "$fname-category" );
                                        continue;
                                }
                        }
 
-                       if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
+                       if( ( $nt->getPrefixedText() === $selflink ) &&
                            ( $nt->getFragment() === '' ) ) {
                                # Self-links are handled specially; generally de-link and change to bold.
                                $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
@@ -1557,13 +1576,14 @@ class Parser
                                $lastPrefix = $pref2;
                        }
                        if( 0 == $prefixLength ) {
+                               wfProfileIn( "$fname-paragraph" );
                                # No prefix (not in list)--go to paragraph mode
                                $uniq_prefix = UNIQ_PREFIX;
                                // XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
+                               $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
                                $closematch = preg_match(
                                        '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
-                                       '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
+                                       '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/iS', $t );
                                if ( $openmatch or $closematch ) {
                                        $paragraphStack = false;
                                        $output .= $this->closeParagraph();
@@ -1612,6 +1632,7 @@ class Parser
                                                }
                                        }
                                }
+                               wfProfileOut( "$fname-paragraph" );
                        }
                        if ($paragraphStack === false) {
                                $output .= $t."\n";
@@ -1677,16 +1698,23 @@ class Parser
         */
        function getVariableValue( $index ) {
                global $wgContLang, $wgSitename, $wgServer;
-
+               
+               /**
+                * Some of these require message or data lookups and can be
+                * expensive to check many times.
+                */
+               static $varCache = array();
+               if( isset( $varCache[$index] ) ) return $varCache[$index];
+               
                switch ( $index ) {
                        case MAG_CURRENTMONTH:
-                               return $wgContLang->formatNum( date( 'm' ) );
+                               return $varCache[$index] = $wgContLang->formatNum( date( 'm' ) );
                        case MAG_CURRENTMONTHNAME:
-                               return $wgContLang->getMonthName( date('n') );
+                               return $varCache[$index] = $wgContLang->getMonthName( date('n') );
                        case MAG_CURRENTMONTHNAMEGEN:
-                               return $wgContLang->getMonthNameGen( date('n') );
+                               return $varCache[$index] = $wgContLang->getMonthNameGen( date('n') );
                        case MAG_CURRENTDAY:
-                               return $wgContLang->formatNum( date('j') );
+                               return $varCache[$index] = $wgContLang->formatNum( date('j') );
                        case MAG_PAGENAME:
                                return $this->mTitle->getText();
                        case MAG_PAGENAMEE:
@@ -1695,13 +1723,13 @@ class Parser
                                # return Namespace::getCanonicalName($this->mTitle->getNamespace());
                                return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
                        case MAG_CURRENTDAYNAME:
-                               return $wgContLang->getWeekdayName( date('w')+1 );
+                               return $varCache[$index] = $wgContLang->getWeekdayName( date('w')+1 );
                        case MAG_CURRENTYEAR:
-                               return $wgContLang->formatNum( date( 'Y' ) );
+                               return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ) );
                        case MAG_CURRENTTIME:
-                               return $wgContLang->time( wfTimestampNow(), false );
+                               return $varCache[$index] = $wgContLang->time( wfTimestampNow(), false );
                        case MAG_NUMBEROFARTICLES:
-                               return $wgContLang->formatNum( wfNumberOfArticles() );
+                               return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() );
                        case MAG_SITENAME:
                                return $wgSitename;
                        case MAG_SERVER:
@@ -1746,8 +1774,9 @@ class Parser
                global $wgLang, $wgScript, $wgArticlePath;
 
                # Prevent too big inclusions
-               if(strlen($text)> MAX_INCLUDE_SIZE)
-               return $text;
+               if( strlen( $text ) > MAX_INCLUDE_SIZE ) {
+                       return $text;
+               }
 
                $fname = 'Parser::replaceVariables';
                wfProfileIn( $fname );
@@ -2084,6 +2113,7 @@ class Parser
 
                # Empties the template path
                $this->mTemplatePath = array();
+               
                if ( !$found ) {
                        return $matches[0];
                } else {
@@ -3046,6 +3076,7 @@ class ParserOutput
 {
        var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
        var $mCacheTime; # Used in ParserCache
+       var $mVersion;   # Compatibility check
 
        function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
                $containsOldMagic = false )
@@ -3055,11 +3086,12 @@ class ParserOutput
                $this->mCategoryLinks = $categoryLinks;
                $this->mContainsOldMagic = $containsOldMagic;
                $this->mCacheTime = '';
+               $this->mVersion = MW_PARSER_VERSION;
        }
 
        function getText()                   { return $this->mText; }
        function getLanguageLinks()          { return $this->mLanguageLinks; }
-       function getCategoryLinks()          { return $this->mCategoryLinks; }
+       function getCategoryLinks()          { return array_keys( $this->mCategoryLinks ); }
        function getCacheTime()              { return $this->mCacheTime; }
        function containsOldMagic()          { return $this->mContainsOldMagic; }
        function setText( $text )            { return wfSetVar( $this->mText, $text ); }
@@ -3067,6 +3099,7 @@ class ParserOutput
        function setCategoryLinks( $cl )     { return wfSetVar( $this->mCategoryLinks, $cl ); }
        function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
        function setCacheTime( $t )          { return wfSetVar( $this->mCacheTime, $t ); }
+       function addCategoryLink( $c )       { $this->mCategoryLinks[$c] = 1; }
 
        function merge( $other ) {
                $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
@@ -3074,6 +3107,22 @@ class ParserOutput
                $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
        }
 
+       /**
+        * Return true if this cached output object predates the global or
+        * per-article cache invalidation timestamps, or if it comes from
+        * an incompatible older version.
+        *
+        * @param string $touched the affected article's last touched timestamp
+        * @return bool
+        * @access public
+        */
+       function expired( $touched ) {
+               global $wgCacheEpoch;
+               return $this->getCacheTime() <= $touched ||
+                      $this->getCacheTime() <= $wgCacheEpoch ||
+                      !isset( $this->mVersion ) ||
+                      version_compare( $this->mVersion, MW_PARSER_VERSION, "lt" );
+       }
 }
 
 /**
@@ -3128,7 +3177,6 @@ class ParserOptions
        # Get user options
        function initialiseFromUser( &$userInput ) {
                global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
-               
                $fname = 'ParserOptions::initialiseFromUser';
                wfProfileIn( $fname );
                if ( !$userInput ) {