A patch to cache articles after conversion to HTML but before insertion
authorMr. E23 <e23@users.mediawiki.org>
Wed, 7 Jan 2004 02:51:47 +0000 (02:51 +0000)
committerMr. E23 <e23@users.mediawiki.org>
Wed, 7 Jan 2004 02:51:47 +0000 (02:51 +0000)
into a full page. Significant speed improvements on cache hits, especially
on pages with many links.

includes/Article.php
includes/DefaultSettings.php
includes/MagicWord.php
includes/OutputPage.php
includes/User.php
maintenance/archives/patch-parsercache.sql [new file with mode: 0644]

index eab68b6..6da535f 100644 (file)
@@ -1254,24 +1254,33 @@ class Article {
        # This is a good place to put code to clear caches, for instance. 
 
        /* static */ function onArticleCreate($title_obj){
-               global $wgEnablePersistentLC;
+               global $wgEnablePersistentLC, $wgEnableParserCache;
                if ( $wgEnablePersistentLC ) {
                        LinkCache::linksccClearBrokenLinksTo( $title_obj->getPrefixedDBkey() );
                }
+               if ( $wgEnableParserCache ) {
+                       OutputPage::parsercacheClearBrokenLinksTo( $title_obj->getPrefixedDBkey() );
+               }
        }
 
        /* static */ function onArticleDelete($title_obj){
-               global $wgEnablePersistentLC;
+               global $wgEnablePersistentLC, $wgEnableParserCache;
                if ( $wgEnablePersistentLC ) {
                        LinkCache::linksccClearLinksTo( $title_obj->getArticleID() );
                }
+               if ( $wgEnableParserCache ) {
+                       OutputPage::parsercacheClearLinksTo( $title_obj->getArticleID() );
+               }
        }
 
        /* static */ function onArticleEdit($title_obj){
-               global $wgEnablePersistentLC;
+               global $wgEnablePersistentLC, $wgEnableParserCache;
                if ( $wgEnablePersistentLC ) {
                        LinkCache::linksccClearPage( $title_obj->getArticleID() );
                }
+               if ( $wgEnableParserCache ) {
+                       OutputPage::parsercacheClearPage( $title_obj->getArticleID() );
+               }
        }
 }
 
index 6996d3d..cfc67fb 100644 (file)
@@ -89,6 +89,8 @@ $wgUseCategoryMagic           = false;
 $wgEnablePersistentLC  = false;        # Persistent link cache in linkscc table; FAILS on MySQL 3.x
 $wgCompressedPersistentLC = true; # use gzcompressed blobs
 
+$wgEnableParserCache = false; # requires that php was compiled --with-zlib
+
 # User rights 
 $wgWhitelistEdit = false;
 $wgWhitelistRead = false;
index f4410b2..4cc3c90 100644 (file)
@@ -16,7 +16,8 @@
 class MagicWord {
        /*private*/ var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
        /*private*/ var $mRegexStart, $mBaseRegex, $mVariableRegex;
-       
+       /*private*/ var $mModified;     
+
        function MagicWord($id = 0, $syn = "", $cs = false) 
        {
                $this->mId = $id;
@@ -25,6 +26,7 @@ class MagicWord {
                $this->mRegex = "";
                $this->mRegexStart = "";
                $this->mVariableRegex = "";
+               $this->mModified = false;
        }
 
        # Factory: creates an object representing an ID
@@ -43,8 +45,7 @@ class MagicWord {
        # Initialises this object with an ID
        function load( $id )
        {
-               global $wgLang;
-               
+               global $wgLang;         
                $this->mId = $id;
                $wgLang->getMagic( $this );
        }
@@ -112,7 +113,9 @@ class MagicWord {
        # Replaces the word with something else
        function replace( $replacement, $subject )
        {
-               return preg_replace( $this->getRegex(), $replacement, $subject );
+               $res = preg_replace( $this->getRegex(), $replacement, $subject );
+               $this->mModified = !($res === $subject);
+               return $res;
        }
 
        # Variable handling: {{SUBST:xxx}} style words
@@ -120,7 +123,9 @@ class MagicWord {
        # Input word must contain $1
        function substituteCallback( $text, $callback ) {
                $regex = $this->getVariableRegex();
-               return preg_replace_callback( $this->getVariableRegex(), $callback, $text );
+               $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
+               $this->mModified = !($res === $text);
+               return $res;
        }
 
        # Matches the word, where $1 is a wildcard
@@ -136,6 +141,12 @@ class MagicWord {
        function getSynonym( $i ) {
                return $this->mSynonyms[$i];
        }
+
+       # Returns true if the last call to replace() or substituteCallback() 
+       # returned a modified text, otherwise false.
+       function getWasModified(){
+               return $this->mModified;
+       }
 }
 
 # Used in matchAndRemove()
index 47d3610..c1a91be 100644 (file)
@@ -14,6 +14,7 @@ class OutputPage {
        var $mLanguageLinks, $mSupressQuickbar;
        var $mOnloadHandler;
        var $mDoNothing;
+       var $mContainsOldMagic, $mContainsNewMagic; 
 
        function OutputPage()
        {
@@ -29,6 +30,7 @@ class OutputPage {
                 $this->mCategoryLinks = array() ;
                $this->mAutonumber = 0;
                $this->mDoNothing = false;
+               $this->mContainsOldMagic = $this->mContainsNewMagic = 0;
        }
 
        function addHeader( $name, $val ) { array_push( $this->mHeaders, "$name: $val" ) ; }
@@ -121,10 +123,9 @@ class OutputPage {
        # First pass--just handle <nowiki> sections, pass the rest off
        # to doWikiPass2() which does all the real work.
        #
-
        function addWikiText( $text, $linestart = true )
        {
-               global $wgUseTeX;
+               global $wgUseTeX, $wgArticle, $wgUser, $action;
                $fname = "OutputPage::addWikiText";
                wfProfileIn( $fname );
                $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
@@ -140,6 +141,19 @@ class OutputPage {
                $stripped2 = "";
                $stripped3 = "";
 
+               global $wgEnableParserCache;
+               $use_parser_cache = 
+                       $wgEnableParserCache && $action == "view" &&
+                       intval($wgUser->getOption( "stubthreshold" )) == 0 && 
+                       isset($wgArticle) && $wgArticle->getID() > 0;
+
+               if( $use_parser_cache ){
+                       if( $this->fillFromParserCache() ){
+                               wfProfileOut( $fname );
+                               return;
+                       }
+               }
+
                while ( "" != $text ) {
                        $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
                        $stripped .= $p[0];
@@ -202,6 +216,10 @@ class OutputPage {
                                $escapedChars, $nwlist[$i] ), $text );
                }
                $this->addHTML( $text );
+
+               if($use_parser_cache ){
+                       $this->saveParserCache( $text );
+               }
                wfProfileOut( $fname );
        }
 
@@ -1218,44 +1236,54 @@ $t[] = "</table>" ;
                $v = date( "m" );
                $mw =& MagicWord::get( MAG_CURRENTMONTH );
                $text = $mw->replace( $v, $text );
-               
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
+
                $v = $wgLang->getMonthName( date( "n" ) );
                $mw =& MagicWord::get( MAG_CURRENTMONTHNAME );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
                
                $v = $wgLang->getMonthNameGen( date( "n" ) );
                $mw =& MagicWord::get( MAG_CURRENTMONTHNAMEGEN );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
                
                $v = date( "j" );
                $mw = MagicWord::get( MAG_CURRENTDAY );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
                
                $v = $wgLang->getWeekdayName( date( "w" )+1 );
                $mw =& MagicWord::get( MAG_CURRENTDAYNAME );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
                
                $v = date( "Y" );
                $mw =& MagicWord::get( MAG_CURRENTYEAR );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
        
                $v = $wgLang->time( wfTimestampNow(), false );
                $mw =& MagicWord::get( MAG_CURRENTTIME );
                $text = $mw->replace( $v, $text );
+               if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 
                $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
                if ( $mw->match( $text ) ) {
                        $v = wfNumberOfArticles();
                        $text = $mw->replace( $v, $text );
+                       if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
                }
 
                # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
                # The callbacks are at the bottom of this file
                $mw =& MagicWord::get( MAG_MSG );
                $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
+               if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 
                $mw =& MagicWord::get( MAG_MSGNW );
                $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
+               if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 
                wfProfileOut( $fname );
                return $text;
@@ -1356,7 +1384,6 @@ $t[] = "</table>" ;
                return $text;
        }
 
-
 /* 
  * 
  * This function accomplishes several tasks:
@@ -1600,6 +1627,76 @@ $t[] = "</table>" ;
                $ret .= "</head>\n";
                return $ret;
        }
+
+       /* private */ function fillFromParserCache(){
+               global $wgUser, $wgArticle;
+               $hash = $wgUser->getPageRenderingHash();
+               $pageid = intval( $wgArticle->getID() );
+               $res = wfQuery("SELECT pc_data FROM parsercache WHERE pc_pageid = {$pageid} ".
+                       " AND pc_prefhash = '{$hash}' AND pc_expire > NOW()", DB_WRITE);
+               $row = wfFetchObject ( $res );          
+               if( $row ){
+                       $data = unserialize( gzuncompress($row->pc_data) );
+                       $this->addHTML( $data['html'] );
+                       $this->mLanguageLinks = $data['mLanguageLinks'];
+                       $this->mCategoryLinks = $data['mCategoryLinks'];
+                       wfProfileOut( $fname );
+                       return true;
+               } else {
+                       return false;
+               }
+       }
+
+       /* private */ function saveParserCache( $text ){
+               global $wgUser, $wgArticle;
+               $hash = $wgUser->getPageRenderingHash();
+               $pageid = intval( $wgArticle->getID() );
+               $title = wfStrencode( $wgArticle->mTitle->getPrefixedDBKey() );
+               $data = array();
+               $data['html'] = $text;
+               $data['mLanguageLinks'] = $this->mLanguageLinks;
+               $data['mCategoryLinks'] = $this->mCategoryLinks;
+               $ser = addslashes( gzcompress( serialize( $data ) ) );
+               if( $this->mContainsOldMagic ){
+                       $expire = "1 HOUR";
+               } else if( $this->mContainsNewMagic ){
+                       $expire = "1 DAY";
+               } else {
+                       $expire = "7 DAY";
+               }
+
+               wfQuery("REPLACE INTO parsercache (pc_prefhash,pc_pageid,pc_title,pc_data, pc_expire) ".
+                       "VALUES('{$hash}', {$pageid}, '{$title}', '{$ser}', ".
+                               "DATE_ADD(NOW(), INTERVAL {$expire}))", DB_WRITE);
+
+               if( rand() % 50 == 0 ){ // more efficient to just do it sometimes
+                       $this->purgeParserCache();
+               }
+       }
+       
+       /* static private */ function purgeParserCache(){
+               wfQuery("DELETE FROM parsercache WHERE pc_expire < NOW() LIMIT 250", DB_WRITE);
+       }
+
+       /* static */ function parsercacheClearLinksTo( $pid ){
+               $pid = intval( $pid );
+               wfQuery("DELETE parsercache FROM parsercache,links ".
+                       "WHERE pc_title=links.l_from AND l_to={$pid}", DB_WRITE);
+               wfQuery("DELETE FROM parsercache WHERE pc_pageid='{$pid}'", DB_WRITE);
+       }
+
+       # $title is a prefixed db title, for example like Title->getPrefixedDBkey() returns.
+       /* static */ function parsercacheClearBrokenLinksTo( $title ){
+               $title = wfStrencode( $title );
+               wfQuery("DELETE parsercache FROM parsercache,brokenlinks ".
+                       "WHERE pc_pageid=bl_from AND bl_to='{$title}'", DB_WRITE);
+       }
+
+       # $pid is a page id
+       /* static */ function parsercacheClearPage( $pid ){
+               $pid = intval( $pid );
+               wfQuery("DELETE FROM parsercache WHERE pc_pageid='{$pid}'", DB_WRITE);
+       }
 }
 
 # Regex callbacks, used in OutputPage::replaceVariables
index 04cbc2d..3d1ca10 100644 (file)
@@ -609,6 +609,33 @@ class User {
        
        }
 
+       function getPageRenderingHash(){
+               static $hash = false;
+               if( $hash ){
+                       return $hash;
+               }
+
+               // stubthreshold is only included below for completeness, 
+               // it will always be 0 when this function is called by parsercache.
+
+               $confstr =        $this->getOption( "quickbar" );
+               $confstr .= "!" . $this->getOption( "underline" );
+               $confstr .= "!" . $this->getOption( "hover" );
+               $confstr .= "!" . $this->getOption( "skin" );
+               $confstr .= "!" . $this->getOption( "math" );
+               $confstr .= "!" . $this->getOption( "highlightbroken" );
+               $confstr .= "!" . $this->getOption( "stubthreshold" ); 
+               $confstr .= "!" . $this->getOption( "editsection" );
+               $confstr .= "!" . $this->getOption( "editsectiononrightclick" );
+               $confstr .= "!" . $this->getOption( "showtoc" );
+               $confstr .= "!" . $this->getOption( "date" );
+
+               if(strlen($confstr) > 32)
+                       $hash = md5($confstr);
+               else
+                       $hash = $confstr;
+               return $hash;
+       }
 
        function isAllowedToCreateAccount() 
        {
diff --git a/maintenance/archives/patch-parsercache.sql b/maintenance/archives/patch-parsercache.sql
new file mode 100644 (file)
index 0000000..533a206
--- /dev/null
@@ -0,0 +1,15 @@
+--
+-- parsercache table, for cacheing complete parsed articles 
+-- before they are imbedded in the skin.
+--
+
+CREATE TABLE parsercache (
+  pc_pageid INT(11) NOT NULL,
+  pc_title VARCHAR(255) NOT NULL,
+  pc_prefhash CHAR(32) NOT NULL,
+  pc_expire DATETIME NOT NULL,
+  pc_data MEDIUMBLOB NOT NULL,
+  PRIMARY KEY (pc_pageid, pc_prefhash),
+  KEY(pc_title),
+  KEY(pc_expire)
+) TYPE=InnoDB;