From 16b6b929caf1d132852dd8d7785aa633c66c7229 Mon Sep 17 00:00:00 2001 From: "Mr. E23" Date: Wed, 7 Jan 2004 02:51:47 +0000 Subject: [PATCH] A patch to cache articles after conversion to HTML but before insertion into a full page. Significant speed improvements on cache hits, especially on pages with many links. --- includes/Article.php | 15 ++- includes/DefaultSettings.php | 2 + includes/MagicWord.php | 21 ++++- includes/OutputPage.php | 105 ++++++++++++++++++++- includes/User.php | 27 ++++++ maintenance/archives/patch-parsercache.sql | 15 +++ 6 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 maintenance/archives/patch-parsercache.sql diff --git a/includes/Article.php b/includes/Article.php index eab68b6988..6da535f4c2 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -1254,24 +1254,33 @@ class Article { # This is a good place to put code to clear caches, for instance. /* static */ function onArticleCreate($title_obj){ - global $wgEnablePersistentLC; + global $wgEnablePersistentLC, $wgEnableParserCache; if ( $wgEnablePersistentLC ) { LinkCache::linksccClearBrokenLinksTo( $title_obj->getPrefixedDBkey() ); } + if ( $wgEnableParserCache ) { + OutputPage::parsercacheClearBrokenLinksTo( $title_obj->getPrefixedDBkey() ); + } } /* static */ function onArticleDelete($title_obj){ - global $wgEnablePersistentLC; + global $wgEnablePersistentLC, $wgEnableParserCache; if ( $wgEnablePersistentLC ) { LinkCache::linksccClearLinksTo( $title_obj->getArticleID() ); } + if ( $wgEnableParserCache ) { + OutputPage::parsercacheClearLinksTo( $title_obj->getArticleID() ); + } } /* static */ function onArticleEdit($title_obj){ - global $wgEnablePersistentLC; + global $wgEnablePersistentLC, $wgEnableParserCache; if ( $wgEnablePersistentLC ) { LinkCache::linksccClearPage( $title_obj->getArticleID() ); } + if ( $wgEnableParserCache ) { + OutputPage::parsercacheClearPage( $title_obj->getArticleID() ); + } } } diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 6996d3dea5..cfc67fb11c 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -89,6 +89,8 @@ $wgUseCategoryMagic = false; $wgEnablePersistentLC = false; # Persistent link cache in linkscc table; FAILS on MySQL 3.x $wgCompressedPersistentLC = true; # use gzcompressed blobs +$wgEnableParserCache = false; # requires that php was compiled --with-zlib + # User rights $wgWhitelistEdit = false; $wgWhitelistRead = false; diff --git a/includes/MagicWord.php b/includes/MagicWord.php index f4410b2f2e..4cc3c90283 100644 --- a/includes/MagicWord.php +++ b/includes/MagicWord.php @@ -16,7 +16,8 @@ class MagicWord { /*private*/ var $mId, $mSynonyms, $mCaseSensitive, $mRegex; /*private*/ var $mRegexStart, $mBaseRegex, $mVariableRegex; - + /*private*/ var $mModified; + function MagicWord($id = 0, $syn = "", $cs = false) { $this->mId = $id; @@ -25,6 +26,7 @@ class MagicWord { $this->mRegex = ""; $this->mRegexStart = ""; $this->mVariableRegex = ""; + $this->mModified = false; } # Factory: creates an object representing an ID @@ -43,8 +45,7 @@ class MagicWord { # Initialises this object with an ID function load( $id ) { - global $wgLang; - + global $wgLang; $this->mId = $id; $wgLang->getMagic( $this ); } @@ -112,7 +113,9 @@ class MagicWord { # Replaces the word with something else function replace( $replacement, $subject ) { - return preg_replace( $this->getRegex(), $replacement, $subject ); + $res = preg_replace( $this->getRegex(), $replacement, $subject ); + $this->mModified = !($res === $subject); + return $res; } # Variable handling: {{SUBST:xxx}} style words @@ -120,7 +123,9 @@ class MagicWord { # Input word must contain $1 function substituteCallback( $text, $callback ) { $regex = $this->getVariableRegex(); - return preg_replace_callback( $this->getVariableRegex(), $callback, $text ); + $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text ); + $this->mModified = !($res === $text); + return $res; } # Matches the word, where $1 is a wildcard @@ -136,6 +141,12 @@ class MagicWord { function getSynonym( $i ) { return $this->mSynonyms[$i]; } + + # Returns true if the last call to replace() or substituteCallback() + # returned a modified text, otherwise false. + function getWasModified(){ + return $this->mModified; + } } # Used in matchAndRemove() diff --git a/includes/OutputPage.php b/includes/OutputPage.php index 47d36101cd..c1a91be594 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -14,6 +14,7 @@ class OutputPage { var $mLanguageLinks, $mSupressQuickbar; var $mOnloadHandler; var $mDoNothing; + var $mContainsOldMagic, $mContainsNewMagic; function OutputPage() { @@ -29,6 +30,7 @@ class OutputPage { $this->mCategoryLinks = array() ; $this->mAutonumber = 0; $this->mDoNothing = false; + $this->mContainsOldMagic = $this->mContainsNewMagic = 0; } function addHeader( $name, $val ) { array_push( $this->mHeaders, "$name: $val" ) ; } @@ -121,10 +123,9 @@ class OutputPage { # First pass--just handle sections, pass the rest off # to doWikiPass2() which does all the real work. # - function addWikiText( $text, $linestart = true ) { - global $wgUseTeX; + global $wgUseTeX, $wgArticle, $wgUser, $action; $fname = "OutputPage::addWikiText"; wfProfileIn( $fname ); $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4"; @@ -140,6 +141,19 @@ class OutputPage { $stripped2 = ""; $stripped3 = ""; + global $wgEnableParserCache; + $use_parser_cache = + $wgEnableParserCache && $action == "view" && + intval($wgUser->getOption( "stubthreshold" )) == 0 && + isset($wgArticle) && $wgArticle->getID() > 0; + + if( $use_parser_cache ){ + if( $this->fillFromParserCache() ){ + wfProfileOut( $fname ); + return; + } + } + while ( "" != $text ) { $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 ); $stripped .= $p[0]; @@ -202,6 +216,10 @@ class OutputPage { $escapedChars, $nwlist[$i] ), $text ); } $this->addHTML( $text ); + + if($use_parser_cache ){ + $this->saveParserCache( $text ); + } wfProfileOut( $fname ); } @@ -1218,44 +1236,54 @@ $t[] = "" ; $v = date( "m" ); $mw =& MagicWord::get( MAG_CURRENTMONTH ); $text = $mw->replace( $v, $text ); - + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } + $v = $wgLang->getMonthName( date( "n" ) ); $mw =& MagicWord::get( MAG_CURRENTMONTHNAME ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $v = $wgLang->getMonthNameGen( date( "n" ) ); $mw =& MagicWord::get( MAG_CURRENTMONTHNAMEGEN ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $v = date( "j" ); $mw = MagicWord::get( MAG_CURRENTDAY ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $v = $wgLang->getWeekdayName( date( "w" )+1 ); $mw =& MagicWord::get( MAG_CURRENTDAYNAME ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $v = date( "Y" ); $mw =& MagicWord::get( MAG_CURRENTYEAR ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $v = $wgLang->time( wfTimestampNow(), false ); $mw =& MagicWord::get( MAG_CURRENTTIME ); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } $mw =& MagicWord::get( MAG_NUMBEROFARTICLES ); if ( $mw->match( $text ) ) { $v = wfNumberOfArticles(); $text = $mw->replace( $v, $text ); + if( $mw->getWasModified() ) { $this->mContainsOldMagic++; } } # "Variables" with an additional parameter e.g. {{MSG:wikipedia}} # The callbacks are at the bottom of this file $mw =& MagicWord::get( MAG_MSG ); $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" ); + if( $mw->getWasModified() ) { $this->mContainsNewMagic++; } $mw =& MagicWord::get( MAG_MSGNW ); $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" ); + if( $mw->getWasModified() ) { $this->mContainsNewMagic++; } wfProfileOut( $fname ); return $text; @@ -1356,7 +1384,6 @@ $t[] = "" ; return $text; } - /* * * This function accomplishes several tasks: @@ -1600,6 +1627,76 @@ $t[] = "" ; $ret .= "\n"; return $ret; } + + /* private */ function fillFromParserCache(){ + global $wgUser, $wgArticle; + $hash = $wgUser->getPageRenderingHash(); + $pageid = intval( $wgArticle->getID() ); + $res = wfQuery("SELECT pc_data FROM parsercache WHERE pc_pageid = {$pageid} ". + " AND pc_prefhash = '{$hash}' AND pc_expire > NOW()", DB_WRITE); + $row = wfFetchObject ( $res ); + if( $row ){ + $data = unserialize( gzuncompress($row->pc_data) ); + $this->addHTML( $data['html'] ); + $this->mLanguageLinks = $data['mLanguageLinks']; + $this->mCategoryLinks = $data['mCategoryLinks']; + wfProfileOut( $fname ); + return true; + } else { + return false; + } + } + + /* private */ function saveParserCache( $text ){ + global $wgUser, $wgArticle; + $hash = $wgUser->getPageRenderingHash(); + $pageid = intval( $wgArticle->getID() ); + $title = wfStrencode( $wgArticle->mTitle->getPrefixedDBKey() ); + $data = array(); + $data['html'] = $text; + $data['mLanguageLinks'] = $this->mLanguageLinks; + $data['mCategoryLinks'] = $this->mCategoryLinks; + $ser = addslashes( gzcompress( serialize( $data ) ) ); + if( $this->mContainsOldMagic ){ + $expire = "1 HOUR"; + } else if( $this->mContainsNewMagic ){ + $expire = "1 DAY"; + } else { + $expire = "7 DAY"; + } + + wfQuery("REPLACE INTO parsercache (pc_prefhash,pc_pageid,pc_title,pc_data, pc_expire) ". + "VALUES('{$hash}', {$pageid}, '{$title}', '{$ser}', ". + "DATE_ADD(NOW(), INTERVAL {$expire}))", DB_WRITE); + + if( rand() % 50 == 0 ){ // more efficient to just do it sometimes + $this->purgeParserCache(); + } + } + + /* static private */ function purgeParserCache(){ + wfQuery("DELETE FROM parsercache WHERE pc_expire < NOW() LIMIT 250", DB_WRITE); + } + + /* static */ function parsercacheClearLinksTo( $pid ){ + $pid = intval( $pid ); + wfQuery("DELETE parsercache FROM parsercache,links ". + "WHERE pc_title=links.l_from AND l_to={$pid}", DB_WRITE); + wfQuery("DELETE FROM parsercache WHERE pc_pageid='{$pid}'", DB_WRITE); + } + + # $title is a prefixed db title, for example like Title->getPrefixedDBkey() returns. + /* static */ function parsercacheClearBrokenLinksTo( $title ){ + $title = wfStrencode( $title ); + wfQuery("DELETE parsercache FROM parsercache,brokenlinks ". + "WHERE pc_pageid=bl_from AND bl_to='{$title}'", DB_WRITE); + } + + # $pid is a page id + /* static */ function parsercacheClearPage( $pid ){ + $pid = intval( $pid ); + wfQuery("DELETE FROM parsercache WHERE pc_pageid='{$pid}'", DB_WRITE); + } } # Regex callbacks, used in OutputPage::replaceVariables diff --git a/includes/User.php b/includes/User.php index 04cbc2d203..3d1ca1090d 100644 --- a/includes/User.php +++ b/includes/User.php @@ -609,6 +609,33 @@ class User { } + function getPageRenderingHash(){ + static $hash = false; + if( $hash ){ + return $hash; + } + + // stubthreshold is only included below for completeness, + // it will always be 0 when this function is called by parsercache. + + $confstr = $this->getOption( "quickbar" ); + $confstr .= "!" . $this->getOption( "underline" ); + $confstr .= "!" . $this->getOption( "hover" ); + $confstr .= "!" . $this->getOption( "skin" ); + $confstr .= "!" . $this->getOption( "math" ); + $confstr .= "!" . $this->getOption( "highlightbroken" ); + $confstr .= "!" . $this->getOption( "stubthreshold" ); + $confstr .= "!" . $this->getOption( "editsection" ); + $confstr .= "!" . $this->getOption( "editsectiononrightclick" ); + $confstr .= "!" . $this->getOption( "showtoc" ); + $confstr .= "!" . $this->getOption( "date" ); + + if(strlen($confstr) > 32) + $hash = md5($confstr); + else + $hash = $confstr; + return $hash; + } function isAllowedToCreateAccount() { diff --git a/maintenance/archives/patch-parsercache.sql b/maintenance/archives/patch-parsercache.sql new file mode 100644 index 0000000000..533a206e3c --- /dev/null +++ b/maintenance/archives/patch-parsercache.sql @@ -0,0 +1,15 @@ +-- +-- parsercache table, for cacheing complete parsed articles +-- before they are imbedded in the skin. +-- + +CREATE TABLE parsercache ( + pc_pageid INT(11) NOT NULL, + pc_title VARCHAR(255) NOT NULL, + pc_prefhash CHAR(32) NOT NULL, + pc_expire DATETIME NOT NULL, + pc_data MEDIUMBLOB NOT NULL, + PRIMARY KEY (pc_pageid, pc_prefhash), + KEY(pc_title), + KEY(pc_expire) +) TYPE=InnoDB; -- 2.20.1