From 5698c6bcc77f6527d7eac5af5658b92e871f0ffd Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 14 Aug 2004 13:34:57 +0000 Subject: [PATCH] Allowed post-parse link cache to be used for update as well as page view. This involves changes to Article::showArticle() and maintenance/refreshLinks.inc. Fixed various bugs that I noticed along the way. Added an option to recover the old 1.3.x behaviour, for profiling and debugging. --- includes/Article.php | 11 +-- includes/DefaultSettings.php | 1 + includes/OutputPage.php | 130 ++++++++++++++++++++++------------- includes/Setup.php | 16 ++++- includes/Skin.php | 2 + maintenance/refreshLinks.inc | 12 ++-- 6 files changed, 111 insertions(+), 61 deletions(-) diff --git a/includes/Article.php b/includes/Article.php index 7fedeee6ad..677b163cd9 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -726,6 +726,8 @@ class Article { ) ); } + # Put link titles into the link cache + $wgOut->replaceLinkHolders(); # Add link titles as META keywords $wgOut->addMetaTags() ; @@ -1031,14 +1033,13 @@ class Article { $wgLinkCache->preFill( $this->mTitle ); $wgLinkCache->clear(); - # Switch on use of link cache in the skin - $sk =& $wgUser->getSkin(); - $sk->postParseLinkColour( false ); - - # Now update the link cache by parsing the text + # Parse the text and replace links with placeholders $wgOut = new OutputPage(); $wgOut->addWikiText( $text ); + # Look up the links in the DB and add them to the link cache + $wgOut->replaceLinkHolders( RLH_FOR_UPDATE ); + if( $wgMwRedir->matchStart( $text ) ) $r = 'redirect=no'; else diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 18151781a6..044c904635 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -177,6 +177,7 @@ $wgUseCategoryBrowser = false; $wgEnablePersistentLC = false; # Obsolete, do not use $wgCompressedPersistentLC = true; # use gzcompressed blobs +$wgUseOldExistenceCheck = false; # use old prefill link method, for debugging only $wgEnableParserCache = false; # requires that php was compiled --with-zlib diff --git a/includes/OutputPage.php b/includes/OutputPage.php index f0a4a283c7..bbda6ef01c 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -7,13 +7,15 @@ if( defined( "MEDIAWIKI" ) ) { if($wgUseTeX) require_once( "Math.php" ); +define( 'RLH_FOR_UPDATE', 1 ); + class OutputPage { var $mHeaders, $mCookies, $mMetatags, $mKeywords; var $mLinktags, $mPagetitle, $mBodytext, $mDebugtext; var $mHTMLtitle, $mRobotpolicy, $mIsarticle, $mPrintable; var $mSubtitle, $mRedirect; var $mLastModified, $mCategoryLinks; - var $mScripts; + var $mScripts, $mLinkColours; var $mSuppressQuickbar; var $mOnloadHandler; @@ -350,7 +352,7 @@ class OutputPage { $this->sendCacheControl(); # Perform link colouring - $this->mBodytext = $this->parseLinkHolders(); + $this->replaceLinkHolders(); # Disable temporary placeholders, so that the skin produces HTML $sk->postParseLinkColour( false ); @@ -764,14 +766,27 @@ class OutputPage { return $ret; } - # Parse link placeholders to avoid using linkcache + # Replace link placeholders with actual links, in the buffer # Placeholders created in Skin::makeLinkObj() - function parseLinkHolders() + # Returns an array of links found, indexed by PDBK: + # 0 - broken + # 1 - normal link + # 2 - stub + # $options is a bit field, RLH_FOR_UPDATE to select for update + function replaceLinkHolders( $options = 0 ) { - global $wgUser; + global $wgUser, $wgLinkCache, $wgUseOldExistenceCheck; - $fname = 'OutputPage::parseLinkHolders'; + if ( $wgUseOldExistenceCheck ) { + return array(); + } + + $fname = 'OutputPage::replaceLinkHolders'; wfProfileIn( $fname ); + + $titles = array(); + $pdbks = array(); + $colours = array(); # Get placeholders from body preg_match_all( "//", $this->mBodytext, $tmpLinks ); @@ -791,71 +806,88 @@ class OutputPage { asort( $namespaces ); # Generate query + $query = false; foreach ( $namespaces as $key => $val ) { - if ( !isset( $current ) ) { - $current = $val; - $query = "SELECT cur_namespace, cur_title"; - if ( $threshold > 0 ) { - $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect"; - } - $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN("; - } elseif ( $current != $val ) { - $current = $val; - $query .= ")) OR (cur_namespace=$val AND cur_title IN("; + # Make title object + $dbk = $dbkeys[$key]; + $title = $titles[$key] = Title::makeTitle( $val, $dbk ); + $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); + + # Check if it's in the link cache already + if ( $wgLinkCache->getGoodLinkID( $pdbk ) ) { + $colours[$pdbk] = 1; + } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) { + $colours[$pdbk] = 0; } else { - $query .= ", "; - } + # Not in the link cache, add it to the query + if ( !isset( $current ) ) { + $current = $val; + $query = "SELECT cur_id, cur_namespace, cur_title"; + if ( $threshold > 0 ) { + $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect"; + } + $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN("; + } elseif ( $current != $val ) { + $current = $val; + $query .= ")) OR (cur_namespace=$val AND cur_title IN("; + } else { + $query .= ", "; + } - $query .= $dbr->addQuotes( $dbkeys[$key] ); + $query .= $dbr->addQuotes( $dbkeys[$key] ); + } } - - $query .= "))"; - - $res = $dbr->query( $query, $fname ); + if ( $query ) { + $query .= "))"; + if ( $options & RLH_FOR_UPDATE ) { + $query .= " FOR UPDATE"; + } - # Fetch data and form into an associative array - # non-existent = broken - # 1 = known - # 2 = stub - $colours = array(); - while ( $s = $dbr->fetchObject($res) ) { - $key = $s->cur_namespace . ' ' . $s->cur_title; - if ( $threshold > 0 ) { - $size = $s->cur_len; - if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) { - $colours[$key] = 1; + $res = $dbr->query( $query, $fname ); + + # Fetch data and form into an associative array + # non-existent = broken + # 1 = known + # 2 = stub + while ( $s = $dbr->fetchObject($res) ) { + $title = Title::makeTitle( $s->cur_namespace, $s->cur_title ); + $pdbk = $title->getPrefixedDBkey(); + $wgLinkCache->addGoodLink( $s->cur_id, $pdbk ); + + if ( $threshold > 0 ) { + $size = $s->cur_len; + if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) { + $colours[$pdbk] = 1; + } else { + $colours[$pdbk] = 2; + } } else { - $colours[$key] = 2; + $colours[$pdbk] = 1; } - $colours[$key] = array( $s->cur_len, $s->cur_is_redirect ); - } else { - $colours[$key] = 1; } } # Construct search and replace arrays $search = $replace = array(); foreach ( $namespaces as $key => $ns ) { - $cKey = $ns . ' ' . $dbkeys[$key]; + $pdbk = $pdbks[$key]; $search[] = $tmpLinks[0][$key]; - $title = Title::makeTitle( $ns, $dbkeys[$key] ); - if ( empty( $colours[$cKey] ) ) { + $title = $titles[$key]; + if ( empty( $colours[$pdbk] ) ) { + $wgLinkCache->addBadLink( $pdbk ); + $colours[$pdbk] = 0; $replace[] = $sk->makeBrokenLinkObj( $title, $texts[$key], $queries[$key] ); - } elseif ( $colours[$cKey] == 1 ) { + } elseif ( $colours[$pdbk] == 1 ) { $replace[] = $sk->makeKnownLinkObj( $title, $texts[$key], $queries[$key] ); - } elseif ( $colours[$cKey] == 2 ) { + } elseif ( $colours[$pdbk] == 2 ) { $replace[] = $sk->makeStubLinkObj( $title, $texts[$key], $queries[$key] ); } } - # Do the thing - $out = str_replace( $search, $replace, $this->mBodytext ); - } else { - $out = $this->mBodytext; + $this->mBodytext = str_replace( $search, $replace, $this->mBodytext ); } - wfProfileOut( $fname ); - return ( $out ); + return $colours; } } diff --git a/includes/Setup.php b/includes/Setup.php index 6a5063e981..280cbbd525 100644 --- a/includes/Setup.php +++ b/includes/Setup.php @@ -69,7 +69,7 @@ $wgRequest = new WebRequest(); wfProfileOut( $fname.'-includes' ); -wfProfileIn( $fname.'-memcached' ); +wfProfileIn( $fname.'-misc1' ); global $wgUser, $wgLang, $wgOut, $wgTitle; global $wgArticle, $wgDeferredUpdateList, $wgLinkCache; global $wgMemc, $wgMagicWords, $wgMwRedir, $wgDebugLogFile; @@ -78,6 +78,8 @@ global $wgMsgCacheExpiry, $wgCommandLineMode; global $wgBlockCache, $wgParserCache, $wgParser, $wgDBConnections; global $wgLoadBalancer, $wgDBservers, $wgDebugDumpSql; global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname, $wgDBtype; +global $wgUseOldExistenceCheck, $wgEnablePersistentLC; + global $wgFullyInitialised; # Useful debug output @@ -95,6 +97,14 @@ if ( $wgCommandLineMode ) { wfDebug( $_SERVER['REQUEST_METHOD'] . ' ' . $_SERVER['REQUEST_URI'] . "\n" ); } +# Disable linkscc except if the old existence check method is enabled +if (!$wgUseOldExistenceCheck) { + $wgEnablePersistentLC = false; +} + +wfProfileOut( $fname.'-misc1' ); +wfProfileIn( $fname.'-memcached' ); + # Set up Memcached # class MemCachedClientforWiki extends memcached { @@ -230,7 +240,7 @@ if( $wgCommandLineMode ) { } wfProfileOut( $fname.'-User' ); -wfProfileIn( $fname.'-misc' ); +wfProfileIn( $fname.'-misc2' ); $wgDeferredUpdateList = array(); $wgLinkCache = new LinkCache(); @@ -246,7 +256,7 @@ wfSeedRandom(); $wgTitle = Title::newFromText( wfMsg( 'badtitle' ) ); $wgArticle = new Article($wgTitle); -wfProfileOut( $fname.'-misc' ); +wfProfileOut( $fname.'-misc2' ); wfProfileIn( $fname.'-extensions' ); # Extension setup functions diff --git a/includes/Skin.php b/includes/Skin.php index f84e340b1a..febacbcc19 100644 --- a/includes/Skin.php +++ b/includes/Skin.php @@ -54,6 +54,8 @@ class Skin { function Skin() { + global $wgUseOldExistenceCheck; + $postParseLinkColour = !$wgUseOldExistenceCheck; $this->linktrail = wfMsg('linktrail'); } diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc index 2261803f8e..41dfce3656 100644 --- a/maintenance/refreshLinks.inc +++ b/maintenance/refreshLinks.inc @@ -15,9 +15,6 @@ function refreshLinks( $start ) { # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway) $wgUser->setOption("math", 3); - # Turn on link cache in skin - $sk =& $wgUser->getSkin(); - $sk->postParseLinkColour( false ); for ($id = $start; $id <= $end; $id++) { if ( !($id % REPORTING_INTERVAL) ) { @@ -32,12 +29,18 @@ function refreshLinks( $start ) { if ( is_null( $wgTitle ) ) { continue; } - + $dbw->query("BEGIN"); + $wgArticle = new Article( $wgTitle ); $text = $wgArticle->getContent( true ); $wgLinkCache = new LinkCache; $wgLinkCache->forUpdate( true ); + + # Parse the text and replace links with placeholders $wgOut->addWikiText( $text ); + + # Look up the links in the DB and add them to the link cache + $wgOut->replaceLinkHolders( RLH_FOR_UPDATE ); if ( $wgEnablePersistentLC ) { $wgLinkCache->saveToLinkscc( $id, $dbw->strencode( $wgTitle->getPrefixedDBkey() ) ); @@ -46,6 +49,7 @@ function refreshLinks( $start ) { $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() ); $linksUpdate->doDumbUpdate(); $linksUpdate->fixBrokenLinks(); + $dbw->query("COMMIT"); } } ?> -- 2.20.1