Allowed post-parse link cache to be used for update as well as page view. This involv...
authorTim Starling <tstarling@users.mediawiki.org>
Sat, 14 Aug 2004 13:34:57 +0000 (13:34 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Sat, 14 Aug 2004 13:34:57 +0000 (13:34 +0000)
includes/Article.php
includes/DefaultSettings.php
includes/OutputPage.php
includes/Setup.php
includes/Skin.php
maintenance/refreshLinks.inc

index 7fedeee..677b163 100644 (file)
@@ -726,6 +726,8 @@ class Article {
                         ) );
                }
 
+               # Put link titles into the link cache
+               $wgOut->replaceLinkHolders();
                # Add link titles as META keywords
                $wgOut->addMetaTags() ;
 
@@ -1031,14 +1033,13 @@ class Article {
                $wgLinkCache->preFill( $this->mTitle );
                $wgLinkCache->clear();
 
-               # Switch on use of link cache in the skin
-               $sk =& $wgUser->getSkin();
-               $sk->postParseLinkColour( false );
-
-               # Now update the link cache by parsing the text
+               # Parse the text and replace links with placeholders
                $wgOut = new OutputPage();
                $wgOut->addWikiText( $text );
 
+               # Look up the links in the DB and add them to the link cache
+               $wgOut->replaceLinkHolders( RLH_FOR_UPDATE );
+
                if( $wgMwRedir->matchStart( $text ) )
                        $r = 'redirect=no';
                else
index 1815178..044c904 100644 (file)
@@ -177,6 +177,7 @@ $wgUseCategoryBrowser   = false;
 
 $wgEnablePersistentLC  = false;        # Obsolete, do not use
 $wgCompressedPersistentLC = true; # use gzcompressed blobs
+$wgUseOldExistenceCheck = false;  # use old prefill link method, for debugging only
 
 $wgEnableParserCache = false; # requires that php was compiled --with-zlib
 
index f0a4a28..bbda6ef 100644 (file)
@@ -7,13 +7,15 @@ if( defined( "MEDIAWIKI" ) ) {
 
 if($wgUseTeX) require_once( "Math.php" );
 
+define( 'RLH_FOR_UPDATE', 1 );
+
 class OutputPage {
        var $mHeaders, $mCookies, $mMetatags, $mKeywords;
        var $mLinktags, $mPagetitle, $mBodytext, $mDebugtext;
        var $mHTMLtitle, $mRobotpolicy, $mIsarticle, $mPrintable;
        var $mSubtitle, $mRedirect;
        var $mLastModified, $mCategoryLinks;
-       var $mScripts;
+       var $mScripts, $mLinkColours;
        
        var $mSuppressQuickbar;
        var $mOnloadHandler;
@@ -350,7 +352,7 @@ class OutputPage {
 
                $this->sendCacheControl();
                # Perform link colouring
-               $this->mBodytext = $this->parseLinkHolders();
+               $this->replaceLinkHolders();
                
                # Disable temporary placeholders, so that the skin produces HTML
                $sk->postParseLinkColour( false );
@@ -764,14 +766,27 @@ class OutputPage {
                return $ret;
        }
        
-       # Parse <!--LINK--> link placeholders to avoid using linkcache
+       # Replace <!--LINK--> link placeholders with actual links, in the buffer
        # Placeholders created in Skin::makeLinkObj()
-       function parseLinkHolders()
+       # Returns an array of links found, indexed by PDBK:
+       #   0 - broken
+       #   1 - normal link
+       #   2 - stub
+       # $options is a bit field, RLH_FOR_UPDATE to select for update
+       function replaceLinkHolders( $options = 0 )
        {
-               global $wgUser;
+               global $wgUser, $wgLinkCache, $wgUseOldExistenceCheck;
                
-               $fname = 'OutputPage::parseLinkHolders';
+               if ( $wgUseOldExistenceCheck ) {
+                       return array();
+               }
+
+               $fname = 'OutputPage::replaceLinkHolders';
                wfProfileIn( $fname );
+
+               $titles = array();
+               $pdbks = array();
+               $colours = array();
                
                # Get placeholders from body
                preg_match_all( "/<!--LINK (.*?) (.*?) (.*?) (.*?)-->/", $this->mBodytext, $tmpLinks );
@@ -791,71 +806,88 @@ class OutputPage {
                        asort( $namespaces );
        
                        # Generate query
+                       $query = false;
                        foreach ( $namespaces as $key => $val ) {
-                               if ( !isset( $current ) ) {
-                                       $current = $val;
-                                       $query =  "SELECT cur_namespace, cur_title";
-                                       if ( $threshold > 0 ) {
-                                               $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect";
-                                       } 
-                                       $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN(";
-                               } elseif ( $current != $val ) {
-                                       $current = $val;
-                                       $query .= ")) OR (cur_namespace=$val AND cur_title IN(";
+                               # Make title object
+                               $dbk = $dbkeys[$key];
+                               $title = $titles[$key] = Title::makeTitle( $val, $dbk );
+                               $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
+
+                               # Check if it's in the link cache already
+                               if ( $wgLinkCache->getGoodLinkID( $pdbk ) ) {
+                                       $colours[$pdbk] = 1;
+                               } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) {
+                                       $colours[$pdbk] = 0;
                                } else {
-                                       $query .= ", ";
-                               }
+                                       # Not in the link cache, add it to the query
+                                       if ( !isset( $current ) ) {
+                                               $current = $val;
+                                               $query =  "SELECT cur_id, cur_namespace, cur_title";
+                                               if ( $threshold > 0 ) {
+                                                       $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect";
+                                               } 
+                                               $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN(";
+                                       } elseif ( $current != $val ) {
+                                               $current = $val;
+                                               $query .= ")) OR (cur_namespace=$val AND cur_title IN(";
+                                       } else {
+                                               $query .= ", ";
+                                       }
                                
-                               $query .= $dbr->addQuotes( $dbkeys[$key] );
+                                       $query .= $dbr->addQuotes( $dbkeys[$key] );
+                               }
                        }
-
-                       $query .= "))";
-                       
-                       $res = $dbr->query( $query, $fname );
+                       if ( $query ) {
+                               $query .= "))";
+                               if ( $options & RLH_FOR_UPDATE ) {
+                                       $query .= " FOR UPDATE";
+                               }
                        
-                       # Fetch data and form into an associative array
-                       # non-existent = broken
-                       # 1 = known
-                       # 2 = stub
-                       $colours = array();
-                       while ( $s = $dbr->fetchObject($res) ) {
-                               $key = $s->cur_namespace . ' ' . $s->cur_title;
-                               if ( $threshold >  0 ) {
-                                       $size = $s->cur_len;
-                                       if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) {
-                                               $colours[$key] = 1;
+                               $res = $dbr->query( $query, $fname );
+                               
+                               # Fetch data and form into an associative array
+                               # non-existent = broken
+                               # 1 = known
+                               # 2 = stub
+                               while ( $s = $dbr->fetchObject($res) ) {
+                                       $title = Title::makeTitle( $s->cur_namespace, $s->cur_title );
+                                       $pdbk = $title->getPrefixedDBkey();
+                                       $wgLinkCache->addGoodLink( $s->cur_id, $pdbk );
+                                       
+                                       if ( $threshold >  0 ) {
+                                               $size = $s->cur_len;
+                                               if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) {
+                                                       $colours[$pdbk] = 1;
+                                               } else {
+                                                       $colours[$pdbk] = 2;
+                                               }
                                        } else {
-                                               $colours[$key] = 2;
+                                               $colours[$pdbk] = 1;
                                        }
-                                       $colours[$key] = array( $s->cur_len, $s->cur_is_redirect );
-                               } else {
-                                       $colours[$key] = 1;
                                }
                        }
                        
                        # Construct search and replace arrays
                        $search = $replace = array();
                        foreach ( $namespaces as $key => $ns ) {
-                               $cKey = $ns . ' ' . $dbkeys[$key];
+                               $pdbk = $pdbks[$key];
                                $search[] = $tmpLinks[0][$key];
-                               $title = Title::makeTitle( $ns, $dbkeys[$key] );
-                               if ( empty( $colours[$cKey] ) ) {
+                               $title = $titles[$key];
+                               if ( empty( $colours[$pdbk] ) ) {
+                                       $wgLinkCache->addBadLink( $pdbk );
+                                       $colours[$pdbk] = 0;
                                        $replace[] = $sk->makeBrokenLinkObj( $title, $texts[$key], $queries[$key] );
-                               } elseif ( $colours[$cKey] == 1 ) {
+                               } elseif ( $colours[$pdbk] == 1 ) {
                                        $replace[] = $sk->makeKnownLinkObj( $title, $texts[$key], $queries[$key] );
-                               } elseif ( $colours[$cKey] == 2 ) {
+                               } elseif ( $colours[$pdbk] == 2 ) {
                                        $replace[] = $sk->makeStubLinkObj( $title, $texts[$key], $queries[$key] );
                                }
                        }
-
                        # Do the thing
-                       $out = str_replace( $search, $replace, $this->mBodytext );
-               } else {
-                       $out = $this->mBodytext;
+                       $this->mBodytext = str_replace( $search, $replace, $this->mBodytext );
                }
-               
                wfProfileOut( $fname );
-               return ( $out );
+               return $colours;
        }
 }
 
index 6a5063e..280cbbd 100644 (file)
@@ -69,7 +69,7 @@ $wgRequest = new WebRequest();
 
 
 wfProfileOut( $fname.'-includes' );
-wfProfileIn( $fname.'-memcached' );
+wfProfileIn( $fname.'-misc1' );
 global $wgUser, $wgLang, $wgOut, $wgTitle;
 global $wgArticle, $wgDeferredUpdateList, $wgLinkCache;
 global $wgMemc, $wgMagicWords, $wgMwRedir, $wgDebugLogFile;
@@ -78,6 +78,8 @@ global $wgMsgCacheExpiry, $wgCommandLineMode;
 global $wgBlockCache, $wgParserCache, $wgParser, $wgDBConnections;
 global $wgLoadBalancer, $wgDBservers, $wgDebugDumpSql;
 global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname, $wgDBtype;
+global $wgUseOldExistenceCheck, $wgEnablePersistentLC;
+
 global $wgFullyInitialised;
 
 # Useful debug output
@@ -95,6 +97,14 @@ if ( $wgCommandLineMode ) {
        wfDebug( $_SERVER['REQUEST_METHOD'] . ' ' . $_SERVER['REQUEST_URI'] . "\n" );
 }
 
+# Disable linkscc except if the old existence check method is enabled
+if (!$wgUseOldExistenceCheck) {
+       $wgEnablePersistentLC = false;
+}
+
+wfProfileOut( $fname.'-misc1' );
+wfProfileIn( $fname.'-memcached' );
+
 # Set up Memcached
 #
 class MemCachedClientforWiki extends memcached {
@@ -230,7 +240,7 @@ if( $wgCommandLineMode ) {
 }
 
 wfProfileOut( $fname.'-User' );
-wfProfileIn( $fname.'-misc' );
+wfProfileIn( $fname.'-misc2' );
 
 $wgDeferredUpdateList = array();
 $wgLinkCache = new LinkCache();
@@ -246,7 +256,7 @@ wfSeedRandom();
 $wgTitle = Title::newFromText( wfMsg( 'badtitle' ) );
 $wgArticle = new Article($wgTitle);
 
-wfProfileOut( $fname.'-misc' );
+wfProfileOut( $fname.'-misc2' );
 wfProfileIn( $fname.'-extensions' );
 
 # Extension setup functions
index f84e340..febacbc 100644 (file)
@@ -54,6 +54,8 @@ class Skin {
 
        function Skin()
        {
+               global $wgUseOldExistenceCheck;
+               $postParseLinkColour = !$wgUseOldExistenceCheck;
                $this->linktrail = wfMsg('linktrail');
        }
 
index 2261803..41dfce3 100644 (file)
@@ -15,9 +15,6 @@ function refreshLinks( $start ) {
        # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
        $wgUser->setOption("math", 3);
        
-       # Turn on link cache in skin
-       $sk =& $wgUser->getSkin();
-       $sk->postParseLinkColour( false );
 
        for ($id = $start; $id <= $end; $id++) {
                if ( !($id % REPORTING_INTERVAL) ) {
@@ -32,12 +29,18 @@ function refreshLinks( $start ) {
                if ( is_null( $wgTitle ) ) {
                        continue;
                }
-               
+               $dbw->query("BEGIN");
+
                $wgArticle = new Article( $wgTitle );
                $text = $wgArticle->getContent( true );
                $wgLinkCache = new LinkCache;
                $wgLinkCache->forUpdate( true );
+
+               # Parse the text and replace links with placeholders
                $wgOut->addWikiText( $text );
+               
+               # Look up the links in the DB and add them to the link cache
+               $wgOut->replaceLinkHolders( RLH_FOR_UPDATE );
 
                if ( $wgEnablePersistentLC ) {
                        $wgLinkCache->saveToLinkscc( $id, $dbw->strencode( $wgTitle->getPrefixedDBkey() ) );
@@ -46,6 +49,7 @@ function refreshLinks( $start ) {
                $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() );
                $linksUpdate->doDumbUpdate();
                $linksUpdate->fixBrokenLinks();
+               $dbw->query("COMMIT");
        }
 }
 ?>