Added a new method to update the cache of all pages linking to a given page without...
authorTim Starling <tstarling@users.mediawiki.org>
Sun, 18 Jun 2006 12:42:16 +0000 (12:42 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Sun, 18 Jun 2006 12:42:16 +0000 (12:42 +0000)
includes/Article.php
includes/AutoLoader.php
includes/DefaultSettings.php
includes/HTMLCacheUpdate.php [new file with mode: 0644]
includes/Image.php
includes/ImagePage.php
includes/JobQueue.php
includes/LinksUpdate.php
includes/SquidUpdate.php
includes/Title.php

index d3c9ca2..13dc5a0 100644 (file)
@@ -1181,7 +1181,6 @@ class Article {
                # Update the page record with revision data
                $this->updateRevisionOn( $dbw, $revision, 0 );
 
-               Article::onArticleCreate( $this->mTitle );
                if(!$suppressRC) {
                        $rcid = RecentChange::notifyNew( $now, $this->mTitle, $isminor, $wgUser, $summary, 'default',
                          '', strlen( $text ), $revisionId );
@@ -1207,11 +1206,14 @@ class Article {
                               'page_title' => $ttl ),
                        $fname );
 
-               # standard deferred updates
+               # Update links, etc.
                $this->editUpdates( $text, $summary, $isminor, $now, $revisionId );
 
-               $oldid = 0; # new article
-               $this->showArticle( $text, wfMsg( 'newarticle' ), false, $isminor, $now, $summary, $oldid );
+               # Clear caches
+               Article::onArticleCreate( $this->mTitle );
+
+               # Output a redirect back to the article
+               $this->doRedirect( $this->isRedirect( $text ) );
 
                wfRunHooks( 'ArticleInsertComplete', array( &$this, &$wgUser, $text,
                        $summary, $isminor,
@@ -1318,7 +1320,6 @@ class Article {
                                'text'       => $text
                                ) );
 
-                       $dbw->immediateCommit();
                        $dbw->begin();
                        $revisionId = $revision->insertOn( $dbw );
 
@@ -1330,7 +1331,7 @@ class Article {
                                $good = false;
                                $dbw->rollback();
                        } else {
-                               # Update recentchanges and purge cache and whatnot
+                               # Update recentchanges
                                $bot = (int)($wgUser->isBot() || $forceBot);
                                $rcid = RecentChange::notifyEdit( $now, $this->mTitle, $isminor, $wgUser, $summary,
                                        $lastRevision, $this->getTimestamp(), $bot, '', $oldsize, $newsize,
@@ -1342,9 +1343,6 @@ class Article {
                                }
                                        
                                $dbw->commit();
-
-                               // Update caches outside the main transaction
-                               Article::onArticleEdit( $this->mTitle );
                        }
                } else {
                        // Keep the same revision ID, but do some updates on it
@@ -1356,52 +1354,28 @@ class Article {
                }
 
                if ( $good ) {
+                       # Invalidate cache of this article and all pages using this article 
+                       # as a template. Partly deferred.
+                       Article::onArticleEdit( $this->mTitle );
+                       
                        if ($watchthis) {
                                if (!$this->mTitle->userIsWatching()) {
-                                       $dbw->immediateCommit();
                                        $dbw->begin();
                                        $this->doWatch();
                                        $dbw->commit();
                                }
                        } else {
                                if ( $this->mTitle->userIsWatching() ) {
-                                       $dbw->immediateCommit();
                                        $dbw->begin();
                                        $this->doUnwatch();
                                        $dbw->commit();
                                }
                        }
-                       # standard deferred updates
+                       # Update links tables, site stats, etc.
                        $this->editUpdates( $text, $summary, $minor, $now, $revisionId );
 
-
-                       $urls = array();
-                       # Invalidate caches of all articles using this article as a template
-
-                       # Template namespace
-                       # Purge all articles linking here
-                       $titles = $this->mTitle->getTemplateLinksTo();
-                       Title::touchArray( $titles );
-                       if ( $wgUseSquid ) {
-                                       foreach ( $titles as $title ) {
-                                               $urls[] = $title->getInternalURL();
-                                       }
-                       }
-
-                       # Squid updates
-                       if ( $wgUseSquid ) {
-                               $urls = array_merge( $urls, $this->mTitle->getSquidURLs() );
-                               $u = new SquidUpdate( $urls );
-                               array_push( $wgPostCommitUpdateList, $u );
-                       }
-
-                       # File cache
-                       if ( $wgUseFileCache ) {
-                               $cm = new CacheManager($this->mTitle);
-                               @unlink($cm->fileCacheName());
-                       }
-
-                       $this->showArticle( $text, wfMsg( 'updated' ), $sectionanchor, $isminor, $now, $summary, $lastRevision );
+                       # Output a redirect back to the article
+                       $this->doRedirect( $this->isRedirect( $text ), $sectionanchor );
                }
                wfRunHooks( 'ArticleSaveComplete',
                        array( &$this, &$wgUser, $text,
@@ -1412,26 +1386,29 @@ class Article {
        }
 
        /**
-        * After we've either updated or inserted the article, update
-        * the link tables and redirect to the new page.
-        * @todo FIXME some function arguments never used
+        * @deprecated wrapper for doRedirect
         */
        function showArticle( $text, $subtitle , $sectionanchor = '', $me2, $now, $summary, $oldid ) {
-               global $wgOut;
-
-               $fname = 'Article::showArticle';
-               wfProfileIn( $fname );
-
-               # Output the redirect
-               if( $this->isRedirect( $text ) )
-                       $r = 'redirect=no';
-               else
-                       $r = '';
-               $wgOut->redirect( $this->mTitle->getFullURL( $r ).$sectionanchor );
-
-               wfProfileOut( $fname );
+               $this->doRedirect( $this->isRedirect( $text ), $sectionanchor );
        }
 
+       /**
+        * Output a redirect back to the article.
+        * This is typically used after an edit.
+        *
+        * @param boolean $noRedir Add redirect=no
+        * @param string $sectionAnchor section to redirect to, including "#"
+        */
+       function doRedirect( $noRedir = false, $sectionAnchor = '' ) {
+               global $wgOut;
+               if ( $noRedir ) {
+                       $query = 'redirect=no';
+               } else {
+                       $query = '';
+               }
+               $wgOut->redirect( $this->mTitle->getFullURL( $query ) . $sectionAnchor );
+       }
+               
        /**
         * Mark this particular edit as patrolled
         */
@@ -1927,24 +1904,6 @@ class Article {
                $u = new SiteStatsUpdate( 0, 1, -(int)$this->isCountable( $this->getContent() ), -1 );
                array_push( $wgDeferredUpdateList, $u );
 
-               $linksTo = $this->mTitle->getLinksTo();
-
-               # Squid purging
-               if ( $wgUseSquid ) {
-                       $urls = array(
-                               $this->mTitle->getInternalURL(),
-                               $this->mTitle->getInternalURL( 'history' )
-                       );
-
-                       $u = SquidUpdate::newFromTitles( $linksTo, $urls );
-                       array_push( $wgPostCommitUpdateList, $u );
-
-               }
-
-               # Client and file cache invalidation
-               Title::touchArray( $linksTo );
-
-
                // For now, shunt the revision data into the archive table.
                // Text is *not* removed from the text table; bulk storage
                // is left intact to avoid breaking block-compression or
@@ -1985,6 +1944,7 @@ class Article {
                # Finally, clean up the link tables
                $t = $this->mTitle->getPrefixedDBkey();
 
+               # Clear caches
                Article::onArticleDelete( $this->mTitle );
 
                # Delete outgoing links
@@ -2042,12 +2002,10 @@ class Article {
                $tt = $this->mTitle->getDBKey();
                $n = $this->mTitle->getNamespace();
 
-               # Get the last editor, lock table exclusively
-               $dbw->begin();
+               # Get the last editor
                $current = Revision::newFromTitle( $this->mTitle );
                if( is_null( $current ) ) {
                        # Something wrong... no page?
-                       $dbw->rollback();
                        $wgOut->addHTML( wfMsg( 'notanarticle' ) );
                        return;
                }
@@ -2082,7 +2040,6 @@ class Article {
                        );
                if( $s === false ) {
                        # Something wrong
-                       $dbw->rollback();
                        $wgOut->setPageTitle(wfMsg('rollbackfailed'));
                        $wgOut->addHTML( wfMsg( 'cantrollback' ) );
                        return;
@@ -2119,9 +2076,7 @@ class Article {
                $wgOut->addHTML( '<h2>' . htmlspecialchars( $newComment ) . "</h2>\n<hr />\n" );
 
                $this->updateArticle( $target->getText(), $newComment, 1, $this->mTitle->userIsWatching(), $bot );
-               Article::onArticleEdit( $this->mTitle );
 
-               $dbw->commit();
                $wgOut->returnToMain( false );
        }
 
@@ -2149,7 +2104,9 @@ class Article {
 
        /**
         * Do standard deferred updates after page edit.
+        * Update links tables, site stats, search index and message cache.
         * Every 1000th edit, prune the recent changes table.
+        * 
         * @private
         * @param string $text
         */
@@ -2447,27 +2404,22 @@ class Article {
         * @param $title_obj a title object
         */
 
-       function onArticleCreate($title_obj) {
-               global $wgUseSquid, $wgPostCommitUpdateList;
-
-               $title_obj->touchLinks();
-               $titles = $title_obj->getLinksTo();
-
-               # Purge squid
-               if ( $wgUseSquid ) {
-                       $urls = $title_obj->getSquidURLs();
-                       foreach ( $titles as $linkTitle ) {
-                               $urls[] = $linkTitle->getInternalURL();
-                       }
-                       $u = new SquidUpdate( $urls );
-                       array_push( $wgPostCommitUpdateList, $u );
-               }
+       static function onArticleCreate($title) {
+               $title->touchLinks();
+               $title->purgeSquid();
        }
 
-       function onArticleDelete( $title ) {
-               global $wgMessageCache;
+       static function onArticleDelete( $title ) {
+               global $wgUseFileCache, $wgMessageCache;
 
                $title->touchLinks();
+               $title->purgeSquid();
+               
+               # File cache
+               if ( $wgUseFileCache ) {
+                       $cm = new CacheManager( $title );
+                       @unlink( $cm->fileCacheName() );
+               }
 
                if( $title->getNamespace() == NS_MEDIAWIKI) {
                        $wgMessageCache->replace( $title->getDBkey(), false );
@@ -2477,31 +2429,19 @@ class Article {
        /**
         * Purge caches on page update etc
         */
-       function onArticleEdit( $title ) {
-               global $wgUseSquid, $wgPostCommitUpdateList, $wgUseFileCache;
+       static function onArticleEdit( $title ) {
+               global $wgDeferredUpdateList, $wgUseFileCache;
 
                $urls = array();
 
-               // Template namespace? Purge all articles linking here.
-               // FIXME: When a templatelinks table arrives, use it for all includes.
-               if ( $title->getNamespace() == NS_TEMPLATE) {
-                       $titles = $title->getLinksTo();
-                       Title::touchArray( $titles );
-                       if ( $wgUseSquid ) {
-                               foreach ( $titles as $link ) {
-                                       $urls[] = $link->getInternalURL();
-                               }
-                       }
-               }
+               // Invalidate caches of articles which include this page
+               $update = new HTMLCacheUpdate( $title, 'templatelinks' );
+               $wgDeferredUpdateList[] = $update;
 
-               # Squid updates
-               if ( $wgUseSquid ) {
-                       $urls = array_merge( $urls, $title->getSquidURLs() );
-                       $u = new SquidUpdate( $urls );
-                       array_push( $wgPostCommitUpdateList, $u );
-               }
+               # Purge squid for this page only
+               $title->purgeSquid();
 
-               # File cache
+               # Clear file cache
                if ( $wgUseFileCache ) {
                        $cm = new CacheManager( $title );
                        @unlink( $cm->fileCacheName() );
index d60339a..5c0122a 100644 (file)
@@ -80,6 +80,8 @@ function __autoload($class_name) {
                'ConcatenatedGzipHistoryBlob' => 'HistoryBlob.php',
                'HistoryBlobStub' => 'HistoryBlob.php',
                'HistoryBlobCurStub' => 'HistoryBlob.php',
+               'HTMLCacheUpdate' => 'HTMLCacheUpdate.php',
+               'HTMLCacheUpdateJob' => 'HTMLCacheUpdate.php',
                'Image' => 'Image.php',
                'ThumbnailImage' => 'Image.php',
                'ImageGallery' => 'ImageGallery.php',
@@ -224,4 +226,4 @@ function __autoload($class_name) {
        }
 }
 
-?>
\ No newline at end of file
+?>
index d1169a8..321a767 100644 (file)
@@ -62,6 +62,7 @@ $wgProto = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') ? 'https' : '
 $wgServer = $wgProto.'://' . $wgServerName;
 # If the port is a non-standard one, add it to the URL
 if(    isset( $_SERVER['SERVER_PORT'] )
+       && !strpos( $wgServerName, ':' )
     && (    ( $wgProto == 'http' && $_SERVER['SERVER_PORT'] != 80 )
         || ( $wgProto == 'https' && $_SERVER['SERVER_PORT'] != 443 ) ) ) {
 
@@ -2060,6 +2061,16 @@ $wgJobRunRate = 1;
  */
 $wgJobLogFile = false;
 
+/**
+ * Number of rows to update per job
+ */
+$wgUpdateRowsPerJob = 500;
+
+/**
+ * Number of rows to update per query
+ */
+$wgUpdateRowsPerQuery = 10;
+
 /**
  * Enable use of AJAX features, currently auto suggestion for the search bar
  */
diff --git a/includes/HTMLCacheUpdate.php b/includes/HTMLCacheUpdate.php
new file mode 100644 (file)
index 0000000..dd7c53b
--- /dev/null
@@ -0,0 +1,230 @@
+<?php
+
+/**
+ * Class to invalidate the HTML cache of all the pages linking to a given title.
+ * Small numbers of links will be done immediately, large numbers are pushed onto
+ * the job queue.
+ *
+ * This class is designed to work efficiently with small numbers of links, and 
+ * to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory
+ * and time requirements of loading all backlinked IDs in doUpdate() might become
+ * prohibitive. The requirements measured at Wikimedia are approximately:
+ * 
+ *   memory: 48 bytes per row
+ *   time: 16us per row for the query plus processing
+ *
+ * The reason this query is done is to support partitioning of the job
+ * by backlinked ID. The memory issue could be allieviated by doing this query in 
+ * batches, but of course LIMIT with an offset is inefficient on the DB side.
+ *
+ * The class is nevertheless a vast improvement on the previous method of using 
+ * Image::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per
+ * link.
+ */
+class HTMLCacheUpdate
+{
+       public $mTitle, $mTable, $mPrefix;
+       public $mRowsPerJob, $mRowsPerQuery;
+
+       function __construct( $titleTo, $table ) {
+               global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
+
+               $this->mTitle = $titleTo;
+               $this->mTable = $table;
+               $this->mRowsPerJob = $wgUpdateRowsPerJob;
+               $this->mRowsPerQuery = $wgUpdateRowsPerQuery;
+       }
+
+       function doUpdate() {
+               # Fetch the IDs
+               $cond = $this->getToCondition();
+               $dbr =& wfGetDB( DB_SLAVE );
+               $res = $dbr->select( $this->mTable, $this->getFromField(), $cond, __METHOD__ );
+               $resWrap = new ResultWrapper( $dbr, $res );
+               if ( $dbr->numRows( $res ) != 0 ) {
+                       if ( $dbr->numRows( $res ) > $this->mRowsPerJob ) {
+                               $this->insertJobs( $resWrap );
+                       } else {
+                               $this->invalidateIDs( $resWrap );
+                       }
+               }
+               $dbr->freeResult( $res );
+       }
+
+       function insertJobs( ResultWrapper $res ) {
+               $numRows = $res->numRows();
+               $numBatches = ceil( $numRows / $this->mRowsPerJob );
+               $realBatchSize = $numRows / $numBatches;
+               $boundaries = array();
+               $start = false;
+               $jobs = array();
+               do {
+                       for ( $i = 0; $i < $realBatchSize - 1; $i++ ) {
+                               $row = $res->fetchRow();
+                               if ( $row ) {
+                                       $id = $row[0];
+                               } else {
+                                       $id = false;
+                                       break;
+                               }
+                       }
+                       if ( $id !== false ) {
+                               // One less on the end to avoid duplicating the boundary
+                               $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, $id - 1 );
+                       } else {
+                               $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, false );
+                       }
+                       $jobs[] = $job;
+
+                       $start = $id;
+               } while ( $start );
+
+               Job::batchInsert( $jobs );
+       }
+
+       function getPrefix() {
+               static $prefixes = array(
+                       'pagelinks' => 'pl',
+                       'imagelinks' => 'il',
+                       'categorylinks' => 'cl',
+                       'templatelinks' => 'tl',
+                       
+                       # Not needed
+                       # 'externallinks' => 'el',
+                       # 'langlinks' => 'll'
+               );
+
+               if ( is_null( $this->mPrefix ) ) {
+                       $this->mPrefix = $prefixes[$this->mTable];
+                       if ( is_null( $this->mPrefix ) ) {
+                               throw new MWException( "Invalid table type \"{$this->mTable}\" in " . __CLASS__ );
+                       }
+               }
+               return $this->mPrefix;
+       }
+       
+       function getFromField() {
+               return $this->getPrefix() . '_from';
+       }
+
+       function getToCondition() {
+               switch ( $this->mTable ) {
+                       case 'pagelinks':
+                               return array( 
+                                       'pl_namespace' => $this->mTitle->getNamespace(),
+                                       'pl_title' => $this->mTitle->getDBkey()
+                               );
+                       case 'templatelinks':
+                               return array(
+                                       'tl_namespace' => $this->mTitle->getNamespace(),
+                                       'tl_title' => $this->mTitle->getDBkey()
+                               );
+                       case 'imagelinks':
+                               return array( 'il_to' => $this->mTitle->getDBkey() );
+                       case 'categorylinks':
+                               return array( 'cl_to' => $this->mTitle->getDBkey() );
+               }
+               throw new MWException( 'Invalid table type in ' . __CLASS__ );
+       }
+
+       /**
+        * Invalidate a set of IDs, right now
+        */
+       function invalidateIDs( ResultWrapper $res ) {
+               global $wgUseFileCache, $wgUseSquid;
+
+               if ( $res->numRows() == 0 ) {
+                       return;
+               }
+
+               $dbw =& wfGetDB( DB_MASTER );
+               $timestamp = $dbw->timestamp();
+               $done = false;
+               
+               while ( !$done ) {
+                       # Get all IDs in this query into an array
+                       $ids = array();
+                       for ( $i = 0; $i < $this->mRowsPerQuery; $i++ ) {
+                               $row = $res->fetchRow();
+                               if ( $row ) {
+                                       $ids[] = $row[0];
+                               } else {
+                                       $done = true;
+                                       break;
+                               }
+                       }
+
+                       if ( !count( $ids ) ) {
+                               break;
+                       }
+                       
+                       # Update page_touched
+                       $dbw->update( 'page', 
+                               array( 'page_touched' => $timestamp ), 
+                               array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ),
+                               __METHOD__
+                       );
+
+                       # Update squid
+                       if ( $wgUseSquid || $wgUseFileCache ) {
+                               $titles = Title::newFromIDs( $ids );
+                               if ( $wgUseSquid ) {
+                                       $u = SquidUpdate::newFromTitles( $titles );
+                                       $u->doUpdate();
+                               }
+
+                               # Update file cache
+                               if  ( $wgUseFileCache ) {
+                                       foreach ( $titles as $title ) {
+                                               $cm = new CacheManager($title);
+                                               @unlink($cm->fileCacheName());
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+class HTMLCacheUpdateJob extends Job {
+       var $table, $start, $end;
+
+       /**
+        * Construct a job
+        * @param Title $title The title linked to
+        * @param string $table The name of the link table.
+        * @param integer $start Beginning page_id or false for open interval
+        * @param integer $end End page_id or false for open interval
+        * @param integer $id job_id
+        */
+       function __construct( $title, $table, $start, $end, $id = 0 ) {
+               $params = array(
+                       'table' => $table, 
+                       'start' => $start, 
+                       'end' => $end );
+               parent::__construct( 'html_cache_update', $title, $params, $id );
+               $this->table = $table;
+               $this->start = intval( $start );
+               $this->end = intval( $end );
+       }
+
+       function run() {
+               $update = new HTMLCacheUpdate( $this->title, $this->table );
+
+               $fromField = $update->getFromField();
+               $conds = $update->getToCondition();
+               if ( $this->start ) {
+                       $conds[] = "$fromField >= {$this->start}";
+               }
+               if ( $this->end ) {
+                       $conds[] = "$fromField <= {$this->end}";
+               }
+
+               $dbr =& wfGetDB( DB_SLAVE );
+               $res = $dbr->select( $this->table, $fromField, $conds, __METHOD__ );
+               $update->invalidateIDs( new ResultWrapper( $dbr, $res ) );
+               $dbr->freeResult( $res );
+
+               return true;
+       }
+}
+?>
index dd40c0f..3573d36 100644 (file)
@@ -1316,16 +1316,8 @@ class Image
                $this->purgeDescription();
                
                // Purge cache of all pages using this image
-               $linksTo = $this->getLinksTo();
-               global $wgUseSquid, $wgPostCommitUpdateList;
-               if ( $wgUseSquid ) {
-                       $u = SquidUpdate::newFromTitles( $linksTo, $urlArr );
-                       array_push( $wgPostCommitUpdateList, $u );
-               }
-
-               // Invalidate parser cache and client cache for pages using this image
-               // This is left until relatively late to reduce lock time
-               Title::touchArray( $linksTo );
+               $update = new HTMLCacheUpdate( $this->getTitle(), 'imagelinks' );
+               $update->doUpdate();
        }
 
        function checkDBSchema(&$db) {
@@ -1461,7 +1453,7 @@ class Image
         * Record an image upload in the upload log and the image table
         */
        function recordUpload( $oldver, $desc, $license = '', $copyStatus = '', $source = '', $watch = false ) {
-               global $wgUser, $wgUseCopyrightUpload, $wgUseSquid, $wgPostCommitUpdateList;
+               global $wgUser, $wgUseCopyrightUpload;
 
                $fname = 'Image::recordUpload';
                $dbw =& wfGetDB( DB_MASTER );
@@ -1528,8 +1520,6 @@ class Image
                        $fname,
                        'IGNORE'
                );
-               $descTitle = $this->getTitle();
-               $purgeURLs = array();
 
                if( $dbw->affectedRows() == 0 ) {
                        # Collision, this is an update of an image
@@ -1575,6 +1565,7 @@ class Image
                        $dbw->query( "UPDATE $site_stats SET ss_images=ss_images+1", $fname );
                }
 
+               $descTitle = $this->getTitle();
                $article = new Article( $descTitle );
                $minor = false;
                $watch = $watch || $wgUser->isWatched( $descTitle );
@@ -1588,7 +1579,7 @@ class Image
 
                        # Invalidate the cache for the description page
                        $descTitle->invalidateCache();
-                       $purgeURLs[] = $descTitle->getInternalURL();
+                       $descTitle->purgeSquid();
                } else {
                        // New image; create the description page.
                        $article->insertNewArticle( $textdesc, $desc, $minor, $watch, $suppressRC );
@@ -1603,13 +1594,8 @@ class Image
                $dbw->immediateCommit();
 
                # Invalidate cache for all pages using this image
-               $linksTo = $this->getLinksTo();
-
-               if ( $wgUseSquid ) {
-                       $u = SquidUpdate::newFromTitles( $linksTo, $purgeURLs );
-                       array_push( $wgPostCommitUpdateList, $u );
-               }
-               Title::touchArray( $linksTo );
+               $update = new HTMLCacheUpdate( $this->getTitle(), 'imagelinks' );
+               $update->doUpdate();
 
                return true;
        }
@@ -1619,6 +1605,8 @@ class Image
         * Also adds their IDs to the link cache
         *
         * This is mostly copied from Title::getLinksTo()
+        *
+        * @deprecated Use HTMLCacheUpdate, this function uses too much memory
         */
        function getLinksTo( $options = '' ) {
                $fname = 'Image::getLinksTo';
index 8a5e166..fbd8297 100644 (file)
@@ -631,8 +631,8 @@ END
                $this->img = new Image( $this->mTitle );
                if( $this->img->exists() ) {
                        wfDebug( "ImagePage::doPurge purging " . $this->img->getName() . "\n" );
-                       $linksTo = $this->img->getLinksTo();
-                       Title::touchArray( $linksTo );
+                       $update = new HTMLCacheUpdate( $this->mTitle, 'imagelinks' );
+                       $update->doUpdate();
                        $this->img->purgeCache();
                } else {
                        wfDebug( "ImagePage::doPurge no image\n" );
index 831d37e..f82b16f 100644 (file)
@@ -4,7 +4,7 @@ if ( !defined( 'MEDIAWIKI' ) ) {
        die( "This file is part of MediaWiki, it is not a valid entry point\n" );
 }
 
-class Job {
+abstract class Job {
        var $command,
                $title,
                $params,
@@ -15,50 +15,37 @@ class Job {
        /*-------------------------------------------------------------------------
         * Static functions
         *------------------------------------------------------------------------*/
+
+       /** 
+        * @deprecated use LinksUpdate::queueRecursiveJobs()
+        */
        /**
-        * Add an array of refreshLinks jobs to the queue
-        * @param array $titles Array of title objects.
-        * @static
+        * static function queueLinksJobs( $titles ) {}
         */
-       function queueLinksJobs( $titles ) {
-               $fname = 'Job::queueLinksJobs';
-               wfProfileIn( $fname );
-               $batchSize = 100;
-               for( $i = 0; $i < count( $titles ); $i += $batchSize ) {
-                       $batch = array_slice( $titles, $i, $batchSize, true );
-                       $jobs = array();
-                       foreach( $batch as $title ) {
-                               $jobs[] = new Job( 'refreshLinks', $title );
-                       }
-                       Job::batchInsert( $jobs );
-               }
-               wfProfileOut( $fname );
-       }
 
        /**
         * Pop a job off the front of the queue
         * @static
         * @return Job or false if there's no jobs
         */
-       function pop() {
-               $fname = 'Job::pop';
-               wfProfileIn( $fname );
+       static function pop() {
+               wfProfileIn( __METHOD__ );
 
                $dbr =& wfGetDB( DB_SLAVE );
 
                // Get a job from the slave
-               $row = $dbr->selectRow( 'job', '*', '', $fname,
+               $row = $dbr->selectRow( 'job', '*', '', __METHOD__,
                        array( 'ORDER BY' => 'job_id', 'LIMIT' => 1 )
                );
 
                if ( $row === false ) {
-                       wfProfileOut( $fname );
+                       wfProfileOut( __METHOD__ );
                        return false;
                }
 
                // Try to delete it from the master
                $dbw =& wfGetDB( DB_MASTER );
-               $dbw->delete( 'job', array( 'job_id' => $row->job_id ), $fname );
+               $dbw->delete( 'job', array( 'job_id' => $row->job_id ), __METHOD__ );
                $affected = $dbw->affectedRows();
                $dbw->immediateCommit();
 
@@ -66,30 +53,30 @@ class Job {
                        // Failed, someone else beat us to it
                        // Try getting a random row
                        $row = $dbw->selectRow( 'job', array( 'MIN(job_id) as minjob',
-                               'MAX(job_id) as maxjob' ), '', $fname );
+                               'MAX(job_id) as maxjob' ), '', __METHOD__ );
                        if ( $row === false || is_null( $row->minjob ) || is_null( $row->maxjob ) ) {
                                // No jobs to get
-                               wfProfileOut( $fname );
+                               wfProfileOut( __METHOD__ );
                                return false;
                        }
                        // Get the random row
                        $row = $dbw->selectRow( 'job', '*',
-                               array( 'job_id' => mt_rand( $row->minjob, $row->maxjob ) ),     $fname );
+                               array( 'job_id' => mt_rand( $row->minjob, $row->maxjob ) ),     __METHOD__ );
                        if ( $row === false ) {
                                // Random job gone before we got the chance to select it
                                // Give up
-                               wfProfileOut( $fname );
+                               wfProfileOut( __METHOD__ );
                                return false;
                        }
                        // Delete the random row
-                       $dbw->delete( 'job', array( 'job_id' => $row->job_id ), $fname );
+                       $dbw->delete( 'job', array( 'job_id' => $row->job_id ), __METHOD__ );
                        $affected = $dbw->affectedRows();
                        $dbw->immediateCommit();
                        
                        if ( !$affected ) {
                                // Random job gone before we exclusively deleted it
                                // Give up
-                               wfProfileOut( $fname );
+                               wfProfileOut( __METHOD__ );
                                return false;
                        }
                }
@@ -99,20 +86,50 @@ class Job {
                $namespace = $row->job_namespace;
                $dbkey = $row->job_title;
                $title = Title::makeTitleSafe( $namespace, $dbkey );
-               $job = new Job( $row->job_cmd, $title, $row->job_params, $row->job_id );
+               $job = Job::factory( $row->job_cmd, $title, Job::extractBlob( $row->job_params ), $row->job_id );
                
                // Remove any duplicates it may have later in the queue
-               $dbw->delete( 'job', $job->insertFields(), $fname );
+               $dbw->delete( 'job', $job->insertFields(), __METHOD__ );
                
-               wfProfileOut( $fname );
+               wfProfileOut( __METHOD__ );
                return $job;
        }
 
+       /** 
+        * Create an object of a subclass
+        */
+       static function factory( $command, $title, $params = false, $id = 0 ) {
+               switch ( $command ) {
+                       case 'refreshLinks':
+                               return new RefreshLinksJob( $title, $params, $id );
+                       case 'html_cache_update':
+                               return new HTMLCacheUpdateJob( $title, $params['table'], $params['start'], $params['end'], $id );
+                       default:
+                               throw new MWException( "Invalid job command \"$command\"" );
+               }
+       }
+
+       static function makeBlob( $params ) {
+               if ( $params !== false ) {
+                       return serialize( $params );
+               } else {
+                       return '';
+               }
+       }
+
+       static function extractBlob( $blob ) {
+               if ( (string)$blob !== '' ) {
+                       return unserialize( $blob );
+               } else {
+                       return false;
+               }
+       }
+
        /*-------------------------------------------------------------------------
         * Non-static functions
         *------------------------------------------------------------------------*/
 
-       function Job( $command, $title, $params = '', $id = 0 ) {
+       function __construct( $command, $title, $params = false, $id = 0 ) {
                $this->command = $command;
                $this->title = $title;
                $this->params = $params;
@@ -127,20 +144,18 @@ class Job {
         * Insert a single job into the queue.
         */
        function insert() {
-               $fname = 'Job::insert';
-               
                $fields = $this->insertFields();
 
                $dbw =& wfGetDB( DB_MASTER );
                
                if ( $this->removeDuplicates ) {
-                       $res = $dbw->select( 'job', array( '1' ), $fields, $fname );
+                       $res = $dbw->select( 'job', array( '1' ), $fields, __METHOD__ );
                        if ( $dbw->numRows( $res ) ) {
                                return;
                        }
                }
                $fields['job_id'] = $dbw->nextSequenceValue( 'job_job_id_seq' );
-               $dbw->insert( 'job', $fields, $fname );
+               $dbw->insert( 'job', $fields, __METHOD__ );
        }
        
        protected function insertFields() {
@@ -148,7 +163,7 @@ class Job {
                        'job_cmd' => $this->command,
                        'job_namespace' => $this->title->getNamespace(),
                        'job_title' => $this->title->getDBkey(),
-                       'job_params' => $this->params
+                       'job_params' => Job::makeBlob( $this->params )
                );
        }
        
@@ -162,16 +177,14 @@ class Job {
         * @param $jobs array of Job objects
         */
        static function batchInsert( $jobs ) {
-               $fname = __CLASS__ . '::' . __FUNCTION__;
-               
                if( count( $jobs ) ) {
                        $dbw = wfGetDB( DB_MASTER );
                        $dbw->begin();
                        foreach( $jobs as $job ) {
                                $rows[] = $job->insertFields();
                        }
-                       $dbw->insert( 'job', $rows, $fname, 'IGNORE' );
-                       $dbw->immediateCommit();
+                       $dbw->insert( 'job', $rows, __METHOD__, 'IGNORE' );
+                       $dbw->commit();
                }
        }
 
@@ -179,35 +192,47 @@ class Job {
         * Run the job
         * @return boolean success
         */
-       function run() {
-               $fname = 'Job::run';
-               wfProfileIn( $fname );
-               switch ( $this->command ) {
-                       case 'refreshLinks':
-                               $retval = $this->refreshLinks();
-                               break;
-                       default:
-                               $retval = true;
-                               if( wfRunHooks( 'RunUnknownJob', array( &$this, &$retval ) ) ) {
-                                       $this->error = "Invalid job type {$this->command}, ignoring";
-                                       wfDebug( $this->error . "\n" );
-                                       $retval = false;
-                               } else {
-                                       $retval = true;
+       abstract function run();
+       
+       function toString() {
+               $paramString = '';
+               if ( $this->params ) {
+                       foreach ( $this->params as $key => $value ) {
+                               if ( $paramString != '' ) {
+                                       $paramString .= ' ';
                                }
+                               $paramString .= "$key=$value";
+                       }
+               }
+
+               if ( is_object( $this->title ) ) {
+                       $s = "{$this->command} " . $this->title->getPrefixedDBkey();
+                       if ( $paramString !== '' ) {
+                               $s .= ' ' . $paramString;
+                       }
+                       return $s;
+               } else {
+                       return "{$this->command} $paramString";
                }
-               wfProfileOut( $fname );
-               return $retval;
+       }
+
+       function getLastError() {
+               return $this->error;
+       }
+}
+
+class RefreshLinksJob extends Job {
+       function __construct( $title, $params = '', $id = 0 ) {
+               parent::__construct( 'refreshLinks', $title, $params, $id );
        }
 
        /**
         * Run a refreshLinks job
         * @return boolean success
         */
-       function refreshLinks() {
+       function run() {
                global $wgParser;
-               $fname = 'Job::refreshLinks';
-               wfProfileIn( $fname );
+               wfProfileIn( __METHOD__ );
 
                # FIXME: $dbw never used.
                $dbw =& wfGetDB( DB_MASTER );
@@ -217,43 +242,28 @@ class Job {
                
                if ( is_null( $this->title ) ) {
                        $this->error = "refreshLinks: Invalid title";
-                       wfProfileOut( $fname );
+                       wfProfileOut( __METHOD__ );
                        return false;
                }
 
                $revision = Revision::newFromTitle( $this->title );
                if ( !$revision ) {
                        $this->error = 'refreshLinks: Article not found "' . $this->title->getPrefixedDBkey() . '"';
-                       wfProfileOut( $fname );
+                       wfProfileOut( __METHOD__ );
                        return false;
                }
 
-               wfProfileIn( "$fname-parse" );
+               wfProfileIn( __METHOD__.'-parse' );
                $options = new ParserOptions;
                $parserOutput = $wgParser->parse( $revision->getText(), $this->title, $options, true, true, $revision->getId() );
-               wfProfileOut( "$fname-parse" );
-               wfProfileIn( "$fname-update" );
+               wfProfileOut( __METHOD__.'-parse' );
+               wfProfileIn( __METHOD__.'-update' );
                $update = new LinksUpdate( $this->title, $parserOutput, false );
                $update->doUpdate();
-               wfProfileOut( "$fname-update" );
-               wfProfileOut( $fname );
+               wfProfileOut( __METHOD__.'-update' );
+               wfProfileOut( __METHOD__ );
                return true;
        }
-
-       function toString() {
-               if ( is_object( $this->title ) ) {
-                       $s = "{$this->command} " . $this->title->getPrefixedDBkey();
-                       if ( $this->params !== '' ) {
-                               $s .= ', ' . $this->params;
-                       }
-                       return $s;
-               } else {
-                       return "{$this->command} {$this->params}";
-               }
-       }
-
-       function getLastError() {
-               return $this->error;
-       }
 }
+
 ?>
index caa1c97..9e25bf0 100644 (file)
@@ -85,7 +85,7 @@ class LinksUpdate {
        function doIncrementalUpdate() {
                $fname = 'LinksUpdate::doIncrementalUpdate';
                wfProfileIn( $fname );
-
+               
                # Page links
                $existing = $this->getExistingLinks();
                $this->incrTableUpdate( 'pagelinks', 'pl', $this->getLinkDeletions( $existing ),
@@ -115,14 +115,6 @@ class LinksUpdate {
                $this->incrTableUpdate( 'templatelinks', 'tl', $this->getTemplateDeletions( $existing ),
                        $this->getTemplateInsertions( $existing ) );
 
-               # Refresh links of all pages including this page
-               if ( $this->mRecursive ) {
-                       $tlto = $this->mTitle->getTemplateLinksTo();
-                       if ( count( $tlto ) ) {
-                               Job::queueLinksJobs( $tlto );
-                       }
-               }
-
                # Category links
                $existing = $this->getExistingCategories();
                $this->incrTableUpdate( 'categorylinks', 'cl', $this->getCategoryDeletions( $existing ),
@@ -132,6 +124,12 @@ class LinksUpdate {
                $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
                $this->invalidateCategories( $categoryUpdates );
 
+               # Refresh links of all pages including this page
+               # This will be in a separate transaction
+               if ( $this->mRecursive ) {
+                       $this->queueRecursiveJobs();
+               }
+               
                wfProfileOut( $fname );
        }
 
@@ -150,14 +148,6 @@ class LinksUpdate {
                $existing = $this->getExistingImages();
                $imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );
 
-               # Refresh links of all pages including this page
-               if ( $this->mRecursive ) {
-                       $tlto = $this->mTitle->getTemplateLinksTo();
-                       if ( count( $tlto ) ) {
-                               Job::queueLinksJobs( $tlto );
-                       }
-               }
-
                $this->dumbTableUpdate( 'pagelinks',     $this->getLinkInsertions(),     'pl_from' );
                $this->dumbTableUpdate( 'imagelinks',    $this->getImageInsertions(),    'il_from' );
                $this->dumbTableUpdate( 'categorylinks', $this->getCategoryInsertions(), 'cl_from' );
@@ -169,8 +159,46 @@ class LinksUpdate {
                $this->invalidateCategories( $categoryUpdates );
                $this->invalidateImageDescriptions( $imageUpdates );
 
+               # Refresh links of all pages including this page
+               # This will be in a separate transaction
+               if ( $this->mRecursive ) {
+                       $this->queueRecursiveJobs();
+               }
+
                wfProfileOut( $fname );
        }
+
+       function queueRecursiveJobs() {
+               wfProfileIn( __METHOD__ );
+               
+               $batchSize = 100;
+               $dbr =& wfGetDB( DB_SLAVE );
+               $res = $dbr->select( array( 'templatelinks', 'page' ), 
+                       array( 'page_namespace', 'page_title' ),
+                       array( 
+                               'page_id=tl_from', 
+                               'tl_namespace' => $this->mTitle->getNamespace(),
+                               'tl_title' => $this->mTitle->getDBkey()
+                       ), __METHOD__
+               );
+
+               $done = false;
+               while ( !$done ) {
+                       $jobs = array();
+                       for ( $i = 0; $i < $batchSize; $i++ ) {
+                               $row = $dbr->fetchObject( $res );
+                               if ( !$row ) {
+                                       $done = true;
+                                       break;
+                               }
+                               $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+                               $jobs[] = Job::factory( 'refreshLinks', $title );
+                       }
+                       Job::batchInsert( $jobs );
+               }
+               $dbr->freeResult( $res );
+               wfProfileOut( __METHOD__ );
+       }
        
        /**
         * Invalidate the cache of a list of pages from a single namespace
index 7ccdf5b..bcb02ce 100644 (file)
@@ -54,6 +54,10 @@ class SquidUpdate {
        }
 
        /* static */ function newFromTitles( &$titles, $urlArr = array() ) {
+               global $wgMaxSquidPurgeTitles;
+               if ( count( $titles ) > $wgMaxSquidPurgeTitles ) {
+                       $titles = array_slice( $titles, 0, $wgMaxSquidPurgeTitles );
+               }
                foreach ( $titles as $title ) {
                        $urlArr[] = $title->getInternalURL();
                }
@@ -77,8 +81,8 @@ class SquidUpdate {
        /* static */ function purge( $urlArr ) {
                global $wgSquidServers, $wgHTCPMulticastAddress, $wgHTCPPort;
 
-               if ( $wgSquidServers == 'echo' ) {
-                       echo implode("<br />\n", $urlArr);
+               if ( (@$wgSquidServers[0]) == 'echo' ) {
+                       echo implode("<br />\n", $urlArr) . "<br />\n";
                        return;
                }
 
index c377798..6c29bc6 100644 (file)
@@ -205,6 +205,21 @@ class Title {
                return $title;
        }
 
+       /**
+        * Make an array of titles from an array of IDs 
+        */
+       function newFromIDs( $ids ) {
+               $dbr =& wfGetDB( DB_SLAVE );
+               $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ),
+                       'page_id IN (' . $dbr->makeList( $ids ) . ')', __METHOD__ );
+
+               $titles = array();
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       $titles[] = Title::makeTitle( $row->page_namespace, $row->page_title );
+               }
+               return $titles;
+       }
+
        /**
         * Create a new Title from a namespace index and a DB key.
         * It's assumed that $ns and $title are *valid*, for instance when
@@ -1572,6 +1587,9 @@ class Title {
         * Get an array of Title objects linking to this Title
         * Also stores the IDs in the link cache.
         *
+        * WARNING: do not use this function on arbitrary user-supplied titles!
+        * On heavily-used templates it will max out the memory.
+        *
         * @param string $options may be FOR UPDATE
         * @return array the Title objects linking here
         * @access public
@@ -1612,6 +1630,9 @@ class Title {
         * Get an array of Title objects using this Title as a template
         * Also stores the IDs in the link cache.
         *
+        * WARNING: do not use this function on arbitrary user-supplied titles!
+        * On heavily-used templates it will max out the memory.
+        *
         * @param string $options may be FOR UPDATE
         * @return array the Title objects linking here
         * @access public
@@ -1673,6 +1694,15 @@ class Title {
                );
        }
 
+       function purgeSquid() {
+               global $wgUseSquid;
+               if ( $wgUseSquid ) {
+                       $urls = $this->getSquidURLs();
+                       $u = new SquidUpdate( $urls );
+                       $u->doUpdate();
+               }
+       }
+
        /**
         * Move this page without authentication
         * @param Title &$nt the new page Title
@@ -1953,21 +1983,9 @@ class Title {
                                'pl_title'     => $nt->getDBkey() ),
                        $fname );
 
-               # Non-existent target may have had broken links to it; these must
-               # now be touched to update link coloring.
-               $nt->touchLinks();
-
                # Purge old title from squid
                # The new title, and links to the new title, are purged in Article::onArticleCreate()
-               $titles = $nt->getLinksTo();
-               if ( $wgUseSquid ) {
-                       $urls = $this->getSquidURLs();
-                       foreach ( $titles as $linkTitle ) {
-                               $urls[] = $linkTitle->getInternalURL();
-                       }
-                       $u = new SquidUpdate( $urls );
-                       $u->doUpdate();
-               }
+               $this->purgeSquid();
        }
 
        /**
@@ -2190,44 +2208,18 @@ class Title {
        }
 
        /**
-        * Update page_touched timestamps on pages linking to this title.
-        * In principal, this could be backgrounded and could also do squid
-        * purging.
+        * Update page_touched timestamps and send squid purge messages for
+        * pages linking to this title. May be sent to the job queue depending 
+        * on the number of links. Typically called on create and delete.
         */
        function touchLinks() {
-               $fname = 'Title::touchLinks';
-
-               $dbw =& wfGetDB( DB_MASTER );
-
-               $res = $dbw->select( 'pagelinks',
-                       array( 'pl_from' ),
-                       array(
-                               'pl_namespace' => $this->getNamespace(),
-                               'pl_title'     => $this->getDbKey() ),
-                       $fname );
-
-               $toucharr = array();
-               while( $row = $dbw->fetchObject( $res ) ) {
-                       $toucharr[] = $row->pl_from;
-               }
-               $dbw->freeResult( $res );
+               $u = new HTMLCacheUpdate( $this, 'pagelinks' );
+               $u->doUpdate();
 
-               if( $this->getNamespace() == NS_CATEGORY ) {
-                       // Categories show up in a separate set of links as well
-                       $res = $dbw->select( 'categorylinks',
-                               array( 'cl_from' ),
-                               array( 'cl_to' => $this->getDbKey() ),
-                               $fname );
-                       while( $row = $dbw->fetchObject( $res ) ) {
-                               $toucharr[] = $row->cl_from;
-                       }
-                       $dbw->freeResult( $res );
+               if ( $this->getNamespace() == NS_CATEGORY ) {
+                       $u = new HTMLCacheUpdate( $this, 'categorylinks' );
+                       $u->doUpdate();
                }
-
-               if (!count($toucharr))
-                       return;
-               $dbw->update( 'page', /* SET */ array( 'page_touched' => $dbw->timestamp() ),
-                                                       /* WHERE */ array( 'page_id' => $toucharr ),$fname);
        }
 
        function trackbackURL() {