* r41081 was causing the job queue to be flooded with tiny htmlCacheUpdate jobs which were less than the batch size and so, according to the original logic, should have been done immediately. This was causing template updates to be delayed even when the template has few backlinks. This is fixed.
* Introduced a shared cache called BacklinkCache with the main purpose of sharing data from these backlink queries, thus recovering the performance of r41081.
* Refactored backlink partitioning code, which in r40741 was copied from HTMLCacheUpdate to LinksUpdate with a bug intact. The bug caused every htmlCacheUpdate or refreshLinks2 job to be split into at least two pieces even when number of rows is less than the batch size.
* Fixed a bug from r40741 causing refreshLinks2 jobs with end=false to be ignored.
* Made SquidUpdate::newFromTitles() accept a TitleArray
use correct message 'allpagesprefix' for input form label, replace _ with ' '
in next page link
* (bug 17506) Exceptions within exceptions now respect $wgShowExceptionDetails
+* Fixed excessive job queue utilisation
== API changes in 1.15 ==
* (bug 16858) Revamped list=deletedrevs to make listing deleted contributions
}
# Invalidate cache of this article and all pages using this article
- # as a template. Partly deferred. Leave templatelinks for editUpdates().
- Article::onArticleEdit( $this->mTitle, 'skiptransclusions' );
+ # as a template. Partly deferred.
+ Article::onArticleEdit( $this->mTitle );
# Update links tables, site stats, etc.
$this->editUpdates( $text, $summary, $isminor, $now, $revisionId, $changed );
} else {
}
# Update the links tables
- $u = new LinksUpdate( $this->mTitle, $editInfo->output, false );
- $u->setRecursiveTouch( $changed ); // refresh/invalidate including pages too
+ $u = new LinksUpdate( $this->mTitle, $editInfo->output );
$u->doUpdate();
wfRunHooks( 'ArticleEditUpdates', array( &$this, &$editInfo, $changed ) );
/**
* Purge caches on page update etc
*/
- public static function onArticleEdit( $title, $transclusions = 'transclusions' ) {
+ public static function onArticleEdit( $title, $flags = '' ) {
global $wgDeferredUpdateList;
// Invalidate caches of articles which include this page
- if( $transclusions !== 'skiptransclusions' )
- $wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'templatelinks' );
+ $wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'templatelinks' );
// Invalidate the caches of all pages which redirect here
$wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'redirect' );
'AuthPlugin' => 'includes/AuthPlugin.php',
'AuthPluginUser' => 'includes/AuthPlugin.php',
'Autopromote' => 'includes/Autopromote.php',
+ 'BacklinkCache' => 'includes/BacklinkCache.php',
'BagOStuff' => 'includes/BagOStuff.php',
'Block' => 'includes/Block.php',
'CacheDependency' => 'includes/CacheDependency.php',
--- /dev/null
+<?php
+
+/**
+ * Class for fetching backlink lists, approximate backlink counts and partitions.
+ * Instances of this class should typically be fetched with $title->getBacklinkCache().
+ *
+ * Ideally you should only get your backlinks from here when you think there is some
+ * advantage in caching them. Otherwise it's just a waste of memory.
+ */
+class BacklinkCache {
+ var $partitionCache = array();
+ var $fullResultCache = array();
+ var $title;
+ var $db;
+
+ const CACHE_EXPIRY = 3600;
+
+ /**
+ * Create a new BacklinkCache
+ */
+ function __construct( $title ) {
+ $this->title = $title;
+ }
+
+ /**
+ * Clear locally stored data
+ */
+ function clear() {
+ $this->partitionCache = array();
+ $this->fullResultCache = array();
+ unset( $this->db );
+ }
+
+ /**
+ * Set the Database object to use
+ */
+ public function setDB( $db ) {
+ $this->db = $db;
+ }
+
+ protected function getDB() {
+ if ( !isset( $this->db ) ) {
+ $this->db = wfGetDB( DB_SLAVE );
+ }
+ return $this->db;
+ }
+
+ /**
+ * Get the backlinks for a given table. Cached in process memory only.
+ * @param string $table
+ * @return TitleArray
+ */
+ public function getLinks( $table, $startId = false, $endId = false ) {
+ wfProfileIn( __METHOD__ );
+
+ if ( $startId || $endId ) {
+ // Partial range, not cached
+ wfDebug( __METHOD__.": from DB (uncacheable range)\n" );
+ $conds = $this->getConditions( $table );
+ // Use the from field in the condition rather than the joined page_id,
+ // because databases are stupid and don't necessarily propagate indexes.
+ $fromField = $this->getPrefix( $table ) . '_from';
+ if ( $startId ) {
+ $conds[] = "$fromField >= " . intval( $startId );
+ }
+ if ( $endId ) {
+ $conds[] = "$fromField <= " . intval( $endId );
+ }
+ $res = $this->getDB()->select(
+ array( 'page', $table ),
+ array( 'page_namespace', 'page_title', 'page_id' ),
+ $conds,
+ __METHOD__ );
+ $ta = TitleArray::newFromResult( $res );
+ wfProfileOut( __METHOD__ );
+ return $ta;
+ }
+
+ if ( !isset( $this->fullResultCache[$table] ) ) {
+ wfDebug( __METHOD__.": from DB\n" );
+ $res = $this->getDB()->select(
+ array( 'page', $table ),
+ array( 'page_namespace', 'page_title', 'page_id' ),
+ $this->getConditions( $table ),
+ __METHOD__ );
+ $this->fullResultCache[$table] = $res;
+ }
+ $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
+ wfProfileOut( __METHOD__ );
+ return $ta;
+ }
+
+ /**
+ * Get the field name prefix for a given table
+ */
+ protected function getPrefix( $table ) {
+ static $prefixes = array(
+ 'pagelinks' => 'pl',
+ 'imagelinks' => 'il',
+ 'categorylinks' => 'cl',
+ 'templatelinks' => 'tl',
+ 'redirect' => 'rd',
+ );
+ if ( isset( $prefixes[$table] ) ) {
+ return $prefixes[$table];
+ } else {
+ throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
+ }
+ }
+
+ /**
+ * Get the SQL condition array for selecting backlinks, with a join on the page table
+ */
+ protected function getConditions( $table ) {
+ $prefix = $this->getPrefix( $table );
+ switch ( $table ) {
+ case 'pagelinks':
+ case 'templatelinks':
+ case 'redirect':
+ $conds = array(
+ "{$prefix}_namespace" => $this->title->getNamespace(),
+ "{$prefix}_title" => $this->title->getDBkey(),
+ "page_id={$prefix}_from"
+ );
+ break;
+ case 'imagelinks':
+ $conds = array(
+ 'il_to' => $this->title->getDBkey(),
+ 'page_id=il_from'
+ );
+ break;
+ case 'categorylinks':
+ $conds = array(
+ 'cl_to' => $this->title->getDBkey(),
+ 'page_id=cl_from',
+ );
+ break;
+ default:
+ throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
+ }
+ return $conds;
+ }
+
+ /**
+ * Get the approximate number of backlinks
+ */
+ public function getNumLinks( $table ) {
+ if ( isset( $this->fullResultCache[$table] ) ) {
+ return $this->fullResultCache[$table]->numRows();
+ }
+ if ( isset( $this->partitionCache[$table] ) ) {
+ $entry = reset( $this->partitionCache[$table] );
+ return $entry['numRows'];
+ }
+ $titleArray = $this->getLinks( $table );
+ return $titleArray->count();
+ }
+
+ /**
+ * Partition the backlinks into batches.
+ * Returns an array giving the start and end of each range. The first batch has
+ * a start of false, and the last batch has an end of false.
+ *
+ * @param string $table The links table name
+ * @param integer $batchSize
+ * @return array
+ */
+ public function partition( $table, $batchSize ) {
+ // Try cache
+ if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
+ wfDebug( __METHOD__.": got from partition cache\n" );
+ return $this->partitionCache[$table][$batchSize]['batches'];
+ }
+ $this->partitionCache[$table][$batchSize] = false;
+ $cacheEntry =& $this->partitionCache[$table][$batchSize];
+
+ // Try full result cache
+ if ( isset( $this->fullResultCache[$table] ) ) {
+ $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
+ wfDebug( __METHOD__.": got from full result cache\n" );
+ return $cacheEntry['batches'];
+ }
+ // Try memcached
+ global $wgMemc;
+ $memcKey = wfMemcKey( 'backlinks', md5( $this->title->getPrefixedDBkey() ),
+ $table, $batchSize );
+ $memcValue = $wgMemc->get( $memcKey );
+ if ( is_array( $memcValue ) ) {
+ $cacheEntry = $memcValue;
+ wfDebug( __METHOD__.": got from memcached $memcKey\n" );
+ return $cacheEntry['batches'];
+ }
+ // Fetch from database
+ $this->getLinks( $table );
+ $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
+ // Save to memcached
+ $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
+ wfDebug( __METHOD__.": got from database\n" );
+ return $cacheEntry['batches'];
+ }
+
+ /**
+ * Partition a DB result with backlinks in it into batches
+ */
+ protected function partitionResult( $res, $batchSize ) {
+ $batches = array();
+ $numRows = $res->numRows();
+ $numBatches = ceil( $numRows / $batchSize );
+ if ( !$numRows ) {
+ $batches = array( array( false, false ) );
+ } else {
+ for ( $i = 0; $i < $numBatches; $i++ ) {
+ if ( $i == 0 ) {
+ $start = false;
+ } else {
+ $rowNum = intval( $numRows * $i / $numBatches );
+ $res->seek( $rowNum );
+ $row = $res->fetchObject();
+ $start = $row->page_id;
+ }
+ if ( $i == $numBatches - 1 ) {
+ $end = false;
+ } else {
+ $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
+ $res->seek( $rowNum );
+ $row = $res->fetchObject();
+ $end = $row->page_id - 1;
+ }
+ $batches[] = array( $start, $end );
+ }
+ }
+ return array( 'numRows' => $numRows, 'batches' => $batches );
+ }
+}
$this->mTable = $table;
$this->mRowsPerJob = $wgUpdateRowsPerJob;
$this->mRowsPerQuery = $wgUpdateRowsPerQuery;
+ $this->mCache = $this->mTitle->getBacklinkCache();
}
public function doUpdate() {
# Fetch the IDs
- $cond = $this->getToCondition();
- $dbr = wfGetDB( DB_SLAVE );
- $res = $dbr->select( $this->mTable, $this->getFromField(), $cond, __METHOD__ );
+ $numRows = $this->mCache->getNumLinks( $this->mTable );
- if ( $dbr->numRows( $res ) != 0 ) {
- if ( $dbr->numRows( $res ) > $this->mRowsPerJob ) {
- $this->insertJobs( $res );
+ if ( $numRows != 0 ) {
+ if ( $numRows > $this->mRowsPerJob ) {
+ $this->insertJobs();
} else {
- $this->invalidateIDs( $res );
+ $this->invalidate();
}
}
wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) );
}
- protected function insertJobs( ResultWrapper $res ) {
- $numRows = $res->numRows();
- $numBatches = ceil( $numRows / $this->mRowsPerJob );
- $realBatchSize = $numRows / $numBatches;
- $start = false;
- $jobs = array();
- do {
- for ( $i = 0; $i <= $realBatchSize - 1; $i++ ) {
- $row = $res->fetchRow();
- if ( $row ) {
- $id = $row[0];
- } else {
- $id = false;
- break;
- }
- }
-
+ protected function insertJobs() {
+ $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob );
+ if ( !$batches ) {
+ return;
+ }
+ foreach ( $batches as $batch ) {
$params = array(
'table' => $this->mTable,
- 'start' => $start,
- 'end' => ( $id !== false ? $id - 1 : false ),
+ 'start' => $batch[0],
+ 'end' => $batch[1],
);
$jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
-
- $start = $id;
- } while ( $start );
-
- Job::batchInsert( $jobs );
- }
-
- protected function getPrefix() {
- static $prefixes = array(
- 'pagelinks' => 'pl',
- 'imagelinks' => 'il',
- 'categorylinks' => 'cl',
- 'templatelinks' => 'tl',
- 'redirect' => 'rd',
- );
-
- if ( is_null( $this->mPrefix ) ) {
- $this->mPrefix = $prefixes[$this->mTable];
- if ( is_null( $this->mPrefix ) ) {
- throw new MWException( "Invalid table type \"{$this->mTable}\" in " . __CLASS__ );
- }
}
- return $this->mPrefix;
- }
-
- public function getFromField() {
- return $this->getPrefix() . '_from';
+ Job::batchInsert( $jobs );
}
- public function getToCondition() {
- $prefix = $this->getPrefix();
- switch ( $this->mTable ) {
- case 'pagelinks':
- case 'templatelinks':
- case 'redirect':
- return array(
- "{$prefix}_namespace" => $this->mTitle->getNamespace(),
- "{$prefix}_title" => $this->mTitle->getDBkey()
- );
- case 'imagelinks':
- return array( 'il_to' => $this->mTitle->getDBkey() );
- case 'categorylinks':
- return array( 'cl_to' => $this->mTitle->getDBkey() );
- }
- throw new MWException( 'Invalid table type in ' . __CLASS__ );
- }
/**
- * Invalidate a set of IDs, right now
+ * Invalidate a set of pages, right now
*/
- public function invalidateIDs( ResultWrapper $res ) {
+ public function invalidate( $startId = false, $endId = false ) {
global $wgUseFileCache, $wgUseSquid;
- if ( $res->numRows() == 0 ) {
+ $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId );
+ if ( $titleArray->count() == 0 ) {
return;
}
$dbw = wfGetDB( DB_MASTER );
$timestamp = $dbw->timestamp();
- $done = false;
-
- while ( !$done ) {
- # Get all IDs in this query into an array
- $ids = array();
- for ( $i = 0; $i < $this->mRowsPerQuery; $i++ ) {
- $row = $res->fetchRow();
- if ( $row ) {
- $ids[] = $row[0];
- } else {
- $done = true;
- break;
- }
- }
- if ( !count( $ids ) ) {
- break;
- }
+ # Get all IDs in this query into an array
+ $ids = array();
+ foreach ( $titleArray as $title ) {
+ $ids[] = $title->getArticleID();
+ }
+ # Update page_touched
+ $dbw->update( 'page',
+ array( 'page_touched' => $timestamp ),
+ array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ),
+ __METHOD__
+ );
- # Update page_touched
- $dbw->update( 'page',
- array( 'page_touched' => $timestamp ),
- array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ),
- __METHOD__
- );
+ # Update squid
+ if ( $wgUseSquid ) {
+ $u = SquidUpdate::newFromTitles( $titleArray );
+ $u->doUpdate();
+ }
- # Update squid
- if ( $wgUseSquid || $wgUseFileCache ) {
- $titles = Title::newFromIDs( $ids );
- if ( $wgUseSquid ) {
- $u = SquidUpdate::newFromTitles( $titles );
- $u->doUpdate();
- }
-
- # Update file cache
- if ( $wgUseFileCache ) {
- foreach ( $titles as $title ) {
- HTMLFileCache::clearFileCache( $title );
- }
- }
+ # Update file cache
+ if ( $wgUseFileCache ) {
+ foreach ( $titleArray as $title ) {
+ HTMLFileCache::clearFileCache( $title );
}
}
}
public function run() {
$update = new HTMLCacheUpdate( $this->title, $this->table );
-
- $fromField = $update->getFromField();
- $conds = $update->getToCondition();
- if ( $this->start ) {
- $conds[] = "$fromField >= {$this->start}";
- }
- if ( $this->end ) {
- $conds[] = "$fromField <= {$this->end}";
- }
-
- $dbr = wfGetDB( DB_SLAVE );
- $res = $dbr->select( $this->table, $fromField, $conds, __METHOD__ );
- $update->invalidateIDs( $res );
-
+ $update->invalidate( $this->start, $this->end );
return true;
}
}
} elseif( $changed ) {
wfDebug( __METHOD__ . ": running onArticleEdit\n" );
- Article::onArticleEdit( $this->title, 'skiptransclusions' ); // leave templatelinks for editUpdates()
+ Article::onArticleEdit( $this->title );
wfDebug( __METHOD__ . ": running edit updates\n" );
$article->editUpdates(
$mProperties, //!< Map of arbitrary name to value
$mDb, //!< Database connection reference
$mOptions, //!< SELECT options to be used (array)
- $mRecursive, //!< Whether to queue jobs for recursive updates
- $mTouchTmplLinks; //!< Whether to queue HTMLCacheUpdate jobs IF recursive
+ $mRecursive; //!< Whether to queue jobs for recursive updates
/**@}}*/
/**
wfRunHooks( 'LinksUpdateConstructed', array( &$this ) );
}
-
- /**
- * Invalidate HTML cache of pages that include this page?
- */
- public function setRecursiveTouch( $val ) {
- $this->mTouchTmplLinks = (bool)$val;
- if( $val ) // Cannot invalidate without queueRecursiveJobs()
- $this->mRecursive = true;
- }
/**
* Update link tables with outgoing links from an updated article
$this->doIncrementalUpdate();
}
wfRunHooks( 'LinksUpdateComplete', array( &$this ) );
-
}
protected function doIncrementalUpdate() {
global $wgUpdateRowsPerJob;
wfProfileIn( __METHOD__ );
- $dbr = wfGetDB( DB_SLAVE );
- $res = $dbr->select( 'templatelinks',
- array( 'tl_from' ),
- array(
- 'tl_namespace' => $this->mTitle->getNamespace(),
- 'tl_title' => $this->mTitle->getDBkey()
- ), __METHOD__
- );
-
- $numRows = $res->numRows();
- if( !$numRows ) {
- wfProfileOut( __METHOD__ );
- return; // nothing to do
- }
- $numBatches = ceil( $numRows / $wgUpdateRowsPerJob );
- $realBatchSize = $numRows / $numBatches;
- $start = false;
+ $cache = $this->mTitle->getBacklinkCache();
+ $batches = $cache->partition( 'templatelinks', $wgUpdateRowsPerJob );
$jobs = array();
- do {
- for( $i = 0; $i <= $realBatchSize - 1; $i++ ) {
- $row = $res->fetchRow();
- if( $row ) {
- $id = $row[0];
- } else {
- $id = false;
- break;
- }
- }
+ foreach ( $batches as $batch ) {
+ list( $start, $end ) = $batch;
$params = array(
'start' => $start,
- 'end' => ( $id !== false ? $id - 1 : false ),
+ 'end' => $end,
);
$jobs[] = new RefreshLinksJob2( $this->mTitle, $params );
- # Hit page caches while we're at it if set to do so...
- if( $this->mTouchTmplLinks ) {
- $params['table'] = 'templatelinks';
- $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
- }
- $start = $id;
- } while ( $start );
-
- $dbr->freeResult( $res );
-
+ }
Job::batchInsert( $jobs );
wfProfileOut( __METHOD__ );
wfProfileOut( __METHOD__ );
return false;
}
- $start = intval($this->params['start']);
- $end = intval($this->params['end']);
-
- $dbr = wfGetDB( DB_SLAVE );
- $res = $dbr->select( array( 'templatelinks', 'page' ),
- array( 'page_namespace', 'page_title' ),
- array(
- 'page_id=tl_from',
- "tl_from >= '$start'",
- "tl_from <= '$end'",
- 'tl_namespace' => $this->title->getNamespace(),
- 'tl_title' => $this->title->getDBkey()
- ), __METHOD__
- );
+ $titles = $this->title->getBacklinkCache()->getLinks(
+ 'templatelinks', $this->params['start'], $this->params['end']);
# Not suitable for page load triggered job running!
# Gracefully switch to refreshLinks jobs if this happens.
if( php_sapi_name() != 'cli' ) {
$jobs = array();
- while( $row = $dbr->fetchObject( $res ) ) {
- $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+ foreach ( $titles as $title ) {
$jobs[] = new RefreshLinksJob( $title, '' );
}
Job::batchInsert( $jobs );
return true;
}
# Re-parse each page that transcludes this page and update their tracking links...
- while( $row = $dbr->fetchObject( $res ) ) {
- $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+ foreach ( $titles as $title ) {
$revision = Revision::newFromTitle( $title );
if ( !$revision ) {
$this->error = 'refreshLinks: Article not found "' . $title->getPrefixedDBkey() . '"';
return new SquidUpdate( $blurlArr );
}
- static function newFromTitles( &$titles, $urlArr = array() ) {
+ /**
+ * Create a SquidUpdate from an array of Title objects, or a TitleArray object
+ */
+ static function newFromTitles( $titles, $urlArr = array() ) {
global $wgMaxSquidPurgeTitles;
- if ( count( $titles ) > $wgMaxSquidPurgeTitles ) {
- $titles = array_slice( $titles, 0, $wgMaxSquidPurgeTitles );
- }
+ $i = 0;
foreach ( $titles as $title ) {
$urlArr[] = $title->getInternalURL();
+ if ( $i++ > $wgMaxSquidPurgeTitles ) {
+ break;
+ }
}
return new SquidUpdate( $urlArr );
}
var $mLength = -1; ///< The page length, 0 for special pages
var $mRedirect = null; ///< Is the article at this title a redirect?
var $mNotificationTimestamp = array(); ///< Associative array of user ID -> timestamp/false
+ var $mBacklinkCache = null; ///< Cache of links to this title
//@}
return true;
}
+
+ /**
+ * Get a backlink cache object
+ */
+ function getBacklinkCache() {
+ if ( is_null( $this->mBacklinkCache ) ) {
+ $this->mBacklinkCache = new BacklinkCache( $this );
+ }
+ return $this->mBacklinkCache;
+ }
}