'HTMLCacheUpdateJob' => 'includes/job/jobs/HTMLCacheUpdateJob.php',
'NullJob' => 'includes/job/jobs/NullJob.php',
'RefreshLinksJob' => 'includes/job/jobs/RefreshLinksJob.php',
- 'RefreshLinksJob2' => 'includes/job/jobs/RefreshLinksJob.php',
+ 'RefreshLinksJob2' => 'includes/job/jobs/RefreshLinksJob2.php',
'UploadFromUrlJob' => 'includes/job/jobs/UploadFromUrlJob.php',
'AssembleUploadChunksJob' => 'includes/job/jobs/AssembleUploadChunksJob.php',
'PublishStashedFileJob' => 'includes/job/jobs/PublishStashedFileJob.php',
+ # includes/job/utils
+ 'BacklinkJobUtils' => 'includes/job/utils/BacklinkJobUtils.php',
+
# includes/json
'FormatJson' => 'includes/json/FormatJson.php',
*/
$wgJobClasses = array(
'refreshLinks' => 'RefreshLinksJob',
- 'refreshLinks2' => 'RefreshLinksJob2',
+ 'refreshLinks2' => 'RefreshLinksJob2', // b/c
'htmlCacheUpdate' => 'HTMLCacheUpdateJob',
'sendMail' => 'EmaillingJob',
'enotifNotify' => 'EnotifNotifyJob',
*/
public function getLanguages() {
global $wgHideInterlanguageLinks;
+ if ( $wgHideInterlanguageLinks ) {
+ return array();
+ }
+
$out = $this->getOutput();
$userLang = $this->getLanguage();
- # Language links
- $language_urls = array();
-
- if ( !$wgHideInterlanguageLinks ) {
- foreach ( $out->getLanguageLinks() as $languageLinkText ) {
- $languageLinkParts = explode( ':', $languageLinkText, 2 );
- $class = 'interlanguage-link interwiki-' . $languageLinkParts[0];
- unset( $languageLinkParts );
- $languageLinkTitle = Title::newFromText( $languageLinkText );
- if ( $languageLinkTitle ) {
- $ilInterwikiCode = $languageLinkTitle->getInterwiki();
- $ilLangName = Language::fetchLanguageName( $ilInterwikiCode );
-
- if ( strval( $ilLangName ) === '' ) {
- $ilLangName = $languageLinkText;
- } else {
- $ilLangName = $this->formatLanguageName( $ilLangName );
- }
+ $languageLinks = array();
- // CLDR extension or similar is required to localize the language name;
- // otherwise we'll end up with the autonym again.
- $ilLangLocalName = Language::fetchLanguageName( $ilInterwikiCode, $userLang->getCode() );
+ foreach ( $out->getLanguageLinks() as $languageLinkText ) {
+ $languageLinkParts = explode( ':', $languageLinkText, 2 );
+ $class = 'interlanguage-link interwiki-' . $languageLinkParts[0];
+ unset( $languageLinkParts );
+ $languageLinkTitle = Title::newFromText( $languageLinkText );
+ if ( $languageLinkTitle ) {
+ $ilInterwikiCode = $languageLinkTitle->getInterwiki();
+ $ilLangName = Language::fetchLanguageName( $ilInterwikiCode );
- if ( $languageLinkTitle->getText() === '' ) {
- $ilTitle = wfMessage( 'interlanguage-link-title-langonly', $ilLangLocalName )->text();
- } else {
- $ilTitle = wfMessage( 'interlanguage-link-title', $languageLinkTitle->getText(),
- $ilLangLocalName )->text();
- }
+ if ( strval( $ilLangName ) === '' ) {
+ $ilLangName = $languageLinkText;
+ } else {
+ $ilLangName = $this->formatLanguageName( $ilLangName );
+ }
- $language_urls[] = array(
- 'href' => $languageLinkTitle->getFullURL(),
- 'text' => $ilLangName,
- 'title' => $ilTitle,
- 'class' => $class,
- 'lang' => wfBCP47( $ilInterwikiCode ),
- 'hreflang' => wfBCP47( $ilInterwikiCode ),
- );
+ // CLDR extension or similar is required to localize the language name;
+ // otherwise we'll end up with the autonym again.
+ $ilLangLocalName = Language::fetchLanguageName( $ilInterwikiCode, $userLang->getCode() );
+
+ if ( $languageLinkTitle->getText() === '' ) {
+ $ilTitle = wfMessage( 'interlanguage-link-title-langonly', $ilLangLocalName )->text();
+ } else {
+ $ilTitle = wfMessage( 'interlanguage-link-title', $languageLinkTitle->getText(),
+ $ilLangLocalName )->text();
}
+
+ $languageLinks[] = array(
+ 'href' => $languageLinkTitle->getFullURL(),
+ 'text' => $ilLangName,
+ 'title' => $ilTitle,
+ 'class' => $class,
+ 'lang' => wfBCP47( $ilInterwikiCode ),
+ 'hreflang' => wfBCP47( $ilInterwikiCode ),
+ );
}
}
- return $language_urls;
+
+ return $languageLinks;
}
protected function setupTemplateForOutput() {
if ( !isset( $this->readers[$code] ) ) {
$fileName = $this->getFileName( $code );
- if ( !file_exists( $fileName ) ) {
- $this->readers[$code] = false;
- } else {
- $this->readers[$code] = CdbReader::open( $fileName );
+ $this->readers[$code] = false;
+ if ( file_exists( $fileName ) ) {
+ try {
+ $this->readers[$code] = CdbReader::open( $fileName );
+ } catch( CdbException $e ) {
+ wfDebug( __METHOD__ . ": unable to open cdb file for reading" );
+ }
}
}
if ( !$this->readers[$code] ) {
return null;
} else {
- $value = $this->readers[$code]->get( $key );
-
+ $value = false;
+ try {
+ $value = $this->readers[$code]->get( $key );
+ } catch ( CdbException $e ) {
+ wfDebug( __METHOD__ . ": CdbException caught, error message was "
+ . $e->getMessage() );
+ }
if ( $value === false ) {
return null;
}
$this->readers[$code]->close();
}
- $this->writer = CdbWriter::open( $this->getFileName( $code ) );
+ try {
+ $this->writer = CdbWriter::open( $this->getFileName( $code ) );
+ } catch ( CdbException $e ) {
+ throw new MWException( $e->getMessage() );
+ }
$this->currentLang = $code;
}
public function finishWrite() {
// Close the writer
- $this->writer->close();
+ try {
+ $this->writer->close();
+ } catch ( CdbException $e ) {
+ throw new MWException( $e->getMessage() );
+ }
$this->writer = null;
unset( $this->readers[$this->currentLang] );
$this->currentLang = null;
if ( is_null( $this->writer ) ) {
throw new MWException( __CLASS__ . ': must call startWrite() before calling set()' );
}
- $this->writer->set( $key, serialize( $value ) );
+ try {
+ $this->writer->set( $key, serialize( $value ) );
+ } catch ( CdbException $e ) {
+ throw new MWException( $e->getMessage() );
+ }
}
protected function getFileName( $code ) {
public static function queueRecursiveJobsForTable( Title $title, $table ) {
wfProfileIn( __METHOD__ );
if ( $title->getBacklinkCache()->hasLinks( $table ) ) {
- $job = new RefreshLinksJob2(
+ $job = new RefreshLinksJob(
$title,
array(
- 'table' => $table,
+ 'table' => $table,
+ 'recursive' => true,
) + Job::newRootJobParams( // "overall" refresh links job info
"refreshlinks:{$table}:{$title->getPrefixedText()}"
)
static $db, $site;
wfDebug( __METHOD__ . "( $prefix )\n" );
- if ( !$db ) {
- $db = CdbReader::open( $wgInterwikiCache );
- }
- /* Resolve site name */
- if ( $wgInterwikiScopes >= 3 && !$site ) {
- $site = $db->get( '__sites:' . wfWikiID() );
- if ( $site == '' ) {
- $site = $wgInterwikiFallbackSite;
+ $value = false;
+ try {
+ if ( !$db ) {
+ $db = CdbReader::open( $wgInterwikiCache );
+ }
+ /* Resolve site name */
+ if ( $wgInterwikiScopes >= 3 && !$site ) {
+ $site = $db->get( '__sites:' . wfWikiID() );
+ if ( $site == '' ) {
+ $site = $wgInterwikiFallbackSite;
+ }
}
- }
- $value = $db->get( wfMemcKey( $prefix ) );
- // Site level
- if ( $value == '' && $wgInterwikiScopes >= 3 ) {
- $value = $db->get( "_{$site}:{$prefix}" );
- }
- // Global Level
- if ( $value == '' && $wgInterwikiScopes >= 2 ) {
- $value = $db->get( "__global:{$prefix}" );
- }
- if ( $value == 'undef' ) {
- $value = '';
+ $value = $db->get( wfMemcKey( $prefix ) );
+ // Site level
+ if ( $value == '' && $wgInterwikiScopes >= 3 ) {
+ $value = $db->get( "_{$site}:{$prefix}" );
+ }
+ // Global Level
+ if ( $value == '' && $wgInterwikiScopes >= 2 ) {
+ $value = $db->get( "__global:{$prefix}" );
+ }
+ if ( $value == 'undef' ) {
+ $value = '';
+ }
+ } catch ( CdbException $e ) {
+ wfDebug( __METHOD__ . ": CdbException caught, error message was "
+ . $e->getMessage() );
}
return $value;
static $db, $site;
wfDebug( __METHOD__ . "()\n" );
- if ( !$db ) {
- $db = CdbReader::open( $wgInterwikiCache );
- }
- /* Resolve site name */
- if ( $wgInterwikiScopes >= 3 && !$site ) {
- $site = $db->get( '__sites:' . wfWikiID() );
- if ( $site == '' ) {
- $site = $wgInterwikiFallbackSite;
- }
- }
-
- // List of interwiki sources
- $sources = array();
- // Global Level
- if ( $wgInterwikiScopes >= 2 ) {
- $sources[] = '__global';
- }
- // Site level
- if ( $wgInterwikiScopes >= 3 ) {
- $sources[] = '_' . $site;
- }
- $sources[] = wfWikiID();
-
$data = array();
-
- foreach ( $sources as $source ) {
- $list = $db->get( "__list:{$source}" );
- foreach ( explode( ' ', $list ) as $iw_prefix ) {
- $row = $db->get( "{$source}:{$iw_prefix}" );
- if ( !$row ) {
- continue;
+ try {
+ if ( !$db ) {
+ $db = CdbReader::open( $wgInterwikiCache );
+ }
+ /* Resolve site name */
+ if ( $wgInterwikiScopes >= 3 && !$site ) {
+ $site = $db->get( '__sites:' . wfWikiID() );
+ if ( $site == '' ) {
+ $site = $wgInterwikiFallbackSite;
}
+ }
- list( $iw_local, $iw_url ) = explode( ' ', $row );
-
- if ( $local !== null && $local != $iw_local ) {
- continue;
+ // List of interwiki sources
+ $sources = array();
+ // Global Level
+ if ( $wgInterwikiScopes >= 2 ) {
+ $sources[] = '__global';
+ }
+ // Site level
+ if ( $wgInterwikiScopes >= 3 ) {
+ $sources[] = '_' . $site;
+ }
+ $sources[] = wfWikiID();
+
+ foreach ( $sources as $source ) {
+ $list = $db->get( "__list:{$source}" );
+ foreach ( explode( ' ', $list ) as $iw_prefix ) {
+ $row = $db->get( "{$source}:{$iw_prefix}" );
+ if ( !$row ) {
+ continue;
+ }
+
+ list( $iw_local, $iw_url ) = explode( ' ', $row );
+
+ if ( $local !== null && $local != $iw_local ) {
+ continue;
+ }
+
+ $data[$iw_prefix] = array(
+ 'iw_prefix' => $iw_prefix,
+ 'iw_url' => $iw_url,
+ 'iw_local' => $iw_local,
+ );
}
-
- $data[$iw_prefix] = array(
- 'iw_prefix' => $iw_prefix,
- 'iw_url' => $iw_url,
- 'iw_local' => $iw_local,
- );
}
+ } catch ( CdbException $e ) {
+ wfDebug( __METHOD__ . ": CdbException caught, error message was "
+ . $e->getMessage() );
}
ksort( $data );
*/
public function push( $jobs ) {
$jobs = is_array( $jobs ) ? $jobs : array( $jobs );
+ if ( !count( $jobs ) ) {
+ return true;
+ }
$jobsByType = array(); // (job type => list of jobs)
foreach ( $jobs as $job ) {
return $this->cache->get( 'isDeprioritized', $type );
}
if ( $type === 'refreshLinks2' ) {
- // Don't keep converting refreshLinks2 => refreshLinks jobs if the
+ // Don't keep converting refreshLinksPartition => refreshLinks jobs if the
// later jobs have not been done yet. This helps throttle queue spam.
- $deprioritized = !$this->get( 'refreshLinks' )->isEmpty();
+ // @TODO: this is mostly a WMF-specific hack and should be removed when
+ // refreshLinks2 jobs are drained.
+ $deprioritized = !$this->get( 'refreshLinks' )->getSize() > 10000;
$this->cache->set( 'isDeprioritized', $type, $deprioritized );
return $deprioritized;
<?php
/**
- * Job to update links for a given title.
+ * Job to update link tables for pages
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*/
/**
- * Background job to update links for a given title.
+ * Job to update link tables for pages
+ *
+ * This job comes in a few variants:
+ * - a) Recursive jobs to update links for backlink pages for a given title
+ * - b) Jobs to update links for a set of titles (the job title is ignored)
+ * - c) Jobs to update links for a single title (the job title)
*
* @ingroup JobQueue
*/
class RefreshLinksJob extends Job {
+ const VERSION = 1;
+
function __construct( $title, $params = '', $id = 0 ) {
parent::__construct( 'refreshLinks', $title, $params, $id );
- $this->removeDuplicates = true; // job is expensive
+ $this->params['version'] = self::VERSION;
+ // Base backlink update jobs and per-title update jobs can be de-duplicated.
+ // If template A changes twice before any jobs run, a clean queue will have:
+ // (A base, A base)
+ // The second job is ignored by the queue on insertion.
+ // Suppose, many pages use template A, and that template itself uses template B.
+ // An edit to both will first create two base jobs. A clean FIFO queue will have:
+ // (A base, B base)
+ // When these jobs run, the queue will have per-title and remnant partition jobs:
+ // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant)
+ // Some these jobs will be the same, and will automatically be ignored by
+ // the queue upon insertion. Some title jobs will run before the duplicate is
+ // inserted, so the work will still be done twice in those cases. More titles
+ // can be de-duplicated as the remnant jobs continue to be broken down. This
+ // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks
+ // and/or the backlink sets for pages A and B are almost identical.
+ $this->removeDuplicates = !isset( $params['range'] )
+ && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
}
- /**
- * Run a refreshLinks job
- * @return bool success
- */
function run() {
- $linkCache = LinkCache::singleton();
- $linkCache->clear();
+ global $wgUpdateRowsPerJob;
if ( is_null( $this->title ) ) {
- $this->error = "refreshLinks: Invalid title";
+ $this->setLastError( "Invalid page title" );
+ return false;
+ }
+
+ // Job to update all (or a range of) backlink pages for a page
+ if ( isset( $this->params['recursive'] ) ) {
+ // Carry over information for de-duplication
+ $extraParams = $this->getRootJobParams();
+ // Avoid slave lag when fetching templates.
+ // When the outermost job is run, we know that the caller that enqueued it must have
+ // committed the relevant changes to the DB by now. At that point, record the master
+ // position and pass it along as the job recursively breaks into smaller range jobs.
+ // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
+ if ( isset( $this->params['masterPos'] ) ) {
+ $extraParams['masterPos'] = $this->params['masterPos'];
+ } elseif ( wfGetLB()->getServerCount() > 1 ) {
+ $extraParams['masterPos'] = wfGetLB()->getMasterPos();
+ } else {
+ $extraParams['masterPos'] = false;
+ }
+ // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
+ // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
+ $jobs = BacklinkJobUtils::partitionBacklinkJob(
+ $this,
+ $wgUpdateRowsPerJob,
+ 1, // job-per-title
+ array( 'params' => $extraParams )
+ );
+ JobQueueGroup::singleton()->push( $jobs );
+ // Job to update link tables for for a set of titles
+ } elseif ( isset( $this->params['pages'] ) ) {
+ foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
+ list( $ns, $dbKey ) = $nsAndKey;
+ $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
+ }
+ // Job to update link tables for a given title
+ } else {
+ $this->runForTitle( $this->mTitle );
+ }
+
+ return true;
+ }
+
+ protected function runForTitle( Title $title = null ) {
+ $linkCache = LinkCache::singleton();
+ $linkCache->clear();
+ if ( is_null( $title ) ) {
+ $this->setLastError( "refreshLinks: Invalid title" );
return false;
}
- # Wait for the DB of the current/next slave DB handle to catch up to the master.
- # This way, we get the correct page_latest for templates or files that just changed
- # milliseconds ago, having triggered this job to begin with.
+ // Wait for the DB of the current/next slave DB handle to catch up to the master.
+ // This way, we get the correct page_latest for templates or files that just changed
+ // milliseconds ago, having triggered this job to begin with.
if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
wfGetLB()->waitFor( $this->params['masterPos'] );
}
- $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
+ $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
if ( !$revision ) {
- $this->error = 'refreshLinks: Article not found "' .
- $this->title->getPrefixedDBkey() . '"';
-
+ $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
return false; // XXX: what if it was just deleted?
}
- self::runForTitleInternal( $this->title, $revision, __METHOD__ );
-
- return true;
- }
-
- /**
- * @return array
- */
- public function getDeduplicationInfo() {
- $info = parent::getDeduplicationInfo();
- // Don't let highly unique "masterPos" values ruin duplicate detection
- if ( is_array( $info['params'] ) ) {
- unset( $info['params']['masterPos'] );
- }
-
- return $info;
- }
-
- /**
- * @param Title $title
- * @param Revision $revision
- * @param string $fname
- * @return void
- */
- public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
- wfProfileIn( $fname );
$content = $revision->getContent( Revision::RAW );
-
if ( !$content ) {
- // if there is no content, pretend the content is empty
+ // If there is no content, pretend the content is empty
$content = $revision->getContentHandler()->makeEmptyContent();
}
InfoAction::invalidateCache( $title );
- wfProfileOut( $fname );
- }
-}
-
-/**
- * Background job to update links for a given title.
- * Newer version for high use templates.
- *
- * @ingroup JobQueue
- */
-class RefreshLinksJob2 extends Job {
- function __construct( $title, $params, $id = 0 ) {
- parent::__construct( 'refreshLinks2', $title, $params, $id );
- // Base jobs for large templates can easily be de-duplicated
- $this->removeDuplicates = !isset( $params['start'] ) && !isset( $params['end'] );
- }
-
- /**
- * Run a refreshLinks2 job
- * @return bool success
- */
- function run() {
- global $wgUpdateRowsPerJob;
-
- $linkCache = LinkCache::singleton();
- $linkCache->clear();
-
- if ( is_null( $this->title ) ) {
- $this->error = "refreshLinks2: Invalid title";
-
- return false;
- }
-
- // Back compat for pre-r94435 jobs
- $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
-
- // Avoid slave lag when fetching templates.
- // When the outermost job is run, we know that the caller that enqueued it must have
- // committed the relevant changes to the DB by now. At that point, record the master
- // position and pass it along as the job recursively breaks into smaller range jobs.
- // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
- if ( isset( $this->params['masterPos'] ) ) {
- $masterPos = $this->params['masterPos'];
- } elseif ( wfGetLB()->getServerCount() > 1 ) {
- $masterPos = wfGetLB()->getMasterPos();
- } else {
- $masterPos = false;
- }
-
- $tbc = $this->title->getBacklinkCache();
-
- $jobs = array(); // jobs to insert
- if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
- # This is a partition job to trigger the insertion of leaf jobs...
- $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
- } else {
- # This is a base job to trigger the insertion of partitioned jobs...
- if ( $tbc->getNumLinks( $table, $wgUpdateRowsPerJob + 1 ) <= $wgUpdateRowsPerJob ) {
- # Just directly insert the single per-title jobs
- $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
- } else {
- # Insert the partition jobs to make per-title jobs
- foreach ( $tbc->partition( $table, $wgUpdateRowsPerJob ) as $batch ) {
- list( $start, $end ) = $batch;
- $jobs[] = new RefreshLinksJob2( $this->title,
- array(
- 'table' => $table,
- 'start' => $start,
- 'end' => $end,
- 'masterPos' => $masterPos,
- ) + $this->getRootJobParams() // carry over information for de-duplication
- );
- }
- }
- }
-
- if ( count( $jobs ) ) {
- JobQueueGroup::singleton()->push( $jobs );
- }
-
return true;
}
- /**
- * @param string $table
- * @param mixed $masterPos
- * @return array
- */
- protected function getSingleTitleJobs( $table, $masterPos ) {
- # The "start"/"end" fields are not set for the base jobs
- $start = isset( $this->params['start'] ) ? $this->params['start'] : false;
- $end = isset( $this->params['end'] ) ? $this->params['end'] : false;
- $titles = $this->title->getBacklinkCache()->getLinks( $table, $start, $end );
- # Convert into single page refresh links jobs.
- # This handles well when in sapi mode and is useful in any case for job
- # de-duplication. If many pages use template A, and that template itself
- # uses template B, then an edit to both will create many duplicate jobs.
- # Roughly speaking, for each page, one of the "RefreshLinksJob" jobs will
- # get run first, and when it does, it will remove the duplicates. Of course,
- # one page could have its job popped when the other page's job is still
- # buried within the logic of a refreshLinks2 job.
- $jobs = array();
- foreach ( $titles as $title ) {
- $jobs[] = new RefreshLinksJob( $title,
- array( 'masterPos' => $masterPos ) + $this->getRootJobParams()
- ); // carry over information for de-duplication
- }
-
- return $jobs;
- }
-
- /**
- * @return array
- */
public function getDeduplicationInfo() {
$info = parent::getDeduplicationInfo();
- // Don't let highly unique "masterPos" values ruin duplicate detection
if ( is_array( $info['params'] ) ) {
+ // Don't let highly unique "masterPos" values ruin duplicate detection
unset( $info['params']['masterPos'] );
+ // For per-pages jobs, the job title is that of the template that changed
+ // (or similar), so remove that since it ruins duplicate detection
+ if ( isset( $info['pages'] ) ) {
+ unset( $info['namespace'] );
+ unset( $info['title'] );
+ }
}
return $info;
--- /dev/null
+<?php
+/**
+ * Job to update links for a given title.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup JobQueue
+ */
+
+/**
+ * Background job to update links for titles in certain backlink range by page ID.
+ * Newer version for high use templates. This is deprecated by RefreshLinksPartitionJob.
+ *
+ * @ingroup JobQueue
+ * @deprecated 1.23
+ */
+class RefreshLinksJob2 extends Job {
+ function __construct( $title, $params, $id = 0 ) {
+ parent::__construct( 'refreshLinks2', $title, $params, $id );
+ // Base jobs for large templates can easily be de-duplicated
+ $this->removeDuplicates = !isset( $params['start'] ) && !isset( $params['end'] );
+ }
+
+ /**
+ * Run a refreshLinks2 job
+ * @return boolean success
+ */
+ function run() {
+ global $wgUpdateRowsPerJob;
+
+ $linkCache = LinkCache::singleton();
+ $linkCache->clear();
+
+ if ( is_null( $this->title ) ) {
+ $this->error = "refreshLinks2: Invalid title";
+ return false;
+ }
+
+ // Back compat for pre-r94435 jobs
+ $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
+
+ // Avoid slave lag when fetching templates.
+ // When the outermost job is run, we know that the caller that enqueued it must have
+ // committed the relevant changes to the DB by now. At that point, record the master
+ // position and pass it along as the job recursively breaks into smaller range jobs.
+ // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
+ if ( isset( $this->params['masterPos'] ) ) {
+ $masterPos = $this->params['masterPos'];
+ } elseif ( wfGetLB()->getServerCount() > 1 ) {
+ $masterPos = wfGetLB()->getMasterPos();
+ } else {
+ $masterPos = false;
+ }
+
+ $tbc = $this->title->getBacklinkCache();
+
+ $jobs = array(); // jobs to insert
+ if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
+ # This is a partition job to trigger the insertion of leaf jobs...
+ $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
+ } else {
+ # This is a base job to trigger the insertion of partitioned jobs...
+ if ( $tbc->getNumLinks( $table, $wgUpdateRowsPerJob + 1 ) <= $wgUpdateRowsPerJob ) {
+ # Just directly insert the single per-title jobs
+ $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
+ } else {
+ # Insert the partition jobs to make per-title jobs
+ foreach ( $tbc->partition( $table, $wgUpdateRowsPerJob ) as $batch ) {
+ list( $start, $end ) = $batch;
+ $jobs[] = new RefreshLinksJob2( $this->title,
+ array(
+ 'table' => $table,
+ 'start' => $start,
+ 'end' => $end,
+ 'masterPos' => $masterPos,
+ ) + $this->getRootJobParams() // carry over information for de-duplication
+ );
+ }
+ }
+ }
+
+ if ( count( $jobs ) ) {
+ JobQueueGroup::singleton()->push( $jobs );
+ }
+
+ return true;
+ }
+
+ /**
+ * @param $table string
+ * @param $masterPos mixed
+ * @return Array
+ */
+ protected function getSingleTitleJobs( $table, $masterPos ) {
+ # The "start"/"end" fields are not set for the base jobs
+ $start = isset( $this->params['start'] ) ? $this->params['start'] : false;
+ $end = isset( $this->params['end'] ) ? $this->params['end'] : false;
+ $titles = $this->title->getBacklinkCache()->getLinks( $table, $start, $end );
+ # Convert into single page refresh links jobs.
+ # This handles well when in sapi mode and is useful in any case for job
+ # de-duplication. If many pages use template A, and that template itself
+ # uses template B, then an edit to both will create many duplicate jobs.
+ # Roughly speaking, for each page, one of the "RefreshLinksJob" jobs will
+ # get run first, and when it does, it will remove the duplicates. Of course,
+ # one page could have its job popped when the other page's job is still
+ # buried within the logic of a refreshLinks2 job.
+ $jobs = array();
+ foreach ( $titles as $title ) {
+ $jobs[] = new RefreshLinksJob( $title,
+ array( 'masterPos' => $masterPos ) + $this->getRootJobParams()
+ ); // carry over information for de-duplication
+ }
+ return $jobs;
+ }
+
+ /**
+ * @return Array
+ */
+ public function getDeduplicationInfo() {
+ $info = parent::getDeduplicationInfo();
+ // Don't let highly unique "masterPos" values ruin duplicate detection
+ if ( is_array( $info['params'] ) ) {
+ unset( $info['params']['masterPos'] );
+ }
+ return $info;
+ }
+}
--- /dev/null
+<?php
+/**
+ * Job to update links for a given title.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup JobQueue
+ * @author Aaron Schulz
+ */
+
+/**
+ * Class with Backlink related Job helper methods
+ *
+ * @ingroup JobQueue
+ * @since 1.23
+ */
+class BacklinkJobUtils {
+ /**
+ * Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition
+ * job that covers the remaining backlink range (if needed). Jobs for the first $bSize
+ * titles are collated ($cSize per job) into leaf jobs to do actual work. All the
+ * resulting jobs are of the same class as $job. No partition job is returned if the
+ * range covered by $job was less than $bSize, as the leaf jobs have full coverage.
+ *
+ * The leaf jobs have the 'pages' param set to a (<page ID>:(<namespace>,<DB key>),...)
+ * map so that the run() function knows what pages to act on. The leaf jobs will keep
+ * the same job title as the parent job (e.g. $job).
+ *
+ * The partition jobs have the 'range' parameter set to a map of the format
+ * (start:<integer>, end:<integer>, batchSize:<integer>, subranges:((<start>,<end>),...)),
+ * the 'table' parameter set to that of $job, and the 'recursive' parameter set to true.
+ * This method can be called on the resulting job to repeat the process again.
+ *
+ * The job provided ($job) must have the 'recursive' parameter set to true and the 'table'
+ * parameter must be set to a backlink table. The job title will be used as the title to
+ * find backlinks for. Any 'range' parameter must follow the same format as mentioned above.
+ * This should be managed by recursive calls to this method.
+ *
+ * The first jobs return are always the leaf jobs. This lets the caller use push() to
+ * put them directly into the queue and works well if the queue is FIFO. In such a queue,
+ * the leaf jobs have to get finished first before anything can resolve the next partition
+ * job, which keeps the queue very small.
+ *
+ * $opts includes:
+ * - params : extra job parameters to include in each job
+ *
+ * @param Job $job
+ * @param int $bSize BacklinkCache partition size; usually $wgUpdateRowsPerJob
+ * @param int $cSize Max titles per leaf job; Usually 1 or a modest value
+ * @param array $opts Optional parameter map
+ * @return array List of Job objects
+ */
+ public static function partitionBacklinkJob( Job $job, $bSize, $cSize, $opts = array() ) {
+ $class = get_class( $job );
+ $title = $job->getTitle();
+ $params = $job->getParams();
+
+ if ( isset( $params['pages'] ) || empty( $params['recursive'] ) ) {
+ $ranges = array(); // sanity; this is a leaf node
+ wfWarn( __METHOD__ . " called on {$job->getType()} leaf job (explosive recursion)." );
+ } elseif ( isset( $params['range'] ) ) {
+ // This is a range job to trigger the insertion of partitioned/title jobs...
+ $ranges = $params['range']['subranges'];
+ $realBSize = $params['range']['batchSize'];
+ } else {
+ // This is a base job to trigger the insertion of partitioned jobs...
+ $ranges = $title->getBacklinkCache()->partition( $params['table'], $bSize );
+ $realBSize = $bSize;
+ }
+
+ $extraParams = isset( $opts['params'] ) ? $opts['params'] : array();
+
+ $jobs = array();
+ // Combine the first range (of size $bSize) backlinks into leaf jobs
+ if ( isset( $ranges[0] ) ) {
+ list( $start, $end ) = $ranges[0];
+ $titles = $title->getBacklinkCache()->getLinks( $params['table'], $start, $end );
+ foreach ( array_chunk( iterator_to_array( $titles ), $cSize ) as $titleBatch ) {
+ $pages = array();
+ foreach ( $titleBatch as $tl ) {
+ $pages[$tl->getArticleId()] = array( $tl->getNamespace(), $tl->getDBKey() );
+ }
+ $jobs[] = new $class(
+ $title, // maintain parent job title
+ array( 'pages' => $pages ) + $extraParams
+ );
+ }
+ }
+ // Take all of the remaining ranges and build a partition job from it
+ if ( isset( $ranges[1] ) ) {
+ $jobs[] = new $class(
+ $title, // maintain parent job title
+ array(
+ 'recursive' => true,
+ 'table' => $params['table'],
+ 'range' => array(
+ 'start' => $ranges[1][0],
+ 'end' => $ranges[count( $ranges ) - 1][1],
+ 'batchSize' => $realBSize,
+ 'subranges' => array_slice( $ranges, 1 )
+ ),
+ ) + $extraParams
+ );
+ }
+
+ return $jobs;
+ }
+}
// User suppression
if ( preg_match( '/^(block|suppress)\/(block|reblock)$/', $key ) ) {
if ( $skin ) {
- $params[1] = '<span class="blockExpiry" title="‎' . htmlspecialchars( $params[1] ) . '">' .
+ // Localize the duration, and add a tooltip
+ // in English to help visitors from other wikis.
+ // The lrm is needed to make sure that the number
+ // is shown on the correct side of the tooltip text.
+ $durationTooltip = '‎' . htmlspecialchars( $params[1] );
+ $params[1] = "<span class='blockExpiry' title='$durationTooltip'>" .
$wgLang->translateBlockExpiry( $params[1] ) . '</span>';
} else {
$params[1] = $wgContLang->translateBlockExpiry( $params[1] );
--- /dev/null
+<?php
+
+/**
+ * @group JobQueue
+ * @group medium
+ * @group Database
+ */
+class RefreshLinksPartitionTest extends MediaWikiTestCase {
+ function __construct( $name = null, array $data = array(), $dataName = '' ) {
+ parent::__construct( $name, $data, $dataName );
+
+ $this->tablesUsed[] = 'page';
+ $this->tablesUsed[] = 'revision';
+ $this->tablesUsed[] = 'pagelinks';
+ }
+
+ /**
+ * @dataProvider provider_backlinks
+ */
+ public function testRefreshLinks( $ns, $dbKey, $pages ) {
+ $title = Title::makeTitle( $ns, $dbKey );
+
+ $dbw = wfGetDB( DB_MASTER );
+
+ $rows = array();
+ foreach ( $pages as $page ) {
+ list( $bns, $bdbkey ) = $page;
+ $bpage = WikiPage::factory( Title::makeTitle( $bns, $bdbkey ) );
+ $content = ContentHandler::makeContent( "[[{$title->getPrefixedText()}]]", $bpage->getTitle() );
+ $bpage->doEditContent( $content, "test" );
+ }
+
+ $title->getBacklinkCache()->clear();
+ $this->assertEquals( 20, $title->getBacklinkCache()->getNumLinks( 'pagelinks' ), 'Correct number of backlinks' );
+
+ $job = new RefreshLinksJob( $title, array( 'recursive' => true, 'table' => 'pagelinks' )
+ + Job::newRootJobParams( "refreshlinks:pagelinks:{$title->getPrefixedText()}" ) );
+ $extraParams = $job->getRootJobParams();
+ $jobs = BacklinkJobUtils::partitionBacklinkJob( $job, 9, 1, array( 'params' => $extraParams ) );
+
+ $this->assertEquals( 10, count( $jobs ), 'Correct number of sub-jobs' );
+ $this->assertEquals( $pages[0], current( $jobs[0]->params['pages'] ),
+ 'First job is leaf job with proper title' );
+ $this->assertEquals( $pages[8], current( $jobs[8]->params['pages'] ),
+ 'Last leaf job is leaf job with proper title' );
+ $this->assertEquals( true, isset( $jobs[9]->params['recursive'] ),
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( true, $jobs[9]->params['recursive'],
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( true, is_array( $jobs[9]->params['range'] ),
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( $title->getPrefixedText(), $jobs[0]->getTitle()->getPrefixedText(),
+ 'Base job title retainend in leaf job' );
+ $this->assertEquals( $title->getPrefixedText(), $jobs[9]->getTitle()->getPrefixedText(),
+ 'Base job title retainend recursive sub-job' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs[0]->params['rootJobSignature'],
+ 'Leaf job has root params' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs[9]->params['rootJobSignature'],
+ 'Recursive sub-job has root params' );
+
+ $jobs2 = BacklinkJobUtils::partitionBacklinkJob( $jobs[9], 9, 1, array( 'params' => $extraParams ) );
+
+ $this->assertEquals( 10, count( $jobs2 ), 'Correct number of sub-jobs' );
+ $this->assertEquals( $pages[9], current( $jobs2[0]->params['pages'] ),
+ 'First job is leaf job with proper title' );
+ $this->assertEquals( $pages[17], current( $jobs2[8]->params['pages'] ),
+ 'Last leaf job is leaf job with proper title' );
+ $this->assertEquals( true, isset( $jobs2[9]->params['recursive'] ),
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( true, $jobs2[9]->params['recursive'],
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( true, is_array( $jobs2[9]->params['range'] ),
+ 'Last job is recursive sub-job' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs2[0]->params['rootJobSignature'],
+ 'Leaf job has root params' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs2[9]->params['rootJobSignature'],
+ 'Recursive sub-job has root params' );
+
+ $jobs3 = BacklinkJobUtils::partitionBacklinkJob( $jobs2[9], 9, 1, array( 'params' => $extraParams ) );
+
+ $this->assertEquals( 2, count( $jobs3 ), 'Correct number of sub-jobs' );
+ $this->assertEquals( $pages[18], current( $jobs3[0]->params['pages'] ),
+ 'First job is leaf job with proper title' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs3[0]->params['rootJobSignature'],
+ 'Leaf job has root params' );
+ $this->assertEquals( $pages[19], current( $jobs3[1]->params['pages'] ),
+ 'Last job is leaf job with proper title' );
+ $this->assertEquals( $extraParams['rootJobSignature'], $jobs3[1]->params['rootJobSignature'],
+ 'Last leaf job has root params' );
+ }
+
+ public static function provider_backlinks() {
+ $pages = array();
+ for ( $i=0; $i<20; ++$i ) {
+ $pages[] = array( 0, "Page-$i" );
+ }
+ return array(
+ array( 10, 'Bang', $pages )
+ );
+ }
+}