From d705ae970a466d70de343e6d6ffa04811ec9812a Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Fri, 23 Oct 2015 11:48:50 -0700 Subject: [PATCH] Fixes related to WikiPage::triggerOpportunisticLinksUpdate() * Focus on updating links that would *not* already be updated by jobs, not those that already *will* be updated. * Place the jobs into a dedicated queue so they don't wait behind jobs that actually have to parse every time. This helps avoid queue buildup. * Make Job::factory() set the command field to match the value it had when enqueued. This makes it easier to have the same job class used for multiple queues. * Given the above, remove the RefreshLinksJob 'prioritize' flag. This worked by overriding getType() so that the job went to a different queue. This required both the special type *and* the flag to be set if using JobSpecification or either ack() would route to the wrong queue and fail or the job would go in the regular queue. This was too messy and error prone. Cirrus jobs using the same pattern also had ack() failures for example. Change-Id: I5941cb62cdafde203fdee7e106894322ba87b48a --- includes/DefaultSettings.php | 1 + includes/deferred/LinksUpdate.php | 3 +- includes/jobqueue/Job.php | 9 +++-- includes/jobqueue/jobs/RefreshLinksJob.php | 28 +++++++++++++--- includes/page/WikiPage.php | 38 ++++++++++++++-------- 5 files changed, 58 insertions(+), 21 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 189ce42fb9..3debaab15f 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -6736,6 +6736,7 @@ $wgJobClasses = array( 'ThumbnailRender' => 'ThumbnailRenderJob', 'recentChangesUpdate' => 'RecentChangesUpdateJob', 'refreshLinksPrioritized' => 'RefreshLinksJob', // for cascading protection + 'refreshLinksDynamic' => 'RefreshLinksJob', // for pages with dynamic content 'activityUpdateJob' => 'ActivityUpdateJob', 'enqueue' => 'EnqueueJob', // local queue for multi-DC setups 'null' => 'NullJob' diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 242a1a5f22..93c75ee31f 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -267,7 +267,7 @@ class LinksUpdate extends SqlDataUpdate implements EnqueueableDataUpdate { // Which ever runs first generally no-ops the other one. $jobs = array(); foreach ( $bc->getCascadeProtectedLinks() as $title ) { - $jobs[] = new RefreshLinksJob( $title, array( 'prioritize' => true ) ); + $jobs[] = RefreshLinksJob::newPrioritized( $title, array() ); } JobQueueGroup::singleton()->push( $jobs ); } @@ -985,7 +985,6 @@ class LinksUpdate extends SqlDataUpdate implements EnqueueableDataUpdate { 'job' => new JobSpecification( 'refreshLinksPrioritized', array( - 'prioritize' => true, // Reuse the parser cache if it was saved 'rootJobTimestamp' => $this->mParserOutput->getCacheTime(), 'useRecursiveLinksUpdate' => $this->mRecursive diff --git a/includes/jobqueue/Job.php b/includes/jobqueue/Job.php index 3e23391cdf..2d13c7e5e5 100644 --- a/includes/jobqueue/Job.php +++ b/includes/jobqueue/Job.php @@ -64,12 +64,17 @@ abstract class Job implements IJobSpecification { */ public static function factory( $command, Title $title, $params = array() ) { global $wgJobClasses; + if ( isset( $wgJobClasses[$command] ) ) { $class = $wgJobClasses[$command]; - return new $class( $title, $params ); + $job = new $class( $title, $params ); + $job->command = $command; + + return $job; } - throw new MWException( "Invalid job command `{$command}`" ); + + throw new InvalidArgumentException( "Invalid job command '{$command}'" ); } /** diff --git a/includes/jobqueue/jobs/RefreshLinksJob.php b/includes/jobqueue/jobs/RefreshLinksJob.php index 1d59b324be..52078459ae 100644 --- a/includes/jobqueue/jobs/RefreshLinksJob.php +++ b/includes/jobqueue/jobs/RefreshLinksJob.php @@ -41,10 +41,6 @@ class RefreshLinksJob extends Job { function __construct( Title $title, array $params ) { parent::__construct( 'refreshLinks', $title, $params ); - // A separate type is used just for cascade-protected backlinks - if ( !empty( $this->params['prioritize'] ) ) { - $this->command .= 'Prioritized'; - } // Base backlink update jobs and per-title update jobs can be de-duplicated. // If template A changes twice before any jobs run, a clean queue will have: // (A base, A base) @@ -64,6 +60,30 @@ class RefreshLinksJob extends Job { && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 ); } + /** + * @param Title $title + * @param array $params + * @return RefreshLinksJob + */ + public static function newPrioritized( Title $title, array $params ) { + $job = new self( $title, $params ); + $job->command = 'refreshLinksPrioritized'; + + return $job; + } + + /** + * @param Title $title + * @param array $params + * @return RefreshLinksJob + */ + public static function newDynamic( Title $title, array $params ) { + $job = new self( $title, $params ); + $job->command = 'refreshLinksDynamic'; + + return $job; + } + function run() { global $wgUpdateRowsPerJob; diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php index 96b2e2777e..983b3bc671 100644 --- a/includes/page/WikiPage.php +++ b/includes/page/WikiPage.php @@ -3479,22 +3479,34 @@ class WikiPage implements Page, IDBAccessObject { return; } + $params = array( + 'isOpportunistic' => true, + 'rootJobTimestamp' => $parserOutput->getCacheTime() + ); + if ( $this->mTitle->areRestrictionsCascading() ) { // If the page is cascade protecting, the links should really be up-to-date - $params = array( 'prioritize' => true ); + JobQueueGroup::singleton()->lazyPush( + RefreshLinksJob::newPrioritized( $this->mTitle, $params ) + ); } elseif ( $parserOutput->hasDynamicContent() ) { - // Assume the output contains time/random based magic words - $params = array(); - } else { - // If the inclusions are deterministic, the edit-triggered link jobs are enough - return; - } - - // Check if the last link refresh was before page_touched - if ( $this->getLinksTimestamp() < $this->getTouched() ) { - $params['isOpportunistic'] = true; - $params['rootJobTimestamp'] = $parserOutput->getCacheTime(); - JobQueueGroup::singleton()->lazyPush( new RefreshLinksJob( $this->mTitle, $params ) ); + // Assume the output contains "dynamic" time/random based magic words. + // Only update pages that expired due to dynamic content and NOT due to edits + // to referenced templates/files. When the cache expires due to dynamic content, + // page_touched is unchanged. We want to avoid triggering redundant jobs due to + // views of pages that were just purged via HTMLCacheUpdateJob. In that case, the + // template/file edit already triggered recursive RefreshLinksJob jobs. + if ( $this->getLinksTimestamp() > $this->getTouched() ) { + // If a page is uncacheable, do not keep spamming a job for it. + // Although it would be de-duplicated, it would still waste I/O. + $cache = ObjectCache::getLocalClusterInstance(); + $key = $cache->makeKey( 'dynamic-linksupdate', 'last', $this->getId() ); + if ( $cache->add( $key, time(), 60 ) ) { + JobQueueGroup::singleton()->lazyPush( + RefreshLinksJob::newDynamic( $this->mTitle, $params ) + ); + } + } } } -- 2.20.1