Fixes related to WikiPage::triggerOpportunisticLinksUpdate()
authorAaron Schulz <aschulz@wikimedia.org>
Fri, 23 Oct 2015 18:48:50 +0000 (11:48 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sat, 24 Oct 2015 00:10:12 +0000 (00:10 +0000)
* Focus on updating links that would *not* already be updated
  by jobs, not those that already *will* be updated.
* Place the jobs into a dedicated queue so they don't wait
  behind jobs that actually have to parse every time. This
  helps avoid queue buildup.
* Make Job::factory() set the command field to match the value
  it had when enqueued. This makes it easier to have the same
  job class used for multiple queues.
* Given the above, remove the RefreshLinksJob 'prioritize' flag.
  This worked by overriding getType() so that the job went to a
  different queue. This required both the special type *and* the
  flag to be set if using JobSpecification or either ack() would
  route to the wrong queue and fail or the job would go in the
  regular queue. This was too messy and error prone. Cirrus jobs
  using the same pattern also had ack() failures for example.

Change-Id: I5941cb62cdafde203fdee7e106894322ba87b48a

includes/DefaultSettings.php
includes/deferred/LinksUpdate.php
includes/jobqueue/Job.php
includes/jobqueue/jobs/RefreshLinksJob.php
includes/page/WikiPage.php

index 189ce42..3debaab 100644 (file)
@@ -6736,6 +6736,7 @@ $wgJobClasses = array(
        'ThumbnailRender' => 'ThumbnailRenderJob',
        'recentChangesUpdate' => 'RecentChangesUpdateJob',
        'refreshLinksPrioritized' => 'RefreshLinksJob', // for cascading protection
+       'refreshLinksDynamic' => 'RefreshLinksJob', // for pages with dynamic content
        'activityUpdateJob' => 'ActivityUpdateJob',
        'enqueue' => 'EnqueueJob', // local queue for multi-DC setups
        'null' => 'NullJob'
index 242a1a5..93c75ee 100644 (file)
@@ -267,7 +267,7 @@ class LinksUpdate extends SqlDataUpdate implements EnqueueableDataUpdate {
                // Which ever runs first generally no-ops the other one.
                $jobs = array();
                foreach ( $bc->getCascadeProtectedLinks() as $title ) {
-                       $jobs[] = new RefreshLinksJob( $title, array( 'prioritize' => true ) );
+                       $jobs[] = RefreshLinksJob::newPrioritized( $title, array() );
                }
                JobQueueGroup::singleton()->push( $jobs );
        }
@@ -985,7 +985,6 @@ class LinksUpdate extends SqlDataUpdate implements EnqueueableDataUpdate {
                        'job'  => new JobSpecification(
                                'refreshLinksPrioritized',
                                array(
-                                       'prioritize' => true,
                                        // Reuse the parser cache if it was saved
                                        'rootJobTimestamp' => $this->mParserOutput->getCacheTime(),
                                        'useRecursiveLinksUpdate' => $this->mRecursive
index 3e23391..2d13c7e 100644 (file)
@@ -64,12 +64,17 @@ abstract class Job implements IJobSpecification {
         */
        public static function factory( $command, Title $title, $params = array() ) {
                global $wgJobClasses;
+
                if ( isset( $wgJobClasses[$command] ) ) {
                        $class = $wgJobClasses[$command];
 
-                       return new $class( $title, $params );
+                       $job = new $class( $title, $params );
+                       $job->command = $command;
+
+                       return $job;
                }
-               throw new MWException( "Invalid job command `{$command}`" );
+
+               throw new InvalidArgumentException( "Invalid job command '{$command}'" );
        }
 
        /**
index 1d59b32..5207845 100644 (file)
@@ -41,10 +41,6 @@ class RefreshLinksJob extends Job {
 
        function __construct( Title $title, array $params ) {
                parent::__construct( 'refreshLinks', $title, $params );
-               // A separate type is used just for cascade-protected backlinks
-               if ( !empty( $this->params['prioritize'] ) ) {
-                       $this->command .= 'Prioritized';
-               }
                // Base backlink update jobs and per-title update jobs can be de-duplicated.
                // If template A changes twice before any jobs run, a clean queue will have:
                //              (A base, A base)
@@ -64,6 +60,30 @@ class RefreshLinksJob extends Job {
                        && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
        }
 
+       /**
+        * @param Title $title
+        * @param array $params
+        * @return RefreshLinksJob
+        */
+       public static function newPrioritized( Title $title, array $params ) {
+               $job = new self( $title, $params );
+               $job->command = 'refreshLinksPrioritized';
+
+               return $job;
+       }
+
+       /**
+        * @param Title $title
+        * @param array $params
+        * @return RefreshLinksJob
+        */
+       public static function newDynamic( Title $title, array $params ) {
+               $job = new self( $title, $params );
+               $job->command = 'refreshLinksDynamic';
+
+               return $job;
+       }
+
        function run() {
                global $wgUpdateRowsPerJob;
 
index 96b2e27..983b3bc 100644 (file)
@@ -3479,22 +3479,34 @@ class WikiPage implements Page, IDBAccessObject {
                        return;
                }
 
+               $params = array(
+                       'isOpportunistic' => true,
+                       'rootJobTimestamp' => $parserOutput->getCacheTime()
+               );
+
                if ( $this->mTitle->areRestrictionsCascading() ) {
                        // If the page is cascade protecting, the links should really be up-to-date
-                       $params = array( 'prioritize' => true );
+                       JobQueueGroup::singleton()->lazyPush(
+                               RefreshLinksJob::newPrioritized( $this->mTitle, $params )
+                       );
                } elseif ( $parserOutput->hasDynamicContent() ) {
-                       // Assume the output contains time/random based magic words
-                       $params = array();
-               } else {
-                       // If the inclusions are deterministic, the edit-triggered link jobs are enough
-                       return;
-               }
-
-               // Check if the last link refresh was before page_touched
-               if ( $this->getLinksTimestamp() < $this->getTouched() ) {
-                       $params['isOpportunistic'] = true;
-                       $params['rootJobTimestamp'] = $parserOutput->getCacheTime();
-                       JobQueueGroup::singleton()->lazyPush( new RefreshLinksJob( $this->mTitle, $params ) );
+                       // Assume the output contains "dynamic" time/random based magic words.
+                       // Only update pages that expired due to dynamic content and NOT due to edits
+                       // to referenced templates/files. When the cache expires due to dynamic content,
+                       // page_touched is unchanged. We want to avoid triggering redundant jobs due to
+                       // views of pages that were just purged via HTMLCacheUpdateJob. In that case, the
+                       // template/file edit already triggered recursive RefreshLinksJob jobs.
+                       if ( $this->getLinksTimestamp() > $this->getTouched() ) {
+                               // If a page is uncacheable, do not keep spamming a job for it.
+                               // Although it would be de-duplicated, it would still waste I/O.
+                               $cache = ObjectCache::getLocalClusterInstance();
+                               $key = $cache->makeKey( 'dynamic-linksupdate', 'last', $this->getId() );
+                               if ( $cache->add( $key, time(), 60 ) ) {
+                                       JobQueueGroup::singleton()->lazyPush(
+                                               RefreshLinksJob::newDynamic( $this->mTitle, $params )
+                                       );
+                               }
+                       }
                }
        }