[JobQueue] Improved refreshLinks/htmlCacheUpdate job de-duplication.
[lhc/web/wiklou.git] / includes / job / JobQueueDB.php
index cbb2391..0c01db7 100644 (file)
@@ -165,6 +165,10 @@ class JobQueueDB extends JobQueue {
                                        );
                                        wfIncrStats( 'job-pop', $dbw->affectedRows() );
                                }
+                               // Flag this job as an old duplicate based on its "root" job...
+                               if ( $this->isRootJobOldDuplicate( $job ) ) {
+                                       $job = DuplicateJob::newFromJob( $job ); // convert to a no-op
+                               }
                                break; // done
                        } while( true );
                } catch ( DBError $e ) {
@@ -353,6 +357,62 @@ class JobQueueDB extends JobQueue {
                return true;
        }
 
+       /**
+        * @see JobQueue::doDeduplicateRootJob()
+        * @return bool
+        */
+       protected function doDeduplicateRootJob( Job $job ) {
+               $params = $job->getParams();
+               if ( !isset( $params['rootJobSignature'] ) ) {
+                       throw new MWException( "Cannot register root job; missing 'rootJobSignature'." );
+               } elseif ( !isset( $params['rootJobTimestamp'] ) ) {
+                       throw new MWException( "Cannot register root job; missing 'rootJobTimestamp'." );
+               }
+               $key = $this->getRootJobCacheKey( $params['rootJobSignature'] );
+               // Callers should call batchInsert() and then this function so that if the insert
+               // fails, the de-duplication registration will be aborted. Since the insert is
+               // deferred till "transaction idle", do that same here, so that the ordering is
+               // maintained. Having only the de-duplication registration succeed would cause
+               // jobs to become no-ops without any actual jobs that made them redundant.
+               $this->getMasterDB()->onTransactionIdle( function() use ( $params, $key ) {
+                       global $wgMemc;
+
+                       $timestamp = $wgMemc->get( $key ); // current last timestamp of this job
+                       if ( $timestamp && $timestamp >= $params['rootJobTimestamp'] ) {
+                               return true; // a newer version of this root job was enqueued
+                       }
+
+                       // Update the timestamp of the last root job started at the location...
+                       return $wgMemc->set( $key, $params['rootJobTimestamp'], 14*86400 ); // 2 weeks
+               } );
+
+               return true;
+       }
+
+       /**
+        * Check if the "root" job of a given job has been superseded by a newer one
+        *
+        * @param $job Job
+        * @return bool
+        */
+       protected function isRootJobOldDuplicate( Job $job ) {
+               global $wgMemc;
+
+               $params = $job->getParams();
+               if ( !isset( $params['rootJobSignature'] ) ) {
+                       return false; // job has no de-deplication info
+               } elseif ( !isset( $params['rootJobTimestamp'] ) ) {
+                       trigger_error( "Cannot check root job; missing 'rootJobTimestamp'." );
+                       return false;
+               }
+
+               // Get the last time this root job was enqueued
+               $timestamp = $wgMemc->get( $this->getRootJobCacheKey( $params['rootJobSignature'] ) );
+
+               // Check if a new root job was started at the location after this one's...
+               return ( $timestamp && $timestamp > $params['rootJobTimestamp'] );
+       }
+
        /**
         * @see JobQueue::doWaitForBackups()
         * @return void
@@ -380,22 +440,22 @@ class JobQueueDB extends JobQueue {
         * @return array
         */
        protected function insertFields( Job $job ) {
-               // Rows that describe the nature of the job
-               $descFields = array(
+               $dbw = $this->getMasterDB();
+               return array(
+                       // Fields that describe the nature of the job
                        'job_cmd'       => $job->getType(),
                        'job_namespace' => $job->getTitle()->getNamespace(),
                        'job_title'     => $job->getTitle()->getDBkey(),
                        'job_params'    => self::makeBlob( $job->getParams() ),
-               );
-               // Additional job metadata
-               $dbw = $this->getMasterDB();
-               $metaFields = array(
+                       // Additional job metadata
                        'job_id'        => $dbw->nextSequenceValue( 'job_job_id_seq' ),
                        'job_timestamp' => $dbw->timestamp(),
-                       'job_sha1'      => wfBaseConvert( sha1( serialize( $descFields ) ), 16, 36, 32 ),
+                       'job_sha1'      => wfBaseConvert(
+                               sha1( serialize( $job->getDeduplicationInfo() ) ),
+                               16, 36, 31
+                       ),
                        'job_random'    => mt_rand( 0, self::MAX_JOB_RANDOM )
                );
-               return ( $descFields + $metaFields );
        }
 
        /**
@@ -406,6 +466,15 @@ class JobQueueDB extends JobQueue {
                return wfForeignMemcKey( $db, $prefix, 'jobqueue', $this->type, 'isempty' );
        }
 
+       /**
+        * @param string $signature Hash identifier of the root job
+        * @return string
+        */
+       private function getRootJobCacheKey( $signature ) {
+               list( $db, $prefix ) = wfSplitWikiID( $this->wiki );
+               return wfForeignMemcKey( $db, $prefix, 'jobqueue', $this->type, 'rootjob', $signature );
+       }
+
        /**
         * @param $params
         * @return string