From 47e9cc7fe89fe4f33047e328b1036853cd28e3bd Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 6 Dec 2012 13:02:34 -0800 Subject: [PATCH] [JobQueue] Reduced the change of deadlocks in recycleStaleJobs(). * Do the DELETE by primary key to avoid gap locks. This should cut down on deadlocks, though their frequency is still fairly modest. * Also use GET_LOCK() to assure that two processes are not doing this update at the same time. Change-Id: Ic3de3b7cb3acd5294a151b4a00bfed65c33d0f1e --- includes/job/JobQueueDB.php | 50 +++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/includes/job/JobQueueDB.php b/includes/job/JobQueueDB.php index cf6f1b97cf..3e057eb16e 100644 --- a/includes/job/JobQueueDB.php +++ b/includes/job/JobQueueDB.php @@ -304,26 +304,42 @@ class JobQueueDB extends JobQueue { $dbw = $this->getMasterDB(); $count = 0; // affected rows - if ( $this->claimTTL > 0 ) { // re-try stale jobs... + if ( !$dbw->lock( "jobqueue-recycle-{$this->type}", __METHOD__, 1 ) ) { + return $count; // already in progress + } + + // Remove claims on jobs acquired for too long if enabled... + if ( $this->claimTTL > 0 ) { $claimCutoff = $dbw->timestamp( $now - $this->claimTTL ); - // Reset job_token for these jobs so that other runners will pick them up. - // Set the timestamp to the current time, as it is useful to now that the job - // was already tried before. - $dbw->update( 'job', - array( - 'job_token' => '', - 'job_token_timestamp' => $dbw->timestamp( $now ) ), // time of release + // Get the IDs of jobs that have be claimed but not finished after too long. + // These jobs can be recycled into the queue by expiring the claim. Selecting + // the IDs first means that the UPDATE can be done by primary key (less deadlocks). + $res = $dbw->select( 'job', 'job_id', array( 'job_cmd' => $this->type, "job_token != {$dbw->addQuotes( '' )}", // was acquired "job_token_timestamp < {$dbw->addQuotes( $claimCutoff )}", // stale - "job_attempts < {$dbw->addQuotes( self::MAX_ATTEMPTS )}" ), + "job_attempts < {$dbw->addQuotes( self::MAX_ATTEMPTS )}" ), // retries left __METHOD__ ); - $count += $dbw->affectedRows(); + $ids = array_map( function( $o ) { return $o->job_id; }, iterator_to_array( $res ) ); + if ( count( $ids ) ) { + // Reset job_token for these jobs so that other runners will pick them up. + // Set the timestamp to the current time, as it is useful to now that the job + // was already tried before (the timestamp becomes the "released" time). + $dbw->update( 'job', + array( + 'job_token' => '', + 'job_token_timestamp' => $dbw->timestamp( $now ) ), // time of release + array( + 'job_id' => $ids ), + __METHOD__ + ); + $count += $dbw->affectedRows(); + } } - // Just destroy stale jobs... + // Just destroy any stale jobs... $pruneCutoff = $dbw->timestamp( $now - self::MAX_AGE_PRUNE ); $conds = array( 'job_cmd' => $this->type, @@ -333,8 +349,16 @@ class JobQueueDB extends JobQueue { if ( $this->claimTTL > 0 ) { // only prune jobs attempted too many times... $conds[] = "job_attempts >= {$dbw->addQuotes( self::MAX_ATTEMPTS )}"; } - $dbw->delete( 'job', $conds, __METHOD__ ); - $count += $dbw->affectedRows(); + // Get the IDs of jobs that are considered stale and should be removed. Selecting + // the IDs first means that the UPDATE can be done by primary key (less deadlocks). + $res = $dbw->select( 'job', 'job_id', $conds, __METHOD__ ); + $ids = array_map( function( $o ) { return $o->job_id; }, iterator_to_array( $res ) ); + if ( count( $ids ) ) { + $dbw->delete( 'job', array( 'job_id' => $ids ), __METHOD__ ); + $count += $dbw->affectedRows(); + } + + $dbw->unlock( "jobqueue-recycle-{$this->type}", __METHOD__ ); return $count; } -- 2.20.1