Merge "Randomize the JobRunner slave lags checks a bit"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 24 Sep 2014 01:20:34 +0000 (01:20 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 24 Sep 2014 01:20:34 +0000 (01:20 +0000)
1  2 
includes/jobqueue/JobRunner.php

@@@ -76,14 -76,6 +76,14 @@@ class JobRunner 
                        $this->runJobsLog( "Executed $count periodic queue task(s)." );
                }
  
 +              // Bail out if there is too much DB lag
 +              // @note: getLagTimes() has better caching than getMaxLag()
 +              $maxLag = max( wfGetLBFactory()->getMainLB( wfWikiID() )->getLagTimes() );
 +              if ( $maxLag >= 5 ) {
 +                      $response['reached'] = 'slave-lag-limit';
 +                      return $response;
 +              }
 +
                // Flush any pending DB writes for sanity
                wfGetLBFactory()->commitMasterChanges();
  
                $jobsRun = 0;
                $timeMsTotal = 0;
                $flags = JobQueueGroup::USE_CACHE;
+               $checkPeriod = 5.0; // seconds
+               $checkPhase = mt_rand( 0, 1000 * $checkPeriod ) / 1000; // avoid stampedes
                $startTime = microtime( true ); // time since jobs started running
-               $lastTime = microtime( true ); // time since last slave check
+               $lastTime = microtime( true ) - $checkPhase; // time since last slave check
                do {
                        // Sync the persistent backoffs with concurrent runners
                        $backoffs = $this->syncBackoffDeltas( $backoffs, $backoffDeltas, $wait );
                                        break;
                                }
  
 -                              // Don't let any of the main DB slaves get backed up
 +                              // Don't let any of the main DB slaves get backed up.
 +                              // This only waits for so long before exiting and letting
 +                              // other wikis in the farm (on different masters) get a chance.
                                $timePassed = microtime( true ) - $lastTime;
                                if ( $timePassed >= 5 || $timePassed < 0 ) {
 -                                      wfWaitForSlaves( $lastTime );
 +                                      if ( !wfWaitForSlaves( $lastTime, wfWikiID(), false, 5 ) ) {
 +                                              $response['reached'] = 'slave-lag-limit';
 +                                              break;
 +                                      }
                                        $lastTime = microtime( true );
                                }
                                // Don't let any queue slaves/backups fall behind