From: Aaron Schulz Date: Sat, 20 Sep 2014 20:53:16 +0000 (-0700) Subject: Slave lag check tweaks to JobRunner X-Git-Tag: 1.31.0-rc.0~13853 X-Git-Url: http://git.cyclocoop.org/%22.%20generer_url_ecrire%28%22sites_tous%22%2C%22%22%29.%20%22?a=commitdiff_plain;h=a55544180b2e4957aba30c20472e4bd208232ef3;p=lhc%2Fweb%2Fwiklou.git Slave lag check tweaks to JobRunner * Do not block forever, but wait up to 10 seconds. Likewise, check the lag times in memcached on startup. This at least lets runners avoid lagged wikis but still work on others. * Made a few small related documentation and code cleanups. Change-Id: Ic1339bab54cba6b6cbea7d97a80ff87c7c5c87af --- diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 490df24690..3306acda3e 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -3761,11 +3761,18 @@ function wfGetNull() { * @param float|null $ifWritesSince Only wait if writes were done since this UNIX timestamp * @param string|bool $wiki Wiki identifier accepted by wfGetLB * @param string|bool $cluster Cluster name accepted by LBFactory. Default: false. + * @param int|null $timeout Max wait time. Default: 1 day (cli), ~10 seconds (web) * @return bool Success (able to connect and no timeouts reached) */ -function wfWaitForSlaves( $ifWritesSince = false, $wiki = false, $cluster = false ) { +function wfWaitForSlaves( + $ifWritesSince = null, $wiki = false, $cluster = false, $timeout = null +) { // B/C: first argument used to be "max seconds of lag"; ignore such values - $ifWritesSince = ( $ifWritesSince > 1e9 ) ? $ifWritesSince : false; + $ifWritesSince = ( $ifWritesSince > 1e9 ) ? $ifWritesSince : null; + + if ( $timeout === null ) { + $timeout = ( PHP_SAPI === 'cli' ) ? 86400 : 10; + } if ( $cluster !== false ) { $lb = wfGetLBFactory()->getExternalLB( $cluster ); @@ -3787,7 +3794,7 @@ function wfWaitForSlaves( $ifWritesSince = false, $wiki = false, $cluster = fals // The DBMS may not support getMasterPos() or the whole // load balancer might be fake (e.g. $wgAllDBsAreLocalhost). if ( $pos !== false ) { - return $lb->waitForAll( $pos, PHP_SAPI === 'cli' ? 86400 : null ); + return $lb->waitForAll( $pos, $timeout ); } } diff --git a/includes/db/LoadBalancer.php b/includes/db/LoadBalancer.php index e517a0250c..f79fde0918 100644 --- a/includes/db/LoadBalancer.php +++ b/includes/db/LoadBalancer.php @@ -1128,7 +1128,7 @@ class LoadBalancer { * Results are cached for a short time in memcached, and indefinitely in the process cache * * @param string|bool $wiki - * @return array + * @return array Map of (server index => seconds) */ function getLagTimes( $wiki = false ) { # Try process cache diff --git a/includes/db/LoadMonitor.php b/includes/db/LoadMonitor.php index 7281485b47..b694a6fac3 100644 --- a/includes/db/LoadMonitor.php +++ b/includes/db/LoadMonitor.php @@ -48,7 +48,7 @@ interface LoadMonitor { * @param array $serverIndexes * @param string $wiki * - * @return array + * @return array Map of (server index => seconds) */ public function getLagTimes( $serverIndexes, $wiki ); } diff --git a/includes/jobqueue/JobRunner.php b/includes/jobqueue/JobRunner.php index 8cccedaf3c..a256c43327 100644 --- a/includes/jobqueue/JobRunner.php +++ b/includes/jobqueue/JobRunner.php @@ -76,6 +76,14 @@ class JobRunner { $this->runJobsLog( "Executed $count periodic queue task(s)." ); } + // Bail out if there is too much DB lag + // @note: getLagTimes() has better caching than getMaxLag() + $maxLag = max( wfGetLBFactory()->getMainLB( wfWikiID() )->getLagTimes() ); + if ( $maxLag >= 5 ) { + $response['reached'] = 'slave-lag-limit'; + return $response; + } + // Flush any pending DB writes for sanity wfGetLBFactory()->commitMasterChanges(); @@ -172,10 +180,15 @@ class JobRunner { break; } - // Don't let any of the main DB slaves get backed up + // Don't let any of the main DB slaves get backed up. + // This only waits for so long before exiting and letting + // other wikis in the farm (on different masters) get a chance. $timePassed = microtime( true ) - $lastTime; if ( $timePassed >= 5 || $timePassed < 0 ) { - wfWaitForSlaves( $lastTime ); + if ( !wfWaitForSlaves( $lastTime, wfWikiID(), false, 5 ) ) { + $response['reached'] = 'slave-lag-limit'; + break; + } $lastTime = microtime( true ); } // Don't let any queue slaves/backups fall behind