From 9cf6637a751d018bc2bec26ea0f1a9de936c2c9c Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Mon, 8 Jun 2015 12:08:01 -0700 Subject: [PATCH] Use APC for caching slave lag times Bug: T101433 Change-Id: Ia37ce5a7aca8217ec57cd8d8c5e43472b3d8845b --- includes/db/LoadBalancer.php | 18 ++----- includes/db/LoadMonitor.php | 102 ++++++++++++++++++++--------------- 2 files changed, 64 insertions(+), 56 deletions(-) diff --git a/includes/db/LoadBalancer.php b/includes/db/LoadBalancer.php index 128118781f..52dca08758 100644 --- a/includes/db/LoadBalancer.php +++ b/includes/db/LoadBalancer.php @@ -60,8 +60,6 @@ class LoadBalancer { private $mLastError = 'Unknown error'; /** @var integer Total connections opened */ private $connsOpened = 0; - /** @var ProcessCacheLRU */ - private $mProcCache; /** @var integer Warn when this many connection are held */ const CONN_HELD_WARN_THRESHOLD = 10; @@ -113,8 +111,6 @@ class LoadBalancer { } } } - - $this->mProcCache = new ProcessCacheLRU( 30 ); } /** @@ -1240,16 +1236,8 @@ class LoadBalancer { return array( 0 => 0 ); // no replication = no lag } - if ( $this->mProcCache->has( 'slave_lag', 'times', 1 ) ) { - return $this->mProcCache->get( 'slave_lag', 'times' ); - } - # Send the request to the load monitor - $times = $this->getLoadMonitor()->getLagTimes( array_keys( $this->mServers ), $wiki ); - - $this->mProcCache->set( 'slave_lag', 'times', $times ); - - return $times; + return $this->getLoadMonitor()->getLagTimes( array_keys( $this->mServers ), $wiki ); } /** @@ -1276,8 +1264,10 @@ class LoadBalancer { /** * Clear the cache for slag lag delay times + * + * This is only used for testing */ public function clearLagTimeCache() { - $this->mProcCache->clear( 'slave_lag' ); + $this->getLoadMonitor()->clearCaches(); } } diff --git a/includes/db/LoadMonitor.php b/includes/db/LoadMonitor.php index 8c4d0ed2b7..7dc2da0647 100644 --- a/includes/db/LoadMonitor.php +++ b/includes/db/LoadMonitor.php @@ -75,13 +75,17 @@ class LoadMonitorMySQL implements LoadMonitor { /** @var LoadBalancer */ public $parent; /** @var BagOStuff */ - protected $cache; + protected $srvCache; + /** @var BagOStuff */ + protected $mainCache; public function __construct( $parent ) { global $wgMemc; $this->parent = $parent; - $this->cache = $wgMemc ?: wfGetMainCache(); + + $this->srvCache = ObjectCache::newAccelerator( array(), 'hash' ); + $this->mainCache = $wgMemc ?: wfGetMainCache(); } public function scaleLoads( &$loads, $group = false, $wiki = false ) { @@ -89,65 +93,79 @@ class LoadMonitorMySQL implements LoadMonitor { public function getLagTimes( $serverIndexes, $wiki ) { if ( count( $serverIndexes ) == 1 && reset( $serverIndexes ) == 0 ) { - // Single server only, just return zero without caching + # Single server only, just return zero without caching return array( 0 => 0 ); } - $expiry = 5; - $requestRate = 10; - - $cache = $this->cache; - $masterName = $this->parent->getServerName( 0 ); - $memcKey = wfMemcKey( 'lag_times', $masterName ); - $times = $cache->get( $memcKey ); - if ( is_array( $times ) ) { - # Randomly recache with probability rising over $expiry - $elapsed = time() - $times['timestamp']; - $chance = max( 0, ( $expiry - $elapsed ) * $requestRate ); - if ( mt_rand( 0, $chance ) != 0 ) { - unset( $times['timestamp'] ); // hide from caller - - return $times; - } - wfIncrStats( 'lag_cache.miss.expired' ); - } else { - wfIncrStats( 'lag_cache.miss.absent' ); + $key = $this->getLagTimeCacheKey(); + # Randomize TTLs to reduce stampedes (4.0 - 5.0 sec) + $ttl = mt_rand( 4e6, 5e6 ) / 1e6; + # Keep keys around longer as fallbacks + $staleTTL = 60; + + # (a) Check the local APC cache + $value = $this->srvCache->get( $key ); + if ( $value && $value['timestamp'] > ( microtime( true ) - $ttl ) ) { + wfDebugLog( 'replication', __FUNCTION__ . ": got lag times ($key) from local cache" ); + return $value['lagTimes']; // cache hit + } + $staleValue = $value ?: false; + + # (b) Check the shared cache and backfill APC + $value = $this->mainCache->get( $key ); + if ( $value && $value['timestamp'] > ( microtime( true ) - $ttl ) ) { + $this->srvCache->set( $key, $value, $staleTTL ); + wfDebugLog( 'replication', __FUNCTION__ . ": got lag times ($key) from main cache" ); + + return $value['lagTimes']; // cache hit } + $staleValue = $value ?: $staleValue; - # Cache key missing or expired - if ( $cache->lock( $memcKey, 0, 10 ) ) { + # (c) Cache key missing or expired; regenerate and backfill + if ( $this->mainCache->lock( $key, 0, 10 ) ) { # Let this process alone update the cache value - $unlocker = new ScopedCallback( function () use ( $cache, $memcKey ) { - $cache->unlock( $memcKey ); + $cache = $this->mainCache; + $unlocker = new ScopedCallback( function () use ( $cache, $key ) { + $cache->unlock( $key ); } ); - } elseif ( is_array( $times ) ) { + } elseif ( $staleValue ) { # Could not acquire lock but an old cache exists, so use it - unset( $times['timestamp'] ); // hide from caller - - return $times; + return $value['lagTimes']; } - $times = array(); + $lagTimes = array(); foreach ( $serverIndexes as $i ) { if ( $i == 0 ) { # Master - $times[$i] = 0; + $lagTimes[$i] = 0; } elseif ( false !== ( $conn = $this->parent->getAnyOpenConnection( $i ) ) ) { - $times[$i] = $conn->getLag(); + $lagTimes[$i] = $conn->getLag(); } elseif ( false !== ( $conn = $this->parent->openConnection( $i, $wiki ) ) ) { - $times[$i] = $conn->getLag(); - // Close the connection to avoid sleeper connections piling up. - // Note that the caller will pick one of these DBs and reconnect, - // which is slightly inefficient, but this only matters for the lag - // time cache miss cache, which is far less common that cache hits. + $lagTimes[$i] = $conn->getLag(); + # Close the connection to avoid sleeper connections piling up. + # Note that the caller will pick one of these DBs and reconnect, + # which is slightly inefficient, but this only matters for the lag + # time cache miss cache, which is far less common that cache hits. $this->parent->closeConnection( $conn ); } } # Add a timestamp key so we know when it was cached - $times['timestamp'] = time(); - $cache->set( $memcKey, $times, $expiry + 10 ); - unset( $times['timestamp'] ); // hide from caller + $value = array( 'lagTimes' => $lagTimes, 'timestamp' => microtime( true ) ); + $this->mainCache->set( $key, $value, $staleTTL ); + $this->srvCache->set( $key, $value, $staleTTL ); + wfDebugLog( 'replication', __FUNCTION__ . ": re-calculated lag times ($key)" ); + + return $value['lagTimes']; + } + + public function clearCaches() { + $key = $this->getLagTimeCacheKey(); + $this->srvCache->delete( $key ); + $this->mainCache->delete( $key ); + } - return $times; + private function getLagTimeCacheKey() { + # Lag is per-server, not per-DB, so key on the master DB name + return wfForeignMemcKey( $this->parent->getServerName( 0 ), '', 'lag_times' ); } } -- 2.20.1