From: Aaron Schulz Date: Fri, 4 Dec 2015 00:37:56 +0000 (-0800) Subject: Make getLagFromPtHeartbeat() always use the LB cluster master entry X-Git-Tag: 1.31.0-rc.0~8559^2 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/password.php?a=commitdiff_plain;h=9c0b55d926e51ef0146122cde888dfeaef38ccdb;p=lhc%2Fweb%2Fwiklou.git Make getLagFromPtHeartbeat() always use the LB cluster master entry Before, it just used the immediate master entry, which could be another slave. In that case, it may not even exists at all. Bug: T119648 Change-Id: Iea970b81ad2c9855aafcccf0bb0662fc0b3a8d4d --- diff --git a/includes/db/Database.php b/includes/db/Database.php index dea4a594f4..31e26531d8 100644 --- a/includes/db/Database.php +++ b/includes/db/Database.php @@ -148,6 +148,9 @@ abstract class DatabaseBase implements IDatabase { */ private $mTrxWriteDuration = 0.0; + /** @var IDatabase|null Lazy handle to the master DB this server replicates from */ + private $lazyMasterHandle; + /** * @since 1.21 * @var resource File handle for upgrade @@ -328,6 +331,25 @@ abstract class DatabaseBase implements IDatabase { } } + /** + * Set a lazy-connecting DB handle to the master DB (for replication status purposes) + * + * @param IDatabase $conn + * @since 1.27 + */ + public function setLazyMasterHandle( IDatabase $conn ) { + $this->lazyMasterHandle = $conn; + } + + /** + * @return IDatabase|null + * @see setLazyMasterHandle() + * @since 1.27 + */ + public function getLazyMasterHandle() { + return $this->lazyMasterHandle; + } + /** * @return TransactionProfiler */ diff --git a/includes/db/DatabaseMysqlBase.php b/includes/db/DatabaseMysqlBase.php index ca568add23..c3cdf35779 100644 --- a/includes/db/DatabaseMysqlBase.php +++ b/includes/db/DatabaseMysqlBase.php @@ -621,13 +621,20 @@ abstract class DatabaseMysqlBase extends Database { abstract protected function mysqlPing(); function getLag() { - if ( $this->lagDetectionMethod === 'pt-heartbeat' ) { + if ( $this->getLagDetectionMethod() === 'pt-heartbeat' ) { return $this->getLagFromPtHeartbeat(); } else { return $this->getLagFromSlaveStatus(); } } + /** + * @return string + */ + protected function getLagDetectionMethod() { + return $this->lagDetectionMethod; + } + /** * @return bool|int */ @@ -645,35 +652,82 @@ abstract class DatabaseMysqlBase extends Database { * @return bool|float */ protected function getLagFromPtHeartbeat() { - $key = wfMemcKey( 'mysql', 'master-server-id', $this->getServer() ); - $masterId = intval( $this->srvCache->get( $key ) ); - if ( !$masterId ) { - $res = $this->query( 'SHOW SLAVE STATUS', __METHOD__ ); - $row = $res ? $res->fetchObject() : false; - if ( $row && strval( $row->Master_Server_Id ) !== '' ) { - $masterId = intval( $row->Master_Server_Id ); - $this->srvCache->set( $key, $masterId, 30 ); - } + $masterInfo = $this->getMasterServerInfo(); + if ( !$masterInfo ) { + return false; // could not get master server ID } - if ( !$masterId ) { - return false; + list( $time, $nowUnix ) = $this->getHeartbeatData( $masterInfo['serverId'] ); + if ( $time !== null ) { + // @time is in ISO format like "2015-09-25T16:48:10.000510" + $dateTime = new DateTime( $time, new DateTimeZone( 'UTC' ) ); + $timeUnix = (int)$dateTime->format( 'U' ) + $dateTime->format( 'u' ) / 1e6; + + return max( $nowUnix - $timeUnix, 0.0 ); } + return false; + } + + protected function getMasterServerInfo() { + $cache = $this->srvCache; + $key = $cache->makeGlobalKey( + 'mysql', + 'master-info', + // Using one key for all cluster slaves is preferable + $this->getLBInfo( 'clusterMasterHost' ) ?: $this->getServer() + ); + + $that = $this; + return $cache->getWithSetCallback( + $key, + $cache::TTL_INDEFINITE, + function () use ( $that, $cache, $key ) { + // Get and leave a lock key in place for a short period + if ( !$cache->lock( $key, 0, 10 ) ) { + return false; // avoid master connection spike slams + } + + $conn = $that->getLazyMasterHandle(); + if ( !$conn ) { + return false; // something is misconfigured + } + + // Connect to and query the master; catch errors to avoid outages + try { + $res = $conn->query( 'SELECT @@server_id AS id', __METHOD__ ); + $row = $res ? $res->fetchObject() : false; + $id = $row ? (int)$row->id : 0; + } catch ( DBError $e ) { + $id = 0; + } + + // Cache the ID if it was retrieved + return $id ? array( 'serverId' => $id, 'asOf' => time() ) : false; + } + ); + } + + /** + * @param string $masterId Server ID + * @return array (heartbeat `ts` column value or null, UNIX timestamp) + * @see https://www.percona.com/doc/percona-toolkit/2.1/pt-heartbeat.html + */ + protected function getHeartbeatData( $masterId ) { + // Get the status row for this master; use the oldest for sanity in case the master + // has entries listed under different server IDs (which should really not happen). + // Note: this would use "MAX(TIMESTAMPDIFF(MICROSECOND,ts,UTC_TIMESTAMP(6)))" but the + // percision field is not supported in MySQL <= 5.5. $res = $this->query( - "SELECT TIMESTAMPDIFF(MICROSECOND,ts,UTC_TIMESTAMP(6)) AS Lag " . - "FROM heartbeat.heartbeat WHERE server_id = $masterId" + "SELECT ts FROM heartbeat.heartbeat WHERE server_id=" . intval( $masterId ) ); $row = $res ? $res->fetchObject() : false; - if ( $row ) { - return max( floatval( $row->Lag ) / 1e6, 0.0 ); - } - return false; + return array( $row ? $row->ts : null, microtime( true ) ); } public function getApproximateLagStatus() { - if ( $this->lagDetectionMethod === 'pt-heartbeat' ) { + if ( $this->getLagDetectionMethod() === 'pt-heartbeat' ) { // Disable caching since this is fast enough and we don't wan't // to be *too* pessimistic by having both the cache TTL and the // pt-heartbeat interval count as lag in getSessionLagStatus() diff --git a/includes/db/loadbalancer/LoadBalancer.php b/includes/db/loadbalancer/LoadBalancer.php index 19b2d1cdd4..03c74fc609 100644 --- a/includes/db/loadbalancer/LoadBalancer.php +++ b/includes/db/loadbalancer/LoadBalancer.php @@ -816,11 +816,14 @@ class LoadBalancer { $server['dbname'] = $dbNameOverride; } + // Let the handle know what the cluster master is (e.g. "db1052") + $masterName = $this->getServerName( 0 ); + $server['clusterMasterHost'] = $masterName; + // Log when many connection are made on requests if ( ++$this->connsOpened >= self::CONN_HELD_WARN_THRESHOLD ) { - $masterAddr = $this->getServerName( 0 ); wfDebugLog( 'DBPerformance', __METHOD__ . ": " . - "{$this->connsOpened}+ connections made (master=$masterAddr)\n" . + "{$this->connsOpened}+ connections made (master=$masterName)\n" . wfBacktrace( true ) ); } @@ -834,6 +837,9 @@ class LoadBalancer { } $db->setLBInfo( $server ); + $db->setLazyMasterHandle( + $this->getLazyConnectionRef( DB_MASTER, array(), $db->getWikiID() ) + ); return $db; } diff --git a/tests/phpunit/includes/db/DatabaseMysqlBaseTest.php b/tests/phpunit/includes/db/DatabaseMysqlBaseTest.php index 31e4f5b826..6403905e4e 100644 --- a/tests/phpunit/includes/db/DatabaseMysqlBaseTest.php +++ b/tests/phpunit/includes/db/DatabaseMysqlBaseTest.php @@ -256,4 +256,59 @@ class DatabaseMysqlBaseTest extends MediaWikiTestCase { $this->assertTrue( $pos2->hasReached( $pos1 ) ); $this->assertFalse( $pos1->hasReached( $pos2 ) ); } + + /** + * @dataProvider provideLagAmounts + */ + function testPtHeartbeat( $lag ) { + $db = $this->getMockBuilder( 'DatabaseMysql' ) + ->disableOriginalConstructor() + ->setMethods( array( + 'getLagDetectionMethod', 'getHeartbeatData', 'getMasterServerInfo' ) ) + ->getMock(); + + $db->expects( $this->any() ) + ->method( 'getLagDetectionMethod' ) + ->will( $this->returnValue( 'pt-heartbeat' ) ); + + $db->expects( $this->any() ) + ->method( 'getMasterServerInfo' ) + ->will( $this->returnValue( array( 'serverId' => 172, 'asOf' => time() ) ) ); + + // Fake the current time. + list( $nowSecFrac, $nowSec ) = explode( ' ', microtime() ); + $now = (float)$nowSec + (float)$nowSecFrac; + // Fake the heartbeat time. + // Work arounds for weak DataTime microseconds support. + $ptTime = $now - $lag; + $ptSec = (int)$ptTime; + $ptSecFrac = ( $ptTime - $ptSec ); + $ptDateTime = new DateTime( "@$ptSec" ); + $ptTimeISO = $ptDateTime->format( 'Y-m-d\TH:i:s' ); + $ptTimeISO .= ltrim( number_format( $ptSecFrac, 6 ), '0' ); + + $db->expects( $this->any() ) + ->method( 'getHeartbeatData' ) + ->with( 172 ) + ->will( $this->returnValue( array( $ptTimeISO, $now ) ) ); + + $db->setLBInfo( 'clusterMasterHost', 'db1052' ); + $lagEst = $db->getLag(); + + $this->assertGreaterThan( $lag - .010, $lagEst, "Correct heatbeat lag" ); + $this->assertLessThan( $lag + .010, $lagEst, "Correct heatbeat lag" ); + } + + function provideLagAmounts() { + return array( + array( 0 ), + array( 0.3 ), + array( 6.5 ), + array( 10.1 ), + array( 200.2 ), + array( 400.7 ), + array( 600.22 ), + array( 1000.77 ), + ); + } } diff --git a/tests/phpunit/includes/db/LBFactoryTest.php b/tests/phpunit/includes/db/LBFactoryTest.php index 519d3a0dfb..a64744555a 100644 --- a/tests/phpunit/includes/db/LBFactoryTest.php +++ b/tests/phpunit/includes/db/LBFactoryTest.php @@ -103,9 +103,13 @@ class LBFactoryTest extends MediaWikiTestCase { $dbw = $lb->getConnection( DB_MASTER ); $this->assertTrue( $dbw->getLBInfo( 'master' ), 'master shows as master' ); + $this->assertEquals( + $wgDBserver, $dbw->getLBInfo( 'clusterMasterHost' ), 'cluster master set' ); $dbr = $lb->getConnection( DB_SLAVE ); $this->assertTrue( $dbr->getLBInfo( 'slave' ), 'slave shows as slave' ); + $this->assertEquals( + $wgDBserver, $dbr->getLBInfo( 'clusterMasterHost' ), 'cluster master set' ); $factory->shutdown(); $lb->closeAll();