From a12ce8f7478d9125bf6b58d6be559d3436941df3 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Wed, 1 Jun 2005 06:18:49 +0000 Subject: [PATCH] new load balancing algorithm --- includes/Database.php | 32 ++++++++ includes/GlobalFunctions.php | 5 +- includes/LoadBalancer.php | 148 +++++++++++++++++++++++++++++++++-- includes/OutputPage.php | 8 +- languages/Language.php | 1 + 5 files changed, 183 insertions(+), 11 deletions(-) diff --git a/includes/Database.php b/includes/Database.php index 0dce6a9b5a..935523118b 100644 --- a/includes/Database.php +++ b/includes/Database.php @@ -1447,6 +1447,38 @@ class Database { function ping() { return mysql_ping( $this->mConn ); } + + /** + * Get slave lag. + * At the moment, this will only work if the DB user has the PROCESS privilege + */ + function getLag() { + $res = $this->query( 'SHOW PROCESSLIST' ); + # Find slave SQL thread. Assumed to be the second one running, which is a bit + # dubious, but unfortunately there's no easy rigorous way + $slaveThreads = 0; + while ( $row = $this->fetchObject( $res ) ) { + if ( $row->User == 'system user' ) { + if ( ++$slaveThreads == 2 ) { + # This is it, return the time + return $row->Time; + } + } + } + return false; + } + + /** + * Get status information from SHOW STATUS in an associative array + */ + function getStatus() { + $res = $this->query( 'SHOW STATUS' ); + $status = array(); + while ( $row = $this->fetchObject( $res ) ) { + $status[$row->Variable_name] = $row->Value; + } + return $status; + } } /** diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index af86f712cb..47646471b2 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -216,8 +216,11 @@ function logProfilingData() { * @return bool */ function wfReadOnly() { - global $wgReadOnlyFile; + global $wgReadOnlyFile, $wgReadOnly; + if ( $wgReadOnly ) { + return true; + } if ( '' == $wgReadOnlyFile ) { return false; } diff --git a/includes/LoadBalancer.php b/includes/LoadBalancer.php index abbf936e5b..c6f6f1521d 100644 --- a/includes/LoadBalancer.php +++ b/includes/LoadBalancer.php @@ -91,6 +91,14 @@ class LoadBalancer { foreach ( $weights as $w ) { $sum += $w; } + + if ( $sum == 0 ) { + # No loads on any of them + # Just pick one at random + foreach ( $weights as $i => $w ) { + $weights[$i] = 1; + } + } $max = mt_getrandmax(); $rand = mt_rand(0, $max) / $max * $sum; @@ -104,8 +112,44 @@ class LoadBalancer { return $i; } + function getRandomNonLagged( $loads ) { + # Unset excessively lagged servers + $lags = $this->getLagTimes(); + foreach ( $lags as $i => $lag ) { + if ( isset( $this->mServers[$i]['max lag'] ) && $lag > $this->mServers[$i]['max lag'] ) { + unset( $loads[$i] ); + } + } + + + # Find out if all the slaves with non-zero load are lagged + $sum = 0; + foreach ( $loads as $load ) { + $sum += $load; + } + if ( $sum == 0 ) { + # No appropriate DB servers except maybe the master and some slaves with zero load + # Do NOT use the master + # Instead, this function will return false, triggering read-only mode, + # and a lagged slave will be used instead. + unset ( $loads[0] ); + } + + if ( count( $loads ) == 0 ) { + return false; + } + + wfDebug( var_export( $loads, true ) ); + + # Return a random representative of the remainder + return $this->pickRandom( $loads ); + } + + function getReaderIndex() { + global $wgMaxLag, $wgReadOnly; + $fname = 'LoadBalancer::getReaderIndex'; wfProfileIn( $fname ); @@ -119,8 +163,19 @@ class LoadBalancer { # $loads is $this->mLoads except with elements knocked out if they # don't work $loads = $this->mLoads; + $done = false; + $totalElapsed = 0; do { - $i = $this->pickRandom( $loads ); + if ( $wgReadOnly ) { + $i = $this->pickRandom( $loads ); + } else { + $i = $this->getRandomNonLagged( $loads ); + if ( $i === false && count( $loads ) != 0 ) { + # All slaves lagged. Switch to read-only mode + $wgReadOnly = wfMsgNoDB( 'readonly_lag' ); + $i = $this->pickRandom( $loads ); + } + } if ( $i !== false ) { wfDebug( "Using reader #$i: {$this->mServers[$i]['host']}...\n" ); $this->openConnection( $i ); @@ -128,16 +183,32 @@ class LoadBalancer { if ( !$this->isOpen( $i ) ) { wfDebug( "Failed\n" ); unset( $loads[$i] ); - } elseif ( isset( $this->mServers[$i]['slave pos'] ) ) { - wfDebug( "Lagged slave\n" ); - $this->mLaggedSlaveMode = true; + $sleepTime = 0; } else { - wfDebug( "OK\n" ); + $status = $this->mConnections[$i]->getStatus(); + if ( isset( $this->mServers[$i]['max threads'] ) && + $status['Threads_running'] > $this->mServers[$i]['max threads'] ) + { + # Slave is lagged, wait for a while + $sleepTime = 5000 * $status['Threads_connected']; + + # If we reach the timeout and exit the loop, don't use it + $i = false; + } else { + $done = true; + $sleepTime = 0; + } } + } else { + $sleepTime = 500000; + } + if ( $sleepTime ) { + $totalElapsed += $sleepTime; + usleep( $sleepTime ); } - } while ( $i !== false && !$this->isOpen( $i ) ); + } while ( count( $loads ) && !$done && $totalElapsed / 1e6 < $this->mWaitTimeout ); - if ( $this->isOpen( $i ) ) { + if ( $i !== false && $this->isOpen( $i ) ) { $this->mReadIndex = $i; } else { $i = false; @@ -167,6 +238,7 @@ class LoadBalancer { * Otherwise sets a variable telling it to wait if such a connection is opened */ function waitFor( $file, $pos ) { + /* $fname = 'LoadBalancer::waitFor'; wfProfileIn( $fname ); @@ -187,12 +259,15 @@ class LoadBalancer { } } wfProfileOut( $fname ); + */ } /** * Wait for a given slave to catch up to the master pos stored in $this */ function doWait( $index ) { + return true; + /* global $wgMemc; $retVal = false; @@ -228,7 +303,7 @@ class LoadBalancer { wfDebug( "Done\n" ); } } - return $retVal; + return $retVal;*/ } /** @@ -459,6 +534,63 @@ class LoadBalancer { } return $success; } + + /** + * Get the hostname and lag time of the most-lagged slave + * This is useful for maintenance scripts that need to throttle their updates + */ + function getMaxLag() { + $maxLag = -1; + $host = ''; + foreach ( $this->mServers as $i => $conn ) { + if ( $this->openConnection( $i ) ) { + $lag = $this->mConnections[$i]->getLag(); + if ( $lag > $maxLag ) { + $maxLag = $lag; + $host = $this->mServers[$i]['host']; + } + } + } + return array( $host, $maxLag ); + } + + /** + * Get lag time for each DB + * Results are cached for a short time in memcached + */ + function getLagTimes() { + $expiry = 5; + $requestRate = 10; + + global $wgMemc; + $times = $wgMemc->get( 'lag_times' ); + if ( $times ) { + # Randomly recache with probability rising over $expiry + $elapsed = time() - $times['timestamp']; + $chance = max( 0, ( $expiry - $elapsed ) * $requestRate ); + if ( mt_rand( 0, $chance ) != 0 ) { + unset( $times['timestamp'] ); + return $times; + } + } + + # Cache key missing or expired + + $times = array(); + foreach ( $this->mServers as $i => $conn ) { + if ( $this->openConnection( $i ) ) { + $times[$i] = $this->mConnections[$i]->getLag(); + } + } + + # Add a timestamp key so we know when it was cached + $times['timestamp'] = time(); + $wgMemc->set( 'lag_times', $times, $expiry ); + + # But don't give the timestamp to the caller + unset($times['timestamp']); + return $times; + } } ?> diff --git a/includes/OutputPage.php b/includes/OutputPage.php index 879f5e8f13..624ab6cf6a 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -611,7 +611,7 @@ class OutputPage { } function readOnlyPage( $source = null, $protected = false ) { - global $wgUser, $wgReadOnlyFile; + global $wgUser, $wgReadOnlyFile, $wgReadOnly; $this->setRobotpolicy( 'noindex,nofollow' ); $this->setArticleRelated( false ); @@ -621,7 +621,11 @@ class OutputPage { $this->addWikiText( wfMsg( 'protectedtext' ) ); } else { $this->setPageTitle( wfMsg( 'readonly' ) ); - $reason = file_get_contents( $wgReadOnlyFile ); + if ( $wgReadOnly ) { + $reason = $wgReadOnly; + } else { + $reason = file_get_contents( $wgReadOnlyFile ); + } $this->addWikiText( wfMsg( 'readonlytext', $reason ) ); } diff --git a/languages/Language.php b/languages/Language.php index ce983a6265..2186247a16 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -520,6 +520,7 @@ page that has been deleted.

If this is not the case, you may have found a bug in the software. Please report this to an administrator, making note of the URL.", +'readonly_lag' => "The database has been automatically locked while the slave database servers catch up to the master", 'internalerror' => 'Internal error', 'filecopyerror' => "Could not copy file \"$1\" to \"$2\".", 'filerenameerror' => "Could not rename file \"$1\" to \"$2\".", -- 2.20.1