Improve getLagTimes.php output and add statsD flag
authorAaron Schulz <aschulz@wikimedia.org>
Wed, 26 Oct 2016 22:17:08 +0000 (15:17 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Fri, 4 Nov 2016 21:43:07 +0000 (21:43 +0000)
* The script now lists all DBs in the LBFactory,
  not just those of the current wiki cluster.
* Add a --report option to send the metrics
  to statsD so that the MediaWiki view of lag can
  be measured, rather than just the DB-level view.
  This avoids some noise with depooled servers.

Bug: T149210
Change-Id: I6eae25e29aecf21251ad0eec53c56a86f35007f5

includes/libs/rdbms/lbfactory/ILBFactory.php
includes/libs/rdbms/lbfactory/LBFactoryMulti.php
includes/libs/rdbms/lbfactory/LBFactorySimple.php
includes/libs/rdbms/lbfactory/LBFactorySingle.php
maintenance/getLagTimes.php

index ff1bd43..5288c24 100644 (file)
@@ -106,6 +106,22 @@ interface ILBFactory {
         */
        public function getExternalLB( $cluster );
 
+       /**
+        * Get cached (tracked) load balancers for all main database clusters
+        *
+        * @return LoadBalancer[] Map of (cluster name => LoadBalancer)
+        * @since 1.29
+        */
+       public function getAllMainLBs();
+
+       /**
+        * Get cached (tracked) load balancers for all external database clusters
+        *
+        * @return LoadBalancer[] Map of (cluster name => LoadBalancer)
+        * @since 1.29
+        */
+       public function getAllExternalLBs();
+
        /**
         * Execute a function for each tracked load balancer
         * The callback is called with the load balancer as the first parameter,
index a7cc16c..1d22873 100644 (file)
@@ -284,6 +284,26 @@ class LBFactoryMulti extends LBFactory {
                return $this->extLBs[$cluster];
        }
 
+       public function getAllMainLBs() {
+               $lbs = [];
+               foreach ( $this->sectionsByDB as $db => $section ) {
+                       if ( !isset( $lbs[$section] ) ) {
+                               $lbs[$section] = $this->getMainLB( $db );
+                       }
+               }
+
+               return $lbs;
+       }
+
+       public function getAllExternalLBs() {
+               $lbs = [];
+               foreach ( $this->externalLoads as $cluster => $unused ) {
+                       $lbs[$cluster] = $this->getExternalLB( $cluster );
+               }
+
+               return $lbs;
+       }
+
        /**
         * Make a new load balancer object based on template and load array
         *
index 1e69d8f..5bf5032 100644 (file)
@@ -108,6 +108,19 @@ class LBFactorySimple extends LBFactory {
                return $this->extLBs[$cluster];
        }
 
+       public function getAllMainLBs() {
+               return [ 'DEFAULT' => $this->getMainLB() ];
+       }
+
+       public function getAllExternalLBs() {
+               $lbs = [];
+               foreach ( $this->externalClusters as $cluster => $unused ) {
+                       $lbs[$cluster] = $this->getExternalLB( $cluster );
+               }
+
+               return $lbs;
+       }
+
        private function newLoadBalancer( array $servers ) {
                $lb = new LoadBalancer( array_merge(
                        $this->baseLoadBalancerParams(),
index 9424614..819375d 100644 (file)
@@ -78,6 +78,20 @@ class LBFactorySingle extends LBFactory {
                throw new BadMethodCallException( "Method is not supported." );
        }
 
+       /**
+        * @return LoadBalancerSingle[] Map of (cluster name => LoadBalancer)
+        */
+       public function getAllMainLBs() {
+               return [ 'DEFAULT' => $this->lb ];
+       }
+
+       /**
+        * @return LoadBalancerSingle[] Map of (cluster name => LoadBalancer)
+        */
+       public function getAllExternalLBs() {
+               return [];
+       }
+
        /**
         * @param string|callable $callback
         * @param array $params
index c2c6958..677bfa2 100644 (file)
@@ -23,6 +23,8 @@
 
 require_once __DIR__ . '/Maintenance.php';
 
+use MediaWiki\MediaWikiServices;
+
 /**
  * Maintenance script that displays replication lag times.
  *
@@ -32,27 +34,35 @@ class GetLagTimes extends Maintenance {
        public function __construct() {
                parent::__construct();
                $this->addDescription( 'Dump replication lag times' );
+               $this->addOption( 'report', "Report the lag values to StatsD" );
        }
 
        public function execute() {
-               $lb = wfGetLB();
+               $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+               $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
 
-               if ( $lb->getServerCount() == 1 ) {
-                       $this->error( "This script dumps replication lag times, but you don't seem to have\n"
-                               . "a multi-host db server configuration." );
-               } else {
+               $lbs = $lbFactory->getAllMainLBs() + $lbFactory->getAllExternalLBs();
+               foreach ( $lbs as $cluster => $lb ) {
+                       if ( $lb->getServerCount() <= 1 ) {
+                               continue;
+                       }
                        $lags = $lb->getLagTimes();
-                       foreach ( $lags as $n => $lag ) {
-                               $host = $lb->getServerName( $n );
+                       foreach ( $lags as $serverIndex => $lag ) {
+                               $host = $lb->getServerName( $serverIndex );
                                if ( IP::isValid( $host ) ) {
                                        $ip = $host;
                                        $host = gethostbyaddr( $host );
                                } else {
                                        $ip = gethostbyname( $host );
                                }
+
                                $starLen = min( intval( $lag ), 40 );
                                $stars = str_repeat( '*', $starLen );
                                $this->output( sprintf( "%10s %20s %3d %s\n", $ip, $host, $lag, $stars ) );
+
+                               if ( $this->hasOption( 'report' ) ) {
+                                       $stats->gauge( "loadbalancer.lag.$cluster.$host", $lag );
+                               }
                        }
                }
        }