From b203ec5fcd8a63019caeb802ce251d060d5a1eb6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Gerg=C5=91=20Tisza?= Date: Tue, 12 Apr 2016 22:08:20 +0000 Subject: [PATCH] Make statsd sampling rates configurable Change-Id: I6f5b0531ff5caf4b0fda38331fabad93a6a8a360 --- includes/DefaultSettings.php | 8 +++++ includes/GlobalFunctions.php | 1 + includes/libs/SamplingStatsdClient.php | 32 +++++++++++++---- .../libs/SamplingStatsdClientTest.php | 35 +++++++++++++++---- 4 files changed, 63 insertions(+), 13 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index ae2aeaafc7..425e153d89 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -5901,6 +5901,14 @@ $wgStatsdServer = false; */ $wgStatsdMetricPrefix = 'MediaWiki'; +/** + * Sampling rate for statsd metrics as an associative array of patterns and rates. + * Patterns are Unix shell patterns (e.g. 'MediaWiki.api.*'). + * Rates are sampling probabilities (e.g. 0.1 means 1 in 10 events are sampled). + * @since 1.28 + */ +$wgStatsdSamplingRates = []; + /** * InfoAction retrieves a list of transclusion links (both to and from). * This number puts a limit on that query in the case of highly transcluded diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 8c55d9a929..d4226aa651 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -1182,6 +1182,7 @@ function wfLogProfilingData() { $statsdPort = isset( $statsdServer[1] ) ? $statsdServer[1] : 8125; $statsdSender = new SocketSender( $statsdHost, $statsdPort ); $statsdClient = new SamplingStatsdClient( $statsdSender, true, false ); + $statsdClient->setSamplingRates( $config->get( 'StatsdSamplingRates' ) ); $statsdClient->send( $context->getStats()->getBuffer() ); } catch ( Exception $ex ) { MWExceptionHandler::logException( $ex ); diff --git a/includes/libs/SamplingStatsdClient.php b/includes/libs/SamplingStatsdClient.php index 2e780c9728..dd1976c06e 100644 --- a/includes/libs/SamplingStatsdClient.php +++ b/includes/libs/SamplingStatsdClient.php @@ -30,6 +30,19 @@ use Liuggio\StatsdClient\Entity\StatsdDataInterface; * @since 1.26 */ class SamplingStatsdClient extends StatsdClient { + protected $samplingRates = []; + + /** + * Sampling rates as an associative array of patterns and rates. + * Patterns are Unix shell patterns (e.g. 'MediaWiki.api.*'). + * Rates are sampling probabilities (e.g. 0.1 means 1 in 10 events are sampled). + * @param array $samplingRates + * @since 1.28 + */ + public function setSamplingRates( array $samplingRates ) { + $this->samplingRates = $samplingRates; + } + /** * Sets sampling rate for all items in $data. * The sample rate specified in a StatsdData entity overrides the sample rate specified here. @@ -37,11 +50,18 @@ class SamplingStatsdClient extends StatsdClient { * {@inheritDoc} */ public function appendSampleRate( $data, $sampleRate = 1 ) { - if ( $sampleRate < 1 ) { - array_walk( $data, function( $item ) use ( $sampleRate ) { + $samplingRates = $this->samplingRates; + if ( !$samplingRates && $sampleRate !== 1 ) { + $samplingRates = [ '*' => $sampleRate ]; + } + if ( $samplingRates ) { + array_walk( $data, function( $item ) use ( $samplingRates ) { /** @var $item StatsdData */ - if ( $item->getSampleRate() === 1 ) { - $item->setSampleRate( $sampleRate ); + foreach ( $samplingRates as $pattern => $rate ) { + if ( fnmatch( $pattern, $item->getKey(), FNM_NOESCAPE ) ) { + $item->setSampleRate( $item->getSampleRate() * $rate ); + break; + } } } ); } @@ -74,9 +94,7 @@ class SamplingStatsdClient extends StatsdClient { } // add sampling - if ( $sampleRate < 1 ) { - $data = $this->appendSampleRate( $data, $sampleRate ); - } + $data = $this->appendSampleRate( $data, $sampleRate ); $data = $this->sampleData( $data ); $data = array_map( 'strval', $data ); diff --git a/tests/phpunit/includes/libs/SamplingStatsdClientTest.php b/tests/phpunit/includes/libs/SamplingStatsdClientTest.php index 1ebe55110f..9a489303a7 100644 --- a/tests/phpunit/includes/libs/SamplingStatsdClientTest.php +++ b/tests/phpunit/includes/libs/SamplingStatsdClientTest.php @@ -32,12 +32,35 @@ class SamplingStatsdClientTest extends PHPUnit_Framework_TestCase { return [ // $data, $sampleRate, $seed, $expectWrite - [ $unsampled, 1, 0 /*0.44*/, $unsampled ], - [ $sampled, 1, 0 /*0.44*/, null ], - [ $sampled, 1, 4 /*0.03*/, $sampled ], - [ $unsampled, 0.1, 4 /*0.03*/, $sampled ], - [ $sampled, 0.5, 0 /*0.44*/, null ], - [ $sampled, 0.5, 4 /*0.03*/, $sampled ], + [ $unsampled, 1, 0 /*0.44*/, true ], + [ $sampled, 1, 0 /*0.44*/, false ], + [ $sampled, 1, 4 /*0.03*/, true ], + [ $unsampled, 0.1, 0 /*0.44*/, false ], + [ $sampled, 0.5, 0 /*0.44*/, false ], + [ $sampled, 0.5, 4 /*0.03*/, false ], ]; } + + public function testSetSamplingRates() { + $matching = new StatsdData(); + $matching->setKey( 'foo.bar' ); + $matching->setValue( 1 ); + + $nonMatching = new StatsdData(); + $nonMatching->setKey( 'oof.bar' ); + $nonMatching->setValue( 1 ); + + $sender = $this->getMock( 'Liuggio\StatsdClient\Sender\SenderInterface' ); + $sender->expects( $this->any() )->method( 'open' )->will( $this->returnValue( true ) ); + $sender->expects( $this->once() )->method( 'write' )->with( $this->anything(), + $this->equalTo( $nonMatching ) ); + + $client = new SamplingStatsdClient( $sender ); + $client->setSamplingRates( [ 'foo.*' => 0.2 ] ); + + mt_srand( 0 ); // next random is 0.44 + $client->send( $matching ); + mt_srand( 0 ); + $client->send( $nonMatching ); + } } -- 2.20.1