From: Ori Livneh Date: Tue, 27 Oct 2015 18:41:17 +0000 (-0700) Subject: Replace local RunningStat with wikimedia/running-stat library X-Git-Tag: 1.31.0-rc.0~9204 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/password.php?a=commitdiff_plain;h=e9d64a69bca485d4aae74a572dbe82b016c6b181;p=lhc%2Fweb%2Fwiklou.git Replace local RunningStat with wikimedia/running-stat library * mediawiki/vendor change: I66b419c8168df. * Add back-compat shim in includes/compat/RunningStatCompat.php. Change-Id: Ibad4dc0ce5aa8697791865ed8ea61a25bae7b3e0 --- diff --git a/autoload.php b/autoload.php index 731bdaaf5f..d7bccb363a 100644 --- a/autoload.php +++ b/autoload.php @@ -1076,7 +1076,7 @@ $wgAutoloadLocalClasses = array( 'RollbackEdits' => __DIR__ . '/maintenance/rollbackEdits.php', 'RowUpdateGenerator' => __DIR__ . '/includes/utils/RowUpdateGenerator.php', 'RunJobs' => __DIR__ . '/maintenance/runJobs.php', - 'RunningStat' => __DIR__ . '/includes/libs/RunningStat.php', + 'RunningStat' => __DIR__ . '/includes/compat/RunningStatCompat.php', 'SQLiteField' => __DIR__ . '/includes/db/DatabaseSqlite.php', 'SVGMetadataExtractor' => __DIR__ . '/includes/media/SVGMetadataExtractor.php', 'SVGReader' => __DIR__ . '/includes/media/SVGMetadataExtractor.php', diff --git a/composer.json b/composer.json index 489c29997c..e35612513e 100644 --- a/composer.json +++ b/composer.json @@ -31,6 +31,7 @@ "wikimedia/composer-merge-plugin": "1.2.1", "wikimedia/ip-set": "1.0.1", "wikimedia/relpath": "1.0.3", + "wikimedia/running-stat": "1.0.0", "wikimedia/utfnormal": "1.0.3", "wikimedia/wrappedstring": "2.0.0", "zordius/lightncandy": "0.21" diff --git a/includes/compat/RunningStatCompat.php b/includes/compat/RunningStatCompat.php new file mode 100644 index 0000000000..ac82f44d0e --- /dev/null +++ b/includes/compat/RunningStatCompat.php @@ -0,0 +1,28 @@ +. -define( 'NEGATIVE_INF', -INF ); - -/** - * Represents a running summary of a stream of numbers. - * - * RunningStat instances are accumulator-like objects that provide a set of - * continuously-updated summary statistics for a stream of numbers, without - * requiring that each value be stored. The measures it provides are the - * arithmetic mean, variance, standard deviation, and extrema (min and max); - * together they describe the central tendency and statistical dispersion of a - * set of values. - * - * One RunningStat instance can be merged into another; the resultant - * RunningStat has the state it would have had if it had accumulated each - * individual point. This allows data to be summarized in parallel and in - * stages without loss of fidelity. - * - * Based on a C++ implementation by John D. Cook: - * - * - * - * The in-line documentation for this class incorporates content from the - * English Wikipedia articles "Variance", "Algorithms for calculating - * variance", and "Standard deviation". - * - * @since 1.23 - */ -class RunningStat implements Countable { - - /** @var int Number of samples. **/ - public $n = 0; - - /** @var float The first moment (or mean, or expected value). **/ - public $m1 = 0.0; - - /** @var float The second central moment (or variance). **/ - public $m2 = 0.0; - - /** @var float The least value in the set. **/ - public $min = INF; - - /** @var float The greatest value in the set. **/ - public $max = NEGATIVE_INF; - - /** - * Count the number of accumulated values. - * @return int Number of values - */ - public function count() { - return $this->n; - } - - /** - * Add a number to the data set. - * @param int|float $x Value to add - */ - public function push( $x ) { - $x = (float) $x; - - $this->min = min( $this->min, $x ); - $this->max = max( $this->max, $x ); - - $n1 = $this->n; - $this->n += 1; - $delta = $x - $this->m1; - $delta_n = $delta / $this->n; - $this->m1 += $delta_n; - $this->m2 += $delta * $delta_n * $n1; - } - - /** - * Get the mean, or expected value. - * - * The arithmetic mean is the sum of all measurements divided by the number - * of observations in the data set. - * - * @return float Mean - */ - public function getMean() { - return $this->m1; - } - - /** - * Get the estimated variance. - * - * Variance measures how far a set of numbers is spread out. A small - * variance indicates that the data points tend to be very close to the - * mean (and hence to each other), while a high variance indicates that the - * data points are very spread out from the mean and from each other. - * - * @return float Estimated variance - */ - public function getVariance() { - if ( $this->n === 0 ) { - // The variance of the empty set is undefined. - return NAN; - } elseif ( $this->n === 1 ) { - return 0.0; - } else { - return $this->m2 / ( $this->n - 1.0 ); - } - } - - /** - * Get the estimated standard deviation. - * - * The standard deviation of a statistical population is the square root of - * its variance. It shows how much variation from the mean exists. In - * addition to expressing the variability of a population, the standard - * deviation is commonly used to measure confidence in statistical conclusions. - * - * @return float Estimated standard deviation - */ - public function getStdDev() { - return sqrt( $this->getVariance() ); - } - - /** - * Merge another RunningStat instance into this instance. - * - * This instance then has the state it would have had if all the data had - * been accumulated by it alone. - * - * @param RunningStat RunningStat instance to merge into this one - */ - public function merge( RunningStat $other ) { - // If the other RunningStat is empty, there's nothing to do. - if ( $other->n === 0 ) { - return; - } - - // If this RunningStat is empty, copy values from other RunningStat. - if ( $this->n === 0 ) { - $this->n = $other->n; - $this->m1 = $other->m1; - $this->m2 = $other->m2; - $this->min = $other->min; - $this->max = $other->max; - return; - } - - $n = $this->n + $other->n; - $delta = $other->m1 - $this->m1; - $delta2 = $delta * $delta; - - $this->m1 = ( ( $this->n * $this->m1 ) + ( $other->n * $other->m1 ) ) / $n; - $this->m2 = $this->m2 + $other->m2 + ( $delta2 * $this->n * $other->n / $n ); - $this->min = min( $this->min, $other->min ); - $this->max = max( $this->max, $other->max ); - $this->n = $n; - } -} diff --git a/includes/libs/Xhprof.php b/includes/libs/Xhprof.php index 5ed67c7323..eaf15df6b5 100644 --- a/includes/libs/Xhprof.php +++ b/includes/libs/Xhprof.php @@ -18,6 +18,8 @@ * @file */ +use RunningStat\RunningStat; + /** * Convenience class for working with XHProf * . XHProf can be installed as a PECL diff --git a/tests/phpunit/includes/libs/RunningStatTest.php b/tests/phpunit/includes/libs/RunningStatTest.php deleted file mode 100644 index 35a8e4ffac..0000000000 --- a/tests/phpunit/includes/libs/RunningStatTest.php +++ /dev/null @@ -1,79 +0,0 @@ -points as $point ) { - $rstat->push( $point ); - } - - $mean = array_sum( $this->points ) / count( $this->points ); - $variance = array_sum( array_map( function ( $x ) use ( $mean ) { - return pow( $mean - $x, 2 ); - }, $this->points ) ) / ( count( $rstat ) - 1 ); - $stddev = sqrt( $variance ); - - $this->assertEquals( count( $rstat ), count( $this->points ) ); - $this->assertEquals( $rstat->min, min( $this->points ) ); - $this->assertEquals( $rstat->max, max( $this->points ) ); - $this->assertEquals( $rstat->getMean(), $mean ); - $this->assertEquals( $rstat->getVariance(), $variance ); - $this->assertEquals( $rstat->getStdDev(), $stddev ); - } - - /** - * When one RunningStat instance is merged into another, the state of the - * target RunningInstance should have the state that it would have had if - * all the data had been accumulated by it alone. - * @covers RunningStat::merge - * @covers RunningStat::count - */ - public function testRunningStatMerge() { - $expected = new RunningStat(); - - foreach ( $this->points as $point ) { - $expected->push( $point ); - } - - // Split the data into two sets - $sets = array_chunk( $this->points, floor( count( $this->points ) / 2 ) ); - - // Accumulate the first half into one RunningStat object - $first = new RunningStat(); - foreach ( $sets[0] as $point ) { - $first->push( $point ); - } - - // Accumulate the second half into another RunningStat object - $second = new RunningStat(); - foreach ( $sets[1] as $point ) { - $second->push( $point ); - } - - // Merge the second RunningStat object into the first - $first->merge( $second ); - - $this->assertEquals( count( $first ), count( $this->points ) ); - $this->assertEquals( $first, $expected ); - } -}