Replace local RunningStat with wikimedia/running-stat library
authorOri Livneh <ori@wikimedia.org>
Tue, 27 Oct 2015 18:41:17 +0000 (11:41 -0700)
committerBryanDavis <bdavis@wikimedia.org>
Tue, 27 Oct 2015 19:16:58 +0000 (19:16 +0000)
* mediawiki/vendor change: I66b419c8168df.
* Add back-compat shim in includes/compat/RunningStatCompat.php.

Change-Id: Ibad4dc0ce5aa8697791865ed8ea61a25bae7b3e0

autoload.php
composer.json
includes/compat/RunningStatCompat.php [new file with mode: 0644]
includes/libs/RunningStat.php [deleted file]
includes/libs/Xhprof.php
tests/phpunit/includes/libs/RunningStatTest.php [deleted file]

index 731bdaa..d7bccb3 100644 (file)
@@ -1076,7 +1076,7 @@ $wgAutoloadLocalClasses = array(
        'RollbackEdits' => __DIR__ . '/maintenance/rollbackEdits.php',
        'RowUpdateGenerator' => __DIR__ . '/includes/utils/RowUpdateGenerator.php',
        'RunJobs' => __DIR__ . '/maintenance/runJobs.php',
-       'RunningStat' => __DIR__ . '/includes/libs/RunningStat.php',
+       'RunningStat' => __DIR__ . '/includes/compat/RunningStatCompat.php',
        'SQLiteField' => __DIR__ . '/includes/db/DatabaseSqlite.php',
        'SVGMetadataExtractor' => __DIR__ . '/includes/media/SVGMetadataExtractor.php',
        'SVGReader' => __DIR__ . '/includes/media/SVGMetadataExtractor.php',
index 489c299..e356125 100644 (file)
@@ -31,6 +31,7 @@
                "wikimedia/composer-merge-plugin": "1.2.1",
                "wikimedia/ip-set": "1.0.1",
                "wikimedia/relpath": "1.0.3",
+               "wikimedia/running-stat": "1.0.0",
                "wikimedia/utfnormal": "1.0.3",
                "wikimedia/wrappedstring": "2.0.0",
                "zordius/lightncandy": "0.21"
diff --git a/includes/compat/RunningStatCompat.php b/includes/compat/RunningStatCompat.php
new file mode 100644 (file)
index 0000000..ac82f44
--- /dev/null
@@ -0,0 +1,28 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Backward-compatibility alias for RunningStat, which was moved out
+ * into an external library and namespaced.
+ *
+ * @deprecated since 1.27 use RunningStat\RunningStat directly
+ */
+class RunningStat extends RunningStat\RunningStat {
+}
diff --git a/includes/libs/RunningStat.php b/includes/libs/RunningStat.php
deleted file mode 100644 (file)
index 8bd4656..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-<?php
-/**
- * Compute running mean, variance, and extrema of a stream of numbers.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Profiler
- */
-
-// Needed due to PHP non-bug <https://bugs.php.net/bug.php?id=49828>.
-define( 'NEGATIVE_INF', -INF );
-
-/**
- * Represents a running summary of a stream of numbers.
- *
- * RunningStat instances are accumulator-like objects that provide a set of
- * continuously-updated summary statistics for a stream of numbers, without
- * requiring that each value be stored. The measures it provides are the
- * arithmetic mean, variance, standard deviation, and extrema (min and max);
- * together they describe the central tendency and statistical dispersion of a
- * set of values.
- *
- * One RunningStat instance can be merged into another; the resultant
- * RunningStat has the state it would have had if it had accumulated each
- * individual point. This allows data to be summarized in parallel and in
- * stages without loss of fidelity.
- *
- * Based on a C++ implementation by John D. Cook:
- *  <http://www.johndcook.com/standard_deviation.html>
- *  <http://www.johndcook.com/skewness_kurtosis.html>
- *
- * The in-line documentation for this class incorporates content from the
- * English Wikipedia articles "Variance", "Algorithms for calculating
- * variance", and "Standard deviation".
- *
- * @since 1.23
- */
-class RunningStat implements Countable {
-
-       /** @var int Number of samples. **/
-       public $n = 0;
-
-       /** @var float The first moment (or mean, or expected value). **/
-       public $m1 = 0.0;
-
-       /** @var float The second central moment (or variance). **/
-       public $m2 = 0.0;
-
-       /** @var float The least value in the set. **/
-       public $min = INF;
-
-       /** @var float The greatest value in the set. **/
-       public $max = NEGATIVE_INF;
-
-       /**
-        * Count the number of accumulated values.
-        * @return int Number of values
-        */
-       public function count() {
-               return $this->n;
-       }
-
-       /**
-        * Add a number to the data set.
-        * @param int|float $x Value to add
-        */
-       public function push( $x ) {
-               $x = (float) $x;
-
-               $this->min = min( $this->min, $x );
-               $this->max = max( $this->max, $x );
-
-               $n1 = $this->n;
-               $this->n += 1;
-               $delta = $x - $this->m1;
-               $delta_n = $delta / $this->n;
-               $this->m1 += $delta_n;
-               $this->m2 += $delta * $delta_n * $n1;
-       }
-
-       /**
-        * Get the mean, or expected value.
-        *
-        * The arithmetic mean is the sum of all measurements divided by the number
-        * of observations in the data set.
-        *
-        * @return float Mean
-        */
-       public function getMean() {
-               return $this->m1;
-       }
-
-       /**
-        * Get the estimated variance.
-        *
-        * Variance measures how far a set of numbers is spread out. A small
-        * variance indicates that the data points tend to be very close to the
-        * mean (and hence to each other), while a high variance indicates that the
-        * data points are very spread out from the mean and from each other.
-        *
-        * @return float Estimated variance
-        */
-       public function getVariance() {
-               if ( $this->n === 0 ) {
-                       // The variance of the empty set is undefined.
-                       return NAN;
-               } elseif ( $this->n === 1 ) {
-                       return 0.0;
-               } else {
-                       return $this->m2 / ( $this->n - 1.0 );
-               }
-       }
-
-       /**
-        * Get the estimated standard deviation.
-        *
-        * The standard deviation of a statistical population is the square root of
-        * its variance. It shows how much variation from the mean exists. In
-        * addition to expressing the variability of a population, the standard
-        * deviation is commonly used to measure confidence in statistical conclusions.
-        *
-        * @return float Estimated standard deviation
-        */
-       public function getStdDev() {
-               return sqrt( $this->getVariance() );
-       }
-
-       /**
-        * Merge another RunningStat instance into this instance.
-        *
-        * This instance then has the state it would have had if all the data had
-        * been accumulated by it alone.
-        *
-        * @param RunningStat RunningStat instance to merge into this one
-        */
-       public function merge( RunningStat $other ) {
-               // If the other RunningStat is empty, there's nothing to do.
-               if ( $other->n === 0 ) {
-                       return;
-               }
-
-               // If this RunningStat is empty, copy values from other RunningStat.
-               if ( $this->n === 0 ) {
-                       $this->n = $other->n;
-                       $this->m1 = $other->m1;
-                       $this->m2 = $other->m2;
-                       $this->min = $other->min;
-                       $this->max = $other->max;
-                       return;
-               }
-
-               $n = $this->n + $other->n;
-               $delta = $other->m1 - $this->m1;
-               $delta2 = $delta * $delta;
-
-               $this->m1 = ( ( $this->n * $this->m1 ) + ( $other->n * $other->m1 ) ) / $n;
-               $this->m2 = $this->m2 + $other->m2 + ( $delta2 * $this->n * $other->n / $n );
-               $this->min = min( $this->min, $other->min );
-               $this->max = max( $this->max, $other->max );
-               $this->n = $n;
-       }
-}
index 5ed67c7..eaf15df 100644 (file)
@@ -18,6 +18,8 @@
  * @file
  */
 
+use RunningStat\RunningStat;
+
 /**
  * Convenience class for working with XHProf
  * <https://github.com/phacility/xhprof>. XHProf can be installed as a PECL
diff --git a/tests/phpunit/includes/libs/RunningStatTest.php b/tests/phpunit/includes/libs/RunningStatTest.php
deleted file mode 100644 (file)
index 35a8e4f..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-<?php
-/**
- * PHP Unit tests for RunningStat class.
- * @covers RunningStat
- */
-class RunningStatTest extends PHPUnit_Framework_TestCase {
-
-       public $points = array(
-               49.7168, 74.3804,  7.0115, 96.5769, 34.9458,
-               36.9947, 33.8926, 89.0774, 23.7745, 73.5154,
-               86.1322, 53.2124, 16.2046, 73.5130, 10.4209,
-               42.7299, 49.3330, 47.0215, 34.9950, 18.2914,
-       );
-
-       /**
-        * Verify that the statistical moments and extrema computed by RunningStat
-        * match expected values.
-        * @covers RunningStat::push
-        * @covers RunningStat::count
-        * @covers RunningStat::getMean
-        * @covers RunningStat::getVariance
-        * @covers RunningStat::getStdDev
-        */
-       public function testRunningStatAccuracy() {
-               $rstat = new RunningStat();
-               foreach ( $this->points as $point ) {
-                       $rstat->push( $point );
-               }
-
-               $mean = array_sum( $this->points ) / count( $this->points );
-               $variance = array_sum( array_map( function ( $x ) use ( $mean ) {
-                       return pow( $mean - $x, 2 );
-               }, $this->points ) ) / ( count( $rstat ) - 1 );
-               $stddev = sqrt( $variance );
-
-               $this->assertEquals( count( $rstat ), count( $this->points ) );
-               $this->assertEquals( $rstat->min, min( $this->points ) );
-               $this->assertEquals( $rstat->max, max( $this->points ) );
-               $this->assertEquals( $rstat->getMean(), $mean );
-               $this->assertEquals( $rstat->getVariance(), $variance );
-               $this->assertEquals( $rstat->getStdDev(), $stddev );
-       }
-
-       /**
-        * When one RunningStat instance is merged into another, the state of the
-        * target RunningInstance should have the state that it would have had if
-        * all the data had been accumulated by it alone.
-        * @covers RunningStat::merge
-        * @covers RunningStat::count
-        */
-       public function testRunningStatMerge() {
-               $expected = new RunningStat();
-
-               foreach ( $this->points as $point ) {
-                       $expected->push( $point );
-               }
-
-               // Split the data into two sets
-               $sets = array_chunk( $this->points, floor( count( $this->points ) / 2 ) );
-
-               // Accumulate the first half into one RunningStat object
-               $first = new RunningStat();
-               foreach ( $sets[0] as $point ) {
-                       $first->push( $point );
-               }
-
-               // Accumulate the second half into another RunningStat object
-               $second = new RunningStat();
-               foreach ( $sets[1] as $point ) {
-                       $second->push( $point );
-               }
-
-               // Merge the second RunningStat object into the first
-               $first->merge( $second );
-
-               $this->assertEquals( count( $first ), count( $this->points ) );
-               $this->assertEquals( $first, $expected );
-       }
-}