Merge "Rewrite HashRing to use consistent hashing"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 8 Jun 2018 05:25:25 +0000 (05:25 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 8 Jun 2018 05:25:25 +0000 (05:25 +0000)
includes/libs/HashRing.php
includes/poolcounter/PoolCounterRedis.php
tests/phpunit/includes/libs/HashRingTest.php

index 3b9c24d..501cbcd 100644 (file)
 /**
  * Convenience class for weighted consistent hash rings
  *
+ * This deterministically maps "keys" to a set of "locations" while avoiding clumping
+ *
+ * Each location is represented by a number of nodes on a ring proportionate to the ratio
+ * of its weight compared to the total location weight. Note positions are deterministically
+ * derived from the hash of the location name. Nodes are responsible for the portion of the
+ * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
+ * of the ring that the location's nodes are responsible for.
+ *
+ * A location that is temporarily "ejected" is said to be absent from the "live" ring.
+ * If no location ejections are active, then the base ring and live ring are identical.
+ *
  * @since 1.22
  */
-class HashRing {
-       /** @var array (location => weight) */
-       protected $sourceMap = [];
-       /** @var array (location => (start, end)) */
-       protected $ring = [];
+class HashRing implements Serializable {
+       /** @var string Hashing algorithm for hash() */
+       protected $algo;
+       /** @var int[] Non-empty (location => integer weight) */
+       protected $weightByLocation;
+       /** @var int[] Map of (location => UNIX timestamp) */
+       protected $ejectExpiryByLocation;
 
-       /** @var HashRing|null */
+       /** @var array[] Non-empty list of (float, node name, location name) */
+       protected $baseRing;
+       /** @var array[] Non-empty list of (float, node name, location name) */
        protected $liveRing;
-       /** @var array (location => UNIX timestamp) */
-       protected $ejectionExpiries = [];
-       /** @var int UNIX timestamp */
-       protected $ejectionNextExpiry = INF;
 
-       const RING_SIZE = 268435456; // 2^28
+       /** @var int|null Number of nodes scanned to place an item last time */
+       private $lastNodeScanSize;
+
+       /** @var float Number of positions on the ring */
+       const RING_SIZE = 4294967296.0; // 2^32
+       /** @var integer Overall number of node groups per server */
+       const HASHES_PER_LOCATION = 40;
+       /** @var integer Number of nodes in a node group */
+       const SECTORS_PER_HASH = 4;
+
+       const KEY_POS = 0;
+       const KEY_LOCATION = 1;
+
+       /** @var int Consider all locations */
+       const RING_ALL = 0;
+       /** @var int Only consider "live" locations */
+       const RING_LIVE = 1;
 
        /**
-        * @param array $map (location => weight)
+        * Make a consistent hash ring given a set of locations and their weight values
+        *
+        * @param int[] $map Map of (location => weight)
+        * @param string $algo Hashing algorithm listed in hash_algos() [optional]
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
+        * @since 1.31
         */
-       public function __construct( array $map ) {
-               $map = array_filter( $map, function ( $w ) {
-                       return $w > 0;
-               } );
-               if ( !count( $map ) ) {
-                       throw new UnexpectedValueException( "Ring is empty or all weights are zero." );
-               }
-               $this->sourceMap = $map;
-               // Sort the locations based on the hash of their names
-               $hashes = [];
-               foreach ( $map as $location => $weight ) {
-                       $hashes[$location] = sha1( $location );
-               }
-               uksort( $map, function ( $a, $b ) use ( $hashes ) {
-                       return strcmp( $hashes[$a], $hashes[$b] );
-               } );
-               // Fit the map to weight-proportionate one with a space of size RING_SIZE
-               $sum = array_sum( $map );
-               $standardMap = [];
-               foreach ( $map as $location => $weight ) {
-                       $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE );
+       public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
+               $this->init( $map, $algo, $ejections );
+       }
+
+       /**
+        * @param int[] $map Map of (location => integer)
+        * @param string $algo Hashing algorithm
+        * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
+        */
+       protected function init( array $map, $algo, array $ejections ) {
+               if ( !in_array( $algo, hash_algos(), true ) ) {
+                       throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
                }
-               // Build a ring of RING_SIZE spots, with each location at a spot in location hash order
-               $index = 0;
-               foreach ( $standardMap as $location => $weight ) {
-                       // Location covers half-closed interval [$index,$index + $weight)
-                       $this->ring[$location] = [ $index, $index + $weight ];
-                       $index += $weight;
+
+               $weightByLocation = array_filter( $map );
+               if ( $weightByLocation === [] ) {
+                       throw new UnexpectedValueException( "No locations with non-zero weight." );
+               } elseif ( min( $map ) < 0 ) {
+                       throw new InvalidArgumentException( "Location weight cannot be negative." );
                }
-               // Make sure the last location covers what is left
-               end( $this->ring );
-               $this->ring[key( $this->ring )][1] = self::RING_SIZE;
+
+               $this->algo = $algo;
+               $this->weightByLocation = $weightByLocation;
+               $this->ejectExpiryByLocation = $ejections;
+               $this->baseRing = $this->buildLocationRing( $this->weightByLocation, $this->algo );
        }
 
        /**
@@ -82,11 +107,10 @@ class HashRing {
         *
         * @param string $item
         * @return string Location
+        * @throws UnexpectedValueException
         */
        final public function getLocation( $item ) {
-               $locations = $this->getLocations( $item, 1 );
-
-               return $locations[0];
+               return $this->getLocations( $item, 1 )[0];
        }
 
        /**
@@ -94,46 +118,56 @@ class HashRing {
         *
         * @param string $item
         * @param int $limit Maximum number of locations to return
-        * @return array List of locations
+        * @param int $from One of the RING_* class constants
+        * @return string[] List of locations
+        * @throws UnexpectedValueException
         */
-       public function getLocations( $item, $limit ) {
-               $locations = [];
-               $primaryLocation = null;
-               $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits
-               foreach ( $this->ring as $location => $range ) {
-                       if ( count( $locations ) >= $limit ) {
-                               break;
-                       }
-                       // The $primaryLocation is the location the item spot is in.
-                       // After that is reached, keep appending the next locations.
-                       if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) {
-                               if ( $primaryLocation === null ) {
-                                       $primaryLocation = $location;
-                               }
-                               $locations[] = $location;
-                       }
+       public function getLocations( $item, $limit, $from = self::RING_ALL ) {
+               if ( $from === self::RING_ALL ) {
+                       $ring = $this->baseRing;
+               } elseif ( $from === self::RING_LIVE ) {
+                       $ring = $this->getLiveRing();
+               } else {
+                       throw new InvalidArgumentException( "Invalid ring source specified." );
+               }
+
+               // Locate this item's position on the hash ring
+               $position = $this->getItemPosition( $item );
+
+               // Guess a nearby node based on the node list being ordered and the probabilistic
+               // expected size of nodes being equal, varying less when with higher node counts
+               $guessIndex = $this->guessNodeIndexForPosition( $position, $ring );
+
+               // Find the index of the node within which this item resides
+               $itemNodeIndex = $this->findNodeIndexForPosition( $position, $guessIndex, $ring );
+               if ( $itemNodeIndex === null ) {
+                       throw new RuntimeException( __METHOD__ . ": no place for '$item' ($position)" );
                }
-               // If more locations are requested, wrap-around and keep adding them
-               reset( $this->ring );
+
+               $locations = [];
+               $currentIndex = $itemNodeIndex;
                while ( count( $locations ) < $limit ) {
-                       $location = key( $this->ring );
-                       if ( $location === $primaryLocation ) {
-                               break; // don't go in circles
+                       $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
+                       if ( !in_array( $nodeLocation, $locations, true ) ) {
+                               // Ignore other nodes for the same locations already added
+                               $locations[] = $nodeLocation;
+                       }
+                       $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
+                       if ( $currentIndex === $itemNodeIndex ) {
+                               break; // all nodes visited
                        }
-                       $locations[] = $location;
-                       next( $this->ring );
                }
 
                return $locations;
        }
 
        /**
-        * Get the map of locations to weight (ignores 0-weight items)
+        * Get the map of locations to weight (does not include zero weight items)
         *
-        * @return array
+        * @return int[]
         */
        public function getLocationWeights() {
-               return $this->sourceMap;
+               return $this->weightByLocation;
        }
 
        /**
@@ -142,48 +176,282 @@ class HashRing {
         * @param string $location
         * @param int $ttl Seconds
         * @return bool Whether some non-ejected locations are left
+        * @throws UnexpectedValueException
         */
        public function ejectFromLiveRing( $location, $ttl ) {
-               if ( !isset( $this->sourceMap[$location] ) ) {
+               if ( !isset( $this->weightByLocation[$location] ) ) {
                        throw new UnexpectedValueException( "No location '$location' in the ring." );
                }
-               $expiry = time() + $ttl;
-               $this->liveRing = null; // stale
-               $this->ejectionExpiries[$location] = $expiry;
-               $this->ejectionNextExpiry = min( $expiry, $this->ejectionNextExpiry );
 
-               return ( count( $this->ejectionExpiries ) < count( $this->sourceMap ) );
+               $expiry = $this->getCurrentTime() + $ttl;
+               $this->ejectExpiryByLocation[$location] = $expiry;
+
+               $this->liveRing = null; // invalidate ring cache
+
+               return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
+       }
+
+       /**
+        * Get the location of an item on the "live" ring
+        *
+        * @param string $item
+        * @return string Location
+        * @throws UnexpectedValueException
+        */
+       final public function getLiveLocation( $item ) {
+               return $this->getLocations( $item, 1, self::RING_LIVE )[0];
+       }
+
+       /**
+        * Get the location of an item on the "live" ring, as well as the next locations
+        *
+        * @param string $item
+        * @param int $limit Maximum number of locations to return
+        * @return string[] List of locations
+        * @throws UnexpectedValueException
+        */
+       final public function getLiveLocations( $item, $limit ) {
+               return $this->getLocations( $item, $limit, self::RING_LIVE );
+       }
+
+       /**
+        * Get the map of "live" locations to weight (does not include zero weight items)
+        *
+        * @return int[]
+        * @throws UnexpectedValueException
+        */
+       public function getLiveLocationWeights() {
+               $now = $this->getCurrentTime();
+
+               return array_diff_key(
+                       $this->weightByLocation,
+                       array_filter(
+                               $this->ejectExpiryByLocation,
+                               function ( $expiry ) use ( $now ) {
+                                       return ( $expiry > $now );
+                               }
+                       )
+               );
+       }
+
+       /**
+        * @param float $position
+        * @param array[] $ring Either the base or live ring
+        * @return int
+        */
+       private function guessNodeIndexForPosition( $position, $ring ) {
+               $arcRatio = $position / self::RING_SIZE; // range is [0.0, 1.0)
+               $maxIndex = count( $ring ) - 1;
+               $guessIndex = intval( $maxIndex * $arcRatio );
+
+               $displacement = $ring[$guessIndex][self::KEY_POS] - $position;
+               $aveSize = self::RING_SIZE / count( $ring );
+               $shift = intval( $displacement / $aveSize );
+
+               $guessIndex -= $shift;
+               if ( $guessIndex < 0 ) {
+                       $guessIndex = max( $maxIndex + $guessIndex, 0 ); // roll-over
+               } elseif ( $guessIndex > $maxIndex ) {
+                       $guessIndex = min( $guessIndex - $maxIndex, 0 ); // roll-over
+               }
+
+               return $guessIndex;
+       }
+
+       /**
+        * @param float $position
+        * @param int $guessIndex Node index to start scanning
+        * @param array[] $ring Either the base or live ring
+        * @return int|null
+        */
+       private function findNodeIndexForPosition( $position, $guessIndex, $ring ) {
+               $mainNodeIndex = null; // first matching node index
+
+               $this->lastNodeScanSize = 0;
+
+               if ( $ring[$guessIndex][self::KEY_POS] >= $position ) {
+                       // Walk the nodes counter-clockwise until reaching a node at/before $position
+                       do {
+                               $priorIndex = $guessIndex;
+                               $guessIndex = $this->getPrevClockwiseNodeIndex( $guessIndex, $ring );
+                               $nodePosition = $ring[$guessIndex][self::KEY_POS];
+                               if ( $nodePosition < $position || $guessIndex > $priorIndex ) {
+                                       $mainNodeIndex = $priorIndex; // includes roll-over case
+                               } elseif ( $nodePosition === $position ) {
+                                       $mainNodeIndex = $guessIndex;
+                               }
+                               ++$this->lastNodeScanSize;
+                       } while ( $mainNodeIndex === null );
+               } else {
+                       // Walk the nodes clockwise until reaching a node at/after $position
+                       do {
+                               $priorIndex = $guessIndex;
+                               $guessIndex = $this->getNextClockwiseNodeIndex( $guessIndex, $ring );
+                               $nodePosition = $ring[$guessIndex][self::KEY_POS];
+                               if ( $nodePosition >= $position || $guessIndex < $priorIndex ) {
+                                       $mainNodeIndex = $guessIndex; // includes roll-over case
+                               }
+                               ++$this->lastNodeScanSize;
+                       } while ( $mainNodeIndex === null );
+               }
+
+               return $mainNodeIndex;
+       }
+
+       /**
+        * @param int[] $weightByLocation
+        * @param string $algo Hashing algorithm
+        * @return array[]
+        */
+       private function buildLocationRing( array $weightByLocation, $algo ) {
+               $locationCount = count( $weightByLocation );
+               $totalWeight = array_sum( $weightByLocation );
+
+               $ring = [];
+               // Assign nodes to all locations based on location weight
+               $claimed = []; // (position as string => (node, index))
+               foreach ( $weightByLocation as $location => $weight ) {
+                       $ratio = $weight / $totalWeight;
+                       // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
+                       // assign a few groups of nodes to this location based on its weight.
+                       $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
+                       for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
+                               // For efficiency, get 4 points per hash call and 4X node count.
+                               // If $algo is MD5, then this matches that of with libketama.
+                               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+                               $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
+                               foreach ( $positions as $gi => $position ) {
+                                       $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
+                                       $posKey = (string)$position; // large integer
+                                       if ( isset( $claimed[$posKey] ) ) {
+                                               // Disallow duplicates for sanity (name decides precedence)
+                                               if ( $claimed[$posKey]['node'] > $node ) {
+                                                       continue;
+                                               } else {
+                                                       unset( $ring[$claimed[$posKey]['index']] );
+                                               }
+                                       }
+                                       $ring[] = [
+                                               self::KEY_POS => $position,
+                                               self::KEY_LOCATION => $location
+                                       ];
+                                       $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
+                               }
+                       }
+               }
+               // Sort the locations into clockwise order based on the hash ring position
+               usort( $ring, function ( $a, $b ) {
+                       if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
+                               throw new UnexpectedValueException( 'Duplicate node positions.' );
+                       }
+
+                       return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
+               } );
+
+               return $ring;
+       }
+
+       /**
+        * @param string $item Key
+        * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
+        */
+       private function getItemPosition( $item ) {
+               // If $algo is MD5, then this matches that of with libketama.
+               // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
+               $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
+               if ( strlen( $octets ) != 4 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
+               }
+
+               return (float)sprintf( '%u', unpack( 'V', $octets )[1] );
+       }
+
+       /**
+        * @param string $nodeGroupName
+        * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
+        */
+       private function getNodePositionQuartet( $nodeGroupName ) {
+               $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
+               if ( strlen( $octets ) != 16 ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
+               }
+
+               $positions = [];
+               foreach ( unpack( 'V4', $octets ) as $signed ) {
+                       $positions[] = (float)sprintf( '%u', $signed );
+               }
+
+               return $positions;
+       }
+
+       /**
+        * @param int $i Valid index for a node in the ring
+        * @param array[] $ring Either the base or live ring
+        * @return int Valid index for a node in the ring
+        */
+       private function getNextClockwiseNodeIndex( $i, $ring ) {
+               if ( !isset( $ring[$i] ) ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
+               }
+
+               $next = $i + 1;
+
+               return ( $next < count( $ring ) ) ? $next : 0;
+       }
+
+       /**
+        * @param int $i Valid index for a node in the ring
+        * @param array[] $ring Either the base or live ring
+        * @return int Valid index for a node in the ring
+        */
+       private function getPrevClockwiseNodeIndex( $i, $ring ) {
+               if ( !isset( $ring[$i] ) ) {
+                       throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
+               }
+
+               $prev = $i - 1;
+
+               return ( $prev >= 0 ) ? $prev : count( $ring ) - 1;
        }
 
        /**
         * Get the "live" hash ring (which does not include ejected locations)
         *
-        * @return HashRing
+        * @return array[]
         * @throws UnexpectedValueException
         */
        protected function getLiveRing() {
-               $now = time();
-               if ( $this->liveRing === null || $this->ejectionNextExpiry <= $now ) {
-                       $this->ejectionExpiries = array_filter(
-                               $this->ejectionExpiries,
+               if ( !$this->ejectExpiryByLocation ) {
+                       return $this->baseRing; // nothing ejected
+               }
+
+               $now = $this->getCurrentTime();
+
+               if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
+                       // Live ring needs to be regerenated...
+                       $this->ejectExpiryByLocation = array_filter(
+                               $this->ejectExpiryByLocation,
                                function ( $expiry ) use ( $now ) {
                                        return ( $expiry > $now );
                                }
                        );
-                       if ( count( $this->ejectionExpiries ) ) {
-                               $map = array_diff_key( $this->sourceMap, $this->ejectionExpiries );
-                               $this->liveRing = count( $map ) ? new self( $map ) : false;
-
-                               $this->ejectionNextExpiry = min( $this->ejectionExpiries );
-                       } else { // common case; avoid recalculating ring
-                               $this->liveRing = clone $this;
-                               $this->liveRing->ejectionExpiries = [];
-                               $this->liveRing->ejectionNextExpiry = INF;
-                               $this->liveRing->liveRing = null;
-
-                               $this->ejectionNextExpiry = INF;
+
+                       if ( count( $this->ejectExpiryByLocation ) ) {
+                               // Some locations are still ejected from the ring
+                               $liveRing = [];
+                               foreach ( $this->baseRing as $i => $nodeInfo ) {
+                                       $location = $nodeInfo[self::KEY_LOCATION];
+                                       if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
+                                               $liveRing[] = $nodeInfo;
+                                       }
+                               }
+                       } else {
+                               $liveRing = $this->baseRing;
                        }
+
+                       $this->liveRing = $liveRing;
                }
+
                if ( !$this->liveRing ) {
                        throw new UnexpectedValueException( "The live ring is currently empty." );
                }
@@ -192,35 +460,33 @@ class HashRing {
        }
 
        /**
-        * Get the location of an item on the "live" ring
-        *
-        * @param string $item
-        * @return string Location
-        * @throws UnexpectedValueException
+        * @return int UNIX timestamp
         */
-       public function getLiveLocation( $item ) {
-               return $this->getLiveRing()->getLocation( $item );
+       protected function getCurrentTime() {
+               return time();
        }
 
        /**
-        * Get the location of an item on the "live" ring, as well as the next locations
-        *
-        * @param string $item
-        * @param int $limit Maximum number of locations to return
-        * @return array List of locations
-        * @throws UnexpectedValueException
+        * @return int|null
         */
-       public function getLiveLocations( $item, $limit ) {
-               return $this->getLiveRing()->getLocations( $item, $limit );
+       public function getLastNodeScanSize() {
+               return $this->lastNodeScanSize;
        }
 
-       /**
-        * Get the map of "live" locations to weight (ignores 0-weight items)
-        *
-        * @return array
-        * @throws UnexpectedValueException
-        */
-       public function getLiveLocationWeights() {
-               return $this->getLiveRing()->getLocationWeights();
+       public function serialize() {
+               return serialize( [
+                       'algorithm' => $this->algo,
+                       'locations' => $this->weightByLocation,
+                       'ejections' => $this->ejectExpiryByLocation
+               ] );
+       }
+
+       public function unserialize( $serialized ) {
+               $data = unserialize( $serialized );
+               if ( is_array( $data ) ) {
+                       $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
+               } else {
+                       throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
+               }
        }
 }
index 9515f25..f5fa4c7 100644 (file)
@@ -85,7 +85,9 @@ class PoolCounterRedis extends PoolCounter {
                parent::__construct( $conf, $type, $key );
 
                $this->serversByLabel = $conf['servers'];
-               $this->ring = new HashRing( array_fill_keys( array_keys( $conf['servers'] ), 100 ) );
+
+               $serverLabels = array_keys( $conf['servers'] );
+               $this->ring = new HashRing( array_fill_keys( $serverLabels, 10 ) );
 
                $conf['redisConfig']['serializer'] = 'none'; // for use with Lua
                $this->pool = RedisConnectionPool::singleton( $conf['redisConfig'] );
index ba28828..1e51aa8 100644 (file)
@@ -2,44 +2,74 @@
 
 /**
  * @group HashRing
+ * @covers HashRing
  */
 class HashRingTest extends PHPUnit\Framework\TestCase {
 
        use MediaWikiCoversValidator;
 
-       /**
-        * @covers HashRing
-        */
-       public function testHashRing() {
-               $ring = new HashRing( [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ] );
+       public function testHashRingSerialize() {
+               $map = [ 's1' => 3, 's2' => 10, 's3' => 2, 's4' => 10, 's5' => 2, 's6' => 3 ];
+               $ring = new HashRing( $map, 'md5' );
+
+               $serialized = serialize( $ring );
+               $ringRemade = unserialize( $serialized );
+
+               for ( $i = 0; $i < 100; $i++ ) {
+                       $this->assertEquals(
+                               $ring->getLocation( "hello$i" ),
+                               $ringRemade->getLocation( "hello$i" ),
+                               'Items placed at proper locations'
+                       );
+               }
+       }
+
+       public function testHashRingMapping() {
+               // SHA-1 based and weighted
+               $ring = new HashRing(
+                       [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3, 's7' => 0 ],
+                       'sha1'
+               );
+
+               $this->assertEquals(
+                       [ 's1' => 1, 's2' => 1, 's3' => 2, 's4' => 2, 's5' => 2, 's6' => 3 ],
+                       $ring->getLocationWeights(),
+                       'Normalized location weights'
+               );
+
+               $this->assertEquals( null, $ring->getLastNodeScanSize() );
 
                $locations = [];
-               for ( $i = 0; $i < 20; $i++ ) {
+               for ( $i = 0; $i < 25; $i++ ) {
                        $locations[ "hello$i"] = $ring->getLocation( "hello$i" );
                }
                $expectedLocations = [
-                       "hello0" => "s5",
+                       "hello0" => "s4",
                        "hello1" => "s6",
-                       "hello2" => "s2",
-                       "hello3" => "s5",
+                       "hello2" => "s3",
+                       "hello3" => "s6",
                        "hello4" => "s6",
                        "hello5" => "s4",
-                       "hello6" => "s5",
+                       "hello6" => "s3",
                        "hello7" => "s4",
-                       "hello8" => "s5",
-                       "hello9" => "s5",
+                       "hello8" => "s3",
+                       "hello9" => "s3",
                        "hello10" => "s3",
-                       "hello11" => "s6",
-                       "hello12" => "s1",
-                       "hello13" => "s3",
-                       "hello14" => "s3",
+                       "hello11" => "s5",
+                       "hello12" => "s4",
+                       "hello13" => "s5",
+                       "hello14" => "s2",
                        "hello15" => "s5",
-                       "hello16" => "s4",
-                       "hello17" => "s6",
-                       "hello18" => "s6",
-                       "hello19" => "s3"
+                       "hello16" => "s6",
+                       "hello17" => "s5",
+                       "hello18" => "s1",
+                       "hello19" => "s1",
+                       "hello20" => "s6",
+                       "hello21" => "s5",
+                       "hello22" => "s3",
+                       "hello23" => "s4",
+                       "hello24" => "s1"
                ];
-
                $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' );
 
                $locations = [];
@@ -48,12 +78,273 @@ class HashRingTest extends PHPUnit\Framework\TestCase {
                }
 
                $expectedLocations = [
-                       "hello0" => [ "s5", "s6" ],
-                       "hello1" => [ "s6", "s4" ],
-                       "hello2" => [ "s2", "s1" ],
-                       "hello3" => [ "s5", "s6" ],
-                       "hello4" => [ "s6", "s4" ],
+                       "hello0" => [ "s4", "s5" ],
+                       "hello1" => [ "s6", "s5" ],
+                       "hello2" => [ "s3", "s1" ],
+                       "hello3" => [ "s6", "s5" ],
+                       "hello4" => [ "s6", "s3" ],
                ];
                $this->assertEquals( $expectedLocations, $locations, 'Items placed at proper locations' );
        }
+
+       /**
+        * @dataProvider providor_getHashLocationWeights
+        */
+       public function testHashRingRatios( $locations, $expectedHits ) {
+               $ring = new HashRing( $locations, 'whirlpool' );
+
+               $locationStats = array_fill_keys( array_keys( $locations ), 0 );
+               for ( $i = 0; $i < 10000; ++$i ) {
+                       ++$locationStats[$ring->getLocation( "key-$i" )];
+               }
+               $this->assertEquals( $expectedHits, $locationStats );
+       }
+
+       public static function providor_getHashLocationWeights() {
+               return [
+                       [
+                               [ 'big' => 10, 'medium' => 5, 'small' => 1 ],
+                               [ 'big' => 6037, 'medium' => 3314, 'small' => 649 ]
+                       ]
+               ];
+       }
+
+       /**
+        * @dataProvider providor_getHashLocationWeights2
+        */
+       public function testHashRingRatios2( $locations, $expected ) {
+               $ring = new HashRing( $locations, 'sha1' );
+               $locationStats = array_fill_keys( array_keys( $locations ), 0 );
+               for ( $i = 0; $i < 1000; ++$i ) {
+                       foreach ( $ring->getLocations( "key-$i", 3 ) as $location ) {
+                               ++$locationStats[$location];
+                       }
+               }
+               $this->assertEquals( $expected, $locationStats );
+       }
+
+       public static function providor_getHashLocationWeights2() {
+               return [
+                       [
+                               [ 'big1' => 10, 'big2' => 10, 'big3' => 10, 'small1' => 1, 'small2' => 1 ],
+                               [ 'big1' => 929, 'big2' => 899, 'big3' => 887, 'small1' => 143, 'small2' => 142 ]
+                       ]
+               ];
+       }
+
+       public function testBigHashRingRatios() {
+               $locations = [];
+               for ( $i = 0; $i < 128; ++$i ) {
+                       $locations["server$i"] = 100;
+               }
+
+               $ring = new HashRing( $locations, 'md5' );
+
+               $scans = [];
+               for ( $i = 0; $i < 1000; ++$i ) {
+                       $ring->getLocation( "item$i" );
+                       $scans[] = $ring->getLastNodeScanSize();
+               }
+
+               $this->assertEquals( 1, min( $scans ) );
+               $this->assertEquals( 24, max( $scans ) );
+               // Note: log2( 140 * 128) = 14.129 (e.g. divide & conquer)
+               $this->assertEquals( 4.4, round( array_sum( $scans ) / count( $scans ), 1 ) );
+       }
+
+       public function testHashRingEjection() {
+               $map = [ 's1' => 5, 's2' => 5, 's3' => 10, 's4' => 10, 's5' => 5, 's6' => 5 ];
+               $ring = new HashRing( $map, 'md5' );
+
+               $ring->ejectFromLiveRing( 's3', 30 );
+               $ring->ejectFromLiveRing( 's6', 15 );
+
+               $this->assertEquals(
+                       [ 's1' => 5, 's2' => 5, 's4' => 10, 's5' => 5 ],
+                       $ring->getLiveLocationWeights(),
+                       'Live location weights'
+               );
+
+               for ( $i = 0; $i < 100; ++$i ) {
+                       $key = "key-$i";
+
+                       $this->assertNotEquals( 's3', $ring->getLiveLocation( $key ), 'ejected' );
+                       $this->assertNotEquals( 's6', $ring->getLiveLocation( $key ), 'ejected' );
+
+                       if ( !in_array( $ring->getLocation( $key ), [ 's3', 's6' ], true ) ) {
+                               $this->assertEquals(
+                                       $ring->getLocation( $key ),
+                                       $ring->getLiveLocation( $key ),
+                                       "Live ring otherwise matches (#$i)"
+                               );
+                               $this->assertEquals(
+                                       $ring->getLocations( $key, 1 ),
+                                       $ring->getLiveLocations( $key, 1 ),
+                                       "Live ring otherwise matches (#$i)"
+                               );
+                       }
+               }
+       }
+
+       public function testHashRingCollision() {
+               $ring1 = new HashRing( [ 0 => 1, 6497 => 1 ] );
+               $ring2 = new HashRing( [ 6497 => 1, 0 => 1 ] );
+
+               for ( $i = 0; $i < 100; ++$i ) {
+                       $this->assertEquals( $ring1->getLocation( $i ), $ring2->getLocation( $i ) );
+               }
+       }
+
+       public function testHashRingKetamaMode() {
+               // Same as https://github.com/RJ/ketama/blob/master/ketama.servers
+               $map = [
+                       '10.0.1.1:11211' => 600,
+                       '10.0.1.2:11211' => 300,
+                       '10.0.1.3:11211' => 200,
+                       '10.0.1.4:11211' => 350,
+                       '10.0.1.5:11211' => 1000,
+                       '10.0.1.6:11211' => 800,
+                       '10.0.1.7:11211' => 950,
+                       '10.0.1.8:11211' => 100
+               ];
+               $ring = new HashRing( $map, 'md5' );
+               $wrapper = \Wikimedia\TestingAccessWrapper::newFromObject( $ring );
+
+               $ketama_test = function ( $count ) use ( $wrapper ) {
+                       $baseRing = $wrapper->baseRing;
+
+                       $lines = [];
+                       for ( $key = 0; $key < $count; ++$key ) {
+                               $location = $wrapper->getLocation( $key );
+
+                               $itemPos = $wrapper->getItemPosition( $key );
+                               $guess = $wrapper->guessNodeIndexForPosition( $itemPos, $baseRing );
+                               $nodeIndex = $wrapper->findNodeIndexForPosition( $itemPos, $guess, $baseRing );
+                               $nodePos = $baseRing[$nodeIndex][HashRing::KEY_POS];
+
+                               $lines[] = sprintf( "%u %u %s\n", $itemPos, $nodePos, $location );
+                       }
+
+                       return "\n" . implode( '', $lines );
+               };
+
+               // Known correct values generated from C code:
+               // https://github.com/RJ/ketama/blob/master/libketama/ketama_test.c
+               $expected = <<<EOT
+
+2216742351 2217271743 10.0.1.1:11211
+943901380 949045552 10.0.1.5:11211
+2373066440 2374693370 10.0.1.6:11211
+2127088620 2130338203 10.0.1.6:11211
+2046197672 2051996197 10.0.1.7:11211
+2134629092 2135172435 10.0.1.1:11211
+470382870 472541453 10.0.1.7:11211
+1608782991 1609789509 10.0.1.3:11211
+2516119753 2520092206 10.0.1.2:11211
+3465331781 3466294492 10.0.1.4:11211
+1749342675 1753760600 10.0.1.5:11211
+1136464485 1137779711 10.0.1.1:11211
+3620997826 3621580689 10.0.1.7:11211
+283385029 285581365 10.0.1.6:11211
+2300818346 2302165654 10.0.1.5:11211
+2132603803 2134614475 10.0.1.8:11211
+2962705863 2969767984 10.0.1.2:11211
+786427760 786565633 10.0.1.5:11211
+4095887727 4096760944 10.0.1.6:11211
+2906459679 2906987515 10.0.1.6:11211
+137884056 138922607 10.0.1.4:11211
+81549628 82491298 10.0.1.6:11211
+3530020790 3530525869 10.0.1.6:11211
+4231817527 4234960467 10.0.1.7:11211
+2011099423 2014738083 10.0.1.7:11211
+107620750 120968799 10.0.1.6:11211
+3979113294 3981926993 10.0.1.4:11211
+273671938 276355738 10.0.1.4:11211
+4032816947 4033300359 10.0.1.5:11211
+464234862 466093615 10.0.1.1:11211
+3007059764 3007671127 10.0.1.5:11211
+542337729 542491760 10.0.1.7:11211
+4040385635 4044064727 10.0.1.5:11211
+3319802648 3320661601 10.0.1.7:11211
+1032153571 1035085391 10.0.1.1:11211
+3543939100 3545608820 10.0.1.5:11211
+3876899353 3885324049 10.0.1.2:11211
+3771318181 3773259708 10.0.1.8:11211
+3457906597 3459285639 10.0.1.5:11211
+3028975062 3031083168 10.0.1.7:11211
+244467158 250943416 10.0.1.5:11211
+1604785716 1609789509 10.0.1.3:11211
+3905343649 3905751132 10.0.1.1:11211
+1713497623 1725056963 10.0.1.5:11211
+1668356087 1668827816 10.0.1.5:11211
+3427369836 3438933308 10.0.1.1:11211
+2515850457 2520092206 10.0.1.2:11211
+3886138983 3887390208 10.0.1.1:11211
+4019334756 4023153300 10.0.1.8:11211
+1170561012 1170785765 10.0.1.7:11211
+1841809344 1848425105 10.0.1.6:11211
+973223976 973369204 10.0.1.1:11211
+358093210 359562433 10.0.1.6:11211
+378350808 380841931 10.0.1.5:11211
+4008477862 4012085095 10.0.1.7:11211
+1027226549 1028630030 10.0.1.6:11211
+2386583967 2387706118 10.0.1.1:11211
+522892146 524831677 10.0.1.7:11211
+3779194982 3788912803 10.0.1.5:11211
+3764731657 3771312500 10.0.1.7:11211
+184756999 187529415 10.0.1.6:11211
+838351231 845886003 10.0.1.3:11211
+2827220548 2828019973 10.0.1.6:11211
+3604721411 3607668249 10.0.1.6:11211
+472866282 475506254 10.0.1.5:11211
+2752268796 2754833471 10.0.1.5:11211
+1791464754 1795042583 10.0.1.7:11211
+3029359475 3031083168 10.0.1.7:11211
+3633378211 3639985542 10.0.1.6:11211
+3148267284 3149217023 10.0.1.6:11211
+163887996 166705043 10.0.1.7:11211
+3642803426 3649125922 10.0.1.7:11211
+3901799218 3902199881 10.0.1.7:11211
+418045394 425867331 10.0.1.6:11211
+346775981 348578169 10.0.1.6:11211
+368352208 372224616 10.0.1.7:11211
+2643711995 2644259911 10.0.1.5:11211
+2032983336 2033860601 10.0.1.6:11211
+3567842357 3572867530 10.0.1.2:11211
+1024982737 1028630030 10.0.1.6:11211
+933966832 938106828 10.0.1.7:11211
+2102520899 2103402846 10.0.1.7:11211
+3537205399 3538094881 10.0.1.7:11211
+2311233534 2314593262 10.0.1.1:11211
+2500514664 2503565236 10.0.1.7:11211
+1091958846 1093484995 10.0.1.6:11211
+3984972691 3987453644 10.0.1.1:11211
+2669994439 2670911201 10.0.1.4:11211
+2846111786 2846115813 10.0.1.5:11211
+1805010806 1808593732 10.0.1.8:11211
+1587024774 1587746378 10.0.1.5:11211
+3214549588 3215619351 10.0.1.2:11211
+1965214866 1970922428 10.0.1.7:11211
+1038671000 1040777775 10.0.1.7:11211
+820820468 823114475 10.0.1.6:11211
+2722835329 2723166435 10.0.1.5:11211
+1602053414 1604196066 10.0.1.5:11211
+1330835426 1335097278 10.0.1.5:11211
+556547565 557075710 10.0.1.4:11211
+2977587884 2978402952 10.0.1.1:11211
+
+EOT;
+
+               $this->assertEquals( $expected, $ketama_test( 100 ), 'Ketama mode (diff check)' );
+
+               // Hash of known correct values from C code
+               $this->assertEquals(
+                       'c69ac9eb7a8a630c0cded201cefeaace',
+                       md5( $ketama_test( 1e5 ) ),
+                       'Ketama mode (large, MD5 check)'
+               );
+
+               // Slower, full upstream MD5 check, manually verified 3/21/2018
+               // $this->assertEquals( '5672b131391f5aa2b280936aec1eea74', md5( $ketama_test( 1e6 ) ) );
+       }
 }