Fix wfMessage() annotation
[lhc/web/wiklou.git] / includes / libs / HashRing.php
1 <?php
2 /**
3 * Convenience class for weighted consistent hash rings.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Convenience class for weighted consistent hash rings
25 *
26 * This deterministically maps "keys" to a set of "locations" while avoiding clumping
27 *
28 * Each location is represented by a number of nodes on a ring proportionate to the ratio
29 * of its weight compared to the total location weight. Note positions are deterministically
30 * derived from the hash of the location name. Nodes are responsible for the portion of the
31 * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
32 * of the ring that the location's nodes are responsible for.
33 *
34 * A location that is temporarily "ejected" is said to be absent from the "live" ring.
35 * If no location ejections are active, then the base ring and live ring are identical.
36 *
37 * @since 1.22
38 */
39 class HashRing implements Serializable {
40 /** @var string Hashing algorithm for hash() */
41 protected $algo;
42 /** @var int[] Non-empty (location => integer weight) */
43 protected $weightByLocation;
44 /** @var int[] Map of (location => UNIX timestamp) */
45 protected $ejectExpiryByLocation;
46
47 /** @var array[] Non-empty list of (float, node name, location name) */
48 protected $baseRing;
49 /** @var array[] Non-empty list of (float, node name, location name) */
50 protected $liveRing;
51
52 /** @var int|null Number of nodes scanned to place an item last time */
53 private $lastNodeScanSize;
54
55 /** @var float Number of positions on the ring */
56 const RING_SIZE = 4294967296.0; // 2^32
57 /** @var integer Overall number of node groups per server */
58 const HASHES_PER_LOCATION = 40;
59 /** @var integer Number of nodes in a node group */
60 const SECTORS_PER_HASH = 4;
61
62 const KEY_POS = 0;
63 const KEY_LOCATION = 1;
64
65 /** @var int Consider all locations */
66 const RING_ALL = 0;
67 /** @var int Only consider "live" locations */
68 const RING_LIVE = 1;
69
70 /**
71 * Make a consistent hash ring given a set of locations and their weight values
72 *
73 * @param int[] $map Map of (location => weight)
74 * @param string $algo Hashing algorithm listed in hash_algos() [optional]
75 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
76 * @since 1.31
77 */
78 public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
79 $this->init( $map, $algo, $ejections );
80 }
81
82 /**
83 * @param int[] $map Map of (location => integer)
84 * @param string $algo Hashing algorithm
85 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
86 */
87 protected function init( array $map, $algo, array $ejections ) {
88 if ( !in_array( $algo, hash_algos(), true ) ) {
89 throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
90 }
91
92 $weightByLocation = array_filter( $map );
93 if ( $weightByLocation === [] ) {
94 throw new UnexpectedValueException( "No locations with non-zero weight." );
95 } elseif ( min( $map ) < 0 ) {
96 throw new InvalidArgumentException( "Location weight cannot be negative." );
97 }
98
99 $this->algo = $algo;
100 $this->weightByLocation = $weightByLocation;
101 $this->ejectExpiryByLocation = $ejections;
102 $this->baseRing = $this->buildLocationRing( $this->weightByLocation, $this->algo );
103 }
104
105 /**
106 * Get the location of an item on the ring
107 *
108 * @param string $item
109 * @return string Location
110 * @throws UnexpectedValueException
111 */
112 final public function getLocation( $item ) {
113 return $this->getLocations( $item, 1 )[0];
114 }
115
116 /**
117 * Get the location of an item on the ring, as well as the next locations
118 *
119 * @param string $item
120 * @param int $limit Maximum number of locations to return
121 * @param int $from One of the RING_* class constants
122 * @return string[] List of locations
123 * @throws UnexpectedValueException
124 */
125 public function getLocations( $item, $limit, $from = self::RING_ALL ) {
126 if ( $from === self::RING_ALL ) {
127 $ring = $this->baseRing;
128 } elseif ( $from === self::RING_LIVE ) {
129 $ring = $this->getLiveRing();
130 } else {
131 throw new InvalidArgumentException( "Invalid ring source specified." );
132 }
133
134 // Locate this item's position on the hash ring
135 $position = $this->getItemPosition( $item );
136
137 // Guess a nearby node based on the node list being ordered and the probabilistic
138 // expected size of nodes being equal, varying less when with higher node counts
139 $guessIndex = $this->guessNodeIndexForPosition( $position, $ring );
140
141 // Find the index of the node within which this item resides
142 $itemNodeIndex = $this->findNodeIndexForPosition( $position, $guessIndex, $ring );
143 if ( $itemNodeIndex === null ) {
144 throw new RuntimeException( __METHOD__ . ": no place for '$item' ($position)" );
145 }
146
147 $locations = [];
148 $currentIndex = $itemNodeIndex;
149 while ( count( $locations ) < $limit ) {
150 $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
151 if ( !in_array( $nodeLocation, $locations, true ) ) {
152 // Ignore other nodes for the same locations already added
153 $locations[] = $nodeLocation;
154 }
155 $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
156 if ( $currentIndex === $itemNodeIndex ) {
157 break; // all nodes visited
158 }
159 }
160
161 return $locations;
162 }
163
164 /**
165 * Get the map of locations to weight (does not include zero weight items)
166 *
167 * @return int[]
168 */
169 public function getLocationWeights() {
170 return $this->weightByLocation;
171 }
172
173 /**
174 * Remove a location from the "live" hash ring
175 *
176 * @param string $location
177 * @param int $ttl Seconds
178 * @return bool Whether some non-ejected locations are left
179 * @throws UnexpectedValueException
180 */
181 public function ejectFromLiveRing( $location, $ttl ) {
182 if ( !isset( $this->weightByLocation[$location] ) ) {
183 throw new UnexpectedValueException( "No location '$location' in the ring." );
184 }
185
186 $expiry = $this->getCurrentTime() + $ttl;
187 $this->ejectExpiryByLocation[$location] = $expiry;
188
189 $this->liveRing = null; // invalidate ring cache
190
191 return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
192 }
193
194 /**
195 * Get the location of an item on the "live" ring
196 *
197 * @param string $item
198 * @return string Location
199 * @throws UnexpectedValueException
200 */
201 final public function getLiveLocation( $item ) {
202 return $this->getLocations( $item, 1, self::RING_LIVE )[0];
203 }
204
205 /**
206 * Get the location of an item on the "live" ring, as well as the next locations
207 *
208 * @param string $item
209 * @param int $limit Maximum number of locations to return
210 * @return string[] List of locations
211 * @throws UnexpectedValueException
212 */
213 final public function getLiveLocations( $item, $limit ) {
214 return $this->getLocations( $item, $limit, self::RING_LIVE );
215 }
216
217 /**
218 * Get the map of "live" locations to weight (does not include zero weight items)
219 *
220 * @return int[]
221 * @throws UnexpectedValueException
222 */
223 public function getLiveLocationWeights() {
224 $now = $this->getCurrentTime();
225
226 return array_diff_key(
227 $this->weightByLocation,
228 array_filter(
229 $this->ejectExpiryByLocation,
230 function ( $expiry ) use ( $now ) {
231 return ( $expiry > $now );
232 }
233 )
234 );
235 }
236
237 /**
238 * @param float $position
239 * @param array[] $ring Either the base or live ring
240 * @return int
241 */
242 private function guessNodeIndexForPosition( $position, $ring ) {
243 $arcRatio = $position / self::RING_SIZE; // range is [0.0, 1.0)
244 $maxIndex = count( $ring ) - 1;
245 $guessIndex = intval( $maxIndex * $arcRatio );
246
247 $displacement = $ring[$guessIndex][self::KEY_POS] - $position;
248 $aveSize = self::RING_SIZE / count( $ring );
249 $shift = intval( $displacement / $aveSize );
250
251 $guessIndex -= $shift;
252 if ( $guessIndex < 0 ) {
253 $guessIndex = max( $maxIndex + $guessIndex, 0 ); // roll-over
254 } elseif ( $guessIndex > $maxIndex ) {
255 $guessIndex = min( $guessIndex - $maxIndex, 0 ); // roll-over
256 }
257
258 return $guessIndex;
259 }
260
261 /**
262 * @param float $position
263 * @param int $guessIndex Node index to start scanning
264 * @param array[] $ring Either the base or live ring
265 * @return int|null
266 */
267 private function findNodeIndexForPosition( $position, $guessIndex, $ring ) {
268 $mainNodeIndex = null; // first matching node index
269
270 $this->lastNodeScanSize = 0;
271
272 if ( $ring[$guessIndex][self::KEY_POS] >= $position ) {
273 // Walk the nodes counter-clockwise until reaching a node at/before $position
274 do {
275 $priorIndex = $guessIndex;
276 $guessIndex = $this->getPrevClockwiseNodeIndex( $guessIndex, $ring );
277 $nodePosition = $ring[$guessIndex][self::KEY_POS];
278 if ( $nodePosition < $position || $guessIndex > $priorIndex ) {
279 $mainNodeIndex = $priorIndex; // includes roll-over case
280 } elseif ( $nodePosition === $position ) {
281 $mainNodeIndex = $guessIndex;
282 }
283 ++$this->lastNodeScanSize;
284 } while ( $mainNodeIndex === null );
285 } else {
286 // Walk the nodes clockwise until reaching a node at/after $position
287 do {
288 $priorIndex = $guessIndex;
289 $guessIndex = $this->getNextClockwiseNodeIndex( $guessIndex, $ring );
290 $nodePosition = $ring[$guessIndex][self::KEY_POS];
291 if ( $nodePosition >= $position || $guessIndex < $priorIndex ) {
292 $mainNodeIndex = $guessIndex; // includes roll-over case
293 }
294 ++$this->lastNodeScanSize;
295 } while ( $mainNodeIndex === null );
296 }
297
298 return $mainNodeIndex;
299 }
300
301 /**
302 * @param int[] $weightByLocation
303 * @param string $algo Hashing algorithm
304 * @return array[]
305 */
306 private function buildLocationRing( array $weightByLocation, $algo ) {
307 $locationCount = count( $weightByLocation );
308 $totalWeight = array_sum( $weightByLocation );
309
310 $ring = [];
311 // Assign nodes to all locations based on location weight
312 $claimed = []; // (position as string => (node, index))
313 foreach ( $weightByLocation as $location => $weight ) {
314 $ratio = $weight / $totalWeight;
315 // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
316 // assign a few groups of nodes to this location based on its weight.
317 $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
318 for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
319 // For efficiency, get 4 points per hash call and 4X node count.
320 // If $algo is MD5, then this matches that of with libketama.
321 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
322 $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
323 foreach ( $positions as $gi => $position ) {
324 $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
325 $posKey = (string)$position; // large integer
326 if ( isset( $claimed[$posKey] ) ) {
327 // Disallow duplicates for sanity (name decides precedence)
328 if ( $claimed[$posKey]['node'] > $node ) {
329 continue;
330 } else {
331 unset( $ring[$claimed[$posKey]['index']] );
332 }
333 }
334 $ring[] = [
335 self::KEY_POS => $position,
336 self::KEY_LOCATION => $location
337 ];
338 $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
339 }
340 }
341 }
342 // Sort the locations into clockwise order based on the hash ring position
343 usort( $ring, function ( $a, $b ) {
344 if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
345 throw new UnexpectedValueException( 'Duplicate node positions.' );
346 }
347
348 return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
349 } );
350
351 return $ring;
352 }
353
354 /**
355 * @param string $item Key
356 * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
357 */
358 private function getItemPosition( $item ) {
359 // If $algo is MD5, then this matches that of with libketama.
360 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
361 $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
362 if ( strlen( $octets ) != 4 ) {
363 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
364 }
365
366 return (float)sprintf( '%u', unpack( 'V', $octets )[1] );
367 }
368
369 /**
370 * @param string $nodeGroupName
371 * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
372 */
373 private function getNodePositionQuartet( $nodeGroupName ) {
374 $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
375 if ( strlen( $octets ) != 16 ) {
376 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
377 }
378
379 $positions = [];
380 foreach ( unpack( 'V4', $octets ) as $signed ) {
381 $positions[] = (float)sprintf( '%u', $signed );
382 }
383
384 return $positions;
385 }
386
387 /**
388 * @param int $i Valid index for a node in the ring
389 * @param array[] $ring Either the base or live ring
390 * @return int Valid index for a node in the ring
391 */
392 private function getNextClockwiseNodeIndex( $i, $ring ) {
393 if ( !isset( $ring[$i] ) ) {
394 throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
395 }
396
397 $next = $i + 1;
398
399 return ( $next < count( $ring ) ) ? $next : 0;
400 }
401
402 /**
403 * @param int $i Valid index for a node in the ring
404 * @param array[] $ring Either the base or live ring
405 * @return int Valid index for a node in the ring
406 */
407 private function getPrevClockwiseNodeIndex( $i, $ring ) {
408 if ( !isset( $ring[$i] ) ) {
409 throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
410 }
411
412 $prev = $i - 1;
413
414 return ( $prev >= 0 ) ? $prev : count( $ring ) - 1;
415 }
416
417 /**
418 * Get the "live" hash ring (which does not include ejected locations)
419 *
420 * @return array[]
421 * @throws UnexpectedValueException
422 */
423 protected function getLiveRing() {
424 if ( !$this->ejectExpiryByLocation ) {
425 return $this->baseRing; // nothing ejected
426 }
427
428 $now = $this->getCurrentTime();
429
430 if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
431 // Live ring needs to be regerenated...
432 $this->ejectExpiryByLocation = array_filter(
433 $this->ejectExpiryByLocation,
434 function ( $expiry ) use ( $now ) {
435 return ( $expiry > $now );
436 }
437 );
438
439 if ( count( $this->ejectExpiryByLocation ) ) {
440 // Some locations are still ejected from the ring
441 $liveRing = [];
442 foreach ( $this->baseRing as $i => $nodeInfo ) {
443 $location = $nodeInfo[self::KEY_LOCATION];
444 if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
445 $liveRing[] = $nodeInfo;
446 }
447 }
448 } else {
449 $liveRing = $this->baseRing;
450 }
451
452 $this->liveRing = $liveRing;
453 }
454
455 if ( !$this->liveRing ) {
456 throw new UnexpectedValueException( "The live ring is currently empty." );
457 }
458
459 return $this->liveRing;
460 }
461
462 /**
463 * @return int UNIX timestamp
464 */
465 protected function getCurrentTime() {
466 return time();
467 }
468
469 /**
470 * @return int|null
471 */
472 public function getLastNodeScanSize() {
473 return $this->lastNodeScanSize;
474 }
475
476 public function serialize() {
477 return serialize( [
478 'algorithm' => $this->algo,
479 'locations' => $this->weightByLocation,
480 'ejections' => $this->ejectExpiryByLocation
481 ] );
482 }
483
484 public function unserialize( $serialized ) {
485 $data = unserialize( $serialized );
486 if ( is_array( $data ) ) {
487 $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
488 } else {
489 throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
490 }
491 }
492 }