3 * This file deals with UID generation.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @author Aaron Schulz
23 use Wikimedia\Assert\Assert
;
26 * Class for getting statistically unique IDs
31 /** @var UIDGenerator */
32 protected static $instance = null;
34 protected $nodeIdFile; // string; local file path
35 protected $nodeId32; // string; node ID in binary (32 bits)
36 protected $nodeId48; // string; node ID in binary (48 bits)
38 protected $lockFile88; // string; local file path
39 protected $lockFile128; // string; local file path
42 protected $fileHandles = array(); // cache file handles
44 const QUICK_RAND
= 1; // get randomness from fast and insecure sources
45 const QUICK_VOLATILE
= 2; // use an APC like in-memory counter if available
47 protected function __construct() {
48 $this->nodeIdFile
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-nodeid';
50 if ( is_file( $this->nodeIdFile
) ) {
51 $nodeId = file_get_contents( $this->nodeIdFile
);
53 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
54 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
55 MediaWiki\
suppressWarnings();
56 if ( wfIsWindows() ) {
57 // http://technet.microsoft.com/en-us/library/bb490913.aspx
58 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
59 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
60 $info = str_getcsv( $line );
61 $nodeId = isset( $info[0] ) ?
str_replace( '-', '', $info[0] ) : '';
62 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
63 // See http://linux.die.net/man/8/ifconfig
65 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
66 wfShellExec( '/sbin/ifconfig -a' ), $m );
67 $nodeId = isset( $m[1] ) ?
str_replace( ':', '', $m[1] ) : '';
69 MediaWiki\restoreWarnings
();
70 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
71 $nodeId = MWCryptRand
::generateHex( 12, true );
72 $nodeId[1] = dechex( hexdec( $nodeId[1] ) |
0x1 ); // set multicast bit
74 file_put_contents( $this->nodeIdFile
, $nodeId ); // cache
76 $this->nodeId32
= Wikimedia\base_convert
( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
77 $this->nodeId48
= Wikimedia\base_convert
( $nodeId, 16, 2, 48 );
78 // If different processes run as different users, they may have different temp dirs.
79 // This is dealt with by initializing the clock sequence number and counters randomly.
80 $this->lockFile88
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-88';
81 $this->lockFile128
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-128';
85 * @todo: move to MW-specific factory class and inject temp dir
86 * @return UIDGenerator
88 protected static function singleton() {
89 if ( self
::$instance === null ) {
90 self
::$instance = new self();
93 return self
::$instance;
97 * Get a statistically unique 88-bit unsigned integer ID string.
98 * The bits of the UID are prefixed with the time (down to the millisecond).
100 * These IDs are suitable as values for the shard key of distributed data.
101 * If a column uses these as values, it should be declared UNIQUE to handle collisions.
102 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
103 * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
105 * UID generation is serialized on each server (as the node ID is for the whole machine).
107 * @param int $base Specifies a base other than 10
108 * @return string Number
109 * @throws RuntimeException
111 public static function newTimestampedUID88( $base = 10 ) {
112 Assert
::parameterType( 'integer', $base, '$base' );
113 Assert
::parameter( $base <= 36, '$base', 'must be <= 36' );
114 Assert
::parameter( $base >= 2, '$base', 'must be >= 2' );
116 $gen = self
::singleton();
117 $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
119 return Wikimedia\base_convert
( $gen->getTimestampedID88( $time ), 2, $base );
123 * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
124 * @return string 88 bits
125 * @throws RuntimeException
127 protected function getTimestampedID88( array $info ) {
128 list( $time, $counter ) = $info;
129 // Take the 46 MSBs of "milliseconds since epoch"
130 $id_bin = $this->millisecondsSinceEpochBinary( $time );
131 // Add a 10 bit counter resulting in 56 bits total
132 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT
);
133 // Add the 32 bit node ID resulting in 88 bits total
134 $id_bin .= $this->nodeId32
;
135 // Convert to a 1-27 digit integer string
136 if ( strlen( $id_bin ) !== 88 ) {
137 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
144 * Get a statistically unique 128-bit unsigned integer ID string.
145 * The bits of the UID are prefixed with the time (down to the millisecond).
147 * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
148 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
149 * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
151 * UID generation is serialized on each server (as the node ID is for the whole machine).
153 * @param int $base Specifies a base other than 10
154 * @return string Number
155 * @throws RuntimeException
157 public static function newTimestampedUID128( $base = 10 ) {
158 Assert
::parameterType( 'integer', $base, '$base' );
159 Assert
::parameter( $base <= 36, '$base', 'must be <= 36' );
160 Assert
::parameter( $base >= 2, '$base', 'must be >= 2' );
162 $gen = self
::singleton();
163 $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
165 return Wikimedia\base_convert
( $gen->getTimestampedID128( $time ), 2, $base );
169 * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
170 * @return string 128 bits
171 * @throws RuntimeException
173 protected function getTimestampedID128( array $info ) {
174 list( $time, $counter, $clkSeq ) = $info;
175 // Take the 46 MSBs of "milliseconds since epoch"
176 $id_bin = $this->millisecondsSinceEpochBinary( $time );
177 // Add a 20 bit counter resulting in 66 bits total
178 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT
);
179 // Add a 14 bit clock sequence number resulting in 80 bits total
180 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT
);
181 // Add the 48 bit node ID resulting in 128 bits total
182 $id_bin .= $this->nodeId48
;
183 // Convert to a 1-39 digit integer string
184 if ( strlen( $id_bin ) !== 128 ) {
185 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
192 * Return an RFC4122 compliant v4 UUID
194 * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
196 * @throws RuntimeException
198 public static function newUUIDv4( $flags = 0 ) {
199 $hex = ( $flags & self
::QUICK_RAND
)
200 ?
wfRandomString( 31 )
201 : MWCryptRand
::generateHex( 31 );
203 return sprintf( '%s-%s-%s-%s-%s',
204 // "time_low" (32 bits)
205 substr( $hex, 0, 8 ),
206 // "time_mid" (16 bits)
207 substr( $hex, 8, 4 ),
208 // "time_hi_and_version" (16 bits)
209 '4' . substr( $hex, 12, 3 ),
210 // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
211 dechex( 0x8 |
( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
213 substr( $hex, 19, 12 )
218 * Return an RFC4122 compliant v4 UUID
220 * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
221 * @return string 32 hex characters with no hyphens
222 * @throws RuntimeException
224 public static function newRawUUIDv4( $flags = 0 ) {
225 return str_replace( '-', '', self
::newUUIDv4( $flags ) );
229 * Return an ID that is sequential *only* for this node and bucket
231 * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols.
232 * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart.
234 * @param string $bucket Arbitrary bucket name (should be ASCII)
235 * @param int $bits Bit size (<=48) of resulting numbers before wrap-around
236 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
237 * @return float Integer value as float
240 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
241 return current( self
::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
245 * Return IDs that are sequential *only* for this node and bucket
247 * @see UIDGenerator::newSequentialPerNodeID()
248 * @param string $bucket Arbitrary bucket name (should be ASCII)
249 * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
250 * @param int $count Number of IDs to return (1 to 10000)
251 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
252 * @return array Ordered list of float integer values
255 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
256 $gen = self
::singleton();
257 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
261 * Return IDs that are sequential *only* for this node and bucket
263 * @see UIDGenerator::newSequentialPerNodeID()
264 * @param string $bucket Arbitrary bucket name (should be ASCII)
265 * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
266 * @param int $count Number of IDs to return (1 to 10000)
267 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
268 * @return array Ordered list of float integer values
269 * @throws RuntimeException
271 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
273 return array(); // nothing to do
274 } elseif ( $count > 10000 ) {
275 throw new RuntimeException( "Number of requested IDs ($count) is too high." );
276 } elseif ( $bits < 16 ||
$bits > 48 ) {
277 throw new RuntimeException( "Requested bit size ($bits) is out of range." );
280 $counter = null; // post-increment persistent counter value
282 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
283 // Counter values would not survive accross script instances in CLI mode.
285 if ( ( $flags & self
::QUICK_VOLATILE
) && PHP_SAPI
!== 'cli' ) {
286 $cache = ObjectCache
::getLocalServerInstance();
289 $counter = $cache->incr( $bucket, $count );
290 if ( $counter === false ) {
291 if ( !$cache->add( $bucket, (int)$count ) ) {
292 throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
298 // Note: use of fmod() avoids "division by zero" on 32 bit machines
299 if ( $counter === null ) {
300 $path = wfTempDir() . '/mw-' . __CLASS__
. '-' . rawurlencode( $bucket ) . '-48';
301 // Get the UID lock file handle
302 if ( isset( $this->fileHandles
[$path] ) ) {
303 $handle = $this->fileHandles
[$path];
305 $handle = fopen( $path, 'cb+' );
306 $this->fileHandles
[$path] = $handle ?
: null; // cache
308 // Acquire the UID lock file
309 if ( $handle === false ) {
310 throw new RuntimeException( "Could not open '{$path}'." );
311 } elseif ( !flock( $handle, LOCK_EX
) ) {
313 throw new RuntimeException( "Could not acquire '{$path}'." );
315 // Fetch the counter value and increment it...
317 $counter = floor( trim( fgets( $handle ) ) ) +
$count; // fetch as float
318 // Write back the new counter value
319 ftruncate( $handle, 0 );
321 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
323 // Release the UID lock file
324 flock( $handle, LOCK_UN
);
328 $divisor = pow( 2, $bits );
329 $currentId = floor( $counter - $count ); // pre-increment counter value
330 for ( $i = 0; $i < $count; ++
$i ) {
331 $ids[] = fmod( ++
$currentId, $divisor );
338 * Get a (time,counter,clock sequence) where (time,counter) is higher
339 * than any previous (time,counter) value for the given clock sequence.
340 * This is useful for making UIDs sequential on a per-node bases.
342 * @param string $lockFile Name of a local lock file
343 * @param int $clockSeqSize The number of possible clock sequence values
344 * @param int $counterSize The number of possible counter values
345 * @return array (result of UIDGenerator::millitime(), counter, clock sequence)
346 * @throws RuntimeException
348 protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
349 // Get the UID lock file handle
350 $path = $this->$lockFile;
351 if ( isset( $this->fileHandles
[$path] ) ) {
352 $handle = $this->fileHandles
[$path];
354 $handle = fopen( $path, 'cb+' );
355 $this->fileHandles
[$path] = $handle ?
: null; // cache
357 // Acquire the UID lock file
358 if ( $handle === false ) {
359 throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
360 } elseif ( !flock( $handle, LOCK_EX
) ) {
362 throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
364 // Get the current timestamp, clock sequence number, last time, and counter
366 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
367 $clockChanged = false; // clock set back significantly?
368 if ( count( $data ) == 5 ) { // last UID info already initialized
369 $clkSeq = (int)$data[0] %
$clockSeqSize;
370 $prevTime = array( (int)$data[1], (int)$data[2] );
371 $offset = (int)$data[4] %
$counterSize; // random counter offset
372 $counter = 0; // counter for UIDs with the same timestamp
373 // Delay until the clock reaches the time of the last ID.
374 // This detects any microtime() drift among processes.
375 $time = $this->timeWaitUntil( $prevTime );
376 if ( !$time ) { // too long to delay?
377 $clockChanged = true; // bump clock sequence number
378 $time = self
::millitime();
379 } elseif ( $time == $prevTime ) {
380 // Bump the counter if there are timestamp collisions
381 $counter = (int)$data[3] %
$counterSize;
382 if ( ++
$counter >= $counterSize ) { // sanity (starts at 0)
383 flock( $handle, LOCK_UN
); // abort
384 throw new RuntimeException( "Counter overflow for timestamp value." );
387 } else { // last UID info not initialized
388 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
390 $offset = mt_rand( 0, $counterSize - 1 );
391 $time = self
::millitime();
393 // microtime() and gettimeofday() can drift from time() at least on Windows.
394 // The drift is immediate for processes running while the system clock changes.
395 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
396 if ( abs( time() - $time[0] ) >= 2 ) {
397 // We don't want processes using too high or low timestamps to avoid duplicate
398 // UIDs and clock sequence number churn. This process should just be restarted.
399 flock( $handle, LOCK_UN
); // abort
400 throw new RuntimeException( "Process clock is outdated or drifted." );
402 // If microtime() is synced and a clock change was detected, then the clock went back
403 if ( $clockChanged ) {
404 // Bump the clock sequence number and also randomize the counter offset,
405 // which is useful for UIDs that do not include the clock sequence number.
406 $clkSeq = ( $clkSeq +
1 ) %
$clockSeqSize;
407 $offset = mt_rand( 0, $counterSize - 1 );
408 trigger_error( "Clock was set back; sequence number incremented." );
410 // Update the (clock sequence number, timestamp, counter)
411 ftruncate( $handle, 0 );
413 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
415 // Release the UID lock file
416 flock( $handle, LOCK_UN
);
418 return array( $time, ( $counter +
$offset ) %
$counterSize, $clkSeq );
422 * Wait till the current timestamp reaches $time and return the current
423 * timestamp. This returns false if it would have to wait more than 10ms.
425 * @param array $time Result of UIDGenerator::millitime()
426 * @return array|bool UIDGenerator::millitime() result or false
428 protected function timeWaitUntil( array $time ) {
430 $ct = self
::millitime();
431 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
432 return $ct; // current timestamp is higher than $time
434 } while ( ( ( $time[0] - $ct[0] ) * 1000 +
( $time[1] - $ct[1] ) ) <= 10 );
440 * @param array $time Result of UIDGenerator::millitime()
441 * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
442 * @throws RuntimeException
444 protected function millisecondsSinceEpochBinary( array $time ) {
445 list( $sec, $msec ) = $time;
446 $ts = 1000 * $sec +
$msec;
447 if ( $ts > pow( 2, 52 ) ) {
448 throw new RuntimeException( __METHOD__
.
449 ': sorry, this function doesn\'t work after the year 144680' );
452 return substr( Wikimedia\base_convert
( $ts, 10, 2, 46 ), -46 );
456 * @return array (current time in seconds, milliseconds since then)
458 protected static function millitime() {
459 list( $msec, $sec ) = explode( ' ', microtime() );
461 return array( (int)$sec, (int)( $msec * 1000 ) );
465 * Delete all cache files that have been created.
467 * This is a cleanup method primarily meant to be used from unit tests to
468 * avoid poluting the local filesystem. If used outside of a unit test
469 * environment it should be used with caution as it may destroy state saved
472 * @see unitTestTearDown
475 protected function deleteCacheFiles() {
477 foreach ( $this->fileHandles
as $path => $handle ) {
478 if ( $handle !== null ) {
481 if ( is_file( $path ) ) {
484 unset( $this->fileHandles
[$path] );
486 if ( is_file( $this->nodeIdFile
) ) {
487 unlink( $this->nodeIdFile
);
492 * Cleanup resources when tearing down after a unit test.
494 * This is a cleanup method primarily meant to be used from unit tests to
495 * avoid poluting the local filesystem. If used outside of a unit test
496 * environment it should be used with caution as it may destroy state saved
499 * @see deleteCacheFiles
502 public static function unitTestTearDown() {
504 $gen = self
::singleton();
505 $gen->deleteCacheFiles();
508 function __destruct() {
509 array_map( 'fclose', array_filter( $this->fileHandles
) );