Merge "API tests to verify basic query functionality (list & props)"
[lhc/web/wiklou.git] / includes / UIDGenerator.php
1 <?php
2 /**
3 * This file deals with UID generation.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @author Aaron Schulz
22 */
23
24 /**
25 * Class for getting statistically unique IDs
26 *
27 * @since 1.21
28 */
29 class UIDGenerator {
30 /** @var UIDGenerator */
31 protected static $instance = null;
32
33 protected $nodeId32; // string; node ID in binary (32 bits)
34 protected $nodeId48; // string; node ID in binary (48 bits)
35
36 protected $lockFile88; // string; local file path
37 protected $lockFile128; // string; local file path
38
39 /** @var Array */
40 protected $fileHandles = array(); // cache file handles
41
42 const QUICK_RAND = 1; // get randomness from fast and unsecure sources
43
44 protected function __construct() {
45 $idFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
46 $nodeId = is_file( $idFile ) ? file_get_contents( $idFile ) : '';
47 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
48 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
49 wfSuppressWarnings();
50 if ( wfIsWindows() ) {
51 // http://technet.microsoft.com/en-us/library/bb490913.aspx
52 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
53 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
54 $info = str_getcsv( $line );
55 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
56 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
57 // See http://linux.die.net/man/8/ifconfig
58 $m = array();
59 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/', wfShellExec( '/sbin/ifconfig -a' ), $m );
60 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
61 }
62 wfRestoreWarnings();
63 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
64 $nodeId = MWCryptRand::generateHex( 12, true );
65 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
66 }
67 file_put_contents( $idFile, $nodeId ); // cache
68 }
69 $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
70 $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
71 // If different processes run as different users, they may have different temp dirs.
72 // This is dealt with by initializing the clock sequence number and counters randomly.
73 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
74 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
75 }
76
77 /**
78 * @return UIDGenerator
79 */
80 protected static function singleton() {
81 if ( self::$instance === null ) {
82 self::$instance = new self();
83 }
84 return self::$instance;
85 }
86
87 /**
88 * Get a statistically unique 88-bit unsigned integer ID string.
89 * The bits of the UID are prefixed with the time (down to the millisecond).
90 *
91 * These IDs are suitable as values for the shard key of distributed data.
92 * If a column uses these as values, it should be declared UNIQUE to handle collisions.
93 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
94 * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
95 *
96 * UID generation is serialized on each server (as the node ID is for the whole machine).
97 *
98 * @param $base integer Specifies a base other than 10
99 * @return string Number
100 * @throws MWException
101 */
102 public static function newTimestampedUID88( $base = 10 ) {
103 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
104 throw new MWException( "Base must an integer be between 2 and 36" );
105 }
106 $gen = self::singleton();
107 $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
108 return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
109 }
110
111 /**
112 * @param $time array (UIDGenerator::millitime(), clock sequence)
113 * @return string 88 bits
114 */
115 protected function getTimestampedID88( array $info ) {
116 list( $time, $counter ) = $info;
117 // Take the 46 MSBs of "milliseconds since epoch"
118 $id_bin = $this->millisecondsSinceEpochBinary( $time );
119 // Add a 10 bit counter resulting in 56 bits total
120 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
121 // Add the 32 bit node ID resulting in 88 bits total
122 $id_bin .= $this->nodeId32;
123 // Convert to a 1-27 digit integer string
124 if ( strlen( $id_bin ) !== 88 ) {
125 throw new MWException( "Detected overflow for millisecond timestamp." );
126 }
127 return $id_bin;
128 }
129
130 /**
131 * Get a statistically unique 128-bit unsigned integer ID string.
132 * The bits of the UID are prefixed with the time (down to the millisecond).
133 *
134 * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
135 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
136 * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
137 *
138 * UID generation is serialized on each server (as the node ID is for the whole machine).
139 *
140 * @param $base integer Specifies a base other than 10
141 * @return string Number
142 * @throws MWException
143 */
144 public static function newTimestampedUID128( $base = 10 ) {
145 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
146 throw new MWException( "Base must be an integer between 2 and 36" );
147 }
148 $gen = self::singleton();
149 $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
150 return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
151 }
152
153 /**
154 * @param $info array (UIDGenerator::milltime(), counter, clock sequence)
155 * @return string 128 bits
156 */
157 protected function getTimestampedID128( array $info ) {
158 list( $time, $counter, $clkSeq ) = $info;
159 // Take the 46 MSBs of "milliseconds since epoch"
160 $id_bin = $this->millisecondsSinceEpochBinary( $time );
161 // Add a 20 bit counter resulting in 66 bits total
162 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
163 // Add a 14 bit clock sequence number resulting in 80 bits total
164 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
165 // Add the 48 bit node ID resulting in 128 bits total
166 $id_bin .= $this->nodeId48;
167 // Convert to a 1-39 digit integer string
168 if ( strlen( $id_bin ) !== 128 ) {
169 throw new MWException( "Detected overflow for millisecond timestamp." );
170 }
171 return $id_bin;
172 }
173
174 /**
175 * Return an RFC4122 compliant v4 UUID
176 *
177 * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
178 * @return string
179 * @throws MWException
180 */
181 public static function newUUIDv4( $flags = 0 ) {
182 $hex = ( $flags & self::QUICK_RAND )
183 ? wfRandomString( 31 )
184 : MWCryptRand::generateHex( 31 );
185
186 return sprintf( '%s-%s-%s-%s-%s',
187 // "time_low" (32 bits)
188 substr( $hex, 0, 8 ),
189 // "time_mid" (16 bits)
190 substr( $hex, 8, 4 ),
191 // "time_hi_and_version" (16 bits)
192 '4' . substr( $hex, 12, 3 ),
193 // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
194 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
195 // "node" (48 bits)
196 substr( $hex, 19, 12 )
197 );
198 }
199
200 /**
201 * Return an RFC4122 compliant v4 UUID
202 *
203 * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
204 * @return string 32 hex characters with no hyphens
205 * @throws MWException
206 */
207 public static function newRawUUIDv4( $flags = 0 ) {
208 return str_replace( '-', '', self::newUUIDv4( $flags ) );
209 }
210
211 /**
212 * Get a (time,counter,clock sequence) where (time,counter) is higher
213 * than any previous (time,counter) value for the given clock sequence.
214 * This is useful for making UIDs sequential on a per-node bases.
215 *
216 * @param $lockFile string Name of a local lock file
217 * @param $clockSeqSize integer The number of possible clock sequence values
218 * @param $counterSize integer The number of possible counter values
219 * @return Array (result of UIDGenerator::millitime(), counter, clock sequence)
220 * @throws MWException
221 */
222 protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
223 // Get the UID lock file handle
224 if ( isset( $this->fileHandles[$lockFile] ) ) {
225 $handle = $this->fileHandles[$lockFile];
226 } else {
227 $handle = fopen( $this->$lockFile, 'cb+' );
228 $this->fileHandles[$lockFile] = $handle ?: null; // cache
229 }
230 // Acquire the UID lock file
231 if ( $handle === false ) {
232 throw new MWException( "Could not open '{$this->$lockFile}'." );
233 } elseif ( !flock( $handle, LOCK_EX ) ) {
234 throw new MWException( "Could not acquire '{$this->$lockFile}'." );
235 }
236 // Get the current timestamp, clock sequence number, last time, and counter
237 rewind( $handle );
238 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
239 $clockChanged = false; // clock set back significantly?
240 if ( count( $data ) == 5 ) { // last UID info already initialized
241 $clkSeq = (int) $data[0] % $clockSeqSize;
242 $prevTime = array( (int) $data[1], (int) $data[2] );
243 $offset = (int) $data[4] % $counterSize; // random counter offset
244 $counter = 0; // counter for UIDs with the same timestamp
245 // Delay until the clock reaches the time of the last ID.
246 // This detects any microtime() drift among processes.
247 $time = $this->timeWaitUntil( $prevTime );
248 if ( !$time ) { // too long to delay?
249 $clockChanged = true; // bump clock sequence number
250 $time = self::millitime();
251 } elseif ( $time == $prevTime ) {
252 // Bump the counter if there are timestamp collisions
253 $counter = (int) $data[3] % $counterSize;
254 if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
255 flock( $handle, LOCK_UN ); // abort
256 throw new MWException( "Counter overflow for timestamp value." );
257 }
258 }
259 } else { // last UID info not initialized
260 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
261 $counter = 0;
262 $offset = mt_rand( 0, $counterSize - 1 );
263 $time = self::millitime();
264 }
265 // microtime() and gettimeofday() can drift from time() at least on Windows.
266 // The drift is immediate for processes running while the system clock changes.
267 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
268 if ( abs( time() - $time[0] ) >= 2 ) {
269 // We don't want processes using too high or low timestamps to avoid duplicate
270 // UIDs and clock sequence number churn. This process should just be restarted.
271 flock( $handle, LOCK_UN ); // abort
272 throw new MWException( "Process clock is outdated or drifted." );
273 }
274 // If microtime() is synced and a clock change was detected, then the clock went back
275 if ( $clockChanged ) {
276 // Bump the clock sequence number and also randomize the counter offset,
277 // which is useful for UIDs that do not include the clock sequence number.
278 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
279 $offset = mt_rand( 0, $counterSize - 1 );
280 trigger_error( "Clock was set back; sequence number incremented." );
281 }
282 // Update the (clock sequence number, timestamp, counter)
283 ftruncate( $handle, 0 );
284 rewind( $handle );
285 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
286 fflush( $handle );
287 // Release the UID lock file
288 flock( $handle, LOCK_UN );
289
290 return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
291 }
292
293 /**
294 * Wait till the current timestamp reaches $time and return the current
295 * timestamp. This returns false if it would have to wait more than 10ms.
296 *
297 * @param $time array Result of UIDGenerator::millitime()
298 * @return Array|bool UIDGenerator::millitime() result or false
299 */
300 protected function timeWaitUntil( array $time ) {
301 do {
302 $ct = self::millitime();
303 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
304 return $ct; // current timestamp is higher than $time
305 }
306 } while ( ( ( $time[0] - $ct[0] )*1000 + ( $time[1] - $ct[1] ) ) <= 10 );
307
308 return false;
309 }
310
311 /**
312 * @param $time array Result of UIDGenerator::millitime()
313 * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
314 */
315 protected function millisecondsSinceEpochBinary( array $time ) {
316 list( $sec, $msec ) = $time;
317 if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
318 $ts = ( 1000 * $sec + $msec );
319 $id_bin = str_pad( decbin( $ts % pow( 2, 46 ) ), 46, '0', STR_PAD_LEFT );
320 } elseif ( extension_loaded( 'gmp' ) ) {
321 $ts = gmp_mod( // wrap around
322 gmp_add( gmp_mul( (string) $sec, (string) 1000 ), (string) $msec ),
323 gmp_pow( '2', '46' )
324 );
325 $id_bin = str_pad( gmp_strval( $ts, 2 ), 46, '0', STR_PAD_LEFT );
326 } elseif ( extension_loaded( 'bcmath' ) ) {
327 $ts = bcmod( // wrap around
328 bcadd( bcmul( $sec, 1000 ), $msec ),
329 bcpow( 2, 46 )
330 );
331 $id_bin = wfBaseConvert( $ts, 10, 2, 46 );
332 } else {
333 throw new MWException( 'bcmath or gmp extension required for 32 bit machines.' );
334 }
335 return $id_bin;
336 }
337
338 /**
339 * @return Array (current time in seconds, milliseconds since then)
340 */
341 protected static function millitime() {
342 list( $msec, $sec ) = explode( ' ', microtime() );
343 return array( (int) $sec, (int) ( $msec * 1000 ) );
344 }
345
346 function __destruct() {
347 array_map( 'fclose', $this->fileHandles );
348 }
349 }