3 * This file deals with UID generation.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @author Aaron Schulz
25 * Class for getting statistically unique IDs
30 /** @var UIDGenerator */
31 protected static $instance = null;
33 protected $nodeId24; // string; node ID in binary (24 bits)
34 protected $nodeId32; // string; node ID in binary (32 bits)
35 protected $nodeId48; // string; node ID in binary (48 bits)
36 protected $threadId; // integer; thread ID or random integer
38 protected $lockFile64; // string; local file path
39 protected $lockFile88; // string; local file path
40 protected $lockFile128; // string; local file path
42 const EPOCH_MS_40
= 1325376000; // "January 1, 2012"
43 const EPOCH_US_56
= 1325376000; // "January 1, 2012"
45 protected function __construct() {
46 # Try to get some ID that uniquely identifies this machine...
48 if ( wfIsWindows() ) {
49 # http://technet.microsoft.com/en-us/library/bb490913.aspx
50 $csv = trim( `getmac
/NH
/FO CSV`
);
51 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
52 $info = str_getcsv( $line );
53 $hostId = count( $info ) ?
str_replace( '-', '', $info[0] ) : '';
55 # See http://linux.die.net/man/1/hostid
56 $hostId = trim( `hostid`
);
59 $hostId = strtolower( $hostId );
60 if ( preg_match( '/^[0-9a-f]{8}|[0-9a-f]{12}$/', $hostId ) ) {
61 $this->nodeId24
= wfBaseConvert( substr( sha1( $hostId ), 0, 6 ), 16, 2, 24 );
62 $this->nodeId32
= wfBaseConvert( substr( sha1( $hostId ), 0, 8 ), 16, 2, 32 );
63 $this->nodeId48
= wfBaseConvert( $hostId, 16, 2, 48 );
64 } else { // fallback to host name
65 $hash = sha1( wfHostname() . ':' . php_uname( 's' ) . ':' . php_uname( 'm' ) );
66 $this->nodeId24
= wfBaseConvert( substr( $hash, 0, 6 ), 16, 2, 24 );
67 $this->nodeId32
= wfBaseConvert( substr( $hash, 0, 8 ), 16, 2, 32 );
68 $this->nodeId48
= wfBaseConvert( substr( $hash, 0, 12 ), 16, 2, 48 );
70 # UID128 should be unique without having to check. The flock() calls may not be effective
71 # on operating systems when PHP is running in a threaded server model. In such cases, the
72 # Zend Thread Safety extension should be installed per php.net guidelines. This extension
73 # gives us zend_thread_id(), which we include in the UID to avoid collisions.
74 # See http://www.php.net/manual/en/install.unix.apache2.php.
75 $this->threadId
= function_exists( 'zend_thread_id' ) ?
zend_thread_id() : 0;
76 $this->lockFile64
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-64.lock';
77 $this->lockFile88
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-88.lock';
78 $this->lockFile128
= wfTempDir() . '/mw-' . __CLASS__
. '-UID-128.lock';
82 * @return UIDGenerator
84 protected static function singleton() {
85 if ( self
::$instance === null ) {
86 self
::$instance = new self();
88 return self
::$instance;
92 * Get a statistically unique 64-bit unsigned integer ID string.
93 * The bits of the UID are prefixed with the time (down to the millisecond).
95 * These IDs are suitable as values for the shard column of sharded DB tables.
96 * If a column uses these as values, it should be declared UNIQUE to handle collisions.
97 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
98 * They can also be stored efficiently as a "BIGINT UNSIGNED" in MySQL.
100 * UID generation is serialized on each server (as the node ID is for the whole machine).
103 * @throws MWException
105 public static function timestampedUID64() {
106 $generator = self
::singleton();
108 // Acquire the UID lock file
109 $handle = fopen( $generator->lockFile64
, 'a' );
110 if ( $handle === false ) {
111 throw new MWException( "Could not open '{$this->lockFile64}'." );
112 } elseif ( !flock( $handle, LOCK_EX
) ) {
113 fclose( $handle ); // bail out
114 throw new MWException( 'Could not acquire local UID64 lock.' );
116 // Get the current UNIX timestamp on this server
117 $time = self
::millitime(); // millisecond precision
118 // Delay so the next process on this server will generate higher UIDs
119 while ( self
::lessThanOrEqualTime( self
::millitime(), $time ) );
120 // Release the UID lock file
121 flock( $handle, LOCK_UN
);
124 // Generate a UID that is higher than the last one from this server
125 return $generator->newTimestampedID64( $time );
129 * @param $time array Result of UIDGenerator::millitime()
132 protected function newTimestampedID64( array $time ) {
133 list( $sec, $msec ) = $time;
134 $sec = $sec - self
::EPOCH_MS_40
; // start from epoch
135 # Take the 40 MSBs of "milliseconds since epoch" (rolls over in 2046)
136 if ( PHP_INT_SIZE
=== 8 ) { // 64 bit integers
137 $ts = ( 1e3
* $sec +
$msec );
138 $id_bin = str_pad( decbin( $ts %
pow( 2, 40 ) ), 40, '0', STR_PAD_LEFT
);
139 } elseif ( extension_loaded( 'bcmath' ) ) { // 32 bit integers
140 $ts = bcadd( bcmul( $sec, 1e3
), $msec );
141 $id_bin = wfBaseConvert( bcmod( $ts, bcpow( 2, 40 ) ), 10, 2, 40 );
143 throw new MWException( 'bcmath extension required for 32 bit machines.' );
145 # Add the 24 bit node ID resulting in 64 bits total
146 $id_bin .= $this->nodeId24
;
147 # Convert to a 1-20 digit integer string
148 if ( strlen( $id_bin ) !== 64 ) {
149 throw new MWException( "Detected overflow for millisecond timestamp." );
151 return wfBaseConvert( $id_bin, 2, 10 );
155 * Get a statistically unique 88-bit unsigned integer ID string.
156 * The bits of the UID are prefixed with the time (down to the microsecond).
158 * These IDs are suitable as values for the shard column of sharded DB tables.
159 * If a column uses these as values, it should be declared UNIQUE to handle collisions.
160 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
161 * They can also be stored reasonably as a "DECIMAL(27) UNSIGNED" in MySQL.
163 * UID generation is serialized on each server (as the node ID is for the whole machine).
166 * @throws MWException
168 public static function timestampedUID88() {
169 $generator = self
::singleton();
171 // Acquire the UID lock file
172 $handle = fopen( $generator->lockFile88
, 'a' );
173 if ( $handle === false ) {
174 throw new MWException( "Could not open '{$this->lockFile88}'." );
175 } elseif ( !flock( $handle, LOCK_EX
) ) {
176 fclose( $handle ); // bail out
177 throw new MWException( 'Could not acquire local UID88 lock.' );
179 // Get the current UNIX timestamp on this server
180 $time = self
::microtime(); // microsecond precision
181 // Delay so the next process on this server will generate higher UIDs
182 while ( self
::lessThanOrEqualTime( self
::microtime(), $time ) );
183 // Release the UID lock file
184 flock( $handle, LOCK_UN
);
187 // Generate a UID that is higher than the last one from this server
188 return $generator->newTimestampedID88( $time );
192 * @param $time array Result of UIDGenerator::microtime()
195 protected function newTimestampedID88( array $time ) {
196 list( $sec, $usec ) = $time;
197 $sec = $sec - self
::EPOCH_US_56
; // start from epoch
198 # Take the 56 MSBs of "microseconds since epoch" (rolls over in 4354)
199 if ( PHP_INT_SIZE
=== 8 ) { // 64 bit integers
200 $ts = ( 1e6
* $sec +
$usec );
201 $id_bin = str_pad( decbin( $ts %
pow( 2, 56 ) ), 56, '0', STR_PAD_LEFT
);
202 } elseif ( extension_loaded( 'bcmath' ) ) { // 32 bit integers
203 $ts = bcadd( bcmul( $sec, 1e6
), $usec );
204 $id_bin = wfBaseConvert( bcmod( $ts, bcpow( 2, 56 ) ), 10, 2, 56 );
206 throw new MWException( 'bcmath extension required for 32 bit machines.' );
208 # Add the 32 bit node ID resulting in 88 bits total
209 $id_bin .= $this->nodeId32
;
210 # Convert to a 1-27 digit integer string
211 if ( strlen( $id_bin ) !== 88 ) {
212 throw new MWException( "Detected overflow for microsecond timestamp." );
214 return wfBaseConvert( $id_bin, 2, 10 );
218 * Get a statistically unique 128-bit unsigned integer ID string.
219 * The bits of the UID are prefixed with the time (down to the microsecond).
221 * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
222 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
223 * They can also be stored reasonably as a "DECIMAL(39) UNSIGNED" in MySQL.
225 * UID generation is serialized on each server (as the node ID is for the whole machine).
228 * @throws MWException
230 public static function timestampedUID128() {
231 $generator = self
::singleton();
233 // Acquire the UID lock file
234 $handle = fopen( $generator->lockFile128
, 'a' );
235 if ( $handle === false ) {
236 throw new MWException( "Could not open '{$this->lockFile128}'." );
237 } elseif ( !flock( $handle, LOCK_EX
) ) {
238 fclose( $handle ); // bail out
239 throw new MWException( 'Could not acquire local UID128 lock.' );
241 // Get the current UNIX timestamp on this server
242 $time = self
::microtime(); // microsecond precision
243 // Delay so the next process on this server will generate higher UIDs
244 while ( self
::lessThanOrEqualTime( self
::microtime(), $time ) );
245 // Release the UID lock file
246 flock( $handle, LOCK_UN
);
249 // Generate a UID that is higher than the last one from this server
250 return $generator->newTimestampedID128( $time );
254 * @param $time array Result of UIDGenerator::microtime()
257 protected function newTimestampedID128( array $time ) {
258 list( $sec, $usec ) = $time;
259 $sec = $sec - self
::EPOCH_US_56
; // start from epoch
260 $rand = mt_rand( 0, 255 );
261 $tid = $this->threadId ?
$this->threadId
: mt_rand( 0, 65535 );
262 # Take the 56 MSBs of "microseconds since epoch" (rolls over in 4354)
263 if ( PHP_INT_SIZE
=== 8 ) { // 64 bit integers
264 $ts = ( 1e6
* $sec +
$usec );
265 $id_bin = str_pad( decbin( $ts %
pow( 2, 56 ) ), 56, '0', STR_PAD_LEFT
);
266 } elseif ( extension_loaded( 'bcmath' ) ) { // 32 bit integers
267 $ts = bcadd( bcmul( $sec, 1e6
), $usec );
268 $id_bin = wfBaseConvert( bcmod( $ts, bcpow( 2, 56 ) ), 10, 2, 56 );
270 throw new MWException( 'bcmath extension required for 32 bit machines.' );
272 # Add on 8 bits of randomness to make 64 bits total (2^8 = 256)
273 $id_bin .= str_pad( decbin( $rand ), 8, '0', STR_PAD_LEFT
);
274 # Add on 16 bits of thread ID or randomness to make 80 bits total (2^16 = 65536)
275 $id_bin .= str_pad( decbin( $tid %
65536 ), 16, '0', STR_PAD_LEFT
);
276 # Add the 48 bit node ID resulting in 128 bits total
277 $id_bin .= $this->nodeId48
;
278 # Convert to a 1-39 digit integer string
279 if ( strlen( $id_bin ) !== 128 ) {
280 throw new MWException( "Detected overflow for microsecond timestamp." );
282 return wfBaseConvert( $id_bin, 2, 10 );
286 * @return array (current time in seconds, milliseconds since then)
288 protected static function millitime() {
289 list( $usec, $sec ) = explode( ' ', microtime() );
290 return array( (int)$sec, (int)( $usec * 1e3
) );
294 * @return array (current time in seconds, microseconds since then)
296 protected static function microtime() {
297 list( $usec, $sec ) = explode( ' ', microtime() );
298 return array( (int)$sec, (int)( $usec * 1e6
) );
302 * Returns true if time $t1 is <= time $t2
304 * @param $t1 array Result of UIDGenerator::microtime() or UIDGenerator::millitime()
305 * @param $t2 array Result of UIDGenerator::microtime() or UIDGenerator::millitime()
307 protected static function lessThanOrEqualTime( array $t1, array $t2 ) {
308 return ( $t1[0] < $t2[0] ||
( $t1[0] === $t2[0] && $t1[1] <= $t2[1] ) );