And more documentation. Yaaaay
[lhc/web/wiklou.git] / includes / Cdb_PHP.php
1 <?php
2 /**
3 * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
4 * appears in PHP 5.3. Changes are:
5 * * Error returns replaced with exceptions
6 * * Exception thrown if sizes or offsets are between 2GB and 4GB
7 * * Some variables renamed
8 *
9 * @file
10 */
11
12 /**
13 * Common functions for readers and writers
14 */
15 class CdbFunctions {
16 /**
17 * Take a modulo of a signed integer as if it were an unsigned integer.
18 * $b must be less than 0x40000000 and greater than 0
19 * @return int
20 */
21 public static function unsignedMod( $a, $b ) {
22 if ( $a & 0x80000000 ) {
23 $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
24 return $m % $b;
25 } else {
26 return $a % $b;
27 }
28 }
29
30 /**
31 * Shift a signed integer right as if it were unsigned
32 * @param $a
33 * @param $b
34 * @return int
35 */
36 public static function unsignedShiftRight( $a, $b ) {
37 if ( $b == 0 ) {
38 return $a;
39 }
40 if ( $a & 0x80000000 ) {
41 return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
42 } else {
43 return $a >> $b;
44 }
45 }
46
47 /**
48 * The CDB hash function.
49 *
50 * @param $s
51 * @return
52 */
53 public static function hash( $s ) {
54 $h = 5381;
55 for ( $i = 0; $i < strlen( $s ); $i++ ) {
56 $h5 = ($h << 5) & 0xffffffff;
57 // Do a 32-bit sum
58 // Inlined here for speed
59 $sum = ($h & 0x3fffffff) + ($h5 & 0x3fffffff);
60 $h =
61 (
62 ( $sum & 0x40000000 ? 1 : 0 )
63 + ( $h & 0x80000000 ? 2 : 0 )
64 + ( $h & 0x40000000 ? 1 : 0 )
65 + ( $h5 & 0x80000000 ? 2 : 0 )
66 + ( $h5 & 0x40000000 ? 1 : 0 )
67 ) << 30
68 | ( $sum & 0x3fffffff );
69 $h ^= ord( $s[$i] );
70 $h &= 0xffffffff;
71 }
72 return $h;
73 }
74 }
75
76 /**
77 * CDB reader class
78 */
79 class CdbReader_PHP extends CdbReader {
80 /** The file handle */
81 var $handle;
82
83 /* number of hash slots searched under this key */
84 var $loop;
85
86 /* initialized if loop is nonzero */
87 var $khash;
88
89 /* initialized if loop is nonzero */
90 var $kpos;
91
92 /* initialized if loop is nonzero */
93 var $hpos;
94
95 /* initialized if loop is nonzero */
96 var $hslots;
97
98 /* initialized if findNext() returns true */
99 var $dpos;
100
101 /* initialized if cdb_findnext() returns 1 */
102 var $dlen;
103
104 function __construct( $fileName ) {
105 $this->handle = fopen( $fileName, 'rb' );
106 if ( !$this->handle ) {
107 throw new MWException( 'Unable to open CDB file "' . $fileName . '"' );
108 }
109 $this->findStart();
110 }
111
112 function close() {
113 if( isset( $this->handle ) ) {
114 fclose( $this->handle );
115 }
116 unset( $this->handle );
117 }
118
119 public function get( $key ) {
120 // strval is required
121 if ( $this->find( strval( $key ) ) ) {
122 return $this->read( $this->dlen, $this->dpos );
123 } else {
124 return false;
125 }
126 }
127
128 /**
129 * @param $key
130 * @param $pos
131 * @return bool
132 */
133 protected function match( $key, $pos ) {
134 $buf = $this->read( strlen( $key ), $pos );
135 return $buf === $key;
136 }
137
138 protected function findStart() {
139 $this->loop = 0;
140 }
141
142 /**
143 * @throws MWException
144 * @param $length
145 * @param $pos
146 * @return string
147 */
148 protected function read( $length, $pos ) {
149 if ( fseek( $this->handle, $pos ) == -1 ) {
150 // This can easily happen if the internal pointers are incorrect
151 throw new MWException( __METHOD__.': seek failed, file may be corrupted.' );
152 }
153
154 if ( $length == 0 ) {
155 return '';
156 }
157
158 $buf = fread( $this->handle, $length );
159 if ( $buf === false || strlen( $buf ) !== $length ) {
160 throw new MWException( __METHOD__.': read from CDB file failed, file may be corrupted' );
161 }
162 return $buf;
163 }
164
165 /**
166 * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
167 * @param $s
168 * @return
169 */
170 protected function unpack31( $s ) {
171 $data = unpack( 'V', $s );
172 if ( $data[1] > 0x7fffffff ) {
173 throw new MWException( __METHOD__.': error in CDB file, integer too big' );
174 }
175 return $data[1];
176 }
177
178 /**
179 * Unpack a 32-bit signed integer
180 * @param $s
181 * @return int
182 */
183 protected function unpackSigned( $s ) {
184 $data = unpack( 'va/vb', $s );
185 return $data['a'] | ( $data['b'] << 16 );
186 }
187
188 /**
189 * @param $key
190 * @return bool
191 */
192 protected function findNext( $key ) {
193 if ( !$this->loop ) {
194 $u = CdbFunctions::hash( $key );
195 $buf = $this->read( 8, ( $u << 3 ) & 2047 );
196 $this->hslots = $this->unpack31( substr( $buf, 4 ) );
197 if ( !$this->hslots ) {
198 return false;
199 }
200 $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
201 $this->khash = $u;
202 $u = CdbFunctions::unsignedShiftRight( $u, 8 );
203 $u = CdbFunctions::unsignedMod( $u, $this->hslots );
204 $u <<= 3;
205 $this->kpos = $this->hpos + $u;
206 }
207
208 while ( $this->loop < $this->hslots ) {
209 $buf = $this->read( 8, $this->kpos );
210 $pos = $this->unpack31( substr( $buf, 4 ) );
211 if ( !$pos ) {
212 return false;
213 }
214 $this->loop += 1;
215 $this->kpos += 8;
216 if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
217 $this->kpos = $this->hpos;
218 }
219 $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
220 if ( $u === $this->khash ) {
221 $buf = $this->read( 8, $pos );
222 $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
223 if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
224 // Found
225 $this->dlen = $this->unpack31( substr( $buf, 4 ) );
226 $this->dpos = $pos + 8 + $keyLen;
227 return true;
228 }
229 }
230 }
231 return false;
232 }
233
234 /**
235 * @param $key
236 * @return bool
237 */
238 protected function find( $key ) {
239 $this->findStart();
240 return $this->findNext( $key );
241 }
242 }
243
244 /**
245 * CDB writer class
246 */
247 class CdbWriter_PHP extends CdbWriter {
248 var $handle, $realFileName, $tmpFileName;
249
250 var $hplist;
251 var $numEntries, $pos;
252
253 function __construct( $fileName ) {
254 $this->realFileName = $fileName;
255 $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
256 $this->handle = fopen( $this->tmpFileName, 'wb' );
257 if ( !$this->handle ) {
258 throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' );
259 }
260 $this->hplist = array();
261 $this->numentries = 0;
262 $this->pos = 2048; // leaving space for the pointer array, 256 * 8
263 if ( fseek( $this->handle, $this->pos ) == -1 ) {
264 throw new MWException( __METHOD__.': fseek failed' );
265 }
266 }
267
268 function __destruct() {
269 if ( isset( $this->handle ) ) {
270 $this->close();
271 }
272 }
273
274 /**
275 * @param $key
276 * @param $value
277 * @return
278 */
279 public function set( $key, $value ) {
280 if ( strval( $key ) === '' ) {
281 // DBA cross-check hack
282 return;
283 }
284 $this->addbegin( strlen( $key ), strlen( $value ) );
285 $this->write( $key );
286 $this->write( $value );
287 $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
288 }
289
290 /**
291 * @throws MWException
292 */
293 public function close() {
294 $this->finish();
295 if( isset($this->handle) ) {
296 fclose( $this->handle );
297 }
298 if ( wfIsWindows() && file_exists($this->realFileName) ) {
299 unlink( $this->realFileName );
300 }
301 if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
302 throw new MWException( 'Unable to move the new CDB file into place.' );
303 }
304 unset( $this->handle );
305 }
306
307 /**
308 * @throws MWException
309 * @param $buf
310 */
311 protected function write( $buf ) {
312 $len = fwrite( $this->handle, $buf );
313 if ( $len !== strlen( $buf ) ) {
314 throw new MWException( 'Error writing to CDB file.' );
315 }
316 }
317
318 /**
319 * @throws MWException
320 * @param $len
321 */
322 protected function posplus( $len ) {
323 $newpos = $this->pos + $len;
324 if ( $newpos > 0x7fffffff ) {
325 throw new MWException( 'A value in the CDB file is too large' );
326 }
327 $this->pos = $newpos;
328 }
329
330 protected function addend( $keylen, $datalen, $h ) {
331 $this->hplist[] = array(
332 'h' => $h,
333 'p' => $this->pos
334 );
335
336 $this->numentries++;
337 $this->posplus( 8 );
338 $this->posplus( $keylen );
339 $this->posplus( $datalen );
340 }
341
342 /**
343 * @throws MWException
344 * @param $keylen
345 * @param $datalen
346 */
347 protected function addbegin( $keylen, $datalen ) {
348 if ( $keylen > 0x7fffffff ) {
349 throw new MWException( __METHOD__.': key length too long' );
350 }
351 if ( $datalen > 0x7fffffff ) {
352 throw new MWException( __METHOD__.': data length too long' );
353 }
354 $buf = pack( 'VV', $keylen, $datalen );
355 $this->write( $buf );
356 }
357
358 protected function finish() {
359 // Hack for DBA cross-check
360 $this->hplist = array_reverse( $this->hplist );
361
362 // Calculate the number of items that will be in each hashtable
363 $counts = array_fill( 0, 256, 0 );
364 foreach ( $this->hplist as $item ) {
365 ++ $counts[ 255 & $item['h'] ];
366 }
367
368 // Fill in $starts with the *end* indexes
369 $starts = array();
370 $pos = 0;
371 for ( $i = 0; $i < 256; ++$i ) {
372 $pos += $counts[$i];
373 $starts[$i] = $pos;
374 }
375
376 // Excessively clever and indulgent code to simultaneously fill $packedTables
377 // with the packed hashtables, and adjust the elements of $starts
378 // to actually point to the starts instead of the ends.
379 $packedTables = array_fill( 0, $this->numentries, false );
380 foreach ( $this->hplist as $item ) {
381 $packedTables[--$starts[255 & $item['h']]] = $item;
382 }
383
384 $final = '';
385 for ( $i = 0; $i < 256; ++$i ) {
386 $count = $counts[$i];
387
388 // The size of the hashtable will be double the item count.
389 // The rest of the slots will be empty.
390 $len = $count + $count;
391 $final .= pack( 'VV', $this->pos, $len );
392
393 $hashtable = array();
394 for ( $u = 0; $u < $len; ++$u ) {
395 $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
396 }
397
398 // Fill the hashtable, using the next empty slot if the hashed slot
399 // is taken.
400 for ( $u = 0; $u < $count; ++$u ) {
401 $hp = $packedTables[$starts[$i] + $u];
402 $where = CdbFunctions::unsignedMod(
403 CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
404 while ( $hashtable[$where]['p'] )
405 if ( ++$where == $len )
406 $where = 0;
407 $hashtable[$where] = $hp;
408 }
409
410 // Write the hashtable
411 for ( $u = 0; $u < $len; ++$u ) {
412 $buf = pack( 'vvV',
413 $hashtable[$u]['h'] & 0xffff,
414 CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
415 $hashtable[$u]['p'] );
416 $this->write( $buf );
417 $this->posplus( 8 );
418 }
419 }
420
421 // Write the pointer array at the start of the file
422 rewind( $this->handle );
423 if ( ftell( $this->handle ) != 0 ) {
424 throw new MWException( __METHOD__.': Error rewinding to start of file' );
425 }
426 $this->write( $final );
427 }
428 }