From d93ea4874e95243e097636579a7be8b5047a0f5f Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 20 Jun 2009 15:59:56 +0000 Subject: [PATCH] Added PHP port of CDB, with abstraction layer. Tested for correctness with a differential fuzz tester, not yet benchmarked. The idea is to open up new applications for CDB, and benefit both shell and shared hosting users. Ported existing uses of CDB to the new abstraction layer. --- RELEASE-NOTES | 3 + includes/AutoLoader.php | 7 + includes/Cdb.php | 147 +++++++++++++ includes/Cdb_PHP.php | 388 ++++++++++++++++++++++++++++++++++ includes/Interwiki.php | 10 +- maintenance/dumpInterwiki.inc | 2 +- maintenance/dumpInterwiki.php | 2 +- 7 files changed, 552 insertions(+), 7 deletions(-) create mode 100644 includes/Cdb.php create mode 100644 includes/Cdb_PHP.php diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 67badd74de..1ac000528e 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -89,6 +89,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * Add $wgRevokePermissions as a means of restricting a group's rights. The syntax is identical to $wgGroupPermissions, but users in these groups will have these rights stripped from them. +* Added a PHP port of CDB (constant database), for improved local caching when + the DBA extension is not available. + === Bug fixes in 1.16 === diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index ce86237d3f..482c34bace 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -27,6 +27,13 @@ $wgAutoloadLocalClasses = array( 'Categoryfinder' => 'includes/Categoryfinder.php', 'CategoryPage' => 'includes/CategoryPage.php', 'CategoryViewer' => 'includes/CategoryPage.php', + 'CdbFunctions' => 'includes/Cdb_PHP.php', + 'CdbReader' => 'includes/Cdb.php', + 'CdbReader_DBA' => 'includes/Cdb.php', + 'CdbReader_PHP' => 'includes/Cdb_PHP.php', + 'CdbWriter' => 'includes/Cdb.php', + 'CdbWriter_DBA' => 'includes/Cdb.php', + 'CdbWriter_PHP' => 'includes/Cdb_PHP.php', 'ChangesList' => 'includes/ChangesList.php', 'ChangesFeed' => 'includes/ChangesFeed.php', 'ChangeTags' => 'includes/ChangeTags.php', diff --git a/includes/Cdb.php b/includes/Cdb.php new file mode 100644 index 0000000000..20cb7e3e6e --- /dev/null +++ b/includes/Cdb.php @@ -0,0 +1,147 @@ +handle = dba_open( $fileName, 'r-', 'cdb' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open DB file "' . $fileName . '"' ); + } + } + + function close() { + dba_close( $this->handle ); + unset( $this->handle ); + } + + function get( $key ) { + return dba_fetch( $key, $this->handle ); + } +} + + +/** + * Writer class which uses the DBA extension + */ +class CdbWriter_DBA { + var $handle, $realFileName, $tmpFileName; + + function __construct( $fileName ) { + $this->realFileName = $fileName; + $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); + $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' ); + } + } + + function set( $key, $value ) { + return dba_insert( $key, $value, $this->handle ); + } + + function close() { + dba_close( $this->handle ); + if ( wfIsWindows() ) { + unlink( $this->realFileName ); + } + if ( !rename( $this->tmpFileName, $this->realFileName ) ) { + throw new MWException( 'Unable to move the new CDB file into place.' ); + } + unset( $this->handle ); + } + + function __destruct() { + if ( isset( $this->handle ) ) { + $this->close(); + } + } +} + diff --git a/includes/Cdb_PHP.php b/includes/Cdb_PHP.php new file mode 100644 index 0000000000..eb2d31762c --- /dev/null +++ b/includes/Cdb_PHP.php @@ -0,0 +1,388 @@ +>) are all implemented as a simple wrapper around the + * underlying C operator. The algorithm here uses a binary view of addition + * to simulate 32-bit addition using 31-bit registers. + */ + public static function sumWithOverflow( $a, $b ) { + $sum = $a + $b; + if ( is_float( $sum ) ) { + // Use the plus operator to do a sum of the lowest 30 bits to produce a 31-bit result + $lowA = $a & 0x3fffffff; + $lowB = $b & 0x3fffffff; + $sum = $lowA + $lowB; + + // Strip off the carry bit + $carry = ($sum & 0x40000000) >> 30; + $sum = $sum & 0x3fffffff; + + // Get the last two bits + $highA = self::unsignedShiftRight( $a, 30 ); + $highB = self::unsignedShiftRight( $b, 30 ); + + // Add with carry + $highSum = $carry + $highA + $highB; + + // Recombine + $sum = $sum | ( $highSum << 30 ); + } + return $sum; + } + + /** + * Take a modulo of a signed integer as if it were an unsigned integer. + * $b must be less than 0x40000000 and greater than 0 + */ + public static function unsignedMod( $a, $b ) { + if ( $a < 0 ) { + $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b ); + return $m % $b; + } else { + return $a % $b; + } + } + + /** + * Shift a signed integer right as if it were unsigned + */ + public static function unsignedShiftRight( $a, $b ) { + if ( $b == 0 ) { + return $a; + } + if ( $a < 0 ) { + return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) ); + } else { + return $a >> $b; + } + } + + public static function hash( $s ) { + $h = 5381; + for ( $i = 0; $i < strlen( $s ); $i++ ) { + $h = self::sumWithOverflow( $h, $h << 5 ) ^ ord( $s[$i] ); + } + return $h; + } +} + +/** + * CDB reader class + */ +class CdbReader_PHP extends CdbReader { + /** The file handle */ + var $handle; + + /* number of hash slots searched under this key */ + var $loop; + + /* initialized if loop is nonzero */ + var $khash; + + /* initialized if loop is nonzero */ + var $kpos; + + /* initialized if loop is nonzero */ + var $hpos; + + /* initialized if loop is nonzero */ + var $hslots; + + /* initialized if findNext() returns true */ + var $dpos; + + /* initialized if cdb_findnext() returns 1 */ + var $dlen; + + function __construct( $fileName ) { + $this->handle = fopen( $fileName, 'rb' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open DB file "' . $fileName . '"' ); + } + $this->findStart(); + } + + function close() { + fclose( $this->handle ); + unset( $this->handle ); + } + + public function get( $key ) { + // strval is required + if ( $this->find( strval( $key ) ) ) { + return $this->read( $this->dlen, $this->dpos ); + } else { + return false; + } + } + + protected function match( $key, $pos ) { + $buf = $this->read( strlen( $key ), $pos ); + return $buf === $key; + } + + protected function findStart() { + $this->loop = 0; + } + + protected function read( $length, $pos ) { + if ( fseek( $this->handle, $pos ) == -1 ) { + // This can easily happen if the internal pointers are incorrect + throw new MWException( __METHOD__.': seek failed, file may be corrupted.' ); + } + + if ( $length == 0 ) { + return ''; + } + + $buf = fread( $this->handle, $length ); + if ( $buf === false || strlen( $buf ) !== $length ) { + throw new MWException( __METHOD__.': read from cdb file failed, file may be corrupted' ); + } + return $buf; + } + + /** + * Unpack an unsigned integer and throw an exception if it needs more than 31 bits + */ + protected function unpack31( $s ) { + $data = unpack( 'V', $s ); + if ( $data[1] > 0x7fffffff ) { + throw new MWException( __METHOD__.': error in CDB file, integer too big' ); + } + return $data[1]; + } + + /** + * Unpack a 32-bit signed integer + */ + protected function unpackSigned( $s ) { + $data = unpack( 'va/vb', $s ); + return $data['a'] | ( $data['b'] << 16 ); + } + + protected function findNext( $key ) { + if ( !$this->loop ) { + $u = CdbFunctions::hash( $key ); + $buf = $this->read( 8, ( $u << 3 ) & 2047 ); + $this->hslots = $this->unpack31( substr( $buf, 4 ) ); + if ( !$this->hslots ) { + return false; + } + $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) ); + $this->khash = $u; + $u = CdbFunctions::unsignedShiftRight( $u, 8 ); + $u = CdbFunctions::unsignedMod( $u, $this->hslots ); + $u <<= 3; + $this->kpos = $this->hpos + $u; + } + + while ( $this->loop < $this->hslots ) { + $buf = $this->read( 8, $this->kpos ); + $pos = $this->unpack31( substr( $buf, 4 ) ); + if ( !$pos ) { + return false; + } + $this->loop += 1; + $this->kpos += 8; + if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) { + $this->kpos = $this->hpos; + } + $u = $this->unpackSigned( substr( $buf, 0, 4 ) ); + if ( $u === $this->khash ) { + $buf = $this->read( 8, $pos ); + $keyLen = $this->unpack31( substr( $buf, 0, 4 ) ); + if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) { + // Found + $this->dlen = $this->unpack31( substr( $buf, 4 ) ); + $this->dpos = $pos + 8 + $keyLen; + return true; + } + } + } + return false; + } + + protected function find( $key ) { + $this->findStart(); + return $this->findNext( $key ); + } +} + +/** + * CDB writer class + */ +class CdbWriter_PHP extends CdbWriter { + var $handle, $realFileName, $tmpFileName; + + var $hplist; + var $numEntries, $pos; + + function __construct( $fileName ) { + $this->realFileName = $fileName; + $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); + $this->handle = fopen( $this->tmpFileName, 'wb' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' ); + } + $this->hplist = array(); + $this->numentries = 0; + $this->pos = 2048; // leaving space for the pointer array, 256 * 8 + if ( fseek( $this->handle, $this->pos ) == -1 ) { + throw new MWException( __METHOD__.': fseek failed' ); + } + } + + function __destruct() { + if ( isset( $this->handle ) ) { + $this->close(); + } + } + + public function set( $key, $value ) { + if ( strval( $key ) === '' ) { + // DBA cross-check hack + return; + } + $this->addbegin( strlen( $key ), strlen( $value ) ); + $this->write( $key ); + $this->write( $value ); + $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) ); + } + + public function close() { + $this->finish(); + fclose( $this->handle ); + if ( wfIsWindows() ) { + unlink( $this->realFileName ); + } + if ( !rename( $this->tmpFileName, $this->realFileName ) ) { + throw new MWException( 'Unable to move the new CDB file into place.' ); + } + unset( $this->handle ); + } + + protected function write( $buf ) { + $len = fwrite( $this->handle, $buf ); + if ( $len !== strlen( $buf ) ) { + throw new MWException( 'Error writing to CDB file.' ); + } + } + + protected function posplus( $len ) { + $newpos = $this->pos + $len; + if ( $newpos > 0x7fffffff ) { + throw new MWException( 'A value in the CDB file is too large' ); + } + $this->pos = $newpos; + } + + protected function addend( $keylen, $datalen, $h ) { + $this->hplist[] = array( + 'h' => $h, + 'p' => $this->pos + ); + + $this->numentries++; + $this->posplus( 8 ); + $this->posplus( $keylen ); + $this->posplus( $datalen ); + } + + protected function addbegin( $keylen, $datalen ) { + if ( $keylen > 0x7fffffff ) { + throw new MWException( __METHOD__.': key length too long' ); + } + if ( $datalen > 0x7fffffff ) { + throw new MWException( __METHOD__.': data length too long' ); + } + $buf = pack( 'VV', $keylen, $datalen ); + $this->write( $buf ); + } + + protected function finish() { + // Hack for DBA cross-check + $this->hplist = array_reverse( $this->hplist ); + + // Calculate the number of items that will be in each hashtable + $counts = array_fill( 0, 256, 0 ); + foreach ( $this->hplist as $item ) { + ++ $counts[ 255 & $item['h'] ]; + } + + // Fill in $starts with the *end* indexes + $starts = array(); + $pos = 0; + for ( $i = 0; $i < 256; ++$i ) { + $pos += $counts[$i]; + $starts[$i] = $pos; + } + + // Excessively clever and indulgent code to simultaneously fill $packedTables + // with the packed hashtables, and adjust the elements of $starts + // to actually point to the starts instead of the ends. + $packedTables = array_fill( 0, $this->numentries, false ); + foreach ( $this->hplist as $item ) { + $packedTables[--$starts[255 & $item['h']]] = $item; + } + + $final = ''; + for ( $i = 0; $i < 256; ++$i ) { + $count = $counts[$i]; + + // The size of the hashtable will be double the item count. + // The rest of the slots will be empty. + $len = $count + $count; + $final .= pack( 'VV', $this->pos, $len ); + + $hashtable = array(); + for ( $u = 0; $u < $len; ++$u ) { + $hashtable[$u] = array( 'h' => 0, 'p' => 0 ); + } + + // Fill the hashtable, using the next empty slot if the hashed slot + // is taken. + for ( $u = 0; $u < $count; ++$u ) { + $hp = $packedTables[$starts[$i] + $u]; + $where = CdbFunctions::unsignedMod( + CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len ); + while ( $hashtable[$where]['p'] ) + if ( ++$where == $len ) + $where = 0; + $hashtable[$where] = $hp; + } + + // Write the hashtable + for ( $u = 0; $u < $len; ++$u ) { + $buf = pack( 'vvV', + $hashtable[$u]['h'] & 0xffff, + CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ), + $hashtable[$u]['p'] ); + $this->write( $buf ); + $this->posplus( 8 ); + } + } + + // Write the pointer array at the start of the file + rewind( $this->handle ); + if ( ftell( $this->handle ) != 0 ) { + throw new MWException( __METHOD__.': Error rewinding to start of file' ); + } + $this->write( $final ); + } +} diff --git a/includes/Interwiki.php b/includes/Interwiki.php index f42f7e3af4..5ca926297a 100644 --- a/includes/Interwiki.php +++ b/includes/Interwiki.php @@ -104,24 +104,24 @@ class Interwiki { wfDebug( __METHOD__ . "( $prefix )\n" ); if( !$db ) { - $db = dba_open( $wgInterwikiCache, 'r', 'cdb' ); + $db = CdbReader::open( $wgInterwikiCache ); } /* Resolve site name */ if( $wgInterwikiScopes>=3 && !$site ) { - $site = dba_fetch( '__sites:' . wfWikiID(), $db ); + $site = $db->get( '__sites:' . wfWikiID() ); if ( $site == '' ) { $site = $wgInterwikiFallbackSite; } } - $value = dba_fetch( wfMemcKey( $prefix ), $db ); + $value = $db->get( wfMemcKey( $prefix ) ); // Site level if ( $value == '' && $wgInterwikiScopes >= 3 ) { - $value = dba_fetch( "_{$site}:{$prefix}", $db ); + $value = $db->get( "_{$site}:{$prefix}" ); } // Global Level if ( $value == '' && $wgInterwikiScopes >= 2 ) { - $value = dba_fetch( "__global:{$prefix}", $db ); + $value = $db->get( "__global:{$prefix}" ); } if ( $value == 'undef' ) $value = ''; diff --git a/maintenance/dumpInterwiki.inc b/maintenance/dumpInterwiki.inc index 481e21cce3..031d0444ef 100644 --- a/maintenance/dumpInterwiki.inc +++ b/maintenance/dumpInterwiki.inc @@ -201,7 +201,7 @@ function makeLink( $entry, $source ) { array_key_exists($entry['iw_prefix'],$prefixRewrites[$source])) $entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']]; if ($dbFile) - dba_insert("{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}"),$dbFile); + $dbFile->set( "{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}") ); else print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n"; diff --git a/maintenance/dumpInterwiki.php b/maintenance/dumpInterwiki.php index f5f22f3cfe..16bf4db004 100644 --- a/maintenance/dumpInterwiki.php +++ b/maintenance/dumpInterwiki.php @@ -19,7 +19,7 @@ chdir( $oldCwd ); # Output if ( isset( $options['o'] ) ) { # To database specified with -o - $dbFile = dba_open( $options['o'], "n", "cdb_make" ); + $dbFile = CdbWriter::open( $options['o'] ); } getRebuildInterwikiDump(); -- 2.20.1