From 2946cdcaaa8402713f70a169a2fbad340a12430a Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 3 Oct 2008 00:00:24 +0000 Subject: [PATCH] Backing r41531 out for now ("Concept for diff-based compression using the new xdiff beta") Looks cool, but this changes the return type of ConcatenatedGzipHistoryBlob::addItem() from a stub object to a hash value, which will break its usages in ConcatenatedGzipHistoryBlob::setText() and maintenance/storage/compressOld.inc. --- includes/AutoLoader.php | 1 - includes/HistoryBlob.php | 220 +++++++----------------- maintenance/storage/testCompression.php | 70 -------- 3 files changed, 61 insertions(+), 230 deletions(-) delete mode 100644 maintenance/storage/testCompression.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 77baf6089c..c1f9a2ef2f 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -34,7 +34,6 @@ $wgAutoloadLocalClasses = array( 'Credits' => 'includes/Credits.php', 'DBABagOStuff' => 'includes/BagOStuff.php', 'DependencyWrapper' => 'includes/CacheDependency.php', - 'DiffHistoryBlob' => 'includes/HistoryBlob.php', 'DjVuImage' => 'includes/DjVuImage.php', 'DoubleReplacer' => 'includes/StringUtils.php', 'DoubleRedirectJob' => 'includes/DoubleRedirectJob.php', diff --git a/includes/HistoryBlob.php b/includes/HistoryBlob.php index 2281e7b5bb..3772926dfd 100644 --- a/includes/HistoryBlob.php +++ b/includes/HistoryBlob.php @@ -1,33 +1,41 @@ uncompress(); + $this->mItems['meta'] = $metaData; + } + + /** @todo document */ + public function getMeta() { + $this->uncompress(); + return $this->mItems['meta']; + } + + /** @todo document */ public function addItem( $text ) { $this->uncompress(); $hash = md5( $text ); $this->mItems[$hash] = $text; $this->mSize += strlen( $text ); - return $hash; + $stub = new HistoryBlobStub( $hash ); + return $stub; } + /** @todo document */ public function getItem( $hash ) { $this->uncompress(); if ( array_key_exists( $hash, $this->mItems ) ) { @@ -70,28 +97,29 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob } } + /** @todo document */ public function setText( $text ) { $this->uncompress(); $stub = $this->addItem( $text ); $this->mDefaultHash = $stub->mHash; } + /** @todo document */ public function getText() { $this->uncompress(); return $this->getItem( $this->mDefaultHash ); } - /** - * Remove an item - */ + # HistoryBlob implemented. + + + /** @todo document */ public function removeItem( $hash ) { $this->mSize -= strlen( $this->mItems[$hash] ); unset( $this->mItems[$hash] ); } - /** - * Compress the bulk data in the object - */ + /** @todo document */ public function compress() { if ( !$this->mCompressed ) { $this->mItems = gzdeflate( serialize( $this->mItems ) ); @@ -99,9 +127,7 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob } } - /** - * Uncompress bulk data - */ + /** @todo document */ public function uncompress() { if ( $this->mCompressed ) { $this->mItems = unserialize( gzinflate( $this->mItems ) ); @@ -110,18 +136,19 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob } + /** @todo document */ function __sleep() { $this->compress(); return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); } + /** @todo document */ function __wakeup() { $this->uncompress(); } /** - * Helper function for compression jobs - * Returns true until the object is "full" and ready to be committed + * Determines if this object is happy */ public function isHappy( $maxFactor, $factorThreshold ) { if ( count( $this->mItems ) == 0 ) { @@ -157,15 +184,12 @@ $wgBlobCache = array(); /** - * Pointer object for an item within a CGZ blob stored in the text table. + * @todo document (needs one-sentence top-level class description + some function descriptions). */ class HistoryBlobStub { var $mOldId, $mHash, $mRef; - /** - * @param string $hash The content hash of the text - * @param integer $oldid The old_id for the CGZ object - */ + /** @todo document */ function HistoryBlobStub( $hash = '', $oldid = 0 ) { $this->mHash = $hash; } @@ -192,6 +216,7 @@ class HistoryBlobStub { return $this->mRef; } + /** @todo document */ function getText() { $fname = 'HistoryBlobStub::getText'; global $wgBlobCache; @@ -239,9 +264,7 @@ class HistoryBlobStub { return $obj->getItem( $this->mHash ); } - /** - * Get the content hash - */ + /** @todo document */ function getHash() { return $this->mHash; } @@ -259,9 +282,7 @@ class HistoryBlobStub { class HistoryBlobCurStub { var $mCurId; - /** - * @param integer $curid The cur_id pointed to - */ + /** @todo document */ function HistoryBlobCurStub( $curid = 0 ) { $this->mCurId = $curid; } @@ -274,6 +295,7 @@ class HistoryBlobCurStub { $this->mCurId = $id; } + /** @todo document */ function getText() { $dbr = wfGetDB( DB_SLAVE ); $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); @@ -283,123 +305,3 @@ class HistoryBlobCurStub { return $row->cur_text; } } - -/** - * Diff-based history compression - * Requires xdiff 1.5+ and zlib - */ -class DiffHistoryBlob implements HistoryBlob { - /** Uncompressed item cache */ - var $mItems = array(); - - /** - * Array of diffs, where $this->mDiffs[0] is the diff between - * $this->mDiffs[0] and $this->mDiffs[1] - */ - var $mDiffs = array(); - - /** - * The key for getText() - */ - var $mDefaultKey; - - /** - * Compressed storage - */ - var $mCompressed; - - /** - * True if the object is locked against further writes - */ - var $mFrozen = false; - - - function __construct() { - if ( !function_exists( 'xdiff_string_bdiff' ) ){ - throw new MWException( "Need xdiff 1.5+ support to read or write DiffHistoryBlob\n" ); - } - if ( !function_exists( 'gzdeflate' ) ) { - throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); - } - } - - function addItem( $text ) { - if ( $this->mFrozen ) { - throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" ); - } - - $this->mItems[] = $text; - $i = count( $this->mItems ) - 1; - if ( $i > 0 ) { - # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff - # "String is not zero-terminated" - wfSuppressWarnings(); - $this->mDiffs[] = xdiff_string_bdiff( $this->mItems[$i-1], $text ) . ''; - wfRestoreWarnings(); - } - return $i; - } - - function getItem( $key ) { - if ( $key > count( $this->mDiffs ) + 1 ) { - return false; - } - $key = intval( $key ); - if ( $key == 0 ) { - return $this->mItems[0]; - } - - $last = count( $this->mItems ) - 1; - for ( $i = $last + 1; $i <= $key; $i++ ) { - # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff - # "String is not zero-terminated" - wfSuppressWarnings(); - $this->mItems[$i] = xdiff_string_bpatch( $this->mItems[$i - 1], $this->mDiffs[$i - 1] ) . ''; - wfRestoreWarnings(); - } - return $this->mItems[$key]; - } - - function setText( $text ) { - $this->mDefaultKey = $this->addItem( $text ); - } - - function getText() { - return $this->getItem( $this->mDefaultKey ); - } - - function __sleep() { - if ( !isset( $this->mItems[0] ) ) { - // Empty object - $info = false; - } else { - $info = array( - 'base' => $this->mItems[0], - 'diffs' => $this->mDiffs - ); - } - if ( isset( $this->mDefaultKey ) ) { - $info['default'] = $this->mDefaultKey; - } - $this->mCompressed = gzdeflate( serialize( $info ) ); - return array( 'mCompressed' ); - } - - function __wakeup() { - // addItem() doesn't work if mItems is partially filled from mDiffs - $this->mFrozen = true; - $info = unserialize( gzinflate( $this->mCompressed ) ); - unset( $this->mCompressed ); - - if ( !$info ) { - // Empty object - return; - } - - if ( isset( $info['default'] ) ) { - $this->mDefaultKey = $info['default']; - } - $this->mItems[0] = $info['base']; - $this->mDiffs = $info['diffs']; - } -} diff --git a/maintenance/storage/testCompression.php b/maintenance/storage/testCompression.php deleted file mode 100644 index eaf7e35fa7..0000000000 --- a/maintenance/storage/testCompression.php +++ /dev/null @@ -1,70 +0,0 @@ -] [--start=] [--limit=] \n"; - exit( 1 ); -} - -$title = Title::newFromText( $args[0] ); -if ( isset( $options['start'] ) ) { - $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) ); - echo "Starting from " . $wgLang->timeanddate( $start ) . "\n"; -} else { - $start = '19700101000000'; -} -$limit = isset( $options['limit'] ) ? $options['limit'] : 10; -$type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; - - -$dbr = wfGetDB( DB_SLAVE ); -$res = $dbr->select( - array( 'page', 'revision', 'text' ), - '*', - array( - 'page_namespace' => $title->getNamespace(), - 'page_title' => $title->getDBkey(), - 'page_id=rev_page', - 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ), - 'rev_text_id=old_id' - ), __FILE__, array( 'LIMIT' => $limit ) -); - -$blob = new $type; -$hashes = array(); -$keys = array(); -$uncompressedSize = 0; -$t = -microtime( true ); -foreach ( $res as $row ) { - $revision = new Revision( $row ); - $text = $revision->getText(); - $uncompressedSize += strlen( $text ); - $hashes[$row->rev_id] = md5( $text ); - $keys[$row->rev_id] = $blob->addItem( $text ); -} - -$serialized = serialize( $blob ); -$t += microtime( true ); - -printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n", - $res->numRows(), - $uncompressedSize / strlen( $serialized ), - $wgLang->formatSize( $uncompressedSize ), - $wgLang->formatSize( strlen( $serialized ) ) -); -printf( "Compression time: %5.2f ms\n", $t * 1000 ); - -$t = -microtime( true ); -$blob = unserialize( $serialized ); -foreach ( $keys as $id => $key ) { - $text = $blob->getItem( $key ); - if ( md5( $text ) != $hashes[$id] ) { - echo "Content hash mismatch for rev_id $id\n"; - #var_dump( $text ); - } -} -$t += microtime( true ); -printf( "Decompression time: %5.2f ms\n", $t * 1000 ); - -- 2.20.1