From 7ede3c619a96a409af12a9cc4dd542e252813917 Mon Sep 17 00:00:00 2001 From: Umherirrender Date: Mon, 25 Mar 2019 20:29:47 +0100 Subject: [PATCH] Move HistoryBlob classes to own files Move all into own subfolder Change-Id: Iccf1bb9a2c8927c1b4dc0952d59d745109a71f76 --- .phpcs.xml | 1 - autoload.php | 16 +- .../ConcatenatedGzipHistoryBlob.php | 146 ++++++++ .../DiffHistoryBlob.php} | 334 ------------------ includes/historyblob/HistoryBlob.php | 67 ++++ includes/historyblob/HistoryBlobCurStub.php | 73 ++++ includes/historyblob/HistoryBlobStub.php | 150 ++++++++ 7 files changed, 444 insertions(+), 343 deletions(-) create mode 100644 includes/historyblob/ConcatenatedGzipHistoryBlob.php rename includes/{HistoryBlob.php => historyblob/DiffHistoryBlob.php} (54%) create mode 100644 includes/historyblob/HistoryBlob.php create mode 100644 includes/historyblob/HistoryBlobCurStub.php create mode 100644 includes/historyblob/HistoryBlobStub.php diff --git a/.phpcs.xml b/.phpcs.xml index d1e54a706c..ce0eac42a3 100644 --- a/.phpcs.xml +++ b/.phpcs.xml @@ -215,7 +215,6 @@ */includes/Feed\.php */includes/filerepo/file/LocalFile\.php */includes/gallery/PackedOverlayImageGallery\.php - */includes/HistoryBlob\.php */includes/htmlform/HTMLFormElement\.php */includes/libs/filebackend/FileBackendStore\.php */includes/libs/filebackend/FSFileBackend\.php diff --git a/autoload.php b/autoload.php index 94a99faaa7..5f031d03ad 100644 --- a/autoload.php +++ b/autoload.php @@ -298,7 +298,7 @@ $wgAutoloadLocalClasses = [ 'ComposerVendorHtaccessCreator' => __DIR__ . '/includes/composer/ComposerVendorHtaccessCreator.php', 'ComposerVersionNormalizer' => __DIR__ . '/includes/composer/ComposerVersionNormalizer.php', 'CompressOld' => __DIR__ . '/maintenance/storage/compressOld.php', - 'ConcatenatedGzipHistoryBlob' => __DIR__ . '/includes/HistoryBlob.php', + 'ConcatenatedGzipHistoryBlob' => __DIR__ . '/includes/historyblob/ConcatenatedGzipHistoryBlob.php', 'Config' => __DIR__ . '/includes/config/Config.php', 'ConfigException' => __DIR__ . '/includes/config/ConfigException.php', 'ConfigFactory' => __DIR__ . '/includes/config/ConfigFactory.php', @@ -398,7 +398,7 @@ $wgAutoloadLocalClasses = [ 'Diff' => __DIR__ . '/includes/diff/DairikiDiff.php', 'DiffEngine' => __DIR__ . '/includes/diff/DiffEngine.php', 'DiffFormatter' => __DIR__ . '/includes/diff/DiffFormatter.php', - 'DiffHistoryBlob' => __DIR__ . '/includes/HistoryBlob.php', + 'DiffHistoryBlob' => __DIR__ . '/includes/historyblob/DiffHistoryBlob.php', 'DiffOp' => __DIR__ . '/includes/diff/DairikiDiff.php', 'DiffOpAdd' => __DIR__ . '/includes/diff/DairikiDiff.php', 'DiffOpChange' => __DIR__ . '/includes/diff/DairikiDiff.php', @@ -628,9 +628,9 @@ $wgAutoloadLocalClasses = [ 'HashSiteStore' => __DIR__ . '/includes/site/HashSiteStore.php', 'HashtableReplacer' => __DIR__ . '/includes/libs/replacers/HashtableReplacer.php', 'HistoryAction' => __DIR__ . '/includes/actions/HistoryAction.php', - 'HistoryBlob' => __DIR__ . '/includes/HistoryBlob.php', - 'HistoryBlobCurStub' => __DIR__ . '/includes/HistoryBlob.php', - 'HistoryBlobStub' => __DIR__ . '/includes/HistoryBlob.php', + 'HistoryBlob' => __DIR__ . '/includes/historyblob/HistoryBlob.php', + 'HistoryBlobCurStub' => __DIR__ . '/includes/historyblob/HistoryBlobCurStub.php', + 'HistoryBlobStub' => __DIR__ . '/includes/historyblob/HistoryBlobStub.php', 'HistoryPager' => __DIR__ . '/includes/actions/pagers/HistoryPager.php', 'Hooks' => __DIR__ . '/includes/Hooks.php', 'Html' => __DIR__ . '/includes/Html.php', @@ -1707,8 +1707,8 @@ $wgAutoloadLocalClasses = [ 'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php', 'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php', 'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReaderError.php', - 'concatenatedgziphistoryblob' => __DIR__ . '/includes/HistoryBlob.php', - 'historyblobcurstub' => __DIR__ . '/includes/HistoryBlob.php', - 'historyblobstub' => __DIR__ . '/includes/HistoryBlob.php', + 'concatenatedgziphistoryblob' => __DIR__ . '/includes/historyblob/ConcatenatedGzipHistoryBlob.php', + 'historyblobcurstub' => __DIR__ . '/includes/historyblob/HistoryBlobCurStub.php', + 'historyblobstub' => __DIR__ . '/includes/historyblob/HistoryBlobStub.php', 'profile_point' => __DIR__ . '/profileinfo.php', ]; diff --git a/includes/historyblob/ConcatenatedGzipHistoryBlob.php b/includes/historyblob/ConcatenatedGzipHistoryBlob.php new file mode 100644 index 0000000000..f6ca2f5a36 --- /dev/null +++ b/includes/historyblob/ConcatenatedGzipHistoryBlob.php @@ -0,0 +1,146 @@ +uncompress(); + $hash = md5( $text ); + if ( !isset( $this->mItems[$hash] ) ) { + $this->mItems[$hash] = $text; + $this->mSize += strlen( $text ); + } + return $hash; + } + + /** + * @param string $hash + * @return array|bool + */ + public function getItem( $hash ) { + $this->uncompress(); + if ( array_key_exists( $hash, $this->mItems ) ) { + return $this->mItems[$hash]; + } else { + return false; + } + } + + /** + * @param string $text + * @return void + */ + public function setText( $text ) { + $this->uncompress(); + $this->mDefaultHash = $this->addItem( $text ); + } + + /** + * @return array|bool + */ + public function getText() { + $this->uncompress(); + return $this->getItem( $this->mDefaultHash ); + } + + /** + * Remove an item + * + * @param string $hash + */ + public function removeItem( $hash ) { + $this->mSize -= strlen( $this->mItems[$hash] ); + unset( $this->mItems[$hash] ); + } + + /** + * Compress the bulk data in the object + */ + public function compress() { + if ( !$this->mCompressed ) { + $this->mItems = gzdeflate( serialize( $this->mItems ) ); + $this->mCompressed = true; + } + } + + /** + * Uncompress bulk data + */ + public function uncompress() { + if ( $this->mCompressed ) { + $this->mItems = unserialize( gzinflate( $this->mItems ) ); + $this->mCompressed = false; + } + } + + /** + * @return array + */ + function __sleep() { + $this->compress(); + return [ 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ]; + } + + function __wakeup() { + $this->uncompress(); + } + + /** + * Helper function for compression jobs + * Returns true until the object is "full" and ready to be committed + * + * @return bool + */ + public function isHappy() { + return $this->mSize < $this->mMaxSize + && count( $this->mItems ) < $this->mMaxCount; + } +} + +// phpcs:ignore Generic.CodeAnalysis.UnconditionalIfStatement.Found +if ( false ) { + // Blobs generated by MediaWiki < 1.5 on PHP 4 were serialized with the + // class name coerced to lowercase. We can improve efficiency by adding + // autoload entries for the lowercase variants of these classes (T166759). + // The code below is never executed, but it is picked up by the AutoloadGenerator + // parser, which scans for class_alias() calls. + class_alias( ConcatenatedGzipHistoryBlob::class, 'concatenatedgziphistoryblob' ); +} diff --git a/includes/HistoryBlob.php b/includes/historyblob/DiffHistoryBlob.php similarity index 54% rename from includes/HistoryBlob.php rename to includes/historyblob/DiffHistoryBlob.php index bca6c7e5bc..8d92fe5312 100644 --- a/includes/HistoryBlob.php +++ b/includes/historyblob/DiffHistoryBlob.php @@ -20,328 +20,6 @@ * @file */ -/** - * Base class for general text storage via the "object" flag in old_flags, or - * two-part external storage URLs. Used for represent efficient concatenated - * storage, and migration-related pointer objects. - */ -interface HistoryBlob { - /** - * Adds an item of text, returns a stub object which points to the item. - * You must call setLocation() on the stub object before storing it to the - * database - * - * @param string $text - * - * @return string The key for getItem() - */ - function addItem( $text ); - - /** - * Get item by key, or false if the key is not present - * - * @param string $key - * - * @return string|bool - */ - function getItem( $key ); - - /** - * Set the "default text" - * This concept is an odd property of the current DB schema, whereby each text item has a revision - * associated with it. The default text is the text of the associated revision. There may, however, - * be other revisions in the same object. - * - * Default text is not required for two-part external storage URLs. - * - * @param string $text - */ - function setText( $text ); - - /** - * Get default text. This is called from Revision::getRevisionText() - * - * @return string - */ - function getText(); -} - -/** - * Concatenated gzip (CGZ) storage - * Improves compression ratio by concatenating like objects before gzipping - */ -class ConcatenatedGzipHistoryBlob implements HistoryBlob { - public $mVersion = 0, $mCompressed = false, $mItems = [], $mDefaultHash = ''; - public $mSize = 0; - public $mMaxSize = 10000000; - public $mMaxCount = 100; - - public function __construct() { - if ( !function_exists( 'gzdeflate' ) ) { - throw new MWException( "Need zlib support to read or write this " - . "kind of history object (ConcatenatedGzipHistoryBlob)\n" ); - } - } - - /** - * @param string $text - * @return string - */ - public function addItem( $text ) { - $this->uncompress(); - $hash = md5( $text ); - if ( !isset( $this->mItems[$hash] ) ) { - $this->mItems[$hash] = $text; - $this->mSize += strlen( $text ); - } - return $hash; - } - - /** - * @param string $hash - * @return array|bool - */ - public function getItem( $hash ) { - $this->uncompress(); - if ( array_key_exists( $hash, $this->mItems ) ) { - return $this->mItems[$hash]; - } else { - return false; - } - } - - /** - * @param string $text - * @return void - */ - public function setText( $text ) { - $this->uncompress(); - $this->mDefaultHash = $this->addItem( $text ); - } - - /** - * @return array|bool - */ - public function getText() { - $this->uncompress(); - return $this->getItem( $this->mDefaultHash ); - } - - /** - * Remove an item - * - * @param string $hash - */ - public function removeItem( $hash ) { - $this->mSize -= strlen( $this->mItems[$hash] ); - unset( $this->mItems[$hash] ); - } - - /** - * Compress the bulk data in the object - */ - public function compress() { - if ( !$this->mCompressed ) { - $this->mItems = gzdeflate( serialize( $this->mItems ) ); - $this->mCompressed = true; - } - } - - /** - * Uncompress bulk data - */ - public function uncompress() { - if ( $this->mCompressed ) { - $this->mItems = unserialize( gzinflate( $this->mItems ) ); - $this->mCompressed = false; - } - } - - /** - * @return array - */ - function __sleep() { - $this->compress(); - return [ 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ]; - } - - function __wakeup() { - $this->uncompress(); - } - - /** - * Helper function for compression jobs - * Returns true until the object is "full" and ready to be committed - * - * @return bool - */ - public function isHappy() { - return $this->mSize < $this->mMaxSize - && count( $this->mItems ) < $this->mMaxCount; - } -} - -/** - * Pointer object for an item within a CGZ blob stored in the text table. - */ -class HistoryBlobStub { - /** - * @var array One-step cache variable to hold base blobs; operations that - * pull multiple revisions may often pull multiple times from the same - * blob. By keeping the last-used one open, we avoid redundant - * unserialization and decompression overhead. - */ - protected static $blobCache = []; - - /** @var int */ - public $mOldId; - - /** @var string */ - public $mHash; - - /** @var string */ - public $mRef; - - /** - * @param string $hash The content hash of the text - * @param int $oldid The old_id for the CGZ object - */ - function __construct( $hash = '', $oldid = 0 ) { - $this->mHash = $hash; - } - - /** - * Sets the location (old_id) of the main object to which this object - * points - * @param int $id - */ - function setLocation( $id ) { - $this->mOldId = $id; - } - - /** - * Sets the location (old_id) of the referring object - * @param string $id - */ - function setReferrer( $id ) { - $this->mRef = $id; - } - - /** - * Gets the location of the referring object - * @return string - */ - function getReferrer() { - return $this->mRef; - } - - /** - * @return string|false - */ - function getText() { - if ( isset( self::$blobCache[$this->mOldId] ) ) { - $obj = self::$blobCache[$this->mOldId]; - } else { - $dbr = wfGetDB( DB_REPLICA ); - $row = $dbr->selectRow( - 'text', - [ 'old_flags', 'old_text' ], - [ 'old_id' => $this->mOldId ] - ); - - if ( !$row ) { - return false; - } - - $flags = explode( ',', $row->old_flags ); - if ( in_array( 'external', $flags ) ) { - $url = $row->old_text; - $parts = explode( '://', $url, 2 ); - if ( !isset( $parts[1] ) || $parts[1] == '' ) { - return false; - } - $row->old_text = ExternalStore::fetchFromURL( $url ); - - } - - if ( !in_array( 'object', $flags ) ) { - return false; - } - - if ( in_array( 'gzip', $flags ) ) { - // This shouldn't happen, but a bug in the compress script - // may at times gzip-compress a HistoryBlob object row. - $obj = unserialize( gzinflate( $row->old_text ) ); - } else { - $obj = unserialize( $row->old_text ); - } - - if ( !is_object( $obj ) ) { - // Correct for old double-serialization bug. - $obj = unserialize( $obj ); - } - - // Save this item for reference; if pulling many - // items in a row we'll likely use it again. - $obj->uncompress(); - self::$blobCache = [ $this->mOldId => $obj ]; - } - - return $obj->getItem( $this->mHash ); - } - - /** - * Get the content hash - * - * @return string - */ - function getHash() { - return $this->mHash; - } -} - -/** - * To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the - * leftover cur table as the backend. This avoids expensively copying hundreds - * of megabytes of data during the conversion downtime. - * - * Serialized HistoryBlobCurStub objects will be inserted into the text table - * on conversion if $wgLegacySchemaConversion is set to true. - */ -class HistoryBlobCurStub { - /** @var int */ - public $mCurId; - - /** - * @param int $curid The cur_id pointed to - */ - function __construct( $curid = 0 ) { - $this->mCurId = $curid; - } - - /** - * Sets the location (cur_id) of the main object to which this object - * points - * - * @param int $id - */ - function setLocation( $id ) { - $this->mCurId = $id; - } - - /** - * @return string|bool - */ - function getText() { - $dbr = wfGetDB( DB_REPLICA ); - $row = $dbr->selectRow( 'cur', [ 'cur_text' ], [ 'cur_id' => $this->mCurId ] ); - if ( !$row ) { - return false; - } - return $row->cur_text; - } -} - /** * Diff-based history compression * Requires xdiff 1.5+ and zlib @@ -697,15 +375,3 @@ class DiffHistoryBlob implements HistoryBlob { } } - -// phpcs:ignore Generic.CodeAnalysis.UnconditionalIfStatement.Found -if ( false ) { - // Blobs generated by MediaWiki < 1.5 on PHP 4 were serialized with the - // class name coerced to lowercase. We can improve efficiency by adding - // autoload entries for the lowercase variants of these classes (T166759). - // The code below is never executed, but it is picked up by the AutoloadGenerator - // parser, which scans for class_alias() calls. - class_alias( ConcatenatedGzipHistoryBlob::class, 'concatenatedgziphistoryblob' ); - class_alias( HistoryBlobCurStub::class, 'historyblobcurstub' ); - class_alias( HistoryBlobStub::class, 'historyblobstub' ); -} diff --git a/includes/historyblob/HistoryBlob.php b/includes/historyblob/HistoryBlob.php new file mode 100644 index 0000000000..36c7c8f75e --- /dev/null +++ b/includes/historyblob/HistoryBlob.php @@ -0,0 +1,67 @@ +mCurId = $curid; + } + + /** + * Sets the location (cur_id) of the main object to which this object + * points + * + * @param int $id + */ + function setLocation( $id ) { + $this->mCurId = $id; + } + + /** + * @return string|bool + */ + function getText() { + $dbr = wfGetDB( DB_REPLICA ); + $row = $dbr->selectRow( 'cur', [ 'cur_text' ], [ 'cur_id' => $this->mCurId ] ); + if ( !$row ) { + return false; + } + return $row->cur_text; + } +} + +// phpcs:ignore Generic.CodeAnalysis.UnconditionalIfStatement.Found +if ( false ) { + // Blobs generated by MediaWiki < 1.5 on PHP 4 were serialized with the + // class name coerced to lowercase. We can improve efficiency by adding + // autoload entries for the lowercase variants of these classes (T166759). + // The code below is never executed, but it is picked up by the AutoloadGenerator + // parser, which scans for class_alias() calls. + class_alias( HistoryBlobCurStub::class, 'historyblobcurstub' ); +} diff --git a/includes/historyblob/HistoryBlobStub.php b/includes/historyblob/HistoryBlobStub.php new file mode 100644 index 0000000000..4995d3b3f0 --- /dev/null +++ b/includes/historyblob/HistoryBlobStub.php @@ -0,0 +1,150 @@ +mHash = $hash; + } + + /** + * Sets the location (old_id) of the main object to which this object + * points + * @param int $id + */ + function setLocation( $id ) { + $this->mOldId = $id; + } + + /** + * Sets the location (old_id) of the referring object + * @param string $id + */ + function setReferrer( $id ) { + $this->mRef = $id; + } + + /** + * Gets the location of the referring object + * @return string + */ + function getReferrer() { + return $this->mRef; + } + + /** + * @return string|false + */ + function getText() { + if ( isset( self::$blobCache[$this->mOldId] ) ) { + $obj = self::$blobCache[$this->mOldId]; + } else { + $dbr = wfGetDB( DB_REPLICA ); + $row = $dbr->selectRow( + 'text', + [ 'old_flags', 'old_text' ], + [ 'old_id' => $this->mOldId ] + ); + + if ( !$row ) { + return false; + } + + $flags = explode( ',', $row->old_flags ); + if ( in_array( 'external', $flags ) ) { + $url = $row->old_text; + $parts = explode( '://', $url, 2 ); + if ( !isset( $parts[1] ) || $parts[1] == '' ) { + return false; + } + $row->old_text = ExternalStore::fetchFromURL( $url ); + + } + + if ( !in_array( 'object', $flags ) ) { + return false; + } + + if ( in_array( 'gzip', $flags ) ) { + // This shouldn't happen, but a bug in the compress script + // may at times gzip-compress a HistoryBlob object row. + $obj = unserialize( gzinflate( $row->old_text ) ); + } else { + $obj = unserialize( $row->old_text ); + } + + if ( !is_object( $obj ) ) { + // Correct for old double-serialization bug. + $obj = unserialize( $obj ); + } + + // Save this item for reference; if pulling many + // items in a row we'll likely use it again. + $obj->uncompress(); + self::$blobCache = [ $this->mOldId => $obj ]; + } + + return $obj->getItem( $this->mHash ); + } + + /** + * Get the content hash + * + * @return string + */ + function getHash() { + return $this->mHash; + } +} + +// phpcs:ignore Generic.CodeAnalysis.UnconditionalIfStatement.Found +if ( false ) { + // Blobs generated by MediaWiki < 1.5 on PHP 4 were serialized with the + // class name coerced to lowercase. We can improve efficiency by adding + // autoload entries for the lowercase variants of these classes (T166759). + // The code below is never executed, but it is picked up by the AutoloadGenerator + // parser, which scans for class_alias() calls. + class_alias( HistoryBlobStub::class, 'historyblobstub' ); +} -- 2.20.1