From 49a9bca8dde6006f3e744cd20ff2f618ae1a5cda Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 4 Jul 2019 13:54:20 +0200 Subject: [PATCH] PopulateContentTables: compute sha1 and length if needed. PopulateContentTables tries to copy rev_sha1 to content_sha1 and rev_len to content_size, but when updating directly from on old version of MediaWiki, these fields may be empty or NULL. To ensure that the content table gets the correct value, we have to compute them on the fly. Note that PopulateRevisionSha1 runs *after* the normal database updates. So even though it was introduced in 1.19, it will run after the schema updates for 1.32, which include PopulateContentTables. This means that PopulateContentTables can't rely on rev_sha1 to have been filled in already. But per I0c22286a16d7b, it also means that PopulateRevisionSha1 can make use of the hash in content_sha1 instead of re-calculating. Bug: T217831 Bug: T200653 Change-Id: I69e91a812ad5f038562220b02c3634589667cdb6 --- maintenance/populateContentTables.php | 44 ++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/maintenance/populateContentTables.php b/maintenance/populateContentTables.php index f3e373a5f1..c84f3de54e 100644 --- a/maintenance/populateContentTables.php +++ b/maintenance/populateContentTables.php @@ -21,6 +21,7 @@ use MediaWiki\MediaWikiServices; use MediaWiki\Revision\SlotRecord; +use MediaWiki\Storage\BlobStore; use MediaWiki\Storage\NameTableStore; use MediaWiki\Storage\SqlBlobStore; use Wikimedia\Assert\Assert; @@ -41,6 +42,9 @@ class PopulateContentTables extends Maintenance { /** @var NameTableStore */ private $contentModelStore; + /** @var BlobStore */ + private $blobStore; + /** @var int */ private $mainRoleId; @@ -67,6 +71,7 @@ class PopulateContentTables extends Maintenance { private function initServices() { $this->dbw = $this->getDB( DB_MASTER ); $this->contentModelStore = MediaWikiServices::getInstance()->getContentModelStore(); + $this->blobStore = MediaWikiServices::getInstance()->getBlobStore(); $this->mainRoleId = MediaWikiServices::getInstance()->getSlotRoleStore() ->acquireId( SlotRecord::MAIN ); } @@ -262,13 +267,16 @@ class PopulateContentTables extends Maintenance { Assert::invariant( $revisionId !== null, 'rev_id must not be null' ); - $modelId = $this->contentModelStore->acquireId( $this->getContentModel( $row ) ); + $model = $this->getContentModel( $row ); + $modelId = $this->contentModelStore->acquireId( $model ); $address = SqlBlobStore::makeAddressFromTextId( $row->text_id ); $key = "{$modelId}:{$address}"; $contentKeys[$revisionId] = $key; if ( !isset( $map[$key] ) ) { + $this->fillMissingFields( $row, $model, $address ); + $map[$key] = false; $contentRows[] = [ 'content_size' => (int)$row->len, @@ -345,6 +353,40 @@ class PopulateContentTables extends Maintenance { private function writeln( $msg ) { $this->output( "$msg\n" ); } + + /** + * Compute any missing fields in $row. + * The way the missing values are computed must correspond to the way this is done in SlotRecord. + * + * @param object $row to be modified + * @param string $model + * @param string $address + */ + private function fillMissingFields( $row, $model, $address ) { + if ( !isset( $row->content_model ) ) { + // just for completeness + $row->content_model = $model; + } + + if ( isset( $row->len ) && isset( $row->sha1 ) && $row->sha1 !== '' ) { + // No need to load the content, quite now. + return; + } + + $blob = $this->blobStore->getBlob( $address ); + + if ( !isset( $row->len ) ) { + // NOTE: The nominal size of the content may not be the length of the raw blob. + $handler = ContentHandler::getForModelID( $model ); + $content = $handler->unserializeContent( $blob ); + + $row->len = $content->getSize(); + } + + if ( !isset( $row->sha1 ) || $row->sha1 === '' ) { + $row->sha1 = SlotRecord::base36Sha1( $blob ); + } + } } $maintClass = 'PopulateContentTables'; -- 2.20.1