From d2b000414e96a3d1ae3fb191cfd0f08291ccfb8c Mon Sep 17 00:00:00 2001 From: Bill Pirkle Date: Wed, 12 Jun 2019 22:39:43 -0500 Subject: [PATCH] Update populateRevisionSha1.php for MCR schema changes Maintenance script populateRevisionSha1.php was using the deprecated Revision class. Instead, use the new RevisionRecord class if possible, and custom code otherwise. Bug: T217829 Change-Id: I0c22286a16d7b243306447d8948428702269a4bd --- maintenance/populateRevisionSha1.php | 105 ++++++++++++++------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php index 9662044acc..f91a5b6ed3 100644 --- a/maintenance/populateRevisionSha1.php +++ b/maintenance/populateRevisionSha1.php @@ -24,6 +24,8 @@ require_once __DIR__ . '/Maintenance.php'; +use MediaWiki\MediaWikiServices; + /** * Maintenance script that fills the rev_sha1 and ar_sha1 columns of revision * and archive tables for revisions created before MW 1.19. @@ -50,17 +52,22 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { $this->fatalError( "archive table does not exist" ); } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) { $this->output( "rev_sha1 column does not exist\n\n", true ); - return false; } + $revStore = MediaWikiServices::getInstance()->getRevisionStore(); + $this->output( "Populating rev_sha1 column\n" ); - $rc = $this->doSha1Updates( 'revision', 'rev_id', Revision::getQueryInfo(), 'rev' ); + $rc = $this->doSha1Updates( $revStore, 'revision', 'rev_id', + $revStore->getQueryInfo(), 'rev' + ); $this->output( "Populating ar_sha1 column\n" ); - $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', Revision::getArchiveQueryInfo(), 'ar' ); + $ac = $this->doSha1Updates( $revStore, 'archive', 'ar_rev_id', + $revStore->getArchiveQueryInfo(), 'ar' + ); $this->output( "Populating ar_sha1 column legacy rows\n" ); - $ac += $this->doSha1LegacyUpdates(); + $ac += $this->doSha1LegacyUpdates( $revStore ); $this->output( "rev_sha1 and ar_sha1 population complete " . "[$rc revision rows, $ac archive rows].\n" ); @@ -69,13 +76,14 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { } /** + * @param MediaWiki\Revision\RevisionStore $revStore * @param string $table * @param string $idCol * @param array $queryInfo * @param string $prefix * @return int Rows changed */ - protected function doSha1Updates( $table, $idCol, $queryInfo, $prefix ) { + protected function doSha1Updates( $revStore, $table, $idCol, $queryInfo, $prefix ) { $db = $this->getDB( DB_MASTER ); $batchSize = $this->getBatchSize(); $start = $db->selectField( $table, "MIN($idCol)", '', __METHOD__ ); @@ -93,6 +101,7 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { $blockEnd = $start + $batchSize - 1; while ( $blockEnd <= $end ) { $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); + $cond = "$idCol BETWEEN " . (int)$blockStart . " AND " . (int)$blockEnd . " AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; $res = $db->select( @@ -101,7 +110,7 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { $this->beginTransaction( $db, __METHOD__ ); foreach ( $res as $row ) { - if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) { + if ( $this->upgradeRow( $revStore, $row, $table, $idCol, $prefix ) ) { $count++; } } @@ -115,19 +124,21 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { } /** + * @param MediaWiki\Revision\RevisionStore $revStore + * @param string $emptySha1 * @return int */ - protected function doSha1LegacyUpdates() { + protected function doSha1LegacyUpdates( $revStore ) { $count = 0; $db = $this->getDB( DB_MASTER ); - $arQuery = Revision::getArchiveQueryInfo(); + $arQuery = $revStore->getArchiveQueryInfo(); $res = $db->select( $arQuery['tables'], $arQuery['fields'], [ 'ar_rev_id IS NULL', 'ar_sha1' => '' ], __METHOD__, [], $arQuery['joins'] ); $updateSize = 0; $this->beginTransaction( $db, __METHOD__ ); foreach ( $res as $row ) { - if ( $this->upgradeLegacyArchiveRow( $row ) ) { + if ( $this->upgradeLegacyArchiveRow( $revStore, $row ) ) { ++$count; } if ( ++$updateSize >= 100 ) { @@ -143,75 +154,67 @@ class PopulateRevisionSha1 extends LoggedUpdateMaintenance { } /** + * @param MediaWiki\Revision\RevisionStore $revStore * @param stdClass $row * @param string $table * @param string $idCol * @param string $prefix * @return bool */ - protected function upgradeRow( $row, $table, $idCol, $prefix ) { + protected function upgradeRow( $revStore, $row, $table, $idCol, $prefix ) { $db = $this->getDB( DB_MASTER ); + + // Create a revision and use it to get the sha1 from the content table, if possible. try { $rev = ( $table === 'archive' ) - ? Revision::newFromArchiveRow( $row ) - : new Revision( $row ); - $text = $rev->getSerializedData(); + ? $revStore->newRevisionFromArchiveRow( $row ) + : $revStore->newRevisionFromRow( $row ); + $sha1 = $rev->getSha1(); } catch ( Exception $e ) { $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); - - return false; // T24624? + return false; // T24624? T22757? } - if ( !is_string( $text ) ) { - # This should not happen, but sometimes does (T22757) - $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); - return false; - } else { - $db->update( $table, - [ "{$prefix}_sha1" => Revision::base36Sha1( $text ) ], - [ $idCol => $row->$idCol ], - __METHOD__ - ); + $db->update( $table, + [ "{$prefix}_sha1" => $sha1 ], + [ $idCol => $row->$idCol ], + __METHOD__ + ); - return true; - } + return true; } /** + * @param MediaWiki\Revision\RevisionStore $revStore * @param stdClass $row * @return bool */ - protected function upgradeLegacyArchiveRow( $row ) { + protected function upgradeLegacyArchiveRow( $revStore, $row ) { $db = $this->getDB( DB_MASTER ); + + // Create a revision and use it to get the sha1 from the content table, if possible. try { - $rev = Revision::newFromArchiveRow( $row ); + $rev = $revStore->newRevisionFromArchiveRow( $row ); + $sha1 = $rev->getSha1(); } catch ( Exception $e ) { $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); - - return false; // T24624? + return false; // T24624? T22757? } - $text = $rev->getSerializedData(); - if ( !is_string( $text ) ) { - # This should not happen, but sometimes does (T22757) - $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); - return false; - } else { - # Archive table as no PK, but (NS,title,time) should be near unique. - # Any duplicates on those should also have duplicated text anyway. - $db->update( 'archive', - [ 'ar_sha1' => Revision::base36Sha1( $text ) ], - [ - 'ar_namespace' => $row->ar_namespace, - 'ar_title' => $row->ar_title, - 'ar_timestamp' => $row->ar_timestamp, - 'ar_len' => $row->ar_len // extra sanity - ], - __METHOD__ - ); + # Archive table has no PK, but (NS,title,time) should be near unique. + # Any duplicates on those should also have duplicated text anyway. + $db->update( 'archive', + [ 'ar_sha1' => $sha1 ], + [ + 'ar_namespace' => $row->ar_namespace, + 'ar_title' => $row->ar_title, + 'ar_timestamp' => $row->ar_timestamp, + 'ar_len' => $row->ar_len // extra sanity + ], + __METHOD__ + ); - return true; - } + return true; } } -- 2.20.1