From 606872436bd27a669a6dfc23901923a87d0b6fa4 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 11 Aug 2011 21:52:54 +0000 Subject: [PATCH] * Added rev_sha1 and ar_sha1 columns to revision/archive tables (useful for bug 25312) * Created a script to populate these fields (doesn't handle archive rows without ar_rev_id set though) --- includes/AutoLoader.php | 1 + includes/installer/MysqlUpdater.php | 13 ++++ maintenance/archives/patch-ar_sha1.sql | 3 + maintenance/archives/patch-rev_sha1.sql | 3 + maintenance/populateRevisionSha1.php | 96 +++++++++++++++++++++++++ maintenance/tables.sql | 10 ++- 6 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 maintenance/archives/patch-ar_sha1.sql create mode 100644 maintenance/archives/patch-rev_sha1.sql create mode 100644 maintenance/populateRevisionSha1.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 9ebd53afe9..1e12280296 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -838,6 +838,7 @@ $wgAutoloadLocalClasses = array( 'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php', 'PopulateParentId' => 'maintenance/populateParentId.php', 'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php', + 'PopulateRevisionSha1' => 'maintenance/PopulateRevisionSha1.php', 'SevenZipStream' => 'maintenance/7zip.inc', 'Sqlite' => 'maintenance/sqlite.inc', 'UpdateCollation' => 'maintenance/updateCollation.php', diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 7550230589..f95351f162 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -186,6 +186,9 @@ class MysqlUpdater extends DatabaseUpdater { // 1.19 array( 'addTable', 'config', 'patch-config.sql' ), array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'), + array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ), + array( 'doPopulateRevSha1' ) ); } @@ -855,4 +858,14 @@ class MysqlUpdater extends DatabaseUpdater { $this->applyPatch( 'patch-user-newtalk-timestamp-null.sql' ); $this->output( "done.\n" ); } + + protected function doPopulateRevSha1() { + if ( $this->updateRowExists( 'populate rev_sha1' ) ) { + $this->output( "...rev_sha1/ar_sha1 columns already populated.\n" ); + return; + } + + $task = $this->maintenance->runChild( 'PopulateRevisionSha1' ); + $task->execute(); + } } diff --git a/maintenance/archives/patch-ar_sha1.sql b/maintenance/archives/patch-ar_sha1.sql new file mode 100644 index 0000000000..1c7d8e917d --- /dev/null +++ b/maintenance/archives/patch-ar_sha1.sql @@ -0,0 +1,3 @@ +-- Adding ar_sha1 field +ALTER TABLE /*$wgDBprefix*/archive + ADD ar_sha1 varbinary(32) NOT NULL default ''; diff --git a/maintenance/archives/patch-rev_sha1.sql b/maintenance/archives/patch-rev_sha1.sql new file mode 100644 index 0000000000..0100c36562 --- /dev/null +++ b/maintenance/archives/patch-rev_sha1.sql @@ -0,0 +1,3 @@ +-- Adding rev_sha1 field +ALTER TABLE /*$wgDBprefix*/revision + ADD rev_sha1 varbinary(32) NOT NULL default ''; diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php new file mode 100644 index 0000000000..0eca1f6f60 --- /dev/null +++ b/maintenance/populateRevisionSha1.php @@ -0,0 +1,96 @@ +mDescription = "Populates the rev_sha1 and ar_sha1 fields"; + $this->setBatchSize( 150 ); + } + + public function execute() { + $db = wfGetDB( DB_MASTER ); + + $this->output( "Populating rev_len column\n" ); + $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' ); + + $this->output( "Populating ar_len column\n" ); + $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' ); + + if ( $db->insert( + 'updatelog', + array( 'ul_key' => 'populate rev_sha1' ), + __METHOD__, + 'IGNORE' + ) + ) { + $this->output( "rev_sha1 and ar_sha1 population complete.\n" ); + return true; + } else { + $this->output( "Could not insert rev_sha1 population row.\n" ); + return false; + } + } + + protected function doSha1Updates( $db, $table, $idCol, $prefix ) { + $start = $db->selectField( $table, "MIN($idCol)", "$idCol IS NOT NULL", __METHOD__ ); + if ( !$start ) { + $this->output( "Nothing to do.\n" ); + return true; + } + $end = $db->selectField( $table, "MAX($idCol)", "$idCol IS NOT NULL", __METHOD__ ); + + # Do remaining chunk + $end += $this->mBatchSize - 1; + $blockStart = $start; + $blockEnd = $start + $this->mBatchSize - 1; + while ( $blockEnd <= $end ) { + $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); + $cond = "$idCol BETWEEN $blockStart AND $blockEnd + AND $idCol IS NOT NULL AND {$prefix}_sha1 IS NOT NULL"; + $res = $db->select( $table, '*', $cond, __METHOD__ ); + + $db->begin(); + foreach ( $res as $row ) { + if ( $table === 'archive' ) { + $rev = Revision::newFromArchiveRow( $row ); + } else { + $rev = new Revision( $row ); + } + $db->update( $table, + array( "{$prefix}_sha1" => Revision::base36Sha1( $rev->getRawText() ) ), + array( $idCol => $row->$idCol ), + __METHOD__ ); + } + $db->commit(); + + $blockStart += $this->mBatchSize; + $blockEnd += $this->mBatchSize; + wfWaitForSlaves(); + } + } +} + +$maintClass = "PopulateRevisionSha1"; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 2ab431f49a..e8d17395da 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -317,7 +317,10 @@ CREATE TABLE /*_*/revision ( -- Key to revision.rev_id -- This field is used to add support for a tree structure (The Adjacency List Model) - rev_parent_id int unsigned default NULL + rev_parent_id int unsigned default NULL, + + -- SHA-1 text content hash in base-36 + rev_sha1 varbinary(32) NOT NULL default '' ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024; -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit @@ -424,7 +427,10 @@ CREATE TABLE /*_*/archive ( ar_page_id int unsigned, -- Original previous revision - ar_parent_id int unsigned default NULL + ar_parent_id int unsigned default NULL, + + -- SHA-1 text content hash in base-36 + ar_sha1 varbinary(32) NOT NULL default '' ) /*$wgDBTableOptions*/; CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp); -- 2.20.1