* Added rev_sha1 and ar_sha1 columns to revision/archive tables (useful for bug 25312)
authorAaron Schulz <aaron@users.mediawiki.org>
Thu, 11 Aug 2011 21:52:54 +0000 (21:52 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Thu, 11 Aug 2011 21:52:54 +0000 (21:52 +0000)
* Created a script to populate these fields (doesn't handle archive rows without ar_rev_id set though)

includes/AutoLoader.php
includes/installer/MysqlUpdater.php
maintenance/archives/patch-ar_sha1.sql [new file with mode: 0644]
maintenance/archives/patch-rev_sha1.sql [new file with mode: 0644]
maintenance/populateRevisionSha1.php [new file with mode: 0644]
maintenance/tables.sql

index 9ebd53a..1e12280 100644 (file)
@@ -838,6 +838,7 @@ $wgAutoloadLocalClasses = array(
        'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
        'PopulateParentId' => 'maintenance/populateParentId.php',
        'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php',
+       'PopulateRevisionSha1' => 'maintenance/PopulateRevisionSha1.php',
        'SevenZipStream' => 'maintenance/7zip.inc',
        'Sqlite' => 'maintenance/sqlite.inc',
        'UpdateCollation' => 'maintenance/updateCollation.php',
index 7550230..f95351f 100644 (file)
@@ -186,6 +186,9 @@ class MysqlUpdater extends DatabaseUpdater {
                        // 1.19
                        array( 'addTable', 'config',                            'patch-config.sql' ),
                        array( 'addIndex', 'logging',       'type_action',      'patch-logging-type-action-index.sql'),
+                       array( 'addField', 'revision',      'rev_sha1',         'patch-rev_sha1.sql' ),
+                       array( 'addField', 'archive',       'ar_sha1',          'patch-ar_sha1.sql' ),
+                       array( 'doPopulateRevSha1' )
                );
        }
 
@@ -855,4 +858,14 @@ class MysqlUpdater extends DatabaseUpdater {
                $this->applyPatch( 'patch-user-newtalk-timestamp-null.sql' );
                $this->output( "done.\n" );
        }
+
+       protected function doPopulateRevSha1() {
+               if ( $this->updateRowExists( 'populate rev_sha1' ) ) {
+                       $this->output( "...rev_sha1/ar_sha1 columns already populated.\n" );
+                       return;
+               }
+
+               $task = $this->maintenance->runChild( 'PopulateRevisionSha1' );
+               $task->execute();
+       }
 }
diff --git a/maintenance/archives/patch-ar_sha1.sql b/maintenance/archives/patch-ar_sha1.sql
new file mode 100644 (file)
index 0000000..1c7d8e9
--- /dev/null
@@ -0,0 +1,3 @@
+-- Adding ar_sha1 field
+ALTER TABLE /*$wgDBprefix*/archive
+  ADD ar_sha1 varbinary(32) NOT NULL default '';
diff --git a/maintenance/archives/patch-rev_sha1.sql b/maintenance/archives/patch-rev_sha1.sql
new file mode 100644 (file)
index 0000000..0100c36
--- /dev/null
@@ -0,0 +1,3 @@
+-- Adding rev_sha1 field
+ALTER TABLE /*$wgDBprefix*/revision
+  ADD rev_sha1 varbinary(32) NOT NULL default '';
diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php
new file mode 100644 (file)
index 0000000..0eca1f6
--- /dev/null
@@ -0,0 +1,96 @@
+<?php
+/**
+ * Fills the rev_sha1 and ar_sha1 columns of revision & archive tables.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class PopulateRevisionSha1 extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
+               $this->setBatchSize( 150 );
+       }
+
+       public function execute() {
+               $db = wfGetDB( DB_MASTER );
+
+               $this->output( "Populating rev_len column\n" );
+               $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' );
+
+               $this->output( "Populating ar_len column\n" );
+               $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' );
+
+               if ( $db->insert(
+                               'updatelog',
+                               array( 'ul_key' => 'populate rev_sha1' ),
+                               __METHOD__,
+                               'IGNORE'
+                       )
+               ) {
+                       $this->output( "rev_sha1 and ar_sha1 population complete.\n" );
+                       return true;
+               } else {
+                       $this->output( "Could not insert rev_sha1 population row.\n" );
+                       return false;
+               }
+       }
+
+       protected function doSha1Updates( $db, $table, $idCol, $prefix ) {
+               $start = $db->selectField( $table, "MIN($idCol)", "$idCol IS NOT NULL", __METHOD__ );
+               if ( !$start ) {
+                       $this->output( "Nothing to do.\n" );
+                       return true;
+               }
+               $end = $db->selectField( $table, "MAX($idCol)", "$idCol IS NOT NULL", __METHOD__ );
+
+               # Do remaining chunk
+               $end += $this->mBatchSize - 1;
+               $blockStart = $start;
+               $blockEnd = $start + $this->mBatchSize - 1;
+               while ( $blockEnd <= $end ) {
+                       $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
+                       $cond = "$idCol BETWEEN $blockStart AND $blockEnd
+                               AND $idCol IS NOT NULL AND {$prefix}_sha1 IS NOT NULL";
+                       $res = $db->select( $table, '*', $cond, __METHOD__ );
+
+                       $db->begin();
+                       foreach ( $res as $row ) {
+                               if ( $table === 'archive' ) {
+                                       $rev = Revision::newFromArchiveRow( $row );
+                               } else {
+                                       $rev = new Revision( $row );
+                               }
+                               $db->update( $table,
+                                       array( "{$prefix}_sha1" => Revision::base36Sha1( $rev->getRawText() ) ),
+                                       array( $idCol => $row->$idCol ),
+                                       __METHOD__ );
+                       }
+                       $db->commit();
+
+                       $blockStart += $this->mBatchSize;
+                       $blockEnd += $this->mBatchSize;
+                       wfWaitForSlaves();
+               }
+       }
+}
+
+$maintClass = "PopulateRevisionSha1";
+require_once( RUN_MAINTENANCE_IF_MAIN );
index 2ab431f..e8d1739 100644 (file)
@@ -317,7 +317,10 @@ CREATE TABLE /*_*/revision (
 
   -- Key to revision.rev_id
   -- This field is used to add support for a tree structure (The Adjacency List Model)
-  rev_parent_id int unsigned default NULL
+  rev_parent_id int unsigned default NULL,
+
+  -- SHA-1 text content hash in base-36
+  rev_sha1 varbinary(32) NOT NULL default ''
 
 ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024;
 -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit
@@ -424,7 +427,10 @@ CREATE TABLE /*_*/archive (
   ar_page_id int unsigned,
 
   -- Original previous revision
-  ar_parent_id int unsigned default NULL
+  ar_parent_id int unsigned default NULL,
+
+  -- SHA-1 text content hash in base-36
+  ar_sha1 varbinary(32) NOT NULL default ''
 ) /*$wgDBTableOptions*/;
 
 CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);