From c326cab51655700edce42633d7d775e0d8508ef6 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Thu, 27 Oct 2011 18:44:10 +0000 Subject: [PATCH] Reinstated r94289 et all - rev_sha1/ar_sha1 field for bug 21860 --- includes/AutoLoader.php | 1 + includes/Revision.php | 50 +++++++++-- includes/WikiPage.php | 3 +- includes/installer/DatabaseUpdater.php | 4 +- includes/installer/MysqlUpdater.php | 3 +- includes/installer/SqliteUpdater.php | 2 + includes/specials/SpecialUndelete.php | 5 +- maintenance/archives/patch-ar_sha1.sql | 3 + maintenance/archives/patch-rev_sha1.sql | 3 + maintenance/populateRevisionSha1.php | 108 ++++++++++++++++++++++++ maintenance/tables.sql | 10 ++- 11 files changed, 178 insertions(+), 14 deletions(-) create mode 100644 maintenance/archives/patch-ar_sha1.sql create mode 100644 maintenance/archives/patch-rev_sha1.sql create mode 100644 maintenance/populateRevisionSha1.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index d5b20fcc80..2f072121db 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -865,6 +865,7 @@ $wgAutoloadLocalClasses = array( 'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php', 'PopulateParentId' => 'maintenance/populateParentId.php', 'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php', + 'PopulateRevisionSha1' => 'maintenance/populateRevisionSha1.php', 'SevenZipStream' => 'maintenance/7zip.inc', 'Sqlite' => 'maintenance/sqlite.inc', 'UpdateCollation' => 'maintenance/updateCollation.php', diff --git a/includes/Revision.php b/includes/Revision.php index ec0508c3ff..f75442ca05 100644 --- a/includes/Revision.php +++ b/includes/Revision.php @@ -13,6 +13,7 @@ class Revision { protected $mTimestamp; protected $mDeleted; protected $mSize; + protected $mSha1; protected $mParentId; protected $mComment; protected $mText; @@ -122,7 +123,8 @@ class Revision { 'minor_edit' => $row->ar_minor_edit, 'text_id' => isset( $row->ar_text_id ) ? $row->ar_text_id : null, 'deleted' => $row->ar_deleted, - 'len' => $row->ar_len + 'len' => $row->ar_len, + 'sha1' => $row->ar_sha1 ); if ( isset( $row->ar_text ) && !$row->ar_text_id ) { // Pre-1.5 ar_text row @@ -313,7 +315,8 @@ class Revision { 'rev_minor_edit', 'rev_deleted', 'rev_len', - 'rev_parent_id' + 'rev_parent_id', + 'rev_sha1' ); } @@ -375,6 +378,12 @@ class Revision { $this->mSize = intval( $row->rev_len ); } + if ( !isset( $row->rev_sha1 ) ) { + $this->mSha1 = null; + } else { + $this->mSha1 = $row->rev_sha1; + } + if( isset( $row->page_latest ) ) { $this->mCurrent = ( $row->rev_id == $row->page_latest ); $this->mTitle = Title::newFromRow( $row ); @@ -402,7 +411,7 @@ class Revision { $this->mOrigUserText = $row->rev_user_text; } elseif( is_array( $row ) ) { // Build a new revision to be saved... - global $wgUser; + global $wgUser; // ugh $this->mId = isset( $row['id'] ) ? intval( $row['id'] ) : null; $this->mPage = isset( $row['page'] ) ? intval( $row['page'] ) : null; @@ -414,6 +423,7 @@ class Revision { $this->mDeleted = isset( $row['deleted'] ) ? intval( $row['deleted'] ) : 0; $this->mSize = isset( $row['len'] ) ? intval( $row['len'] ) : null; $this->mParentId = isset( $row['parent_id'] ) ? intval( $row['parent_id'] ) : null; + $this->mSha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null; // Enforce spacing trimming on supplied text $this->mComment = isset( $row['comment'] ) ? trim( strval( $row['comment'] ) ) : null; @@ -422,9 +432,13 @@ class Revision { $this->mTitle = null; # Load on demand if needed $this->mCurrent = false; - # If we still have no len_size, see it we have the text to figure it out + # If we still have no length, see it we have the text to figure it out if ( !$this->mSize ) { - $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); + $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); + } + # Same for sha1 + if ( $this->mSha1 === null ) { + $this->mSha1 = is_null( $this->mText ) ? null : self::base36Sha1( $this->mText ); } } else { throw new MWException( 'Revision constructor passed invalid row format.' ); @@ -468,6 +482,15 @@ class Revision { return $this->mSize; } + /** + * Returns the base36 sha1 of the text in this revision, or null if unknown. + * + * @return String + */ + public function getSha1() { + return $this->mSha1; + } + /** * Returns the title of the page associated with this entry. * @@ -938,8 +961,12 @@ class Revision { 'rev_timestamp' => $dbw->timestamp( $this->mTimestamp ), 'rev_deleted' => $this->mDeleted, 'rev_len' => $this->mSize, - 'rev_parent_id' => is_null($this->mParentId) ? - $this->getPreviousRevisionId( $dbw ) : $this->mParentId + 'rev_parent_id' => is_null( $this->mParentId ) + ? $this->getPreviousRevisionId( $dbw ) + : $this->mParentId, + 'rev_sha1' => is_null( $this->mSha1 ) + ? Revision::base36Sha1( $this->mText ) + : $this->mSha1 ), __METHOD__ ); @@ -951,6 +978,15 @@ class Revision { return $this->mId; } + /** + * Get the base 36 SHA-1 value for a string of text + * @param $text String + * @return String + */ + public static function base36Sha1( $text ) { + return wfBaseConvert( sha1( $text ), 16, 36, 31 ); + } + /** * Lazy-load the revision's text. * Currently hardcoded to the 'text' table storage engine. diff --git a/includes/WikiPage.php b/includes/WikiPage.php index 3462427bbd..4bc787370a 100644 --- a/includes/WikiPage.php +++ b/includes/WikiPage.php @@ -1667,7 +1667,8 @@ class WikiPage extends Page { 'ar_flags' => '\'\'', // MySQL's "strict mode"... 'ar_len' => 'rev_len', 'ar_page_id' => 'page_id', - 'ar_deleted' => $bitfield + 'ar_deleted' => $bitfield, + 'ar_sha1' => 'rev_sha1' ), array( 'page_id' => $id, 'page_id = rev_page' diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index e7ecb6fa33..80dbe30ac6 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -41,7 +41,9 @@ abstract class DatabaseUpdater { protected $postDatabaseUpdateMaintenance = array( 'DeleteDefaultMessages', - 'PopulateRevisionLength' + 'PopulateRevisionLength', + 'PopulateRevisionSha1', + 'PopulateImageSha1' ); /** diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index cd6de36468..30715fdf46 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -187,7 +187,8 @@ class MysqlUpdater extends DatabaseUpdater { array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'), array( 'doMigrateUserOptions' ), array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ), - + array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ) ); } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 04b6a31330..19e12c5f4c 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -65,6 +65,8 @@ class SqliteUpdater extends DatabaseUpdater { array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'), array( 'doMigrateUserOptions' ), array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ), + array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ) ); } diff --git a/includes/specials/SpecialUndelete.php b/includes/specials/SpecialUndelete.php index 5ee965ece5..7342f3f8d1 100644 --- a/includes/specials/SpecialUndelete.php +++ b/includes/specials/SpecialUndelete.php @@ -116,7 +116,7 @@ class PageArchive { $res = $dbr->select( 'archive', array( 'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text', - 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id' + 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1' ), array( 'ar_namespace' => $this->title->getNamespace(), 'ar_title' => $this->title->getDBkey() ), @@ -460,7 +460,8 @@ class PageArchive { 'ar_text_id', 'ar_deleted', 'ar_page_id', - 'ar_len' ), + 'ar_len', + 'ar_sha1' ), /* WHERE */ array( 'ar_namespace' => $this->title->getNamespace(), 'ar_title' => $this->title->getDBkey(), diff --git a/maintenance/archives/patch-ar_sha1.sql b/maintenance/archives/patch-ar_sha1.sql new file mode 100644 index 0000000000..1c7d8e917d --- /dev/null +++ b/maintenance/archives/patch-ar_sha1.sql @@ -0,0 +1,3 @@ +-- Adding ar_sha1 field +ALTER TABLE /*$wgDBprefix*/archive + ADD ar_sha1 varbinary(32) NOT NULL default ''; diff --git a/maintenance/archives/patch-rev_sha1.sql b/maintenance/archives/patch-rev_sha1.sql new file mode 100644 index 0000000000..0100c36562 --- /dev/null +++ b/maintenance/archives/patch-rev_sha1.sql @@ -0,0 +1,3 @@ +-- Adding rev_sha1 field +ALTER TABLE /*$wgDBprefix*/revision + ADD rev_sha1 varbinary(32) NOT NULL default ''; diff --git a/maintenance/populateRevisionSha1.php b/maintenance/populateRevisionSha1.php new file mode 100644 index 0000000000..386a0a64b2 --- /dev/null +++ b/maintenance/populateRevisionSha1.php @@ -0,0 +1,108 @@ +mDescription = "Populates the rev_sha1 and ar_sha1 fields"; + $this->setBatchSize( 200 ); + } + + protected function getUpdateKey() { + return 'populate rev_sha1'; + } + + protected function doDBUpdates() { + $db = $this->getDB( DB_MASTER ); + if ( !$db->tableExists( 'revision' ) ) { + $this->error( "revision table does not exist", true ); + } + if ( !$db->tableExists( 'archive' ) ) { + $this->error( "archive table does not exist", true ); + } + + $this->output( "Populating rev_sha1 column\n" ); + $rc = $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' ); + + $this->output( "Populating ar_sha1 column\n" ); + $ac = $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' ); + + $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" ); + return true; + } + + /** + * @return Integer Rows changed + */ + protected function doSha1Updates( $db, $table, $idCol, $prefix ) { + $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); + $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); + if ( !$start || !$end ) { + $this->output( "...$table table seems to be empty.\n" ); + return true; + } + + $count = 0; + # Do remaining chunk + $end += $this->mBatchSize - 1; + $blockStart = $start; + $blockEnd = $start + $this->mBatchSize - 1; + while ( $blockEnd <= $end ) { + $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); + $cond = "$idCol BETWEEN $blockStart AND $blockEnd + AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; + $res = $db->select( $table, '*', $cond, __METHOD__ ); + + $db->begin(); + foreach ( $res as $row ) { + if ( $table === 'archive' ) { + $rev = Revision::newFromArchiveRow( $row ); + } else { + $rev = new Revision( $row ); + } + $text = $rev->getRawText(); + if ( !is_string( $text ) ) { + # This should not happen, but sometimes does (bug 20757) + $this->output( "Text of revision {$row->$idCol} unavailable!\n" ); + } else { + $db->update( $table, + array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), + array( $idCol => $row->$idCol ), + __METHOD__ ); + $count++; + } + } + $db->commit(); + + $blockStart += $this->mBatchSize; + $blockEnd += $this->mBatchSize; + wfWaitForSlaves(); + } + return $count; + } +} + +$maintClass = "PopulateRevisionSha1"; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 4b469b4d37..f42b9a6ae8 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -311,7 +311,10 @@ CREATE TABLE /*_*/revision ( -- Key to revision.rev_id -- This field is used to add support for a tree structure (The Adjacency List Model) - rev_parent_id int unsigned default NULL + rev_parent_id int unsigned default NULL, + + -- SHA-1 text content hash in base-36 + rev_sha1 varbinary(32) NOT NULL default '' ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024; -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit @@ -418,7 +421,10 @@ CREATE TABLE /*_*/archive ( ar_page_id int unsigned, -- Original previous revision - ar_parent_id int unsigned default NULL + ar_parent_id int unsigned default NULL, + + -- SHA-1 text content hash in base-36 + ar_sha1 varbinary(32) NOT NULL default '' ) /*$wgDBTableOptions*/; CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp); -- 2.20.1