From f78faf456695531f7512f98355ef8c499176570a Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Thu, 4 Jan 2018 14:35:13 -0500 Subject: [PATCH] Populate ar_rev_id and make it non-nullable Revisions deleted before MediaWiki 1.5 do not have a value in this field. This is going to be a problem for migration to the MCR schema, so provide a maintenance script to clean this up. Then, for good measure, change the schema to make the field non-nullable. Bug: T182678 Change-Id: Ie2e11f12a30f379db32c3e074658012c6f93adb0 --- autoload.php | 1 + includes/installer/DatabaseUpdater.php | 19 ++ includes/installer/MssqlUpdater.php | 1 + includes/installer/MysqlUpdater.php | 1 + includes/installer/OracleUpdater.php | 1 + includes/installer/PostgresUpdater.php | 1 + includes/installer/SqliteUpdater.php | 1 + .../archives/patch-ar_rev_id-not-null.sql | 3 + .../archives/patch-ar_rev_id-not-null.sql | 1 + maintenance/mssql/tables.sql | 2 +- .../archives/patch-ar_rev_id-not-null.sql | 5 + maintenance/oracle/tables.sql | 2 +- maintenance/populateArchiveRevId.php | 177 ++++++++++++++++++ .../archives/patch-ar_rev_id-not-null.sql | 3 + maintenance/postgres/tables.sql | 2 +- .../archives/patch-ar_rev_id-not-null.sql | 49 +++++ maintenance/tables.sql | 2 +- 17 files changed, 267 insertions(+), 4 deletions(-) create mode 100644 maintenance/archives/patch-ar_rev_id-not-null.sql create mode 100644 maintenance/mssql/archives/patch-ar_rev_id-not-null.sql create mode 100644 maintenance/oracle/archives/patch-ar_rev_id-not-null.sql create mode 100644 maintenance/populateArchiveRevId.php create mode 100644 maintenance/postgres/archives/patch-ar_rev_id-not-null.sql create mode 100644 maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql diff --git a/autoload.php b/autoload.php index 126362c2e6..15c9ed281d 100644 --- a/autoload.php +++ b/autoload.php @@ -1159,6 +1159,7 @@ $wgAutoloadLocalClasses = [ 'PoolCounterWorkViaCallback' => __DIR__ . '/includes/poolcounter/PoolCounterWorkViaCallback.php', 'PoolCounter_Stub' => __DIR__ . '/includes/poolcounter/PoolCounter.php', 'PoolWorkArticleView' => __DIR__ . '/includes/poolcounter/PoolWorkArticleView.php', + 'PopulateArchiveRevId' => __DIR__ . '/maintenance/populateArchiveRevId.php', 'PopulateBacklinkNamespace' => __DIR__ . '/maintenance/populateBacklinkNamespace.php', 'PopulateCategory' => __DIR__ . '/maintenance/populateCategory.php', 'PopulateContentModel' => __DIR__ . '/maintenance/populateContentModel.php', diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index 0d554544ac..7a1aba636c 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -1263,4 +1263,23 @@ abstract class DatabaseUpdater { $this->output( "done.\n" ); } + /** + * Populate ar_rev_id, then make it not nullable + * @since 1.31 + */ + protected function populateArchiveRevId() { + $info = $this->db->fieldInfo( 'archive', 'ar_rev_id', __METHOD__ ); + if ( !$info ) { + throw new MWException( 'Missing ar_rev_id field of archive table. Should not happen.' ); + } + if ( $info->isNullable() ) { + $this->output( "Populating ar_rev_id.\n" ); + $task = $this->maintenance->runChild( 'PopulateArchiveRevId', 'populateArchiveRevId.php' ); + if ( $task->execute() ) { + $this->applyPatch( 'patch-ar_rev_id-not-null.sql', false, + 'Making ar_rev_id not nullable' ); + } + } + } + } diff --git a/includes/installer/MssqlUpdater.php b/includes/installer/MssqlUpdater.php index 694cd294c5..3a965d4618 100644 --- a/includes/installer/MssqlUpdater.php +++ b/includes/installer/MssqlUpdater.php @@ -121,6 +121,7 @@ class MssqlUpdater extends DatabaseUpdater { [ 'migrateActors' ], [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ], [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ], + [ 'populateArchiveRevId' ], ]; } diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 9427596565..caa9a67682 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -341,6 +341,7 @@ class MysqlUpdater extends DatabaseUpdater { [ 'migrateActors' ], [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ], [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ], + [ 'populateArchiveRevId' ], ]; } diff --git a/includes/installer/OracleUpdater.php b/includes/installer/OracleUpdater.php index cb0399c6c0..6a1d220acf 100644 --- a/includes/installer/OracleUpdater.php +++ b/includes/installer/OracleUpdater.php @@ -141,6 +141,7 @@ class OracleUpdater extends DatabaseUpdater { [ 'addTable', 'actor', 'patch-actor-table.sql' ], [ 'migrateActors' ], [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ], + [ 'populateArchiveRevId' ], // KEEP THIS AT THE BOTTOM!! [ 'doRebuildDuplicateFunction' ], diff --git a/includes/installer/PostgresUpdater.php b/includes/installer/PostgresUpdater.php index c9a50863fe..45bfa22bdb 100644 --- a/includes/installer/PostgresUpdater.php +++ b/includes/installer/PostgresUpdater.php @@ -534,6 +534,7 @@ class PostgresUpdater extends DatabaseUpdater { [ 'addPgIndex', 'logging', 'logging_actor_time', '( log_actor, log_timestamp )' ], [ 'migrateActors' ], [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ], + [ 'populateArchiveRevId' ], ]; } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 7a2cc0d803..09a52bb67a 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -205,6 +205,7 @@ class SqliteUpdater extends DatabaseUpdater { [ 'migrateActors' ], [ 'modifyField', 'revision', 'rev_text_id', 'patch-rev_text_id-default.sql' ], [ 'modifyTable', 'site_stats', 'patch-site_stats-modify.sql' ], + [ 'populateArchiveRevId' ], ]; } diff --git a/maintenance/archives/patch-ar_rev_id-not-null.sql b/maintenance/archives/patch-ar_rev_id-not-null.sql new file mode 100644 index 0000000000..8418f20db4 --- /dev/null +++ b/maintenance/archives/patch-ar_rev_id-not-null.sql @@ -0,0 +1,3 @@ +-- T182678: Make ar_rev_id not nullable +ALTER TABLE /*_*/archive + CHANGE COLUMN ar_rev_id ar_rev_id int unsigned NOT NULL; diff --git a/maintenance/mssql/archives/patch-ar_rev_id-not-null.sql b/maintenance/mssql/archives/patch-ar_rev_id-not-null.sql new file mode 100644 index 0000000000..d287f49c5c --- /dev/null +++ b/maintenance/mssql/archives/patch-ar_rev_id-not-null.sql @@ -0,0 +1 @@ +ALTER TABLE /*_*/archive ALTER COLUMN ar_rev_id INT NOT NULL; diff --git a/maintenance/mssql/tables.sql b/maintenance/mssql/tables.sql index 977666d47b..50a5b9fe72 100644 --- a/maintenance/mssql/tables.sql +++ b/maintenance/mssql/tables.sql @@ -279,7 +279,7 @@ CREATE TABLE /*_*/archive ( ar_timestamp varchar(14) NOT NULL default '', ar_minor_edit BIT NOT NULL DEFAULT 0, ar_flags NVARCHAR(255) NOT NULL, - ar_rev_id INT NULL, -- NOT a FK, the row gets deleted from revision and moved here + ar_rev_id INT NOT NULL, -- NOT a FK, the row gets deleted from revision and moved here ar_text_id INT CONSTRAINT ar_text_id__old_id__fk FOREIGN KEY REFERENCES /*_*/text(old_id) ON DELETE CASCADE, ar_deleted TINYINT NOT NULL DEFAULT 0, ar_len INT, diff --git a/maintenance/oracle/archives/patch-ar_rev_id-not-null.sql b/maintenance/oracle/archives/patch-ar_rev_id-not-null.sql new file mode 100644 index 0000000000..56f159885d --- /dev/null +++ b/maintenance/oracle/archives/patch-ar_rev_id-not-null.sql @@ -0,0 +1,5 @@ +-- T182678: Make ar_rev_id not nullable + +define mw_prefix='{$wgDBprefix}'; + +ALTER TABLE &mw_prefix.archive MODIFY ar_rev_id NUMBER NOT NULL; diff --git a/maintenance/oracle/tables.sql b/maintenance/oracle/tables.sql index 8551fb9975..70a474339f 100644 --- a/maintenance/oracle/tables.sql +++ b/maintenance/oracle/tables.sql @@ -257,7 +257,7 @@ CREATE TABLE &mw_prefix.archive ( ar_timestamp TIMESTAMP(6) WITH TIME ZONE NOT NULL, ar_minor_edit CHAR(1) DEFAULT '0' NOT NULL, ar_flags VARCHAR2(255), - ar_rev_id NUMBER, + ar_rev_id NUMBER NOT NULL, ar_text_id NUMBER, ar_deleted CHAR(1) DEFAULT '0' NOT NULL, ar_len NUMBER, diff --git a/maintenance/populateArchiveRevId.php b/maintenance/populateArchiveRevId.php new file mode 100644 index 0000000000..b8b9e688e3 --- /dev/null +++ b/maintenance/populateArchiveRevId.php @@ -0,0 +1,177 @@ +addDescription( 'Populate ar_rev_id in pre-1.5 rows' ); + $this->setBatchSize( 100 ); + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function doDBUpdates() { + $this->output( "Populating ar_rev_id...\n" ); + $dbw = $this->getDB( DB_MASTER ); + + // Quick exit if there are no rows needing updates. + $any = $dbw->selectField( + 'archive', + 'ar_id', + [ 'ar_rev_id' => null ], + __METHOD__ + ); + if ( !$any ) { + $this->output( "Completed ar_rev_id population, 0 rows updated.\n" ); + return true; + } + + $rev = $this->makeDummyRevisionRow( $dbw ); + $count = 0; + while ( true ) { + wfWaitForSlaves(); + + $arIds = $dbw->selectFieldValues( + 'archive', + 'ar_id', + [ 'ar_rev_id' => null ], + __METHOD__, + [ 'LIMIT' => $this->getBatchSize(), 'ORDER BY' => [ 'ar_id' ] ] + ); + if ( !$arIds ) { + $this->output( "Completed ar_rev_id population, $count rows updated.\n" ); + return true; + } + + try { + $updates = $dbw->doAtomicSection( __METHOD__, function ( $dbw, $fname ) use ( $arIds, $rev ) { + // Create new rev_ids by inserting dummy rows into revision and then deleting them. + $dbw->insert( 'revision', array_fill( 0, count( $arIds ), $rev ), $fname ); + $revIds = $dbw->selectFieldValues( + 'revision', + 'rev_id', + [ 'rev_timestamp' => $rev['rev_timestamp'] ], + $fname + ); + if ( !is_array( $revIds ) ) { + throw new UnexpectedValueException( 'Failed to insert dummy revisions' ); + } + if ( count( $revIds ) !== count( $arIds ) ) { + throw new UnexpectedValueException( + 'Tried to insert ' . count( $arIds ) . ' dummy revisions, but found ' + . count( $revIds ) . ' matching rows.' + ); + } + $dbw->delete( 'revision', [ 'rev_id' => $revIds ], $fname ); + + return array_combine( $arIds, $revIds ); + } ); + } catch ( UnexpectedValueException $ex ) { + $this->fatalError( $ex->getMessage() ); + } + + foreach ( $updates as $arId => $revId ) { + $dbw->update( + 'archive', + [ 'ar_rev_id' => $revId ], + [ 'ar_id' => $arId, 'ar_rev_id' => null ], + __METHOD__ + ); + $count += $dbw->affectedRows(); + } + + $min = min( array_keys( $updates ) ); + $max = max( array_keys( $updates ) ); + $this->output( " ... $min-$max\n" ); + } + } + + /** + * Construct a dummy revision table row to use for reserving IDs + * + * The row will have a wildly unlikely timestamp, and possibly a generic + * user and comment, but will otherwise be derived from a revision on the + * wiki's main page. + * + * @param IDatabase $dbw + * @return array + */ + private function makeDummyRevisionRow( IDatabase $dbw ) { + $ts = $dbw->timestamp( '11111111111111' ); + $mainPage = Title::newMainPage(); + if ( !$mainPage ) { + $this->fatalError( 'Main page does not exist' ); + } + $pageId = $mainPage->getArticleId(); + if ( !$pageId ) { + $this->fatalError( $mainPage->getPrefixedText() . ' has no ID' ); + } + $rev = $dbw->selectRow( + 'revision', + '*', + [ 'rev_page' => $pageId ], + __METHOD__, + [ 'ORDER BY' => 'rev_timestamp ASC' ] + ); + if ( !$rev ) { + $this->fatalError( $mainPage->getPrefixedText() . ' has no revisions' ); + } + unset( $rev->rev_id ); + $rev = (array)$rev; + $rev['rev_timestamp'] = $ts; + if ( isset( $rev['rev_user'] ) ) { + $rev['rev_user'] = 0; + $rev['rev_user_text'] = '0.0.0.0'; + } + if ( isset( $rev['rev_comment'] ) ) { + $rev['rev_comment'] = 'Dummy row'; + } + + $any = $dbw->selectField( + 'revision', + 'rev_id', + [ 'rev_timestamp' => $ts ], + __METHOD__ + ); + if ( $any ) { + $this->fatalError( "... Why does your database contain a revision dated $ts?" ); + } + + return $rev; + } +} + +$maintClass = "PopulateArchiveRevId"; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/postgres/archives/patch-ar_rev_id-not-null.sql b/maintenance/postgres/archives/patch-ar_rev_id-not-null.sql new file mode 100644 index 0000000000..1a090e9c87 --- /dev/null +++ b/maintenance/postgres/archives/patch-ar_rev_id-not-null.sql @@ -0,0 +1,3 @@ +-- T182678: Make ar_rev_id not nullable +ALTER TABLE archive + ALTER COLUMN ar_rev_id SET NOT NULL; diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql index 9a5420a14b..bf89aede3e 100644 --- a/maintenance/postgres/tables.sql +++ b/maintenance/postgres/tables.sql @@ -248,7 +248,7 @@ CREATE TABLE archive ( ar_timestamp TIMESTAMPTZ NOT NULL, ar_minor_edit SMALLINT NOT NULL DEFAULT 0, ar_flags TEXT, - ar_rev_id INTEGER, + ar_rev_id INTEGER NOT NULL, ar_text_id INTEGER, ar_deleted SMALLINT NOT NULL DEFAULT 0, ar_len INTEGER NULL, diff --git a/maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql b/maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql new file mode 100644 index 0000000000..3df205feab --- /dev/null +++ b/maintenance/sqlite/archives/patch-ar_rev_id-not-null.sql @@ -0,0 +1,49 @@ +-- T182678: Make ar_rev_id not nullable + +BEGIN; + +DROP TABLE IF EXISTS /*_*/archive_tmp; +CREATE TABLE /*_*/archive_tmp ( + ar_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, + ar_namespace int NOT NULL default 0, + ar_title varchar(255) binary NOT NULL default '', + ar_text mediumblob NOT NULL, + ar_comment varbinary(767) NOT NULL default '', + ar_comment_id bigint unsigned NOT NULL DEFAULT 0, + ar_user int unsigned NOT NULL default 0, + ar_user_text varchar(255) binary NOT NULL DEFAULT '', + ar_actor bigint unsigned NOT NULL DEFAULT 0, + ar_timestamp binary(14) NOT NULL default '', + ar_minor_edit tinyint NOT NULL default 0, + ar_flags tinyblob NOT NULL, + ar_rev_id int unsigned NOT NULL, + ar_text_id int unsigned, + ar_deleted tinyint unsigned NOT NULL default 0, + ar_len int unsigned, + ar_page_id int unsigned, + ar_parent_id int unsigned default NULL, + ar_sha1 varbinary(32) NOT NULL default '', + ar_content_model varbinary(32) DEFAULT NULL, + ar_content_format varbinary(64) DEFAULT NULL +) /*$wgDBTableOptions*/; + +INSERT OR IGNORE INTO /*_*/archive_tmp ( + ar_id, ar_namespace, ar_title, ar_text, ar_comment, ar_comment_id, ar_user, + ar_user_text, ar_actor, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, + ar_text_id, ar_deleted, ar_len, ar_page_id, ar_parent_id, ar_sha1, + ar_content_model, ar_content_format) + SELECT + ar_id, ar_namespace, ar_title, ar_text, ar_comment, ar_comment_id, ar_user, + ar_user_text, ar_actor, ar_timestamp, ar_minor_edit, ar_flags, ar_rev_id, + ar_text_id, ar_deleted, ar_len, ar_page_id, ar_parent_id, ar_sha1, + ar_content_model, ar_content_format + FROM /*_*/archive; + +DROP TABLE /*_*/archive; +ALTER TABLE /*_*/archive_tmp RENAME TO /*_*/archive; +CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp); +CREATE INDEX /*i*/ar_usertext_timestamp ON /*_*/archive (ar_user_text,ar_timestamp); +CREATE INDEX /*i*/ar_actor_timestamp ON /*_*/archive (ar_actor,ar_timestamp); +CREATE INDEX /*i*/ar_revid ON /*_*/archive (ar_rev_id); + +COMMIT; diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 0c73f298ec..bd1b6288bf 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -618,7 +618,7 @@ CREATE TABLE /*_*/archive ( -- -- @since 1.5 Entries from 1.4 will be NULL here. When restoring -- archive rows from before 1.5, a new rev_id is created. - ar_rev_id int unsigned, + ar_rev_id int unsigned NOT NULL, -- Copied from rev_text_id, references text.old_id. -- To avoid breaking the block-compression scheme and otherwise making -- 2.20.1