From 295b055ea33d97df9fe6a5323cb0f42ea59b3533 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Tue, 28 Nov 2017 16:13:09 -0500 Subject: [PATCH] Migrate ar_text to modern storage This really should have been done a very long time ago. This adds a maintenance script to migrate rows with ar_text_id null to modern storage, either the text table or ExternalStore. Bug: T36925 Change-Id: I5608c6b6d3ecad516b785e13d668427c1b762e41 --- autoload.php | 1 + includes/Storage/SqlBlobStore.php | 5 + includes/installer/DatabaseUpdater.php | 11 ++ includes/installer/MssqlUpdater.php | 1 + includes/installer/MysqlUpdater.php | 1 + includes/installer/OracleUpdater.php | 1 + includes/installer/PostgresUpdater.php | 1 + includes/installer/SqliteUpdater.php | 3 +- maintenance/migrateArchiveText.php | 159 +++++++++++++++++++++++++ 9 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 maintenance/migrateArchiveText.php diff --git a/autoload.php b/autoload.php index 4a5089411e..5d6104cc04 100644 --- a/autoload.php +++ b/autoload.php @@ -1011,6 +1011,7 @@ $wgAutoloadLocalClasses = [ 'MessageContent' => __DIR__ . '/includes/content/MessageContent.php', 'MessageLocalizer' => __DIR__ . '/languages/MessageLocalizer.php', 'MessageSpecifier' => __DIR__ . '/includes/libs/MessageSpecifier.php', + 'MigrateArchiveText' => __DIR__ . '/maintenance/migrateArchiveText.php', 'MigrateComments' => __DIR__ . '/maintenance/migrateComments.php', 'MigrateFileRepoLayout' => __DIR__ . '/maintenance/migrateFileRepoLayout.php', 'MigrateUserGroup' => __DIR__ . '/maintenance/migrateUserGroup.php', diff --git a/includes/Storage/SqlBlobStore.php b/includes/Storage/SqlBlobStore.php index 69e1539ad1..5ddbd34ccb 100644 --- a/includes/Storage/SqlBlobStore.php +++ b/includes/Storage/SqlBlobStore.php @@ -466,6 +466,11 @@ class SqlBlobStore implements IDBAccessObject, BlobStore { return false; } + if ( in_array( 'error', $blobFlags ) ) { + // Error row, return false + return false; + } + if ( in_array( 'gzip', $blobFlags ) ) { # Deal with optional compression of archived pages. # This can be done periodically via maintenance/compressOld.php, and diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index 242f148818..176d0af7f5 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -1230,4 +1230,15 @@ abstract class DatabaseUpdater { } } + /** + * Migrate ar_text to modern storage + * @since 1.31 + */ + protected function migrateArchiveText() { + $this->output( "Migrating archive ar_text to modern storage.\n" ); + $task = $this->maintenance->runChild( 'MigrateArchiveText', 'migrateArchiveText.php' ); + $task->execute(); + $this->output( "done.\n" ); + } + } diff --git a/includes/installer/MssqlUpdater.php b/includes/installer/MssqlUpdater.php index cb7a6ba895..b4b34de106 100644 --- a/includes/installer/MssqlUpdater.php +++ b/includes/installer/MssqlUpdater.php @@ -111,6 +111,7 @@ class MssqlUpdater extends DatabaseUpdater { [ 'addTable', 'content', 'patch-content.sql' ], [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ], [ 'addTable', 'content_models', 'patch-content_models.sql' ], + [ 'migrateArchiveText' ], ]; } diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index bc7725e93f..a3caa07db3 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -335,6 +335,7 @@ class MysqlUpdater extends DatabaseUpdater { [ 'addTable', 'content', 'patch-content.sql' ], [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ], [ 'addTable', 'content_models', 'patch-content_models.sql' ], + [ 'migrateArchiveText' ], ]; } diff --git a/includes/installer/OracleUpdater.php b/includes/installer/OracleUpdater.php index 67150ee7b1..ea684128c3 100644 --- a/includes/installer/OracleUpdater.php +++ b/includes/installer/OracleUpdater.php @@ -132,6 +132,7 @@ class OracleUpdater extends DatabaseUpdater { [ 'addTable', 'content', 'patch-content.sql' ], [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ], [ 'addTable', 'content_models', 'patch-content_models.sql' ], + [ 'migrateArchiveText' ], // KEEP THIS AT THE BOTTOM!! [ 'doRebuildDuplicateFunction' ], diff --git a/includes/installer/PostgresUpdater.php b/includes/installer/PostgresUpdater.php index fe8a1b1dbb..367d431dec 100644 --- a/includes/installer/PostgresUpdater.php +++ b/includes/installer/PostgresUpdater.php @@ -489,6 +489,7 @@ class PostgresUpdater extends DatabaseUpdater { [ 'addTable', 'content', 'patch-content-table.sql' ], [ 'addTable', 'content_models', 'patch-content_models-table.sql' ], [ 'addTable', 'slot_roles', 'patch-slot_roles-table.sql' ], + [ 'migrateArchiveText' ], ]; } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 88dfa6cf4e..afb8b224b9 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -198,7 +198,8 @@ class SqliteUpdater extends DatabaseUpdater { [ 'addTable', 'content', 'patch-content.sql' ], [ 'addTable', 'content_models', 'patch-content_models.sql' ], [ 'addTable', 'slots', 'patch-slots.sql' ], - [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ] + [ 'addTable', 'slot_roles', 'patch-slot_roles.sql' ], + [ 'migrateArchiveText' ], ]; } diff --git a/maintenance/migrateArchiveText.php b/maintenance/migrateArchiveText.php new file mode 100644 index 0000000000..dd78a7dec8 --- /dev/null +++ b/maintenance/migrateArchiveText.php @@ -0,0 +1,159 @@ +addDescription( + 'Migrates comments from pre-1.5 ar_text and ar_flags columns to modern storage' + ); + $this->addOption( + 'replace-missing', + "For rows with missing or unloadable data, throw away whatever is there and\n" + . "mark them as \"error\" in the database." + ); + } + + /** + * Sets whether a run of this maintenance script has the force parameter set + * @param bool $forced + */ + public function setForce( $forced = true ) { + $this->mOptions['force'] = $forced; + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function doDBUpdates() { + global $wgDefaultExternalStore; + + $replaceMissing = $this->hasOption( 'replace-missing' ); + $batchSize = $this->getBatchSize(); + + $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] ); + $dbw = $this->getDB( DB_MASTER ); + if ( !$dbr->fieldExists( 'archive', 'ar_text', __METHOD__ ) || + !$dbw->fieldExists( 'archive', 'ar_text', __METHOD__ ) + ) { + $this->output( "No ar_text field, so nothing to migrate.\n" ); + return true; + } + + $this->output( "Migrating ar_text to modern storage...\n" ); + $last = 0; + $count = 0; + $errors = 0; + while ( true ) { + $res = $dbr->select( + 'archive', + [ 'ar_id', 'ar_text', 'ar_flags' ], + [ + 'ar_text_id' => null, + "ar_id > $last", + ], + __METHOD__, + [ 'LIMIT' => $batchSize, 'ORDER BY' => [ 'ar_id' ] ] + ); + $numRows = $res->numRows(); + + foreach ( $res as $row ) { + $last = $row->ar_id; + + // Recompress the text (and store in external storage, if + // applicable) if it's not already in external storage. + if ( !in_array( 'external', explode( ',', $row->ar_flags ), true ) ) { + $data = Revision::getRevisionText( $row, 'ar_' ); + if ( $data !== false ) { + $flags = Revision::compressRevisionText( $data ); + + if ( $wgDefaultExternalStore ) { + $data = ExternalStore::insertToDefault( $data ); + if ( !$data ) { + throw new MWException( "Unable to store text to external storage" ); + } + if ( $flags ) { + $flags .= ','; + } + $flags .= 'external'; + } + } elseif ( $replaceMissing ) { + $this->error( "Replacing missing data for row ar_id=$row->ar_id" ); + $data = 'Missing data in migrateArchiveText.php on ' . date( 'c' ); + $flags = 'error'; + } else { + $this->error( "No data for row ar_id=$row->ar_id" ); + $errors++; + continue; + } + } else { + $flags = $row->ar_flags; + $data = $row->ar_text; + } + + $this->beginTransaction( $dbw, __METHOD__ ); + $dbw->insert( + 'text', + [ 'old_text' => $data, 'old_flags' => $flags ], + __METHOD__ + ); + $id = $dbw->insertId(); + $dbw->update( + 'archive', + [ 'ar_text_id' => $id, 'ar_text' => '', 'ar_flags' => '' ], + [ 'ar_id' => $row->ar_id, 'ar_text_id' => null ], + __METHOD__ + ); + $count += $dbw->affectedRows(); + $this->commitTransaction( $dbw, __METHOD__ ); + } + + if ( $numRows < $batchSize ) { + // We must have reached the end + break; + } + + $this->output( "... $last\n" ); + // $this->commitTransaction() already waited for slaves, no need to re-wait here. + } + + $this->output( "Completed ar_text migration, $count rows updated, $errors missing data.\n" ); + if ( $errors ) { + $this->output( "Run with --replace-missing to overwrite missing data with an error message.\n" ); + } + + return $errors === 0; + } +} + +$maintClass = "MigrateArchiveText"; +require_once RUN_MAINTENANCE_IF_MAIN; -- 2.20.1