From: Brian Wolff Date: Sun, 21 Feb 2016 21:59:14 +0000 (-0500) Subject: Add new index to make updateCollation.php painless X-Git-Tag: 1.31.0-rc.0~7544^2 X-Git-Url: http://git.cyclocoop.org/%22%20%20.%20generer_url_ecrire%28%22mots_tous%22%29%20.%20%22?a=commitdiff_plain;h=eec016ece6d2b30addcdf3d3efcc2ba59b10e858;p=lhc%2Fweb%2Fwiklou.git Add new index to make updateCollation.php painless We want to update categories in order, to minimize disruption to users. Previous indexes required a filesort to do this, which exploded things on large wikis. See bug for details Bug: T58041 Change-Id: Iee6cd997ff87a313a46fda19d8ab063d0fed8ce8 --- diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 9a39b77d73..d414d9071f 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -182,7 +182,6 @@ class MysqlUpdater extends DatabaseUpdater { [ 'dropIndex', 'iwlinks', 'iwl_prefix', 'patch-kill-iwl_prefix.sql' ], [ 'addField', 'categorylinks', 'cl_collation', 'patch-categorylinks-better-collation.sql' ], [ 'doClFieldsUpdate' ], - [ 'doCollationUpdate' ], [ 'addTable', 'module_deps', 'patch-module_deps.sql' ], [ 'dropIndex', 'archive', 'ar_page_revid', 'patch-archive_kill_ar_page_revid.sql' ], [ 'addIndex', 'archive', 'ar_revid', 'patch-archive_ar_revid.sql' ], @@ -280,6 +279,10 @@ class MysqlUpdater extends DatabaseUpdater { [ 'dropTable', 'msg_resource' ], [ 'addTable', 'bot_passwords', 'patch-bot_passwords.sql' ], [ 'addField', 'watchlist', 'wl_id', 'patch-watchlist-wl_id.sql' ], + [ 'dropIndex', 'categorylinks', 'cl_collation', 'patch-kill-cl_collation_index.sql' ], + [ 'addIndex', 'categorylinks', 'cl_collation_ext', + 'patch-add-cl_collation_ext_index.sql' ], + [ 'doCollationUpdate' ], ]; } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 99ab4e506c..86dccd74bd 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -65,7 +65,6 @@ class SqliteUpdater extends DatabaseUpdater { [ 'addField', 'interwiki', 'iw_api', 'patch-iw_api_and_wikiid.sql' ], [ 'dropIndex', 'iwlinks', 'iwl_prefix', 'patch-kill-iwl_prefix.sql' ], [ 'addField', 'categorylinks', 'cl_collation', 'patch-categorylinks-better-collation.sql' ], - [ 'doCollationUpdate' ], [ 'addTable', 'module_deps', 'patch-module_deps.sql' ], [ 'dropIndex', 'archive', 'ar_page_revid', 'patch-archive_kill_ar_page_revid.sql' ], [ 'addIndex', 'archive', 'ar_revid', 'patch-archive_ar_revid.sql' ], @@ -149,6 +148,10 @@ class SqliteUpdater extends DatabaseUpdater { [ 'dropTable', 'msg_resource' ], [ 'addTable', 'bot_passwords', 'patch-bot_passwords.sql' ], [ 'addField', 'watchlist', 'wl_id', 'patch-watchlist-wl_id.sql' ], + [ 'dropIndex', 'categorylinks', 'cl_collation', 'patch-kill-cl_collation_index.sql' ], + [ 'addIndex', 'categorylinks', 'cl_collation_ext', + 'patch-add-cl_collation_ext_index.sql' ], + [ 'doCollationUpdate' ], ]; } diff --git a/maintenance/archives/patch-add-cl_collation_ext_index.sql b/maintenance/archives/patch-add-cl_collation_ext_index.sql new file mode 100644 index 0000000000..8137dc6413 --- /dev/null +++ b/maintenance/archives/patch-add-cl_collation_ext_index.sql @@ -0,0 +1,2 @@ +-- @since 1.27 +CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from); diff --git a/maintenance/archives/patch-categorylinks-better-collation.sql b/maintenance/archives/patch-categorylinks-better-collation.sql index c1499c151c..f5ff1f1d1a 100644 --- a/maintenance/archives/patch-categorylinks-better-collation.sql +++ b/maintenance/archives/patch-categorylinks-better-collation.sql @@ -13,7 +13,7 @@ ALTER TABLE /*$wgDBprefix*/categorylinks ADD COLUMN cl_sortkey_prefix varchar(255) binary NOT NULL default '', ADD COLUMN cl_collation varbinary(32) NOT NULL default '', ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL default 'page', - ADD INDEX (cl_collation), +-- rm'd in 1.27 ADD INDEX (cl_collation), DROP INDEX cl_sortkey, ADD INDEX cl_sortkey (cl_to, cl_type, cl_sortkey, cl_from); INSERT IGNORE INTO /*$wgDBprefix*/updatelog (ul_key) VALUES ('cl_fields_update'); diff --git a/maintenance/archives/patch-kill-cl_collation_index.sql b/maintenance/archives/patch-kill-cl_collation_index.sql new file mode 100644 index 0000000000..7f75a62329 --- /dev/null +++ b/maintenance/archives/patch-kill-cl_collation_index.sql @@ -0,0 +1,7 @@ +-- +-- Kill cl_collation index. +-- @since 1.27 +-- + +DROP INDEX /*i*/cl_collation ON /*_*/categorylinks; + diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 7942687579..89aeb9c91d 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -619,8 +619,8 @@ CREATE INDEX /*i*/cl_sortkey ON /*_*/categorylinks (cl_to,cl_type,cl_sortkey,cl_ -- Used by the API (and some extensions) CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp); --- FIXME: Not used, delete this -CREATE INDEX /*i*/cl_collation ON /*_*/categorylinks (cl_collation); +-- Used when updating collation (e.g. updateCollation.php) +CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from); -- -- Track all existing categories. Something is a category if 1) it has an en- diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index af666b06cf..186feb222f 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -33,7 +33,7 @@ require_once __DIR__ . '/Maintenance.php'; * @ingroup Maintenance */ class UpdateCollation extends Maintenance { - const BATCH_SIZE = 10000; // Number of rows to process in one batch + const BATCH_SIZE = 100; // Number of rows to process in one batch const SYNC_INTERVAL = 20; // Wait for slaves after this many batches public $sizeHistogram = []; @@ -85,10 +85,18 @@ TEXT // but this will raise an exception, breaking all category pages $collation->getFirstLetter( 'MediaWiki' ); + // Locally at least, (my local is a rather old version of mysql) + // mysql seems to filesort if there is both an equality + // (but not for an inequality) condition on cl_collation in the + // WHERE and it is also the first item in the ORDER BY. + if ( $this->hasOption( 'previous-collation' ) ) { + $orderBy = 'cl_to, cl_type, cl_from'; + } else { + $orderBy = 'cl_collation, cl_to, cl_type, cl_from'; + } $options = [ 'LIMIT' => self::BATCH_SIZE, - 'ORDER BY' => 'cl_from, cl_to', - 'STRAIGHT_JOIN', + 'ORDER BY' => $orderBy, ]; if ( $force || $dryRun ) { @@ -124,16 +132,24 @@ TEXT } $this->output( "Fixing collation for $count rows.\n" ); } - $count = 0; $batchCount = 0; $batchConds = []; do { $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); + + // cl_type must be selected as a number for proper paging because + // enums suck. + if ( $dbw->getType() === 'mysql' ) { + $clType = 'cl_type+0 AS "cl_type_numeric"'; + } else { + $clType = 'cl_type'; + } $res = $dbw->select( [ 'categorylinks', 'page' ], [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', - 'cl_sortkey', 'page_namespace', 'page_title' + 'cl_sortkey', $clType, + 'page_namespace', 'page_title' ], array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ), __METHOD__, @@ -217,18 +233,28 @@ TEXT /** * Return an SQL expression selecting rows which sort above the given row, - * assuming an ordering of cl_from, cl_to + * assuming an ordering of cl_collation, cl_to, cl_type, cl_from * @param stdClass $row * @param DatabaseBase $dbw * @return string */ function getBatchCondition( $row, $dbw ) { - $fields = [ 'cl_from', 'cl_to' ]; + if ( $this->hasOption( 'previous-collation' ) ) { + $fields = [ 'cl_to', 'cl_type', 'cl_from' ]; + } else { + $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ]; + } $first = true; $cond = false; $prefix = false; foreach ( $fields as $field ) { - $encValue = $dbw->addQuotes( $row->$field ); + if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) { + // Range conditions with enums are weird in mysql + // This must be a numeric literal, or it won't work. + $encValue = intval( $row->cl_type_numeric ); + } else { + $encValue = $dbw->addQuotes( $row->$field ); + } $inequality = "$field > $encValue"; $equality = "$field = $encValue"; if ( $first ) {