From dcd5d260d40547e111c9d678d7d146182d80dd1a Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Fri, 3 Sep 2010 20:52:08 +0000 Subject: [PATCH] Further categorylinks schema changes Per review by Tim, I made two changes: 1) Fix cl_sortkey to be varbinary(255). 2) Expand cl_collation to varbinary(32), and change $wgCollationVersion to $wgCategoryCollation, to account for the variety of collations we might have. tinyint is too small. I could have gone with int, but that's annoyingly inscrutable in practice, as we all know from namespace fields. To make the upgrade easier for non-trunk users, I updated the old patch file to incorporate the new changes, using the updatelog table so that people upgrading from 1.16 won't have to do two alters on categorylinks. I didn't test the upgrade-from-1.16 code path yet, so if anyone tests that and it seems not to break, commenting to that effect would be appreciated. Also removed wfDeprecated() from archive(). Do *not* add this to functions that are still actively used in core. If you think this function is so terrible that it really mustn't be used, remove callers yourself, don't pester every single developer with messages in the hope that someone else will do it for you. --- includes/DefaultSettings.php | 7 +++++-- includes/LinksUpdate.php | 4 ++-- includes/installer/MysqlUpdater.php | 1 + .../patch-categorylinks-better-collation.sql | 8 ++++++-- .../patch-categorylinks-better-collation2.sql | 12 ++++++++++++ maintenance/tables.sql | 15 +++++---------- maintenance/updateCollation.php | 12 ++++++------ maintenance/updaters.inc | 16 +++++++++++++--- 8 files changed, 50 insertions(+), 25 deletions(-) create mode 100644 maintenance/archives/patch-categorylinks-better-collation2.sql diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 333b3a5bd5..aa38e90cda 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -4449,10 +4449,13 @@ $wgCategoryPagingLimit = 200; /** * A version indicator for collations that will be stored in cl_collation for * all new rows. Used when the collation algorithm changes: a script checks - * for all rows where cl_collation != $wgCollationVersion and regenerates + * for all rows where cl_collation != $wgCategoryCollation and regenerates * cl_sortkey based on the page name and cl_sortkey_prefix. + * + * Currently only supports 'uppercase', which just uppercases the string. This + * is a dummy collation, to be replaced later by real ones. */ -$wgCollationVersion = 1; +$wgCategoryCollation = 'uppercase'; /** @} */ # End categories } diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index 7092d3dc4d..4892ff02bb 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -426,7 +426,7 @@ class LinksUpdate { * @private */ function getCategoryInsertions( $existing = array() ) { - global $wgContLang, $wgCollationVersion; + global $wgContLang, $wgCategoryCollation; $diffs = array_diff_assoc( $this->mCategories, $existing ); $arr = array(); foreach ( $diffs as $name => $sortkey ) { @@ -465,7 +465,7 @@ class LinksUpdate { 'cl_sortkey' => $sortkey, 'cl_timestamp' => $this->mDb->timestamp(), 'cl_sortkey_prefix' => $prefix, - 'cl_collation' => $wgCollationVersion, + 'cl_collation' => $wgCategoryCollation, 'cl_type' => $type, ); } diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 7d76860d71..ecd1242bab 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -165,6 +165,7 @@ class MysqlUpdater extends DatabaseUpdater { array( 'drop_index_if_exists', 'iwlinks', 'iwl_prefix', 'patch-kill-iwl_prefix.sql' ), array( 'drop_index_if_exists', 'iwlinks', 'iwl_prefix_from_title', 'patch-kill-iwl_pft.sql' ), array( 'addField', 'categorylinks', 'cl_collation', 'patch-categorylinks-better-collation.sql' ), + array( 'do_cl_fields_update' ), array( 'do_collation_update' ), ); } diff --git a/maintenance/archives/patch-categorylinks-better-collation.sql b/maintenance/archives/patch-categorylinks-better-collation.sql index 844ad9c2ac..7c711a3d79 100644 --- a/maintenance/archives/patch-categorylinks-better-collation.sql +++ b/maintenance/archives/patch-categorylinks-better-collation.sql @@ -1,11 +1,15 @@ -- -- patch-categorylinks-better-collation.sql -- --- Bugs 164, 1211, 23682. +-- Bugs 164, 1211, 23682. This is the second version of this patch; the +-- changes are also incorporated into patch-categorylinks-better-collation2.sql, +-- for the benefit of trunk users who applied the original. ALTER TABLE /*$wgDBprefix*/categorylinks + CHANGE COLUMN cl_sortkey cl_sortkey varbinary(255) NOT NULL default '', ADD COLUMN cl_sortkey_prefix varchar(255) binary NOT NULL default '', - ADD COLUMN cl_collation tinyint NOT NULL default 0, + ADD COLUMN cl_collation varbinary(32) NOT NULL default '', ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL default 'page', ADD INDEX (cl_collation), DROP INDEX cl_sortkey, ADD INDEX cl_sortkey (cl_to, cl_type, cl_sortkey, cl_from); +INSERT IGNORE INTO /*$wgDBprefix*/updatelog (ul_key) VALUES ('cl_fields_update'); diff --git a/maintenance/archives/patch-categorylinks-better-collation2.sql b/maintenance/archives/patch-categorylinks-better-collation2.sql new file mode 100644 index 0000000000..7515c5615f --- /dev/null +++ b/maintenance/archives/patch-categorylinks-better-collation2.sql @@ -0,0 +1,12 @@ +-- +-- patch-categorylinks-better-collation2.sql +-- +-- Bugs 164, 1211, 23682. This patch exists for trunk users who already +-- applied the first patch in its original version. The first patch was +-- updated to incorporate the changes as well, so as not to do two alters on a +-- large table unnecessarily for people upgrading from 1.16, so this will be +-- skipped if unneeded. +ALTER TABLE /*$wgDBprefix*/categorylinks + CHANGE COLUMN cl_sortkey cl_sortkey varbinary(255) NOT NULL default '', + CHANGE COLUMN cl_collation cl_collation varbinary(32) NOT NULL default ''; +INSERT IGNORE INTO /*$wgDBprefix*/updatelog (ul_key) VALUES ('cl_fields_update'); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 6f303d3ad0..dcb776d0d6 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -493,12 +493,7 @@ CREATE TABLE /*_*/categorylinks ( -- A binary string obtained by applying a sortkey generation algorithm -- (Language::convertToSortkey()) to page_title, or cl_sortkey_prefix . "\0" -- . page_title if cl_sortkey_prefix is nonempty. - -- - -- Truncate so that the cl_sortkey key fits in 1000 bytes (MyISAM 5 with - -- server_character_set=utf8). FIXME: this truncation probably makes no - -- sense anymore; we should be using varbinary for this, utf8 will break - -- everything. - cl_sortkey varchar(70) binary NOT NULL default '', + cl_sortkey varbinary(255) NOT NULL default '', -- A prefix for the raw sortkey manually specified by the user, either via -- [[Category:Foo|prefix]] or {{defaultsort:prefix}}. If nonempty, it's @@ -511,12 +506,12 @@ CREATE TABLE /*_*/categorylinks ( -- sorting method by approximate addition time. cl_timestamp timestamp NOT NULL, - -- Stores $wgCollationVersion at the time cl_sortkey was generated. This can - -- be used to install new collation versions, tracking which rows are not yet - -- updated. 0 means no collation, this is a legacy row that needs to be + -- Stores $wgCategoryCollation at the time cl_sortkey was generated. This + -- can be used to install new collation versions, tracking which rows are not + -- yet updated. '' means no collation, this is a legacy row that needs to be -- updated by updateCollation.php. In the future, it might be possible to -- specify different collations per category. - cl_collation tinyint NOT NULL default 0, + cl_collation varbinary(32) NOT NULL default '', -- Stores whether cl_from is a category, file, or other page, so we can -- paginate the three categories separately. This never has to be updated diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index 52b31be701..7b91660d27 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -15,10 +15,10 @@ class UpdateCollation extends Maintenance { public function __construct() { parent::__construct(); - global $wgCollationVersion; + global $wgCategoryCollation; $this->mDescription = <<selectField( 'categorylinks', 'COUNT(*)', - 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ), + 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ), __METHOD__ ); @@ -51,7 +51,7 @@ TEXT; 'cl_sortkey', 'page_namespace', 'page_title' ), array( - 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ), + 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ), 'cl_from = page_id' ), __METHOD__, @@ -89,7 +89,7 @@ TEXT; 'cl_sortkey' => $wgContLang->convertToSortkey( $title->getCategorySortkey( $prefix ) ), 'cl_sortkey_prefix' => $prefix, - 'cl_collation' => $wgCollationVersion, + 'cl_collation' => $wgCategoryCollation, 'cl_type' => $type, 'cl_timestamp = cl_timestamp', ), diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc index 856c3e7bb1..ab990403d4 100644 --- a/maintenance/updaters.inc +++ b/maintenance/updaters.inc @@ -103,7 +103,6 @@ function do_all_updates( $shared = false, $purge = true ) { } function archive( $name ) { - wfDeprecated( __FUNCTION__ ); return DatabaseBase::patchPath( $name ); } @@ -833,12 +832,23 @@ function do_populate_rev_len() { $task->execute(); } +function do_cl_fields_update() { + if ( update_row_exists( 'cl_fields_update' ) ) { + wfOut( "...categorylinks up-to-date.\n" ); + return; + } + wfOut( 'Updating categorylinks (again)...' ); + global $wgDatabase; + $wgDatabase->sourceFile( archive( 'patch-categorylinks-better-collation2.sql' ) ); + wfOut( "done.\n" ); +} + function do_collation_update() { - global $wgDatabase, $wgCollationVersion; + global $wgDatabase, $wgCategoryCollation; if ( $wgDatabase->selectField( 'categorylinks', 'COUNT(*)', - 'cl_collation != ' . $wgDatabase->addQuotes( $wgCollationVersion ), + 'cl_collation != ' . $wgDatabase->addQuotes( $wgCategoryCollation ), __FUNCTION__ ) == 0 ) { wfOut( "...collations up-to-date.\n" ); -- 2.20.1