From 2ffa5e4876a731fb7fd85cbfac430df31057761c Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Mon, 26 Jul 2010 22:04:19 +0000 Subject: [PATCH] Fix bug in prefixing scheme As Bawolff pointed out at [[mw:User talk:Simetrical/Collation]], the prefixing scheme I was using meant that the page "Z" with sort key of "F" would sort after a page named "A" with a sort key of "FF", since the first one's raw sort key would compute to "FZ", and the second's would compute to "FFA". I've fixed this by separating the prefix from the unprefixed part by a null byte (cl_sortkey is eventually going to be totally binary anyway, may as well start now). --- includes/CategoryPage.php | 2 +- includes/LinksUpdate.php | 2 +- includes/Title.php | 23 ++++++++++++++++------- languages/Language.php | 11 +++++++---- maintenance/updateCollation.php | 6 +++--- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/includes/CategoryPage.php b/includes/CategoryPage.php index 0897fbfb5c..41f4969870 100644 --- a/includes/CategoryPage.php +++ b/includes/CategoryPage.php @@ -312,7 +312,7 @@ class CategoryViewer { $count = 0; foreach ( $res as $row ) { $title = Title::newFromRow( $row ); - $rawSortkey = $row->cl_sortkey_prefix . $title->getCategorySortkey(); + $rawSortkey = $title->getCategorySortkey( $row->cl_sortkey_prefix ); if ( ++$count > $this->limit ) { # We've reached the one extra which shows that there diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index fd62cff649..b7ad29cd95 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -457,7 +457,7 @@ class LinksUpdate { # order or such. $prefix = $sortkey; $sortkey = $wgContLang->convertToSortkey( - $prefix . $this->mTitle->getCategorySortkey() ); + $this->mTitle->getCategorySortkey( $prefix ) ); } $arr[] = array( diff --git a/includes/Title.php b/includes/Title.php index aa0ec5f3d3..b3b7bce6ff 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -4139,20 +4139,29 @@ class Title { } /** - * Returns what the default sort key for categories would be, if - * {{defaultsort:}} isn't used. This is the same as getText() for - * categories, and for everything if $wgCategoryPrefixedDefaultSortkey is - * false; otherwise it's the same as getPrefixedText(). + * Returns the raw sort key to be used for categories, with the specified + * prefix. This will be fed to Language::convertToSortkey() to get a + * binary sortkey that can be used for actual sorting. * + * @param $prefix string The prefix to be used, specified using + * {{defaultsort:}} or like [[Category:Foo|prefix]]. Empty for no + * prefix. * @return string */ - public function getCategorySortkey() { + public function getCategorySortkey( $prefix = '' ) { global $wgCategoryPrefixedDefaultSortkey; if ( $this->getNamespace() == NS_CATEGORY || !$wgCategoryPrefixedDefaultSortkey ) { - return $this->getText(); + $unprefixed = $this->getText(); } else { - return $this->getPrefixedText(); + $unprefixed = $this->getPrefixedText(); + } + if ( $prefix !== '' ) { + # Separate with a null byte, so the unprefixed part is only used as + # a tiebreaker when two pages have the exact same prefix -- null + # sorts before everything else (hopefully). + return "$prefix\0$unprefixed"; } + return $unprefixed; } } diff --git a/languages/Language.php b/languages/Language.php index 4b6a72f54f..89e5230fa0 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -2938,10 +2938,10 @@ class Language { /** * Given a string, convert it to a (hopefully short) key that can be used * for efficient sorting. A binary sort according to the sortkeys - * corresponds to a logical sort of the corresponding strings. Applying - * this to cl_sortkey_prefix concatenated with the page title (possibly - * with namespace prefix, depending on $wgCategoryPrefixedDefaultSortkey) - * gives you cl_sortkey. + * corresponds to a logical sort of the corresponding strings. Current + * code expects that a null character should sort before all others, but + * has no other particular expectations (and that one can be changed if + * necessary). * * @param string $string UTF-8 string * @return string Binary sortkey @@ -2988,6 +2988,9 @@ class Language { * @return string UTF-8 string corresponding to the first letter of input */ public function firstLetterForLists( $string ) { + if ( $string[0] == "\0" ) { + $string = substr( $string, 1 ); + } return strtoupper( mb_substr( $string, 0, 1 ) ); } } diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index f842537153..60578ce084 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -57,11 +57,10 @@ TEXT; $dbw->begin(); foreach ( $res as $row ) { $title = Title::newFromRow( $row ); - $rawSortkey = $title->getCategorySortkey(); if ( $row->cl_collation == 0 ) { # This is an old-style row, so the sortkey needs to be # converted. - if ( $row->cl_sortkey == $rawSortkey ) { + if ( $row->cl_sortkey == $title->getCategorySortkey() ) { $prefix = ''; } else { # Custom sortkey, use it as a prefix @@ -82,7 +81,8 @@ TEXT; $dbw->update( 'categorylinks', array( - 'cl_sortkey' => $wgContLang->convertToSortkey( $prefix . $rawSortkey ), + 'cl_sortkey' => $wgContLang->convertToSortkey( + $title->getCategorySortkey( $prefix ) ), 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $wgCollationVersion, 'cl_type' => $type, -- 2.20.1