From 3783aa2a3c32e949ecd7198f21d821478f4621b4 Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Fri, 23 Jul 2010 20:58:11 +0000 Subject: [PATCH] Add non-identity collation, with migration script It seemed to work correctly, with the newly-created page "bob" sorting as "BOB", but then I nuked all my cl_sortkey by running the migration script before refreshLinks.php had finished running, so I'll have to wait a while to see if it works properly with a non-messed-up database. It's possible there's something wrong with the display of section letters in the categories, but otherwise I think this is working right. --- includes/CategoryPage.php | 29 +++++++++---- includes/DefaultSettings.php | 2 +- languages/Language.php | 6 +-- maintenance/updateCollation.php | 74 +++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 maintenance/updateCollation.php diff --git a/includes/CategoryPage.php b/includes/CategoryPage.php index cf58034880..5762331143 100644 --- a/includes/CategoryPage.php +++ b/includes/CategoryPage.php @@ -172,12 +172,18 @@ class CategoryViewer { * else use sortkey... */ function getSubcategorySortChar( $title, $sortkey ) { - global $wgContLang; + global $wgContLang, $wgExperimentalCategorySort; if ( $title->getPrefixedText() == $sortkey ) { - $firstChar = $wgContLang->firstChar( $title->getDBkey() ); + $word = $title->getDBkey(); } else { - $firstChar = $wgContLang->firstChar( $sortkey ); + $word = $sortkey; + } + + if ( $wgExperimentalCategorySort ) { + $firstChar = $wgContLang->firstLetterForLists( $word ); + } else { + $firstChar = $wgContLang->firstChar( $word ); } return $wgContLang->convert( $firstChar ); @@ -202,7 +208,7 @@ class CategoryViewer { * Add a miscellaneous page */ function addPage( $title, $sortkey, $pageLength, $isRedirect = false ) { - global $wgContLang; + global $wgContLang, $wgExperimentalCategorySort; $this->articles[] = $isRedirect ? '' . $this->getSkin()->link( @@ -213,7 +219,12 @@ class CategoryViewer { array( 'known', 'noclasses' ) ) . '' : $this->getSkin()->makeSizeLinkObj( $pageLength, $title ); - $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstChar( $sortkey ) ); + + if ( $wgExperimentalCategorySort ) { + $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstLetterForLists( $sortkey ) ); + } else { + $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstChar( $sortkey ) ); + } } function finaliseCategoryState() { @@ -259,7 +270,7 @@ class CategoryViewer { foreach ( array( 'page', 'subcat', 'file' ) as $type ) { $res = $dbr->select( $tables, - $fields, + array_merge( $fields, array( 'cl_raw_sortkey' ) ), $conds + array( 'cl_type' => $type ) + ( $type == 'page' ? array( $pageCondition ) : array() ), __METHOD__, $opts + ( $type == 'page' ? array( 'LIMIT' => $this->limit + 1 ) : array() ), @@ -278,11 +289,11 @@ class CategoryViewer { if ( $title->getNamespace() == NS_CATEGORY ) { $cat = Category::newFromRow( $row, $title ); - $this->addSubcategoryObject( $cat, $row->cl_sortkey, $row->page_len ); + $this->addSubcategoryObject( $cat, $row->cl_raw_sortkey, $row->page_len ); } elseif ( $this->showGallery && $title->getNamespace() == NS_FILE ) { - $this->addImage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect ); + $this->addImage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect ); } else { - $this->addPage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect ); + $this->addPage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect ); } } } diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 42dc3df957..5db1691681 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -4474,7 +4474,7 @@ $wgExperimentalCategorySort = false; * for all rows where cl_collation < $wgCollationVersion and regenerates * cl_sortkey based on cl_raw_sortkey. */ -$wgCollationVersion = 0; +$wgCollationVersion = 1; /** @} */ # End categories } diff --git a/languages/Language.php b/languages/Language.php index 95d1426e17..29c1cee0e2 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -2945,8 +2945,8 @@ class Language { * @return string Binary sortkey */ public function convertToSortkey( $string ) { - # Stub function for now - return $string; + # Fake function for now + return strtoupper( $string ); } /** @@ -2986,6 +2986,6 @@ class Language { * @return string UTF-8 string corresponding to the first letter of input */ public function firstLetterForLists( $string ) { - return mb_substr( $string, 0, 1 ); + return strtoupper( mb_substr( $string, 0, 1 ) ); } } diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php new file mode 100644 index 0000000000..93c845db16 --- /dev/null +++ b/maintenance/updateCollation.php @@ -0,0 +1,74 @@ +mDescription = <<addOption( 'force', 'Run on all rows, even if the collation is supposed to be up-to-date.' ); + } + + public function execute() { + global $wgCollationVersion, $wgContLang; + + $dbw = wfGetDB( DB_MASTER ); + $count = $dbw->estimateRowCount( + 'categorylinks', + array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ), + 'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ), + __METHOD__ + ); + + $this->output( "Fixing around $count rows (estimate might be wrong).\n" ); + + $count = 0; + do { + $res = $dbw->select( + 'categorylinks', + array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ), + 'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ), + __METHOD__, + array( 'LIMIT' => self::BATCH_SIZE ) + ); + + $dbw->begin(); + foreach ( $res as $row ) { + # TODO: Handle the case where cl_raw_sortkey is null. + $dbw->update( + 'categorylinks', + array( + 'cl_sortkey' => $wgContLang->convertToSortkey( $row->cl_raw_sortkey ), + 'cl_collation' => $wgCollationVersion + ), + array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ), + __METHOD__ + ); + } + $dbw->commit(); + + $count += self::BATCH_SIZE; + $this->output( "$count done.\n" ); + } while ( $res->numRows() >= self::BATCH_SIZE ); + } +} + +$maintClass = "UpdateCollation"; +require_once( DO_MAINTENANCE ); -- 2.20.1