From 52c1b00dc0d7ad6a89d030b63b41fa020317c8d2 Mon Sep 17 00:00:00 2001 From: Kaldari Date: Thu, 14 Jul 2016 20:47:52 -0700 Subject: [PATCH] Adding support for numeric collation when using UCA collations To use, add '-u-kn' to the end of a collation name and set it as the value for $wgCategoryCollation. Bug: T8948 Change-Id: Ica7908daf80624fa2648127114d01665e96234c0 --- includes/collation/Collation.php | 2 ++ includes/collation/IcuCollation.php | 30 +++++++++++++++++++++++++++-- languages/i18n/en.json | 1 + languages/i18n/qqq.json | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/includes/collation/Collation.php b/includes/collation/Collation.php index 9fb06604f8..fe254afdc5 100644 --- a/includes/collation/Collation.php +++ b/includes/collation/Collation.php @@ -53,6 +53,8 @@ abstract class Collation { return new IdentityCollation; case 'uca-default': return new IcuCollation( 'root' ); + case 'uca-default-u-kn': + return new IcuCollation( 'root-u-kn' ); case 'xx-uca-ckb': return new CollationCkb; case 'xx-uca-et': diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index c2e8b2412d..f5d3c3357d 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -36,6 +36,9 @@ class IcuCollation extends Collation { /** @var Language */ protected $digitTransformLanguage; + /** @var boolean */ + private $useNumericCollation = false; + /** @var array */ private $firstLetterData; @@ -197,6 +200,15 @@ class IcuCollation extends Collation { $this->primaryCollator = Collator::create( $locale ); $this->primaryCollator->setStrength( Collator::PRIMARY ); + + // If the special suffix for numeric collation is present, turn on numeric collation. + if ( substr( $locale, -5, 5 ) === '-u-kn' ) { + $this->useNumericCollation = true; + // Strip off the special suffix so it doesn't trip up fetchFirstLetterData(). + $this->locale = substr( $this->locale, 0, -5 ); + $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON ); + $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON ); + } } public function getSortKey( $string ) { @@ -213,8 +225,9 @@ class IcuCollation extends Collation { return ''; } - // Check for CJK $firstChar = mb_substr( $string, 0, 1, 'UTF-8' ); + + // If the first character is a CJK character, just return that character. if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) { return $firstChar; } @@ -232,7 +245,19 @@ class IcuCollation extends Collation { // Before the first letter return ''; } - return $this->getLetterByIndex( $min ); + + $sortLetter = $this->getLetterByIndex( $min ); + + if ( $this->useNumericCollation ) { + // If the sort letter is a number, return '0–9' (or localized equivalent). + // ASCII value of 0 is 48. ASCII value of 9 is 57. + // Note that this also applies to non-Arabic numerals since they are + // mapped to Arabic numeral sort letters. For example, ২ sorts as 2. + if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) { + $sortLetter = wfMessage( 'category-header-numerals' )->numParams( 0, 9 )->text(); + } + } + return $sortLetter; } /** @@ -408,6 +433,7 @@ class IcuCollation extends Collation { } /** + * Test if a code point is a CJK (Chinese, Japanese, Korean) character * @since 1.16.3 */ public static function isCjk( $codepoint ) { diff --git a/languages/i18n/en.json b/languages/i18n/en.json index 6d1e58d4c5..7c755f7db9 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -137,6 +137,7 @@ "noindex-category": "Noindexed pages", "broken-file-category": "Pages with broken file links", "categoryviewer-pagedlinks": "($1) ($2)", + "category-header-numerals": "$1–$2", "about": "About", "article": "Content page", "newwindow": "(opens in new window)", diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 64ba4a6403..4129718bd3 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -318,6 +318,7 @@ "noindex-category": "Name of the [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages with the __NOINDEX__ behavior switch are listed.\n\nFor description of this behavior switch see [[mw:Special:MyLanguage/Help:Magic_words#Behavior_switches|MediaWiki]].\n\nSee also:\n* {{msg-mw|Noindex-category-desc}}", "broken-file-category": "Name of [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages that embed files that do not exist (\"broken images\") are listed.\n\nSee also:\n* {{msg-mw|Broken-file-category-desc}}", "categoryviewer-pagedlinks": "{{Optional}}\nThe pagination links in category viewer. Parameters:\n* $1 - the previous link, uses {{msg-mw|Prevn}}\n* $2 - the next link, uses {{msg-mw|Nextn}}", + "category-header-numerals": "{{Optional}}\nA header for all pages whose titles start with a number. This is used on category pages. This should only be translated if your language uses a different method to indicate a range of numbers (other than a dash).\n* $1 - 0 (or localized equivalent)\n* $2 – 9 (or localized equivalent)", "about": "{{Identical|About}}", "article": "A 'content page' is a page that forms part of the purpose of the wiki. It includes the main page and pages in the main namespace and any other namespaces that are included when the wiki is customised. For example on Wikimedia Commons 'content pages' include pages in the file and category namespaces. On Wikinews 'content pages' include pages in the Portal namespace. For technical definition of 'content namespaces' see [[mw:Manual:Using_custom_namespaces#Content_namespaces|MediaWiki]].\n\nPossible alternatives to the word 'content' are 'subject matter' or 'wiki subject' or 'wiki purpose'.\n\n{{Identical|Content page}}", "newwindow": "Below the edit form, next to \"{{msg-mw|Edithelp}}\".", -- 2.20.1