From: Bartosz Dziewoński Date: Wed, 28 Dec 2016 15:10:24 +0000 (+0100) Subject: CollationFa: Third time's the charm X-Git-Tag: 1.31.0-rc.0~4377^2 X-Git-Url: http://git.cyclocoop.org/%24self?a=commitdiff_plain;h=afc6e7cd157f33ec592ae7292d20569707af4397;p=lhc%2Fweb%2Fwiklou.git CollationFa: Third time's the charm We have to use a tertiary sortkey for everything with the primary sortkey of 2627. Otherwise, the "Remove duplicate prefixes" logic in IcuCollation would remove them. The following characters will now be considered separate letters in the 'xx-uca-fa' collation for the purpose of displaying the headings on category pages: ء ئ ا و ٲ ٳ Bug: T139110 Change-Id: Ibbea5d76348e4cdc38b74cba44286910b2ed592f --- diff --git a/includes/collation/CollationFa.php b/includes/collation/CollationFa.php index b7e45cc1d6..9cce087d3a 100644 --- a/includes/collation/CollationFa.php +++ b/includes/collation/CollationFa.php @@ -19,9 +19,12 @@ */ /** - * Temporary workaround for incorrect collation of Persian language ('fa') in ICU (bug T139110). + * Temporary workaround for incorrect collation of Persian language ('fa') in ICU 52 (bug T139110). * - * 'ا' and 'و' should not be considered the same letter for the purposes of collation in Persian. + * All of the following will be considered separate letters for category headings in Persian: + * - Characters 'و' 'ا' (often appear at the beginning of words) + * - Characters 'Ù²' 'Ù³' (may appear at the beginning of words in loanwords) + * - Characters 'Ø¡' 'ئ' (don't appear at the beginning of words, but it's easier to implement) * * @since 1.29 */ @@ -34,11 +37,14 @@ class CollationFa extends IcuCollation { } public function getPrimarySortKey( $string ) { - $firstLetter = mb_substr( $string, 0, 1 ); - if ( $firstLetter === 'و' || $firstLetter === 'ا' ) { + $primary = parent::getPrimarySortKey( $string ); + // We have to use a tertiary sortkey for everything with the primary sortkey of 2627. + // Otherwise, the "Remove duplicate prefixes" logic in IcuCollation would remove them. + // This matches sortkeys for the following characters: Ø¡ ئ ا و Ù² Ù³ + if ( substr( $primary, 0, 2 ) === "\x26\x27" ) { + wfDebug( "Using tertiary sortkey for '$string'\n" ); return $this->tertiaryCollator->getSortKey( $string ); } - - return parent::getPrimarySortKey( $string ); + return $primary; } }