From 390ff7fca179e26ac177810145d27d98fe2fff43 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bartosz=20Dziewo=C5=84ski?= Date: Tue, 8 May 2018 13:43:10 +0200 Subject: [PATCH] IcuCollation: Use codepoint as tiebreaker when getting first-letters This prevents unexpected cuneiform digits from acting as headings for 2 and 3 on category pages. Bug: T187645 Change-Id: I0424a24769899cb23b28704f97e1002fa44999fd --- includes/collation/IcuCollation.php | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index 36efdb379b..9ac81ae01e 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -384,9 +384,17 @@ class IcuCollation extends Collation { foreach ( $letters as $letter ) { $key = $this->getPrimarySortKey( $letter ); if ( isset( $letterMap[$key] ) ) { - // Primary collision - // Keep whichever one sorts first in the main collator - if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) { + // Primary collision (two characters with the same sort position). + // Keep whichever one sorts first in the main collator. + $comp = $this->mainCollator->compare( $letter, $letterMap[$key] ); + wfDebug( "Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)\n" ); + // If that also has a collision, use codepoint as a tiebreaker. + if ( $comp === 0 ) { + // TODO Use <=> operator when PHP 7 is allowed. + $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) - + UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] ); + } + if ( $comp < 0 ) { $letterMap[$key] = $letter; } } else { -- 2.20.1