From ae38b340dc409f1b9dd66b648eceba09074a2dfd Mon Sep 17 00:00:00 2001 From: MatmaRex Date: Mon, 11 Mar 2013 22:24:09 +0100 Subject: [PATCH] IcuCollation::$tailoringFirstLetters: implement letter removal MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is necessary for Swedish, where 'Þ' ("thorn") - considered a separate letter by default in the first-letters-root.ser file - is sorted as 'th', causing unexpected output on category pages - words starting with 'th'..'u' were placed under a heading with the thorn. There were three obvious ways to do this: * somehow include information that this letter is to be removed in the string itself, as in 'sv' => array( "Å", "Ä", "Ö", "-Þ" ) - could potentially clash with valid uses * create a separate array other than $tailoringFirstLetters to store this information - would cause the data to be fragmented all over the file * include information about letters to be removed in a separate key "linked" to the regular one, as in '-sv' => array( "Þ" ) - I see no obvious downsides, so this is what I ended up doing Bug: 45446 Change-Id: I57e07a2027c391c5baa767a68f4409b9de7b4618 --- includes/Collation.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/includes/Collation.php b/includes/Collation.php index 8a78084ab0..2734871f3d 100644 --- a/includes/Collation.php +++ b/includes/Collation.php @@ -181,7 +181,10 @@ class IcuCollation extends Collation { /** * Additional characters (or character groups) to be considered separate - * letters for given languages, compared to the data stored in the + * letters for given languages, or to be removed from the list of such + * letters (denoted by keys starting with '-'). + * + * These are additions to (or subtractions from) the data stored in the * first-letters-root.ser file (which among others includes full basic latin, * cyrillic and greek alphabets). * @@ -262,6 +265,7 @@ class IcuCollation extends Collation { 'sq' => array( "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ), 'sr' => array(), 'sv' => array( "Å", "Ä", "Ö" ), + '-sv' => array( "Þ" ), // sorted as "th" in Swedish, causing unexpected output - bug 45446 'tk' => array( "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ), 'tl' => array( "Ñ", "Ng" ), 'tr' => array( "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ), @@ -352,7 +356,12 @@ class IcuCollation extends Collation { if ( isset ( self::$tailoringFirstLetters[$this->locale] ) ) { $letters = wfGetPrecompiledData( "first-letters-root.ser" ); + // Append additional characters $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] ); + // Remove unnecessary ones, if any + if ( isset( self::$tailoringFirstLetters[ '-' . $this->locale ] ) ) { + $letters = array_diff( $letters, self::$tailoringFirstLetters[ '-' . $this->locale ] ); + } } else { $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" ); if ( $letters === false ) { -- 2.20.1