From 95c299e67f7b2e8a3e8f232ba935051dd54ba156 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sat, 29 Oct 2016 11:55:27 +0000 Subject: [PATCH] Add firstLetter data for ~50 additional languages Based on CLDR 29 data files. This did the relatively easy languages in CLDR 29 (Which is most of them). I skipped languages with complicated tailoring files. Change-Id: I8367604f7d3a1cdef9cb4e15813893c8cbfff1ff --- includes/collation/IcuCollation.php | 88 ++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 15 deletions(-) diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index 9c0b96e370..4110472d8e 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -133,48 +133,106 @@ class IcuCollation extends Collation { 'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ], // Not verified, but likely correct 'af' => [], - 'ast' => [ "Ch", "Ll", "Ñ" ], + 'am' => [], + 'ar' => [], + 'as' => [ "\xe0\xa6\x82", "\xe0\xa6\x81", "\xe0\xa6\x83", "\xe0\xa7\x8e", "ক্ষ " ], + 'ast' => [ "Ch", "Ll", "Ñ" ], // Not in libicu? 'az' => [ "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ], 'bg' => [], + 'bo' => [], 'br' => [ "Ch", "C'h" ], + 'bs-Cyrl' => [], 'ca' => [], - 'co' => [], + 'chr' => [], + 'co' => [], // Not in libicu? 'da' => [ "Æ", "Ø", "Å" ], 'de' => [], + 'de-AT@collation=phonebook' => [ 'ä', 'ö', 'ü', 'ß' ], 'dsb' => [ "Č", "Ć", "Dź", "Ě", "Ch", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ], + 'ee' => [ "Dz", "Ɖ", "Ɛ", "Ƒ", "Gb", "Ɣ", "Kp", "Ny", "Ŋ", "Ɔ", "Ts", "Ʋ" ], 'el' => [], 'eo' => [ "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ], 'es' => [ "Ñ" ], 'et' => [ "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ], // added W for CollationEt (xx-uca-et) - 'eu' => [ "Ñ" ], + 'eu' => [ "Ñ" ], // Not in libicu? + 'fil' => [ "Ñ", "Ng" ], 'fo' => [ "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ], - 'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ], - 'fy' => [], + 'fr-CA' => [], // fr-CA sorts accents slightly different from fr. + 'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ], // not in libicu + 'fy' => [], // not in libicu 'ga' => [], - 'gd' => [], + 'gd' => [], // not in libicu 'gl' => [ "Ch", "Ll", "Ñ" ], + 'gu' => [ "\xe0\xaa\x82", "\xe0\xaa\x83", "\xe0\xaa\x81", "\xe0\xaa\xb3" ], + 'ha' => [ 'Ɓ', 'Ɗ', 'Ƙ', 'Sh', 'Ts', 'Ƴ' ], + 'haw' => [ 'ʻ' ], + 'he' => [], + 'hi' => [ "\xe0\xa4\x82", "\xe0\xa4\x83" ], + 'hy' => [ "և" ], + 'id' => [], + 'ig' => [ "Ch", "Gb", "Gh", "Gw", "Ị", "Kp", "Kw", "Ṅ", "Nw", "Ny", "Ọ", "Sh", "Ụ" ], + 'ka' => [], + 'km' => [ + "រ", "ឫ", "ឬ", "ល", "ឭ", "ឮ", "\xe1\x9e\xbb\xe1\x9f\x86", + "\xe1\x9f\x86", "\xe1\x9e\xb6\xe1\x9f\x86", "\xe1\x9f\x87", + "\xe1\x9e\xb7\xe1\x9f\x87", "\xe1\x9e\xbb\xe1\x9f\x87", + "\xe1\x9f\x81\xe1\x9f\x87", "\xe1\x9f\x84\xe1\x9f\x87", + ], + 'kn' => [ "\xe0\xb2\x81", "\xe0\xb2\x83", "\xe0\xb3\xb1", "\xe0\xb3\xb2" ], + 'kok' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष" ], 'kk' => [ "Ү", "І" ], 'kl' => [ "Æ", "Ø", "Å" ], - 'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ], + 'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ], // ku is not in libicu 'ky' => [ "Ё" ], - 'la' => [], + 'la' => [], // la is not in libicu 'lb' => [], - 'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ], + 'lkt' => [ 'Č', 'Ǧ', 'Ȟ', 'Š', 'Ž' ], + 'ln' => [ 'Ɛ' ], + 'lo' => [], + 'ml' => [], + 'mn' => [], + 'mr' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष", "ज्ञ" ], + 'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ], // no mo in libicu + 'ms' => [], 'mt' => [ "Ċ", "Ġ", "Għ", "Ħ", "Ż" ], + 'nb' => [ "Æ", "Ø", "Å" ], + 'ne' => [], + 'nn' => [ "Æ", "Ø", "Å" ], + // no is not in the libicu list. You should probably use nb or nn instead. 'no' => [ "Æ", "Ø", "Å" ], - 'oc' => [], - 'rm' => [], + 'oc' => [], // not in libicu + 'om' => [ 'Ch', 'Dh', 'Kh', 'Ny', 'Ph', 'Sh' ], + 'or' => [ "\xe0\xac\x81", "\xe0\xac\x82", "\xe0\xac\x83", "କ୍ଷ" ], + 'pa' => [ "\xe0\xa9\x8d" ], + 'rm' => [], // not in libicu 'ro' => [ "Ă", "Â", "Î", "Ş", "Ţ" ], - 'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ], + 'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ], // not in libicu 'sco' => [], + 'se' => [ + 'Á', 'Č', 'Ʒ', 'Ǯ', 'Đ', 'Ǧ', 'Ǥ', 'Ǩ', 'Ŋ', + 'Š', 'Ŧ', 'Ž', 'Ø', 'Æ', 'Ȧ', 'Ä', 'Ö' + ], + 'si' => [ "\xe0\xb6\x82", "\xe0\xb6\x83", "\xe0\xb6\xa4" ], 'sl' => [ "Č", "Š", "Ž" ], 'smn' => [ "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ], 'sq' => [ "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ], + 'sr-Latn' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ], + 'sw' => [], + 'te' => [ "\xe0\xb0\x81", "\xe0\xb0\x82", "\xe0\xb0\x83" ], + 'th' => [ "ฯ", "\xe0\xb9\x86", "\xe0\xb9\x8d", "\xe0\xb8\xba" ], 'tk' => [ "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ], - 'tl' => [ "Ñ", "Ng" ], + 'tl' => [ "Ñ", "Ng" ], // not in libicu + 'to' => [ "Ng", "ʻ" ], 'tr' => [ "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ], - 'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ], - 'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ], + 'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ], // not in libicu + 'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ], // not in libicu + 'vo' => [ "Ä", "Ö", "Ü" ], + 'yi' => [ + "\xd7\x91\xd6\xbf", "\xd7\x9b\xd6\xbc", "\xd7\xa4\xd6\xbc", + "\xd7\xa9\xd7\x82", "\xd7\xaa\xd6\xbc" + ], + 'yo' => [ "Ẹ", "Gb", "Ọ", "Ṣ" ], + 'zu' => [], ]; /** -- 2.20.1