From: Timo Tijhof Date: Sat, 19 May 2018 18:08:17 +0000 (+0200) Subject: languages: Use static array files for normalizer data X-Git-Tag: 1.34.0-rc.0~5296^2 X-Git-Url: http://git.cyclocoop.org/%24action?a=commitdiff_plain;h=4f22361759c4df6e185637f5bb829235374d8fb8;p=lhc%2Fweb%2Fwiklou.git languages: Use static array files for normalizer data This reduces the number of '.ser' files to 1 (we still have first-letters-root.ser). Change-Id: Ib0ee0d826da34b1825fd5bb74563c6bbadeec75c --- diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 659ac9d268..1915b9b4b6 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2739,6 +2739,28 @@ function wfGetPrecompiledData( $name ) { return false; } +/** + * @since 1.32 + * @param string[] $data Array with string keys/values to export + * @param string $header + * @return string PHP code + */ +function wfMakeStaticArrayFile( array $data, $header = 'Automatically generated' ) { + $format = "\t%s => %s,\n"; + $code = " $value ) { + $code .= sprintf( + $format, + var_export( $key, true ), + var_export( $value, true ) + ); + } + $code .= "];\n"; + return $code; +} + /** * Make a cache key for the local wiki. * diff --git a/languages/Language.php b/languages/Language.php index db302d7004..7224c33fba 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -2990,8 +2990,8 @@ class Language { global $wgAllUnicodeFixes; $s = UtfNormal\Validator::cleanUp( $s ); if ( $wgAllUnicodeFixes ) { - $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s ); - $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s ); + $s = $this->transformUsingPairFile( 'normalize-ar.php', $s ); + $s = $this->transformUsingPairFile( 'normalize-ml.php', $s ); } return $s; @@ -3011,12 +3011,10 @@ class Language { * @throws MWException * @return string */ - function transformUsingPairFile( $file, $string ) { + protected function transformUsingPairFile( $file, $string ) { if ( !isset( $this->transformData[$file] ) ) { - $data = wfGetPrecompiledData( $file ); - if ( $data === false ) { - throw new MWException( __METHOD__ . ": The transformation file $file is missing" ); - } + global $IP; + $data = require "$IP/languages/data/{$file}"; $this->transformData[$file] = new ReplacementArray( $data ); } return $this->transformData[$file]->replace( $string ); diff --git a/languages/classes/LanguageAr.php b/languages/classes/LanguageAr.php index 90e37516e8..efdf5a2713 100644 --- a/languages/classes/LanguageAr.php +++ b/languages/classes/LanguageAr.php @@ -44,7 +44,7 @@ class LanguageAr extends Language { global $wgFixArabicUnicode; $s = parent::normalize( $s ); if ( $wgFixArabicUnicode ) { - $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s ); + $s = $this->transformUsingPairFile( 'normalize-ar.php', $s ); } return $s; } diff --git a/languages/classes/LanguageMl.php b/languages/classes/LanguageMl.php index df894a14a7..cf45762ea8 100644 --- a/languages/classes/LanguageMl.php +++ b/languages/classes/LanguageMl.php @@ -45,7 +45,7 @@ class LanguageMl extends Language { global $wgFixMalayalamUnicode; $s = parent::normalize( $s ); if ( $wgFixMalayalamUnicode ) { - $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s ); + $s = $this->transformUsingPairFile( 'normalize-ml.php', $s ); } return $s; } diff --git a/languages/data/normalize-ar.php b/languages/data/normalize-ar.php new file mode 100644 index 0000000000..c1daf20ca1 --- /dev/null +++ b/languages/data/normalize-ar.php @@ -0,0 +1,735 @@ + 'ٱ', + 'ﭑ' => 'ٱ', + 'ﭒ' => 'ٻ', + 'ﭓ' => 'ٻ', + 'ﭔ' => 'ٻ', + 'ﭕ' => 'ٻ', + 'ﭖ' => 'پ', + 'ﭗ' => 'پ', + 'ﭘ' => 'پ', + 'ﭙ' => 'پ', + 'ﭚ' => 'ڀ', + 'ﭛ' => 'ڀ', + 'ﭜ' => 'ڀ', + 'ﭝ' => 'ڀ', + 'ﭞ' => 'ٺ', + 'ﭟ' => 'ٺ', + 'ﭠ' => 'ٺ', + 'ﭡ' => 'ٺ', + 'ﭢ' => 'ٿ', + 'ﭣ' => 'ٿ', + 'ﭤ' => 'ٿ', + 'ﭥ' => 'ٿ', + 'ﭦ' => 'ٹ', + 'ﭧ' => 'ٹ', + 'ﭨ' => 'ٹ', + 'ﭩ' => 'ٹ', + 'ﭪ' => 'ڤ', + 'ﭫ' => 'ڤ', + 'ﭬ' => 'ڤ', + 'ﭭ' => 'ڤ', + 'ﭮ' => 'ڦ', + 'ﭯ' => 'ڦ', + 'ﭰ' => 'ڦ', + 'ﭱ' => 'ڦ', + 'ﭲ' => 'ڄ', + 'ﭳ' => 'ڄ', + 'ﭴ' => 'ڄ', + 'ﭵ' => 'ڄ', + 'ﭶ' => 'ڃ', + 'ﭷ' => 'ڃ', + 'ﭸ' => 'ڃ', + 'ﭹ' => 'ڃ', + 'ﭺ' => 'چ', + 'ﭻ' => 'چ', + 'ﭼ' => 'چ', + 'ﭽ' => 'چ', + 'ﭾ' => 'ڇ', + 'ﭿ' => 'ڇ', + 'ﮀ' => 'ڇ', + 'ﮁ' => 'ڇ', + 'ﮂ' => 'ڍ', + 'ﮃ' => 'ڍ', + 'ﮄ' => 'ڌ', + 'ﮅ' => 'ڌ', + 'ﮆ' => 'ڎ', + 'ﮇ' => 'ڎ', + 'ﮈ' => 'ڈ', + 'ﮉ' => 'ڈ', + 'ﮊ' => 'ژ', + 'ﮋ' => 'ژ', + 'ﮌ' => 'ڑ', + 'ﮍ' => 'ڑ', + 'ﮎ' => 'ک', + 'ﮏ' => 'ک', + 'ﮐ' => 'ک', + 'ﮑ' => 'ک', + 'ﮒ' => 'گ', + 'ﮓ' => 'گ', + 'ﮔ' => 'گ', + 'ﮕ' => 'گ', + 'ﮖ' => 'ڳ', + 'ﮗ' => 'ڳ', + 'ﮘ' => 'ڳ', + 'ﮙ' => 'ڳ', + 'ﮚ' => 'ڱ', + 'ﮛ' => 'ڱ', + 'ﮜ' => 'ڱ', + 'ﮝ' => 'ڱ', + 'ﮞ' => 'ں', + 'ﮟ' => 'ں', + 'ﮠ' => 'ڻ', + 'ﮡ' => 'ڻ', + 'ﮢ' => 'ڻ', + 'ﮣ' => 'ڻ', + 'ﮤ' => 'ۀ', + 'ﮥ' => 'ۀ', + 'ﮦ' => 'ہ', + 'ﮧ' => 'ہ', + 'ﮨ' => 'ہ', + 'ﮩ' => 'ہ', + 'ﮪ' => 'ھ', + 'ﮫ' => 'ھ', + 'ﮬ' => 'ھ', + 'ﮭ' => 'ھ', + 'ﮮ' => 'ے', + 'ﮯ' => 'ے', + 'ﮰ' => 'ۓ', + 'ﮱ' => 'ۓ', + 'ﯓ' => 'ڭ', + 'ﯔ' => 'ڭ', + 'ﯕ' => 'ڭ', + 'ﯖ' => 'ڭ', + 'ﯗ' => 'ۇ', + 'ﯘ' => 'ۇ', + 'ﯙ' => 'ۆ', + 'ﯚ' => 'ۆ', + 'ﯛ' => 'ۈ', + 'ﯜ' => 'ۈ', + 'ﯝ' => 'ٷ', + 'ﯞ' => 'ۋ', + 'ﯟ' => 'ۋ', + 'ﯠ' => 'ۅ', + 'ﯡ' => 'ۅ', + 'ﯢ' => 'ۉ', + 'ﯣ' => 'ۉ', + 'ﯤ' => 'ې', + 'ﯥ' => 'ې', + 'ﯦ' => 'ې', + 'ﯧ' => 'ې', + 'ﯨ' => 'ى', + 'ﯩ' => 'ى', + 'ﯪ' => 'ئا', + 'ﯫ' => 'ئا', + 'ﯬ' => 'ئە', + 'ﯭ' => 'ئە', + 'ﯮ' => 'ئو', + 'ﯯ' => 'ئو', + 'ﯰ' => 'ئۇ', + 'ﯱ' => 'ئۇ', + 'ﯲ' => 'ئۆ', + 'ﯳ' => 'ئۆ', + 'ﯴ' => 'ئۈ', + 'ﯵ' => 'ئۈ', + 'ﯶ' => 'ئې', + 'ﯷ' => 'ئې', + 'ﯸ' => 'ئې', + 'ﯹ' => 'ئى', + 'ﯺ' => 'ئى', + 'ﯻ' => 'ئى', + 'ﯼ' => 'ی', + 'ﯽ' => 'ی', + 'ﯾ' => 'ی', + 'ﯿ' => 'ی', + 'ﰀ' => 'ئج', + 'ﰁ' => 'ئح', + 'ﰂ' => 'ئم', + 'ﰃ' => 'ئى', + 'ﰄ' => 'ئي', + 'ﰅ' => 'بج', + 'ﰆ' => 'بح', + 'ﰇ' => 'بخ', + 'ﰈ' => 'بم', + 'ﰉ' => 'بى', + 'ﰊ' => 'بي', + 'ﰋ' => 'تج', + 'ﰌ' => 'تح', + 'ﰍ' => 'تخ', + 'ﰎ' => 'تم', + 'ﰏ' => 'تى', + 'ﰐ' => 'تي', + 'ﰑ' => 'ثج', + 'ﰒ' => 'ثم', + 'ﰓ' => 'ثى', + 'ﰔ' => 'ثي', + 'ﰕ' => 'جح', + 'ﰖ' => 'جم', + 'ﰗ' => 'حج', + 'ﰘ' => 'حم', + 'ﰙ' => 'خج', + 'ﰚ' => 'خح', + 'ﰛ' => 'خم', + 'ﰜ' => 'سج', + 'ﰝ' => 'سح', + 'ﰞ' => 'سخ', + 'ﰟ' => 'سم', + 'ﰠ' => 'صح', + 'ﰡ' => 'صم', + 'ﰢ' => 'ضج', + 'ﰣ' => 'ضح', + 'ﰤ' => 'ضخ', + 'ﰥ' => 'ضم', + 'ﰦ' => 'طح', + 'ﰧ' => 'طم', + 'ﰨ' => 'ظم', + 'ﰩ' => 'عج', + 'ﰪ' => 'عم', + 'ﰫ' => 'غج', + 'ﰬ' => 'غم', + 'ﰭ' => 'فج', + 'ﰮ' => 'فح', + 'ﰯ' => 'فخ', + 'ﰰ' => 'فم', + 'ﰱ' => 'فى', + 'ﰲ' => 'في', + 'ﰳ' => 'قح', + 'ﰴ' => 'قم', + 'ﰵ' => 'قى', + 'ﰶ' => 'قي', + 'ﰷ' => 'كا', + 'ﰸ' => 'كج', + 'ﰹ' => 'كح', + 'ﰺ' => 'كخ', + 'ﰻ' => 'كل', + 'ﰼ' => 'كم', + 'ﰽ' => 'كى', + 'ﰾ' => 'كي', + 'ﰿ' => 'لج', + 'ﱀ' => 'لح', + 'ﱁ' => 'لخ', + 'ﱂ' => 'لم', + 'ﱃ' => 'لى', + 'ﱄ' => 'لي', + 'ﱅ' => 'مج', + 'ﱆ' => 'مح', + 'ﱇ' => 'مخ', + 'ﱈ' => 'مم', + 'ﱉ' => 'مى', + 'ﱊ' => 'مي', + 'ﱋ' => 'نج', + 'ﱌ' => 'نح', + 'ﱍ' => 'نخ', + 'ﱎ' => 'نم', + 'ﱏ' => 'نى', + 'ﱐ' => 'ني', + 'ﱑ' => 'هج', + 'ﱒ' => 'هم', + 'ﱓ' => 'هى', + 'ﱔ' => 'هي', + 'ﱕ' => 'يج', + 'ﱖ' => 'يح', + 'ﱗ' => 'يخ', + 'ﱘ' => 'يم', + 'ﱙ' => 'يى', + 'ﱚ' => 'يي', + 'ﱛ' => 'ذٰ', + 'ﱜ' => 'رٰ', + 'ﱝ' => 'ىٰ', + 'ﱞ' => ' ٌّ', + 'ﱟ' => ' ٍّ', + 'ﱠ' => ' َّ', + 'ﱡ' => ' ُّ', + 'ﱢ' => ' ِّ', + 'ﱣ' => ' ّٰ', + 'ﱤ' => 'ئر', + 'ﱥ' => 'ئز', + 'ﱦ' => 'ئم', + 'ﱧ' => 'ئن', + 'ﱨ' => 'ئى', + 'ﱩ' => 'ئي', + 'ﱪ' => 'بر', + 'ﱫ' => 'بز', + 'ﱬ' => 'بم', + 'ﱭ' => 'بن', + 'ﱮ' => 'بى', + 'ﱯ' => 'بي', + 'ﱰ' => 'تر', + 'ﱱ' => 'تز', + 'ﱲ' => 'تم', + 'ﱳ' => 'تن', + 'ﱴ' => 'تى', + 'ﱵ' => 'تي', + 'ﱶ' => 'ثر', + 'ﱷ' => 'ثز', + 'ﱸ' => 'ثم', + 'ﱹ' => 'ثن', + 'ﱺ' => 'ثى', + 'ﱻ' => 'ثي', + 'ﱼ' => 'فى', + 'ﱽ' => 'في', + 'ﱾ' => 'قى', + 'ﱿ' => 'قي', + 'ﲀ' => 'كا', + 'ﲁ' => 'كل', + 'ﲂ' => 'كم', + 'ﲃ' => 'كى', + 'ﲄ' => 'كي', + 'ﲅ' => 'لم', + 'ﲆ' => 'لى', + 'ﲇ' => 'لي', + 'ﲈ' => 'ما', + 'ﲉ' => 'مم', + 'ﲊ' => 'نر', + 'ﲋ' => 'نز', + 'ﲌ' => 'نم', + 'ﲍ' => 'نن', + 'ﲎ' => 'نى', + 'ﲏ' => 'ني', + 'ﲐ' => 'ىٰ', + 'ﲑ' => 'ير', + 'ﲒ' => 'يز', + 'ﲓ' => 'يم', + 'ﲔ' => 'ين', + 'ﲕ' => 'يى', + 'ﲖ' => 'يي', + 'ﲗ' => 'ئج', + 'ﲘ' => 'ئح', + 'ﲙ' => 'ئخ', + 'ﲚ' => 'ئم', + 'ﲛ' => 'ئه', + 'ﲜ' => 'بج', + 'ﲝ' => 'بح', + 'ﲞ' => 'بخ', + 'ﲟ' => 'بم', + 'ﲠ' => 'به', + 'ﲡ' => 'تج', + 'ﲢ' => 'تح', + 'ﲣ' => 'تخ', + 'ﲤ' => 'تم', + 'ﲥ' => 'ته', + 'ﲦ' => 'ثم', + 'ﲧ' => 'جح', + 'ﲨ' => 'جم', + 'ﲩ' => 'حج', + 'ﲪ' => 'حم', + 'ﲫ' => 'خج', + 'ﲬ' => 'خم', + 'ﲭ' => 'سج', + 'ﲮ' => 'سح', + 'ﲯ' => 'سخ', + 'ﲰ' => 'سم', + 'ﲱ' => 'صح', + 'ﲲ' => 'صخ', + 'ﲳ' => 'صم', + 'ﲴ' => 'ضج', + 'ﲵ' => 'ضح', + 'ﲶ' => 'ضخ', + 'ﲷ' => 'ضم', + 'ﲸ' => 'طح', + 'ﲹ' => 'ظم', + 'ﲺ' => 'عج', + 'ﲻ' => 'عم', + 'ﲼ' => 'غج', + 'ﲽ' => 'غم', + 'ﲾ' => 'فج', + 'ﲿ' => 'فح', + 'ﳀ' => 'فخ', + 'ﳁ' => 'فم', + 'ﳂ' => 'قح', + 'ﳃ' => 'قم', + 'ﳄ' => 'كج', + 'ﳅ' => 'كح', + 'ﳆ' => 'كخ', + 'ﳇ' => 'كل', + 'ﳈ' => 'كم', + 'ﳉ' => 'لج', + 'ﳊ' => 'لح', + 'ﳋ' => 'لخ', + 'ﳌ' => 'لم', + 'ﳍ' => 'له', + 'ﳎ' => 'مج', + 'ﳏ' => 'مح', + 'ﳐ' => 'مخ', + 'ﳑ' => 'مم', + 'ﳒ' => 'نج', + 'ﳓ' => 'نح', + 'ﳔ' => 'نخ', + 'ﳕ' => 'نم', + 'ﳖ' => 'نه', + 'ﳗ' => 'هج', + 'ﳘ' => 'هم', + 'ﳙ' => 'هٰ', + 'ﳚ' => 'يج', + 'ﳛ' => 'يح', + 'ﳜ' => 'يخ', + 'ﳝ' => 'يم', + 'ﳞ' => 'يه', + 'ﳟ' => 'ئم', + 'ﳠ' => 'ئه', + 'ﳡ' => 'بم', + 'ﳢ' => 'به', + 'ﳣ' => 'تم', + 'ﳤ' => 'ته', + 'ﳥ' => 'ثم', + 'ﳦ' => 'ثه', + 'ﳧ' => 'سم', + 'ﳨ' => 'سه', + 'ﳩ' => 'شم', + 'ﳪ' => 'شه', + 'ﳫ' => 'كل', + 'ﳬ' => 'كم', + 'ﳭ' => 'لم', + 'ﳮ' => 'نم', + 'ﳯ' => 'نه', + 'ﳰ' => 'يم', + 'ﳱ' => 'يه', + 'ﳲ' => 'ـَّ', + 'ﳳ' => 'ـُّ', + 'ﳴ' => 'ـِّ', + 'ﳵ' => 'طى', + 'ﳶ' => 'طي', + 'ﳷ' => 'عى', + 'ﳸ' => 'عي', + 'ﳹ' => 'غى', + 'ﳺ' => 'غي', + 'ﳻ' => 'سى', + 'ﳼ' => 'سي', + 'ﳽ' => 'شى', + 'ﳾ' => 'شي', + 'ﳿ' => 'حى', + 'ﴀ' => 'حي', + 'ﴁ' => 'جى', + 'ﴂ' => 'جي', + 'ﴃ' => 'خى', + 'ﴄ' => 'خي', + 'ﴅ' => 'صى', + 'ﴆ' => 'صي', + 'ﴇ' => 'ضى', + 'ﴈ' => 'ضي', + 'ﴉ' => 'شج', + 'ﴊ' => 'شح', + 'ﴋ' => 'شخ', + 'ﴌ' => 'شم', + 'ﴍ' => 'شر', + 'ﴎ' => 'سر', + 'ﴏ' => 'صر', + 'ﴐ' => 'ضر', + 'ﴑ' => 'طى', + 'ﴒ' => 'طي', + 'ﴓ' => 'عى', + 'ﴔ' => 'عي', + 'ﴕ' => 'غى', + 'ﴖ' => 'غي', + 'ﴗ' => 'سى', + 'ﴘ' => 'سي', + 'ﴙ' => 'شى', + 'ﴚ' => 'شي', + 'ﴛ' => 'حى', + 'ﴜ' => 'حي', + 'ﴝ' => 'جى', + 'ﴞ' => 'جي', + 'ﴟ' => 'خى', + 'ﴠ' => 'خي', + 'ﴡ' => 'صى', + 'ﴢ' => 'صي', + 'ﴣ' => 'ضى', + 'ﴤ' => 'ضي', + 'ﴥ' => 'شج', + 'ﴦ' => 'شح', + 'ﴧ' => 'شخ', + 'ﴨ' => 'شم', + 'ﴩ' => 'شر', + 'ﴪ' => 'سر', + 'ﴫ' => 'صر', + 'ﴬ' => 'ضر', + 'ﴭ' => 'شج', + 'ﴮ' => 'شح', + 'ﴯ' => 'شخ', + 'ﴰ' => 'شم', + 'ﴱ' => 'سه', + 'ﴲ' => 'شه', + 'ﴳ' => 'طم', + 'ﴴ' => 'سج', + 'ﴵ' => 'سح', + 'ﴶ' => 'سخ', + 'ﴷ' => 'شج', + 'ﴸ' => 'شح', + 'ﴹ' => 'شخ', + 'ﴺ' => 'طم', + 'ﴻ' => 'ظم', + 'ﴼ' => 'اً', + 'ﴽ' => 'اً', + 'ﵐ' => 'تجم', + 'ﵑ' => 'تحج', + 'ﵒ' => 'تحج', + 'ﵓ' => 'تحم', + 'ﵔ' => 'تخم', + 'ﵕ' => 'تمج', + 'ﵖ' => 'تمح', + 'ﵗ' => 'تمخ', + 'ﵘ' => 'جمح', + 'ﵙ' => 'جمح', + 'ﵚ' => 'حمي', + 'ﵛ' => 'حمى', + 'ﵜ' => 'سحج', + 'ﵝ' => 'سجح', + 'ﵞ' => 'سجى', + 'ﵟ' => 'سمح', + 'ﵠ' => 'سمح', + 'ﵡ' => 'سمج', + 'ﵢ' => 'سمم', + 'ﵣ' => 'سمم', + 'ﵤ' => 'صحح', + 'ﵥ' => 'صحح', + 'ﵦ' => 'صمم', + 'ﵧ' => 'شحم', + 'ﵨ' => 'شحم', + 'ﵩ' => 'شجي', + 'ﵪ' => 'شمخ', + 'ﵫ' => 'شمخ', + 'ﵬ' => 'شمم', + 'ﵭ' => 'شمم', + 'ﵮ' => 'ضحى', + 'ﵯ' => 'ضخم', + 'ﵰ' => 'ضخم', + 'ﵱ' => 'طمح', + 'ﵲ' => 'طمح', + 'ﵳ' => 'طمم', + 'ﵴ' => 'طمي', + 'ﵵ' => 'عجم', + 'ﵶ' => 'عمم', + 'ﵷ' => 'عمم', + 'ﵸ' => 'عمى', + 'ﵹ' => 'غمم', + 'ﵺ' => 'غمي', + 'ﵻ' => 'غمى', + 'ﵼ' => 'فخم', + 'ﵽ' => 'فخم', + 'ﵾ' => 'قمح', + 'ﵿ' => 'قمم', + 'ﶀ' => 'لحم', + 'ﶁ' => 'لحي', + 'ﶂ' => 'لحى', + 'ﶃ' => 'لجج', + 'ﶄ' => 'لجج', + 'ﶅ' => 'لخم', + 'ﶆ' => 'لخم', + 'ﶇ' => 'لمح', + 'ﶈ' => 'لمح', + 'ﶉ' => 'محج', + 'ﶊ' => 'محم', + 'ﶋ' => 'محي', + 'ﶌ' => 'مجح', + 'ﶍ' => 'مجم', + 'ﶎ' => 'مخج', + 'ﶏ' => 'مخم', + 'ﶒ' => 'مجخ', + 'ﶓ' => 'همج', + 'ﶔ' => 'همم', + 'ﶕ' => 'نحم', + 'ﶖ' => 'نحى', + 'ﶗ' => 'نجم', + 'ﶘ' => 'نجم', + 'ﶙ' => 'نجى', + 'ﶚ' => 'نمي', + 'ﶛ' => 'نمى', + 'ﶜ' => 'يمم', + 'ﶝ' => 'يمم', + 'ﶞ' => 'بخي', + 'ﶟ' => 'تجي', + 'ﶠ' => 'تجى', + 'ﶡ' => 'تخي', + 'ﶢ' => 'تخى', + 'ﶣ' => 'تمي', + 'ﶤ' => 'تمى', + 'ﶥ' => 'جمي', + 'ﶦ' => 'جحى', + 'ﶧ' => 'جمى', + 'ﶨ' => 'سخى', + 'ﶩ' => 'صحي', + 'ﶪ' => 'شحي', + 'ﶫ' => 'ضحي', + 'ﶬ' => 'لجي', + 'ﶭ' => 'لمي', + 'ﶮ' => 'يحي', + 'ﶯ' => 'يجي', + 'ﶰ' => 'يمي', + 'ﶱ' => 'ممي', + 'ﶲ' => 'قمي', + 'ﶳ' => 'نحي', + 'ﶴ' => 'قمح', + 'ﶵ' => 'لحم', + 'ﶶ' => 'عمي', + 'ﶷ' => 'كمي', + 'ﶸ' => 'نجح', + 'ﶹ' => 'مخي', + 'ﶺ' => 'لجم', + 'ﶻ' => 'كمم', + 'ﶼ' => 'لجم', + 'ﶽ' => 'نجح', + 'ﶾ' => 'جحي', + 'ﶿ' => 'حجي', + 'ﷀ' => 'مجي', + 'ﷁ' => 'فمي', + 'ﷂ' => 'بحي', + 'ﷃ' => 'كمم', + 'ﷄ' => 'عجم', + 'ﷅ' => 'صمم', + 'ﷆ' => 'سخي', + 'ﷇ' => 'نجي', + 'ﷰ' => 'صلے', + 'ﷱ' => 'قلے', + 'ﷲ' => 'الله', + 'ﷳ' => 'اكبر', + 'ﷴ' => 'محمد', + 'ﷵ' => 'صلعم', + 'ﷶ' => 'رسول', + 'ﷷ' => 'عليه', + 'ﷸ' => 'وسلم', + 'ﷹ' => 'صلى', + 'ﷺ' => 'صلى الله عليه وسلم', + 'ﷻ' => 'جل جلاله', + '﷼' => 'ریال', + 'ﹰ' => ' ً', + 'ﹱ' => 'ـً', + 'ﹲ' => ' ٌ', + 'ﹴ' => ' ٍ', + 'ﹶ' => ' َ', + 'ﹷ' => 'ـَ', + 'ﹸ' => ' ُ', + 'ﹹ' => 'ـُ', + 'ﹺ' => ' ِ', + 'ﹻ' => 'ـِ', + 'ﹼ' => ' ّ', + 'ﹽ' => 'ـّ', + 'ﹾ' => ' ْ', + 'ﹿ' => 'ـْ', + 'ﺀ' => 'ء', + 'ﺁ' => 'آ', + 'ﺂ' => 'آ', + 'ﺃ' => 'أ', + 'ﺄ' => 'أ', + 'ﺅ' => 'ؤ', + 'ﺆ' => 'ؤ', + 'ﺇ' => 'إ', + 'ﺈ' => 'إ', + 'ﺉ' => 'ئ', + 'ﺊ' => 'ئ', + 'ﺋ' => 'ئ', + 'ﺌ' => 'ئ', + 'ﺍ' => 'ا', + 'ﺎ' => 'ا', + 'ﺏ' => 'ب', + 'ﺐ' => 'ب', + 'ﺑ' => 'ب', + 'ﺒ' => 'ب', + 'ﺓ' => 'ة', + 'ﺔ' => 'ة', + 'ﺕ' => 'ت', + 'ﺖ' => 'ت', + 'ﺗ' => 'ت', + 'ﺘ' => 'ت', + 'ﺙ' => 'ث', + 'ﺚ' => 'ث', + 'ﺛ' => 'ث', + 'ﺜ' => 'ث', + 'ﺝ' => 'ج', + 'ﺞ' => 'ج', + 'ﺟ' => 'ج', + 'ﺠ' => 'ج', + 'ﺡ' => 'ح', + 'ﺢ' => 'ح', + 'ﺣ' => 'ح', + 'ﺤ' => 'ح', + 'ﺥ' => 'خ', + 'ﺦ' => 'خ', + 'ﺧ' => 'خ', + 'ﺨ' => 'خ', + 'ﺩ' => 'د', + 'ﺪ' => 'د', + 'ﺫ' => 'ذ', + 'ﺬ' => 'ذ', + 'ﺭ' => 'ر', + 'ﺮ' => 'ر', + 'ﺯ' => 'ز', + 'ﺰ' => 'ز', + 'ﺱ' => 'س', + 'ﺲ' => 'س', + 'ﺳ' => 'س', + 'ﺴ' => 'س', + 'ﺵ' => 'ش', + 'ﺶ' => 'ش', + 'ﺷ' => 'ش', + 'ﺸ' => 'ش', + 'ﺹ' => 'ص', + 'ﺺ' => 'ص', + 'ﺻ' => 'ص', + 'ﺼ' => 'ص', + 'ﺽ' => 'ض', + 'ﺾ' => 'ض', + 'ﺿ' => 'ض', + 'ﻀ' => 'ض', + 'ﻁ' => 'ط', + 'ﻂ' => 'ط', + 'ﻃ' => 'ط', + 'ﻄ' => 'ط', + 'ﻅ' => 'ظ', + 'ﻆ' => 'ظ', + 'ﻇ' => 'ظ', + 'ﻈ' => 'ظ', + 'ﻉ' => 'ع', + 'ﻊ' => 'ع', + 'ﻋ' => 'ع', + 'ﻌ' => 'ع', + 'ﻍ' => 'غ', + 'ﻎ' => 'غ', + 'ﻏ' => 'غ', + 'ﻐ' => 'غ', + 'ﻑ' => 'ف', + 'ﻒ' => 'ف', + 'ﻓ' => 'ف', + 'ﻔ' => 'ف', + 'ﻕ' => 'ق', + 'ﻖ' => 'ق', + 'ﻗ' => 'ق', + 'ﻘ' => 'ق', + 'ﻙ' => 'ك', + 'ﻚ' => 'ك', + 'ﻛ' => 'ك', + 'ﻜ' => 'ك', + 'ﻝ' => 'ل', + 'ﻞ' => 'ل', + 'ﻟ' => 'ل', + 'ﻠ' => 'ل', + 'ﻡ' => 'م', + 'ﻢ' => 'م', + 'ﻣ' => 'م', + 'ﻤ' => 'م', + 'ﻥ' => 'ن', + 'ﻦ' => 'ن', + 'ﻧ' => 'ن', + 'ﻨ' => 'ن', + 'ﻩ' => 'ه', + 'ﻪ' => 'ه', + 'ﻫ' => 'ه', + 'ﻬ' => 'ه', + 'ﻭ' => 'و', + 'ﻮ' => 'و', + 'ﻯ' => 'ى', + 'ﻰ' => 'ى', + 'ﻱ' => 'ي', + 'ﻲ' => 'ي', + 'ﻳ' => 'ي', + 'ﻴ' => 'ي', + 'ﻵ' => 'لآ', + 'ﻶ' => 'لآ', + 'ﻷ' => 'لأ', + 'ﻸ' => 'لأ', + 'ﻹ' => 'لإ', + 'ﻺ' => 'لإ', + 'ﻻ' => 'لا', + 'ﻼ' => 'لا', +]; diff --git a/languages/data/normalize-ml.php b/languages/data/normalize-ml.php new file mode 100644 index 0000000000..ca89a5aef3 --- /dev/null +++ b/languages/data/normalize-ml.php @@ -0,0 +1,10 @@ + 'ൺ', + 'ന്‍' => 'ൻ', + 'ര്‍' => 'ർ', + 'ല്‍' => 'ൽ', + 'ള്‍' => 'ൾ', + 'ക്‍' => 'ൿ', +]; diff --git a/maintenance/language/generateNormalizerDataAr.php b/maintenance/language/generateNormalizerDataAr.php index b8b2b8e8d8..af47d65c89 100644 --- a/maintenance/language/generateNormalizerDataAr.php +++ b/maintenance/language/generateNormalizerDataAr.php @@ -35,7 +35,7 @@ class GenerateNormalizerDataAr extends Maintenance { parent::__construct(); $this->addDescription( 'Generate the normalizer data file for Arabic' ); $this->addOption( 'unicode-data-file', 'The local location of the data file ' . - 'from https://unicode.org/Public/UNIDATA/UnicodeData.txt', false, true ); + 'from https://unicode.org/Public/6.0.0/ucd/UnicodeData.txt', false, true ); } public function getDbType() { @@ -122,7 +122,11 @@ class GenerateNormalizerDataAr extends Maintenance { } global $IP; - file_put_contents( "$IP/serialized/normalize-ar.ser", serialize( $pairs ) ); + file_put_contents( "$IP/languages/data/normalize-ar.php", wfMakeStaticArrayFile( + $pairs, + 'File created by generateNormalizerDataAr.php' + ) ); + echo "ar: " . count( $pairs ) . " pairs written.\n"; } } diff --git a/maintenance/language/generateNormalizerDataMl.php b/maintenance/language/generateNormalizerDataMl.php index 0bb491f267..5b75d8b43d 100644 --- a/maintenance/language/generateNormalizerDataMl.php +++ b/maintenance/language/generateNormalizerDataMl.php @@ -61,7 +61,11 @@ class GenerateNormalizerDataMl extends Maintenance { } global $IP; - file_put_contents( "$IP/serialized/normalize-ml.ser", serialize( $pairs ) ); + file_put_contents( "$IP/languages/data/normalize-ml.php", wfMakeStaticArrayFile( + $pairs, + 'File created by generateNormalizerDataMl.php' + ) ); + echo "ml: " . count( $pairs ) . " pairs written.\n"; } } diff --git a/serialized/.gitignore b/serialized/.gitignore index d9d58dd989..33b9f02a97 100644 --- a/serialized/.gitignore +++ b/serialized/.gitignore @@ -1,4 +1,3 @@ -/UnicodeData.txt /allkeys.txt /ucd.all.grouped.xml /ucd.all.grouped.zip diff --git a/serialized/Makefile b/serialized/Makefile index 0a04d85127..09369ef5e1 100644 --- a/serialized/Makefile +++ b/serialized/Makefile @@ -1,4 +1,4 @@ -SPECIAL_TARGETS=normalize-ar.ser normalize-ml.ser first-letters-root.ser +SPECIAL_TARGETS=first-letters-root.ser ALL_TARGETS=$(SPECIAL_TARGETS) DIST_TARGETS=$(SPECIAL_TARGETS) UNICODE_VERSION=6.0.0 @@ -13,18 +13,9 @@ dist: $(DIST_TARGETS) clean: rm -f $(ALL_TARGETS) -normalize-ar.ser: UnicodeData.txt - php ../maintenance/language/generateNormalizerDataAr.php - -normalize-ml.ser: - php ../maintenance/language/generateNormalizerDataMl.php - first-letters-root.ser: allkeys.txt ucd.all.grouped.xml php ../maintenance/language/generateCollationData.php -UnicodeData.txt: - wget https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt - allkeys.txt: wget https://www.unicode.org/Public/UCA/$(UNICODE_VERSION)/allkeys.txt diff --git a/serialized/normalize-ar.ser b/serialized/normalize-ar.ser deleted file mode 100644 index 2a7f122255..0000000000 --- a/serialized/normalize-ar.ser +++ /dev/null @@ -1 +0,0 @@ -a:731:{s:3:"ﭐ";s:2:"ٱ";s:3:"ﭑ";s:2:"ٱ";s:3:"ﭒ";s:2:"ٻ";s:3:"ﭓ";s:2:"ٻ";s:3:"ﭔ";s:2:"ٻ";s:3:"ﭕ";s:2:"ٻ";s:3:"ﭖ";s:2:"پ";s:3:"ﭗ";s:2:"پ";s:3:"ﭘ";s:2:"پ";s:3:"ﭙ";s:2:"پ";s:3:"ﭚ";s:2:"ڀ";s:3:"ﭛ";s:2:"ڀ";s:3:"ﭜ";s:2:"ڀ";s:3:"ﭝ";s:2:"ڀ";s:3:"ﭞ";s:2:"ٺ";s:3:"ﭟ";s:2:"ٺ";s:3:"ﭠ";s:2:"ٺ";s:3:"ﭡ";s:2:"ٺ";s:3:"ﭢ";s:2:"ٿ";s:3:"ﭣ";s:2:"ٿ";s:3:"ﭤ";s:2:"ٿ";s:3:"ﭥ";s:2:"ٿ";s:3:"ﭦ";s:2:"ٹ";s:3:"ﭧ";s:2:"ٹ";s:3:"ﭨ";s:2:"ٹ";s:3:"ﭩ";s:2:"ٹ";s:3:"ﭪ";s:2:"ڤ";s:3:"ﭫ";s:2:"ڤ";s:3:"ﭬ";s:2:"ڤ";s:3:"ﭭ";s:2:"ڤ";s:3:"ﭮ";s:2:"ڦ";s:3:"ﭯ";s:2:"ڦ";s:3:"ﭰ";s:2:"ڦ";s:3:"ﭱ";s:2:"ڦ";s:3:"ﭲ";s:2:"ڄ";s:3:"ﭳ";s:2:"ڄ";s:3:"ﭴ";s:2:"ڄ";s:3:"ﭵ";s:2:"ڄ";s:3:"ﭶ";s:2:"ڃ";s:3:"ﭷ";s:2:"ڃ";s:3:"ﭸ";s:2:"ڃ";s:3:"ﭹ";s:2:"ڃ";s:3:"ﭺ";s:2:"چ";s:3:"ﭻ";s:2:"چ";s:3:"ﭼ";s:2:"چ";s:3:"ﭽ";s:2:"چ";s:3:"ﭾ";s:2:"ڇ";s:3:"ﭿ";s:2:"ڇ";s:3:"ﮀ";s:2:"ڇ";s:3:"ﮁ";s:2:"ڇ";s:3:"ﮂ";s:2:"ڍ";s:3:"ﮃ";s:2:"ڍ";s:3:"ﮄ";s:2:"ڌ";s:3:"ﮅ";s:2:"ڌ";s:3:"ﮆ";s:2:"ڎ";s:3:"ﮇ";s:2:"ڎ";s:3:"ﮈ";s:2:"ڈ";s:3:"ﮉ";s:2:"ڈ";s:3:"ﮊ";s:2:"ژ";s:3:"ﮋ";s:2:"ژ";s:3:"ﮌ";s:2:"ڑ";s:3:"ﮍ";s:2:"ڑ";s:3:"ﮎ";s:2:"ک";s:3:"ﮏ";s:2:"ک";s:3:"ﮐ";s:2:"ک";s:3:"ﮑ";s:2:"ک";s:3:"ﮒ";s:2:"گ";s:3:"ﮓ";s:2:"گ";s:3:"ﮔ";s:2:"گ";s:3:"ﮕ";s:2:"گ";s:3:"ﮖ";s:2:"ڳ";s:3:"ﮗ";s:2:"ڳ";s:3:"ﮘ";s:2:"ڳ";s:3:"ﮙ";s:2:"ڳ";s:3:"ﮚ";s:2:"ڱ";s:3:"ﮛ";s:2:"ڱ";s:3:"ﮜ";s:2:"ڱ";s:3:"ﮝ";s:2:"ڱ";s:3:"ﮞ";s:2:"ں";s:3:"ﮟ";s:2:"ں";s:3:"ﮠ";s:2:"ڻ";s:3:"ﮡ";s:2:"ڻ";s:3:"ﮢ";s:2:"ڻ";s:3:"ﮣ";s:2:"ڻ";s:3:"ﮤ";s:2:"ۀ";s:3:"ﮥ";s:2:"ۀ";s:3:"ﮦ";s:2:"ہ";s:3:"ﮧ";s:2:"ہ";s:3:"ﮨ";s:2:"ہ";s:3:"ﮩ";s:2:"ہ";s:3:"ﮪ";s:2:"ھ";s:3:"ﮫ";s:2:"ھ";s:3:"ﮬ";s:2:"ھ";s:3:"ﮭ";s:2:"ھ";s:3:"ﮮ";s:2:"ے";s:3:"ﮯ";s:2:"ے";s:3:"ﮰ";s:2:"ۓ";s:3:"ﮱ";s:2:"ۓ";s:3:"ﯓ";s:2:"ڭ";s:3:"ﯔ";s:2:"ڭ";s:3:"ﯕ";s:2:"ڭ";s:3:"ﯖ";s:2:"ڭ";s:3:"ﯗ";s:2:"ۇ";s:3:"ﯘ";s:2:"ۇ";s:3:"ﯙ";s:2:"ۆ";s:3:"ﯚ";s:2:"ۆ";s:3:"ﯛ";s:2:"ۈ";s:3:"ﯜ";s:2:"ۈ";s:3:"ﯝ";s:2:"ٷ";s:3:"ﯞ";s:2:"ۋ";s:3:"ﯟ";s:2:"ۋ";s:3:"ﯠ";s:2:"ۅ";s:3:"ﯡ";s:2:"ۅ";s:3:"ﯢ";s:2:"ۉ";s:3:"ﯣ";s:2:"ۉ";s:3:"ﯤ";s:2:"ې";s:3:"ﯥ";s:2:"ې";s:3:"ﯦ";s:2:"ې";s:3:"ﯧ";s:2:"ې";s:3:"ﯨ";s:2:"ى";s:3:"ﯩ";s:2:"ى";s:3:"ﯪ";s:4:"ئا";s:3:"ﯫ";s:4:"ئا";s:3:"ﯬ";s:4:"ئە";s:3:"ﯭ";s:4:"ئە";s:3:"ﯮ";s:4:"ئو";s:3:"ﯯ";s:4:"ئو";s:3:"ﯰ";s:4:"ئۇ";s:3:"ﯱ";s:4:"ئۇ";s:3:"ﯲ";s:4:"ئۆ";s:3:"ﯳ";s:4:"ئۆ";s:3:"ﯴ";s:4:"ئۈ";s:3:"ﯵ";s:4:"ئۈ";s:3:"ﯶ";s:4:"ئې";s:3:"ﯷ";s:4:"ئې";s:3:"ﯸ";s:4:"ئې";s:3:"ﯹ";s:4:"ئى";s:3:"ﯺ";s:4:"ئى";s:3:"ﯻ";s:4:"ئى";s:3:"ﯼ";s:2:"ی";s:3:"ﯽ";s:2:"ی";s:3:"ﯾ";s:2:"ی";s:3:"ﯿ";s:2:"ی";s:3:"ﰀ";s:4:"ئج";s:3:"ﰁ";s:4:"ئح";s:3:"ﰂ";s:4:"ئم";s:3:"ﰃ";s:4:"ئى";s:3:"ﰄ";s:4:"ئي";s:3:"ﰅ";s:4:"بج";s:3:"ﰆ";s:4:"بح";s:3:"ﰇ";s:4:"بخ";s:3:"ﰈ";s:4:"بم";s:3:"ﰉ";s:4:"بى";s:3:"ﰊ";s:4:"بي";s:3:"ﰋ";s:4:"تج";s:3:"ﰌ";s:4:"تح";s:3:"ﰍ";s:4:"تخ";s:3:"ﰎ";s:4:"تم";s:3:"ﰏ";s:4:"تى";s:3:"ﰐ";s:4:"تي";s:3:"ﰑ";s:4:"ثج";s:3:"ﰒ";s:4:"ثم";s:3:"ﰓ";s:4:"ثى";s:3:"ﰔ";s:4:"ثي";s:3:"ﰕ";s:4:"جح";s:3:"ﰖ";s:4:"جم";s:3:"ﰗ";s:4:"حج";s:3:"ﰘ";s:4:"حم";s:3:"ﰙ";s:4:"خج";s:3:"ﰚ";s:4:"خح";s:3:"ﰛ";s:4:"خم";s:3:"ﰜ";s:4:"سج";s:3:"ﰝ";s:4:"سح";s:3:"ﰞ";s:4:"سخ";s:3:"ﰟ";s:4:"سم";s:3:"ﰠ";s:4:"صح";s:3:"ﰡ";s:4:"صم";s:3:"ﰢ";s:4:"ضج";s:3:"ﰣ";s:4:"ضح";s:3:"ﰤ";s:4:"ضخ";s:3:"ﰥ";s:4:"ضم";s:3:"ﰦ";s:4:"طح";s:3:"ﰧ";s:4:"طم";s:3:"ﰨ";s:4:"ظم";s:3:"ﰩ";s:4:"عج";s:3:"ﰪ";s:4:"عم";s:3:"ﰫ";s:4:"غج";s:3:"ﰬ";s:4:"غم";s:3:"ﰭ";s:4:"فج";s:3:"ﰮ";s:4:"فح";s:3:"ﰯ";s:4:"فخ";s:3:"ﰰ";s:4:"فم";s:3:"ﰱ";s:4:"فى";s:3:"ﰲ";s:4:"في";s:3:"ﰳ";s:4:"قح";s:3:"ﰴ";s:4:"قم";s:3:"ﰵ";s:4:"قى";s:3:"ﰶ";s:4:"قي";s:3:"ﰷ";s:4:"كا";s:3:"ﰸ";s:4:"كج";s:3:"ﰹ";s:4:"كح";s:3:"ﰺ";s:4:"كخ";s:3:"ﰻ";s:4:"كل";s:3:"ﰼ";s:4:"كم";s:3:"ﰽ";s:4:"كى";s:3:"ﰾ";s:4:"كي";s:3:"ﰿ";s:4:"لج";s:3:"ﱀ";s:4:"لح";s:3:"ﱁ";s:4:"لخ";s:3:"ﱂ";s:4:"لم";s:3:"ﱃ";s:4:"لى";s:3:"ﱄ";s:4:"لي";s:3:"ﱅ";s:4:"مج";s:3:"ﱆ";s:4:"مح";s:3:"ﱇ";s:4:"مخ";s:3:"ﱈ";s:4:"مم";s:3:"ﱉ";s:4:"مى";s:3:"ﱊ";s:4:"مي";s:3:"ﱋ";s:4:"نج";s:3:"ﱌ";s:4:"نح";s:3:"ﱍ";s:4:"نخ";s:3:"ﱎ";s:4:"نم";s:3:"ﱏ";s:4:"نى";s:3:"ﱐ";s:4:"ني";s:3:"ﱑ";s:4:"هج";s:3:"ﱒ";s:4:"هم";s:3:"ﱓ";s:4:"هى";s:3:"ﱔ";s:4:"هي";s:3:"ﱕ";s:4:"يج";s:3:"ﱖ";s:4:"يح";s:3:"ﱗ";s:4:"يخ";s:3:"ﱘ";s:4:"يم";s:3:"ﱙ";s:4:"يى";s:3:"ﱚ";s:4:"يي";s:3:"ﱛ";s:4:"ذٰ";s:3:"ﱜ";s:4:"رٰ";s:3:"ﱝ";s:4:"ىٰ";s:3:"ﱞ";s:5:" ٌّ";s:3:"ﱟ";s:5:" ٍّ";s:3:"ﱠ";s:5:" َّ";s:3:"ﱡ";s:5:" ُّ";s:3:"ﱢ";s:5:" ِّ";s:3:"ﱣ";s:5:" ّٰ";s:3:"ﱤ";s:4:"ئر";s:3:"ﱥ";s:4:"ئز";s:3:"ﱦ";s:4:"ئم";s:3:"ﱧ";s:4:"ئن";s:3:"ﱨ";s:4:"ئى";s:3:"ﱩ";s:4:"ئي";s:3:"ﱪ";s:4:"بر";s:3:"ﱫ";s:4:"بز";s:3:"ﱬ";s:4:"بم";s:3:"ﱭ";s:4:"بن";s:3:"ﱮ";s:4:"بى";s:3:"ﱯ";s:4:"بي";s:3:"ﱰ";s:4:"تر";s:3:"ﱱ";s:4:"تز";s:3:"ﱲ";s:4:"تم";s:3:"ﱳ";s:4:"تن";s:3:"ﱴ";s:4:"تى";s:3:"ﱵ";s:4:"تي";s:3:"ﱶ";s:4:"ثر";s:3:"ﱷ";s:4:"ثز";s:3:"ﱸ";s:4:"ثم";s:3:"ﱹ";s:4:"ثن";s:3:"ﱺ";s:4:"ثى";s:3:"ﱻ";s:4:"ثي";s:3:"ﱼ";s:4:"فى";s:3:"ﱽ";s:4:"في";s:3:"ﱾ";s:4:"قى";s:3:"ﱿ";s:4:"قي";s:3:"ﲀ";s:4:"كا";s:3:"ﲁ";s:4:"كل";s:3:"ﲂ";s:4:"كم";s:3:"ﲃ";s:4:"كى";s:3:"ﲄ";s:4:"كي";s:3:"ﲅ";s:4:"لم";s:3:"ﲆ";s:4:"لى";s:3:"ﲇ";s:4:"لي";s:3:"ﲈ";s:4:"ما";s:3:"ﲉ";s:4:"مم";s:3:"ﲊ";s:4:"نر";s:3:"ﲋ";s:4:"نز";s:3:"ﲌ";s:4:"نم";s:3:"ﲍ";s:4:"نن";s:3:"ﲎ";s:4:"نى";s:3:"ﲏ";s:4:"ني";s:3:"ﲐ";s:4:"ىٰ";s:3:"ﲑ";s:4:"ير";s:3:"ﲒ";s:4:"يز";s:3:"ﲓ";s:4:"يم";s:3:"ﲔ";s:4:"ين";s:3:"ﲕ";s:4:"يى";s:3:"ﲖ";s:4:"يي";s:3:"ﲗ";s:4:"ئج";s:3:"ﲘ";s:4:"ئح";s:3:"ﲙ";s:4:"ئخ";s:3:"ﲚ";s:4:"ئم";s:3:"ﲛ";s:4:"ئه";s:3:"ﲜ";s:4:"بج";s:3:"ﲝ";s:4:"بح";s:3:"ﲞ";s:4:"بخ";s:3:"ﲟ";s:4:"بم";s:3:"ﲠ";s:4:"به";s:3:"ﲡ";s:4:"تج";s:3:"ﲢ";s:4:"تح";s:3:"ﲣ";s:4:"تخ";s:3:"ﲤ";s:4:"تم";s:3:"ﲥ";s:4:"ته";s:3:"ﲦ";s:4:"ثم";s:3:"ﲧ";s:4:"جح";s:3:"ﲨ";s:4:"جم";s:3:"ﲩ";s:4:"حج";s:3:"ﲪ";s:4:"حم";s:3:"ﲫ";s:4:"خج";s:3:"ﲬ";s:4:"خم";s:3:"ﲭ";s:4:"سج";s:3:"ﲮ";s:4:"سح";s:3:"ﲯ";s:4:"سخ";s:3:"ﲰ";s:4:"سم";s:3:"ﲱ";s:4:"صح";s:3:"ﲲ";s:4:"صخ";s:3:"ﲳ";s:4:"صم";s:3:"ﲴ";s:4:"ضج";s:3:"ﲵ";s:4:"ضح";s:3:"ﲶ";s:4:"ضخ";s:3:"ﲷ";s:4:"ضم";s:3:"ﲸ";s:4:"طح";s:3:"ﲹ";s:4:"ظم";s:3:"ﲺ";s:4:"عج";s:3:"ﲻ";s:4:"عم";s:3:"ﲼ";s:4:"غج";s:3:"ﲽ";s:4:"غم";s:3:"ﲾ";s:4:"فج";s:3:"ﲿ";s:4:"فح";s:3:"ﳀ";s:4:"فخ";s:3:"ﳁ";s:4:"فم";s:3:"ﳂ";s:4:"قح";s:3:"ﳃ";s:4:"قم";s:3:"ﳄ";s:4:"كج";s:3:"ﳅ";s:4:"كح";s:3:"ﳆ";s:4:"كخ";s:3:"ﳇ";s:4:"كل";s:3:"ﳈ";s:4:"كم";s:3:"ﳉ";s:4:"لج";s:3:"ﳊ";s:4:"لح";s:3:"ﳋ";s:4:"لخ";s:3:"ﳌ";s:4:"لم";s:3:"ﳍ";s:4:"له";s:3:"ﳎ";s:4:"مج";s:3:"ﳏ";s:4:"مح";s:3:"ﳐ";s:4:"مخ";s:3:"ﳑ";s:4:"مم";s:3:"ﳒ";s:4:"نج";s:3:"ﳓ";s:4:"نح";s:3:"ﳔ";s:4:"نخ";s:3:"ﳕ";s:4:"نم";s:3:"ﳖ";s:4:"نه";s:3:"ﳗ";s:4:"هج";s:3:"ﳘ";s:4:"هم";s:3:"ﳙ";s:4:"هٰ";s:3:"ﳚ";s:4:"يج";s:3:"ﳛ";s:4:"يح";s:3:"ﳜ";s:4:"يخ";s:3:"ﳝ";s:4:"يم";s:3:"ﳞ";s:4:"يه";s:3:"ﳟ";s:4:"ئم";s:3:"ﳠ";s:4:"ئه";s:3:"ﳡ";s:4:"بم";s:3:"ﳢ";s:4:"به";s:3:"ﳣ";s:4:"تم";s:3:"ﳤ";s:4:"ته";s:3:"ﳥ";s:4:"ثم";s:3:"ﳦ";s:4:"ثه";s:3:"ﳧ";s:4:"سم";s:3:"ﳨ";s:4:"سه";s:3:"ﳩ";s:4:"شم";s:3:"ﳪ";s:4:"شه";s:3:"ﳫ";s:4:"كل";s:3:"ﳬ";s:4:"كم";s:3:"ﳭ";s:4:"لم";s:3:"ﳮ";s:4:"نم";s:3:"ﳯ";s:4:"نه";s:3:"ﳰ";s:4:"يم";s:3:"ﳱ";s:4:"يه";s:3:"ﳲ";s:6:"ـَّ";s:3:"ﳳ";s:6:"ـُّ";s:3:"ﳴ";s:6:"ـِّ";s:3:"ﳵ";s:4:"طى";s:3:"ﳶ";s:4:"طي";s:3:"ﳷ";s:4:"عى";s:3:"ﳸ";s:4:"عي";s:3:"ﳹ";s:4:"غى";s:3:"ﳺ";s:4:"غي";s:3:"ﳻ";s:4:"سى";s:3:"ﳼ";s:4:"سي";s:3:"ﳽ";s:4:"شى";s:3:"ﳾ";s:4:"شي";s:3:"ﳿ";s:4:"حى";s:3:"ﴀ";s:4:"حي";s:3:"ﴁ";s:4:"جى";s:3:"ﴂ";s:4:"جي";s:3:"ﴃ";s:4:"خى";s:3:"ﴄ";s:4:"خي";s:3:"ﴅ";s:4:"صى";s:3:"ﴆ";s:4:"صي";s:3:"ﴇ";s:4:"ضى";s:3:"ﴈ";s:4:"ضي";s:3:"ﴉ";s:4:"شج";s:3:"ﴊ";s:4:"شح";s:3:"ﴋ";s:4:"شخ";s:3:"ﴌ";s:4:"شم";s:3:"ﴍ";s:4:"شر";s:3:"ﴎ";s:4:"سر";s:3:"ﴏ";s:4:"صر";s:3:"ﴐ";s:4:"ضر";s:3:"ﴑ";s:4:"طى";s:3:"ﴒ";s:4:"طي";s:3:"ﴓ";s:4:"عى";s:3:"ﴔ";s:4:"عي";s:3:"ﴕ";s:4:"غى";s:3:"ﴖ";s:4:"غي";s:3:"ﴗ";s:4:"سى";s:3:"ﴘ";s:4:"سي";s:3:"ﴙ";s:4:"شى";s:3:"ﴚ";s:4:"شي";s:3:"ﴛ";s:4:"حى";s:3:"ﴜ";s:4:"حي";s:3:"ﴝ";s:4:"جى";s:3:"ﴞ";s:4:"جي";s:3:"ﴟ";s:4:"خى";s:3:"ﴠ";s:4:"خي";s:3:"ﴡ";s:4:"صى";s:3:"ﴢ";s:4:"صي";s:3:"ﴣ";s:4:"ضى";s:3:"ﴤ";s:4:"ضي";s:3:"ﴥ";s:4:"شج";s:3:"ﴦ";s:4:"شح";s:3:"ﴧ";s:4:"شخ";s:3:"ﴨ";s:4:"شم";s:3:"ﴩ";s:4:"شر";s:3:"ﴪ";s:4:"سر";s:3:"ﴫ";s:4:"صر";s:3:"ﴬ";s:4:"ضر";s:3:"ﴭ";s:4:"شج";s:3:"ﴮ";s:4:"شح";s:3:"ﴯ";s:4:"شخ";s:3:"ﴰ";s:4:"شم";s:3:"ﴱ";s:4:"سه";s:3:"ﴲ";s:4:"شه";s:3:"ﴳ";s:4:"طم";s:3:"ﴴ";s:4:"سج";s:3:"ﴵ";s:4:"سح";s:3:"ﴶ";s:4:"سخ";s:3:"ﴷ";s:4:"شج";s:3:"ﴸ";s:4:"شح";s:3:"ﴹ";s:4:"شخ";s:3:"ﴺ";s:4:"طم";s:3:"ﴻ";s:4:"ظم";s:3:"ﴼ";s:4:"اً";s:3:"ﴽ";s:4:"اً";s:3:"ﵐ";s:6:"تجم";s:3:"ﵑ";s:6:"تحج";s:3:"ﵒ";s:6:"تحج";s:3:"ﵓ";s:6:"تحم";s:3:"ﵔ";s:6:"تخم";s:3:"ﵕ";s:6:"تمج";s:3:"ﵖ";s:6:"تمح";s:3:"ﵗ";s:6:"تمخ";s:3:"ﵘ";s:6:"جمح";s:3:"ﵙ";s:6:"جمح";s:3:"ﵚ";s:6:"حمي";s:3:"ﵛ";s:6:"حمى";s:3:"ﵜ";s:6:"سحج";s:3:"ﵝ";s:6:"سجح";s:3:"ﵞ";s:6:"سجى";s:3:"ﵟ";s:6:"سمح";s:3:"ﵠ";s:6:"سمح";s:3:"ﵡ";s:6:"سمج";s:3:"ﵢ";s:6:"سمم";s:3:"ﵣ";s:6:"سمم";s:3:"ﵤ";s:6:"صحح";s:3:"ﵥ";s:6:"صحح";s:3:"ﵦ";s:6:"صمم";s:3:"ﵧ";s:6:"شحم";s:3:"ﵨ";s:6:"شحم";s:3:"ﵩ";s:6:"شجي";s:3:"ﵪ";s:6:"شمخ";s:3:"ﵫ";s:6:"شمخ";s:3:"ﵬ";s:6:"شمم";s:3:"ﵭ";s:6:"شمم";s:3:"ﵮ";s:6:"ضحى";s:3:"ﵯ";s:6:"ضخم";s:3:"ﵰ";s:6:"ضخم";s:3:"ﵱ";s:6:"طمح";s:3:"ﵲ";s:6:"طمح";s:3:"ﵳ";s:6:"طمم";s:3:"ﵴ";s:6:"طمي";s:3:"ﵵ";s:6:"عجم";s:3:"ﵶ";s:6:"عمم";s:3:"ﵷ";s:6:"عمم";s:3:"ﵸ";s:6:"عمى";s:3:"ﵹ";s:6:"غمم";s:3:"ﵺ";s:6:"غمي";s:3:"ﵻ";s:6:"غمى";s:3:"ﵼ";s:6:"فخم";s:3:"ﵽ";s:6:"فخم";s:3:"ﵾ";s:6:"قمح";s:3:"ﵿ";s:6:"قمم";s:3:"ﶀ";s:6:"لحم";s:3:"ﶁ";s:6:"لحي";s:3:"ﶂ";s:6:"لحى";s:3:"ﶃ";s:6:"لجج";s:3:"ﶄ";s:6:"لجج";s:3:"ﶅ";s:6:"لخم";s:3:"ﶆ";s:6:"لخم";s:3:"ﶇ";s:6:"لمح";s:3:"ﶈ";s:6:"لمح";s:3:"ﶉ";s:6:"محج";s:3:"ﶊ";s:6:"محم";s:3:"ﶋ";s:6:"محي";s:3:"ﶌ";s:6:"مجح";s:3:"ﶍ";s:6:"مجم";s:3:"ﶎ";s:6:"مخج";s:3:"ﶏ";s:6:"مخم";s:3:"ﶒ";s:6:"مجخ";s:3:"ﶓ";s:6:"همج";s:3:"ﶔ";s:6:"همم";s:3:"ﶕ";s:6:"نحم";s:3:"ﶖ";s:6:"نحى";s:3:"ﶗ";s:6:"نجم";s:3:"ﶘ";s:6:"نجم";s:3:"ﶙ";s:6:"نجى";s:3:"ﶚ";s:6:"نمي";s:3:"ﶛ";s:6:"نمى";s:3:"ﶜ";s:6:"يمم";s:3:"ﶝ";s:6:"يمم";s:3:"ﶞ";s:6:"بخي";s:3:"ﶟ";s:6:"تجي";s:3:"ﶠ";s:6:"تجى";s:3:"ﶡ";s:6:"تخي";s:3:"ﶢ";s:6:"تخى";s:3:"ﶣ";s:6:"تمي";s:3:"ﶤ";s:6:"تمى";s:3:"ﶥ";s:6:"جمي";s:3:"ﶦ";s:6:"جحى";s:3:"ﶧ";s:6:"جمى";s:3:"ﶨ";s:6:"سخى";s:3:"ﶩ";s:6:"صحي";s:3:"ﶪ";s:6:"شحي";s:3:"ﶫ";s:6:"ضحي";s:3:"ﶬ";s:6:"لجي";s:3:"ﶭ";s:6:"لمي";s:3:"ﶮ";s:6:"يحي";s:3:"ﶯ";s:6:"يجي";s:3:"ﶰ";s:6:"يمي";s:3:"ﶱ";s:6:"ممي";s:3:"ﶲ";s:6:"قمي";s:3:"ﶳ";s:6:"نحي";s:3:"ﶴ";s:6:"قمح";s:3:"ﶵ";s:6:"لحم";s:3:"ﶶ";s:6:"عمي";s:3:"ﶷ";s:6:"كمي";s:3:"ﶸ";s:6:"نجح";s:3:"ﶹ";s:6:"مخي";s:3:"ﶺ";s:6:"لجم";s:3:"ﶻ";s:6:"كمم";s:3:"ﶼ";s:6:"لجم";s:3:"ﶽ";s:6:"نجح";s:3:"ﶾ";s:6:"جحي";s:3:"ﶿ";s:6:"حجي";s:3:"ﷀ";s:6:"مجي";s:3:"ﷁ";s:6:"فمي";s:3:"ﷂ";s:6:"بحي";s:3:"ﷃ";s:6:"كمم";s:3:"ﷄ";s:6:"عجم";s:3:"ﷅ";s:6:"صمم";s:3:"ﷆ";s:6:"سخي";s:3:"ﷇ";s:6:"نجي";s:3:"ﷰ";s:6:"صلے";s:3:"ﷱ";s:6:"قلے";s:3:"ﷲ";s:8:"الله";s:3:"ﷳ";s:8:"اكبر";s:3:"ﷴ";s:8:"محمد";s:3:"ﷵ";s:8:"صلعم";s:3:"ﷶ";s:8:"رسول";s:3:"ﷷ";s:8:"عليه";s:3:"ﷸ";s:8:"وسلم";s:3:"ﷹ";s:6:"صلى";s:3:"ﷺ";s:33:"صلى الله عليه وسلم";s:3:"ﷻ";s:15:"جل جلاله";s:3:"﷼";s:8:"ریال";s:3:"ﹰ";s:3:" ً";s:3:"ﹱ";s:4:"ـً";s:3:"ﹲ";s:3:" ٌ";s:3:"ﹴ";s:3:" ٍ";s:3:"ﹶ";s:3:" َ";s:3:"ﹷ";s:4:"ـَ";s:3:"ﹸ";s:3:" ُ";s:3:"ﹹ";s:4:"ـُ";s:3:"ﹺ";s:3:" ِ";s:3:"ﹻ";s:4:"ـِ";s:3:"ﹼ";s:3:" ّ";s:3:"ﹽ";s:4:"ـّ";s:3:"ﹾ";s:3:" ْ";s:3:"ﹿ";s:4:"ـْ";s:3:"ﺀ";s:2:"ء";s:3:"ﺁ";s:2:"آ";s:3:"ﺂ";s:2:"آ";s:3:"ﺃ";s:2:"أ";s:3:"ﺄ";s:2:"أ";s:3:"ﺅ";s:2:"ؤ";s:3:"ﺆ";s:2:"ؤ";s:3:"ﺇ";s:2:"إ";s:3:"ﺈ";s:2:"إ";s:3:"ﺉ";s:2:"ئ";s:3:"ﺊ";s:2:"ئ";s:3:"ﺋ";s:2:"ئ";s:3:"ﺌ";s:2:"ئ";s:3:"ﺍ";s:2:"ا";s:3:"ﺎ";s:2:"ا";s:3:"ﺏ";s:2:"ب";s:3:"ﺐ";s:2:"ب";s:3:"ﺑ";s:2:"ب";s:3:"ﺒ";s:2:"ب";s:3:"ﺓ";s:2:"ة";s:3:"ﺔ";s:2:"ة";s:3:"ﺕ";s:2:"ت";s:3:"ﺖ";s:2:"ت";s:3:"ﺗ";s:2:"ت";s:3:"ﺘ";s:2:"ت";s:3:"ﺙ";s:2:"ث";s:3:"ﺚ";s:2:"ث";s:3:"ﺛ";s:2:"ث";s:3:"ﺜ";s:2:"ث";s:3:"ﺝ";s:2:"ج";s:3:"ﺞ";s:2:"ج";s:3:"ﺟ";s:2:"ج";s:3:"ﺠ";s:2:"ج";s:3:"ﺡ";s:2:"ح";s:3:"ﺢ";s:2:"ح";s:3:"ﺣ";s:2:"ح";s:3:"ﺤ";s:2:"ح";s:3:"ﺥ";s:2:"خ";s:3:"ﺦ";s:2:"خ";s:3:"ﺧ";s:2:"خ";s:3:"ﺨ";s:2:"خ";s:3:"ﺩ";s:2:"د";s:3:"ﺪ";s:2:"د";s:3:"ﺫ";s:2:"ذ";s:3:"ﺬ";s:2:"ذ";s:3:"ﺭ";s:2:"ر";s:3:"ﺮ";s:2:"ر";s:3:"ﺯ";s:2:"ز";s:3:"ﺰ";s:2:"ز";s:3:"ﺱ";s:2:"س";s:3:"ﺲ";s:2:"س";s:3:"ﺳ";s:2:"س";s:3:"ﺴ";s:2:"س";s:3:"ﺵ";s:2:"ش";s:3:"ﺶ";s:2:"ش";s:3:"ﺷ";s:2:"ش";s:3:"ﺸ";s:2:"ش";s:3:"ﺹ";s:2:"ص";s:3:"ﺺ";s:2:"ص";s:3:"ﺻ";s:2:"ص";s:3:"ﺼ";s:2:"ص";s:3:"ﺽ";s:2:"ض";s:3:"ﺾ";s:2:"ض";s:3:"ﺿ";s:2:"ض";s:3:"ﻀ";s:2:"ض";s:3:"ﻁ";s:2:"ط";s:3:"ﻂ";s:2:"ط";s:3:"ﻃ";s:2:"ط";s:3:"ﻄ";s:2:"ط";s:3:"ﻅ";s:2:"ظ";s:3:"ﻆ";s:2:"ظ";s:3:"ﻇ";s:2:"ظ";s:3:"ﻈ";s:2:"ظ";s:3:"ﻉ";s:2:"ع";s:3:"ﻊ";s:2:"ع";s:3:"ﻋ";s:2:"ع";s:3:"ﻌ";s:2:"ع";s:3:"ﻍ";s:2:"غ";s:3:"ﻎ";s:2:"غ";s:3:"ﻏ";s:2:"غ";s:3:"ﻐ";s:2:"غ";s:3:"ﻑ";s:2:"ف";s:3:"ﻒ";s:2:"ف";s:3:"ﻓ";s:2:"ف";s:3:"ﻔ";s:2:"ف";s:3:"ﻕ";s:2:"ق";s:3:"ﻖ";s:2:"ق";s:3:"ﻗ";s:2:"ق";s:3:"ﻘ";s:2:"ق";s:3:"ﻙ";s:2:"ك";s:3:"ﻚ";s:2:"ك";s:3:"ﻛ";s:2:"ك";s:3:"ﻜ";s:2:"ك";s:3:"ﻝ";s:2:"ل";s:3:"ﻞ";s:2:"ل";s:3:"ﻟ";s:2:"ل";s:3:"ﻠ";s:2:"ل";s:3:"ﻡ";s:2:"م";s:3:"ﻢ";s:2:"م";s:3:"ﻣ";s:2:"م";s:3:"ﻤ";s:2:"م";s:3:"ﻥ";s:2:"ن";s:3:"ﻦ";s:2:"ن";s:3:"ﻧ";s:2:"ن";s:3:"ﻨ";s:2:"ن";s:3:"ﻩ";s:2:"ه";s:3:"ﻪ";s:2:"ه";s:3:"ﻫ";s:2:"ه";s:3:"ﻬ";s:2:"ه";s:3:"ﻭ";s:2:"و";s:3:"ﻮ";s:2:"و";s:3:"ﻯ";s:2:"ى";s:3:"ﻰ";s:2:"ى";s:3:"ﻱ";s:2:"ي";s:3:"ﻲ";s:2:"ي";s:3:"ﻳ";s:2:"ي";s:3:"ﻴ";s:2:"ي";s:3:"ﻵ";s:4:"لآ";s:3:"ﻶ";s:4:"لآ";s:3:"ﻷ";s:4:"لأ";s:3:"ﻸ";s:4:"لأ";s:3:"ﻹ";s:4:"لإ";s:3:"ﻺ";s:4:"لإ";s:3:"ﻻ";s:4:"لا";s:3:"ﻼ";s:4:"لا";} \ No newline at end of file diff --git a/serialized/normalize-ml.ser b/serialized/normalize-ml.ser deleted file mode 100644 index b27a217c3f..0000000000 --- a/serialized/normalize-ml.ser +++ /dev/null @@ -1 +0,0 @@ -a:6:{s:9:"ണ്‍";s:3:"ൺ";s:9:"ന്‍";s:3:"ൻ";s:9:"ര്‍";s:3:"ർ";s:9:"ല്‍";s:3:"ൽ";s:9:"ള്‍";s:3:"ൾ";s:9:"ക്‍";s:3:"ൿ";} \ No newline at end of file