From bbbcf089dbbb84dcd92e2332d2cf7222d7140647 Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Mon, 18 Jun 2012 13:58:44 +0530 Subject: [PATCH] CLDR Plural rules based plural form calculation * Use the plurals.xml of CLDR for the plural rules of languages * Use plurals-mediawiki.xml to override or extend the rules inside MW * Remove the convertPlural method in each LanguageXX.php * Parse and load the xml files in LocalisationCache * Use the CLDRPluralRuleEvaluator.php for parsing the cldr plural rules (This is taken from Translate extension and might require a replacement parser without using eval) * Add getPluralRules() to make the CLDR plural rules available to JS. PS3: More method documentation, cleanup Change-Id: I58a9cdfe60c7b9027bf031c91370472054f04ae2 --- includes/AutoLoader.php | 1 + includes/LocalisationCache.php | 89 ++++++++++---- .../ResourceLoaderLanguageDataModule.php | 23 +++- languages/Language.php | 37 ++++-- languages/classes/LanguageAm.php | 44 ------- languages/classes/LanguageAr.php | 25 ---- languages/classes/LanguageBe.php | 62 ---------- languages/classes/LanguageBh.php | 44 ------- languages/classes/LanguageBs.php | 23 ---- languages/classes/LanguageCs.php | 56 --------- languages/classes/LanguageCu.php | 2 +- languages/classes/LanguageCy.php | 50 -------- languages/classes/LanguageDsb.php | 17 --- languages/classes/LanguageFr.php | 44 ------- languages/classes/LanguageGa.php | 20 --- languages/classes/LanguageGd.php | 68 ----------- languages/classes/LanguageHe.php | 19 --- languages/data/plurals-mediawiki.xml | 35 ++++++ languages/data/plurals.xml | 114 ++++++++++++++++++ languages/utils/CLDRPluralRuleEvaluator.php | 72 +++++++++++ tests/phpunit/languages/LanguageHeTest.php | 14 +-- 21 files changed, 341 insertions(+), 518 deletions(-) delete mode 100644 languages/classes/LanguageAm.php delete mode 100644 languages/classes/LanguageBe.php delete mode 100644 languages/classes/LanguageBh.php delete mode 100644 languages/classes/LanguageCs.php delete mode 100644 languages/classes/LanguageCy.php delete mode 100644 languages/classes/LanguageFr.php delete mode 100644 languages/classes/LanguageGd.php create mode 100644 languages/data/plurals-mediawiki.xml create mode 100644 languages/data/plurals.xml create mode 100644 languages/utils/CLDRPluralRuleEvaluator.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 752f09c93a..8244bdf373 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -1006,6 +1006,7 @@ $wgAutoloadLocalClasses = array( 'FakeConverter' => 'languages/Language.php', 'Language' => 'languages/Language.php', 'LanguageConverter' => 'languages/LanguageConverter.php', + 'CLDRPluralRuleEvaluator' => 'languages/utils/CLDRPluralRuleEvaluator.php', # maintenance 'ConvertLinks' => 'maintenance/convertLinks.php', diff --git a/includes/LocalisationCache.php b/includes/LocalisationCache.php index 9ce26d000b..c9dd69754d 100644 --- a/includes/LocalisationCache.php +++ b/includes/LocalisationCache.php @@ -110,7 +110,7 @@ class LocalisationCache { 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap', 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases', 'imageFiles', 'preloadedMessages', 'namespaceGenderAliases', - 'digitGroupingPattern' + 'digitGroupingPattern', 'pluralRules' ); /** @@ -118,7 +118,7 @@ class LocalisationCache { * by a fallback sequence. */ static public $mergeableMapKeys = array( 'messages', 'namespaceNames', - 'dateFormats', 'imageFiles', 'preloadedMessages', + 'dateFormats', 'imageFiles', 'preloadedMessages', 'pluralRules' ); /** @@ -154,6 +154,11 @@ class LocalisationCache { */ static public $preloadedKeys = array( 'dateFormats', 'namespaceNames' ); + /* + * Associative array containing plural rules. + */ + var $pluralRules = array(); + var $mergeableKeys = null; /** @@ -202,6 +207,7 @@ class LocalisationCache { $this->$var = $conf[$var]; } } + $this->readPluralRules(); } /** @@ -234,9 +240,9 @@ class LocalisationCache { */ public function getItem( $code, $key ) { if ( !isset( $this->loadedItems[$code][$key] ) ) { - wfProfileIn( __METHOD__.'-load' ); + wfProfileIn( __METHOD__ . '-load' ); $this->loadItem( $code, $key ); - wfProfileOut( __METHOD__.'-load' ); + wfProfileOut( __METHOD__ . '-load' ); } if ( $key === 'fallback' && isset( $this->shallowFallbacks[$code] ) ) { @@ -256,9 +262,9 @@ class LocalisationCache { public function getSubitem( $code, $key, $subkey ) { if ( !isset( $this->loadedSubitems[$code][$key][$subkey] ) && !isset( $this->loadedItems[$code][$key] ) ) { - wfProfileIn( __METHOD__.'-load' ); + wfProfileIn( __METHOD__ . '-load' ); $this->loadSubitem( $code, $key, $subkey ); - wfProfileOut( __METHOD__.'-load' ); + wfProfileOut( __METHOD__ . '-load' ); } if ( isset( $this->data[$code][$key][$subkey] ) ) { @@ -367,7 +373,7 @@ class LocalisationCache { */ public function isExpired( $code ) { if ( $this->forceRecache && !isset( $this->recachedLangs[$code] ) ) { - wfDebug( __METHOD__."($code): forced reload\n" ); + wfDebug( __METHOD__ . "($code): forced reload\n" ); return true; } @@ -376,7 +382,7 @@ class LocalisationCache { $preload = $this->store->get( $code, 'preload' ); // Different keys may expire separately, at least in LCStore_Accel if ( $deps === null || $keys === null || $preload === null ) { - wfDebug( __METHOD__."($code): cache missing, need to make one\n" ); + wfDebug( __METHOD__ . "($code): cache missing, need to make one\n" ); return true; } @@ -386,7 +392,7 @@ class LocalisationCache { // anymore (e.g. uninstalled extensions) // When this happens, always expire the cache if ( !$dep instanceof CacheDependency || $dep->isExpired() ) { - wfDebug( __METHOD__."($code): cache for $code expired due to " . + wfDebug( __METHOD__ . "($code): cache for $code expired due to " . get_class( $dep ) . "\n" ); return true; } @@ -481,11 +487,43 @@ class LocalisationCache { } elseif ( $_fileType == 'aliases' ) { $data = compact( 'aliases' ); } else { - throw new MWException( __METHOD__.": Invalid file type: $_fileType" ); + throw new MWException( __METHOD__ . ": Invalid file type: $_fileType" ); } - return $data; } + /** + * Read the plural rule xml files. + * First the CLDR xml will be read and it will be extended with + * mediawiki specific tailoring. + * @since 1.20 + */ + protected function readPluralRules() { + $CLDRPlural = __DIR__ . "/../languages/data/plurals.xml"; + $MWPlural = __DIR__ . "/../languages/data/plurals-mediawiki.xml"; + # Load CLDR plural rules + $this->parsePluralXML( $CLDRPlural ); + if ( file_exists( $MWPlural ) ) { + // override or extend. + $this->parsePluralXML( $MWPlural ); + } + } + + private function parsePluralXML( $xmlFile ) { + $pluraldoc = new DOMDocument(); + $pluraldoc->load( $xmlFile ); + $rulesets = $pluraldoc->getElementsByTagName( "pluralRules" ); + foreach ( $rulesets as $ruleset ) { + $codes = $ruleset->getAttribute( 'locales' ); + $parsedRules = array(); + $rules = $ruleset->getElementsByTagName( "pluralRule" ); + foreach ( $rules as $rule ) { + $parsedRules[$rule->getAttribute( 'count' )] = $rule->nodeValue; + } + foreach ( explode( ' ', $codes ) as $code ) { + $this->pluralRules[$code] = $parsedRules; + } + } + } /** * Merge two localisation values, a primary and a fallback, overwriting the @@ -587,12 +625,12 @@ class LocalisationCache { # Load the primary localisation from the source file $fileName = Language::getMessagesFileName( $code ); if ( !file_exists( $fileName ) ) { - wfDebug( __METHOD__.": no localisation file for $code, using fallback to en\n" ); + wfDebug( __METHOD__ . ": no localisation file for $code, using fallback to en\n" ); $coreData['fallback'] = 'en'; } else { $deps[] = new FileDependency( $fileName ); $data = $this->readPHPFile( $fileName, 'core' ); - wfDebug( __METHOD__.": got localisation for $code from source\n" ); + wfDebug( __METHOD__ . ": got localisation for $code from source\n" ); # Merge primary localisation foreach ( $data as $key => $value ) { @@ -605,7 +643,6 @@ class LocalisationCache { if ( is_null( $coreData['fallback'] ) ) { $coreData['fallback'] = $code === 'en' ? false : 'en'; } - if ( $coreData['fallback'] === false ) { $coreData['fallbackSequence'] = array(); } else { @@ -654,7 +691,7 @@ class LocalisationCache { $used = false; foreach ( $data as $key => $item ) { - if( $this->mergeExtensionItem( $codeSequence, $key, $allData[$key], $item ) ) { + if ( $this->mergeExtensionItem( $codeSequence, $key, $allData[$key], $item ) ) { $used = true; } } @@ -684,19 +721,22 @@ class LocalisationCache { $page = str_replace( ' ', '_', $page ); } # Decouple the reference to prevent accidental damage - unset($page); + unset( $page ); # Set the list keys $allData['list'] = array(); foreach ( self::$splitKeys as $key ) { $allData['list'][$key] = array_keys( $allData[$key] ); } - + # Load CLDR plural rules + if ( isset( $this->pluralRules[$code] ) ) { + $allData['pluralRules'] = $this->pluralRules[$code]; + } # Run hooks wfRunHooks( 'LocalisationCacheRecache', array( $this, $code, &$allData ) ); if ( is_null( $allData['namespaceNames'] ) ) { - throw new MWException( __METHOD__.': Localisation data failed sanity check! ' . + throw new MWException( __METHOD__ . ': Localisation data failed sanity check! ' . 'Check that your languages/messages/MessagesEn.php file is intact.' ); } @@ -924,7 +964,7 @@ class LCStore_DB implements LCStore { } if ( !$code ) { - throw new MWException( __METHOD__.": Invalid language \"$code\"" ); + throw new MWException( __METHOD__ . ": Invalid language \"$code\"" ); } $this->dbw = wfGetDB( DB_MASTER ); @@ -968,7 +1008,7 @@ class LCStore_DB implements LCStore { } if ( is_null( $this->currentLang ) ) { - throw new MWException( __CLASS__.': must call startWrite() before calling set()' ); + throw new MWException( __CLASS__ . ': must call startWrite() before calling set()' ); } $this->batch[] = array( @@ -1040,7 +1080,7 @@ class LCStore_CDB implements LCStore { } // Close reader to stop permission errors on write - if( !empty($this->readers[$code]) ) { + if ( !empty( $this->readers[$code] ) ) { $this->readers[$code]->close(); } @@ -1058,14 +1098,14 @@ class LCStore_CDB implements LCStore { public function set( $key, $value ) { if ( is_null( $this->writer ) ) { - throw new MWException( __CLASS__.': must call startWrite() before calling set()' ); + throw new MWException( __CLASS__ . ': must call startWrite() before calling set()' ); } $this->writer->set( $key, serialize( $value ) ); } protected function getFileName( $code ) { if ( !$code || strpos( $code, '/' ) !== false ) { - throw new MWException( __METHOD__.": Invalid language \"$code\"" ); + throw new MWException( __METHOD__ . ": Invalid language \"$code\"" ); } return "{$this->directory}/l10n_cache-$code.cdb"; } @@ -1181,8 +1221,9 @@ class LocalisationCache_BulkLoad extends LocalisationCache { while ( count( $this->data ) > $this->maxLoadedLangs && count( $this->mruLangs ) ) { reset( $this->mruLangs ); $code = key( $this->mruLangs ); - wfDebug( __METHOD__.": unloading $code\n" ); + wfDebug( __METHOD__ . ": unloading $code\n" ); $this->unload( $code ); } } + } diff --git a/includes/resourceloader/ResourceLoaderLanguageDataModule.php b/includes/resourceloader/ResourceLoaderLanguageDataModule.php index a36aaec49b..c916c4a59d 100644 --- a/includes/resourceloader/ResourceLoaderLanguageDataModule.php +++ b/includes/resourceloader/ResourceLoaderLanguageDataModule.php @@ -29,7 +29,7 @@ class ResourceLoaderLanguageDataModule extends ResourceLoaderModule { protected $language; /** - * Get the grammer forms for the site content language. + * Get the grammar forms for the site content language. * * @return array */ @@ -37,6 +37,15 @@ class ResourceLoaderLanguageDataModule extends ResourceLoaderModule { return $this->language->getGrammarForms(); } + /** + * Get the plural forms for the site content language. + * + * @return array + */ + protected function getPluralRules() { + return $this->language->getPluralRules(); + } + /** * Get the digit transform table for the content language * Seperator transform table also required here to convert @@ -61,17 +70,19 @@ class ResourceLoaderLanguageDataModule extends ResourceLoaderModule { * @return array */ protected function getData() { - return array( 'grammarForms' => $this->getSiteLangGrammarForms(), - 'digitTransformTable' => $this->getDigitTransformTable() - ); + return array( + 'digitTransformTable' => $this->getDigitTransformTable(), + 'grammarForms' => $this->getSiteLangGrammarForms(), + 'pluralRules' => $this->getPluralRules(), + ); } /** * @param $context ResourceLoaderContext - * @return string: Javascript code + * @return string: JavaScript code */ public function getScript( ResourceLoaderContext $context ) { - $this->language = Language::factory( $context ->getLanguage() ); + $this->language = Language::factory( $context->getLanguage() ); return Xml::encodeJsCall( 'mw.language.setData', array( $this->language->getCode(), $this->getData() diff --git a/languages/Language.php b/languages/Language.php index 320cdf2622..e67c086518 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -266,9 +266,9 @@ class Language { */ public static function isValidBuiltInCode( $code ) { - if( !is_string($code) ) { + if ( !is_string( $code ) ) { $type = gettype( $code ); - if( $type === 'object' ) { + if ( $type === 'object' ) { $addmsg = " of class " . get_class( $code ); } else { $addmsg = ''; @@ -742,7 +742,7 @@ class Language { $names = array(); - if( $inLanguage ) { + if ( $inLanguage ) { # TODO: also include when $inLanguage is null, when this code is more efficient wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) ); } @@ -762,11 +762,11 @@ class Language { $returnMw = array(); $coreCodes = array_keys( $mwNames ); - foreach( $coreCodes as $coreCode ) { + foreach ( $coreCodes as $coreCode ) { $returnMw[$coreCode] = $names[$coreCode]; } - if( $include === 'mwfile' ) { + if ( $include === 'mwfile' ) { $namesMwFile = array(); # We do this using a foreach over the codes instead of a directory # loop so that messages files in extensions will work correctly. @@ -3418,9 +3418,9 @@ class Language { if ( !count( $forms ) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 2 ); - - return ( $count == 1 ) ? $forms[0] : $forms[1]; + $pluralForm = $this->getPluralForm( $count ); + $pluralForm = min( $pluralForm, count( $forms ) - 1 ); + return $forms[$pluralForm]; } /** @@ -4189,4 +4189,25 @@ class Language { public function getConvRuleTitle() { return $this->mConverter->getConvRuleTitle(); } + + /** + * Get the plural rules for the language + * @since 1.20 + * @return array Associative array with plural form, and plural rule as key-value pairs + */ + public function getPluralRules() { + return self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' ); + } + + /** + * Find the plural form matching to the given number + * It return the form index. + * @return int The index of the plural form + */ + private function getPluralForm( $number ) { + $pluralRules = $this->getPluralRules(); + $form = CLDRPluralRuleEvaluator::evaluate( $number, $pluralRules ); + return $form; + } + } diff --git a/languages/classes/LanguageAm.php b/languages/classes/LanguageAm.php deleted file mode 100644 index 4c39c26f78..0000000000 --- a/languages/classes/LanguageAm.php +++ /dev/null @@ -1,44 +0,0 @@ -preConvertPlural( $forms, 2 ); - - return ( $count <= 1 ) ? $forms[0] : $forms[1]; - } -} diff --git a/languages/classes/LanguageAr.php b/languages/classes/LanguageAr.php index cc6b85c3f7..553ff077cd 100644 --- a/languages/classes/LanguageAr.php +++ b/languages/classes/LanguageAr.php @@ -29,31 +29,6 @@ */ class LanguageAr extends Language { - /** - * @param $count int - * @param $forms array - * @return string - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 6 ); - - if ( $count == 0 ) { - $index = 0; - } elseif ( $count == 1 ) { - $index = 1; - } elseif ( $count == 2 ) { - $index = 2; - } elseif ( $count % 100 >= 3 && $count % 100 <= 10 ) { - $index = 3; - } elseif ( $count % 100 >= 11 && $count % 100 <= 99 ) { - $index = 4; - } else { - $index = 5; - } - return $forms[$index]; - } - /** * Temporary hack for bug 9413: replace Arabic presentation forms with their * standard equivalents. diff --git a/languages/classes/LanguageBe.php b/languages/classes/LanguageBe.php deleted file mode 100644 index b5b5966556..0000000000 --- a/languages/classes/LanguageBe.php +++ /dev/null @@ -1,62 +0,0 @@ - - * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License - * @license http://www.gnu.org/copyleft/fdl.html GNU Free Documentation License - * @ingroup Language - */ - -/** - * Belarusian normative (Беларуская мова) - * - * This is still the version from Be-x-old, only duplicated for consistency of - * plural and grammar functions. If there are errors please send a patch. - * - * @ingroup Language - * @see http://be.wikipedia.org/wiki/Talk:LanguageBe.php - */ -class LanguageBe extends Language { - - /** - * @param $count int - * @param $forms array - * - * @return string - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - // @todo FIXME: CLDR defines 4 plural forms instead of 3 - // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html - $forms = $this->preConvertPlural( $forms, 3 ); - - if ( $count > 10 && floor( ( $count % 100 ) / 10 ) == 1 ) { - return $forms[2]; - } else { - switch ( $count % 10 ) { - case 1: return $forms[0]; - case 2: - case 3: - case 4: return $forms[1]; - default: return $forms[2]; - } - } - } -} diff --git a/languages/classes/LanguageBh.php b/languages/classes/LanguageBh.php deleted file mode 100644 index 0eaf2ff5c5..0000000000 --- a/languages/classes/LanguageBh.php +++ /dev/null @@ -1,44 +0,0 @@ -preConvertPlural( $forms, 2 ); - - return ( $count <= 1 ) ? $forms[0] : $forms[1]; - } -} diff --git a/languages/classes/LanguageBs.php b/languages/classes/LanguageBs.php index 0929641c8b..3da7711e8f 100644 --- a/languages/classes/LanguageBs.php +++ b/languages/classes/LanguageBs.php @@ -28,29 +28,6 @@ */ class LanguageBs extends Language { - /** - * @param $count int - * @param $forms array - * @return string - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 3 ); - - // @todo FIXME: CLDR defines 4 plural forms instead of 3. Plural for decimals is missing. - // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html - if ( $count > 10 && floor( ( $count % 100 ) / 10 ) == 1 ) { - return $forms[2]; - } else { - switch ( $count % 10 ) { - case 1: return $forms[0]; - case 2: - case 3: - case 4: return $forms[1]; - default: return $forms[2]; - } - } - } /** * Convert from the nominative form of a noun to some other case diff --git a/languages/classes/LanguageCs.php b/languages/classes/LanguageCs.php deleted file mode 100644 index 49c4756c57..0000000000 --- a/languages/classes/LanguageCs.php +++ /dev/null @@ -1,56 +0,0 @@ -preConvertPlural( $forms, 3 ); - - switch ( $count ) { - case 1: - return $forms[0]; - case 2: - case 3: - case 4: - return $forms[1]; - default: - return $forms[2]; - } - } -} diff --git a/languages/classes/LanguageCu.php b/languages/classes/LanguageCu.php index bfa95cfb57..2016a43cce 100644 --- a/languages/classes/LanguageCu.php +++ b/languages/classes/LanguageCu.php @@ -20,7 +20,7 @@ * @file * @ingroup Language */ - + /** * Old Church Slavonic (Ѩзыкъ словѣньскъ) * diff --git a/languages/classes/LanguageCy.php b/languages/classes/LanguageCy.php deleted file mode 100644 index 9c28279de8..0000000000 --- a/languages/classes/LanguageCy.php +++ /dev/null @@ -1,50 +0,0 @@ -preConvertPlural( $forms, 6 ); - $count = abs( $count ); - if ( $count >= 0 && $count <= 3 ) { - return $forms[$count]; - } elseif ( $count == 6 ) { - return $forms[4]; - } else { - return $forms[5]; - } - } -} diff --git a/languages/classes/LanguageDsb.php b/languages/classes/LanguageDsb.php index b8ed7fcb32..975157f41e 100644 --- a/languages/classes/LanguageDsb.php +++ b/languages/classes/LanguageDsb.php @@ -54,21 +54,4 @@ class LanguageDsb extends Language { return $word; # this will return the original value for 'nominatiw' (nominativ) and all undefined case values } - /** - * @param $count int - * @param $forms array - * @return string - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 4 ); - - switch ( abs( $count ) % 100 ) { - case 1: return $forms[0]; // singular - case 2: return $forms[1]; // dual - case 3: - case 4: return $forms[2]; // plural - default: return $forms[3]; // pluralgen - } - } } diff --git a/languages/classes/LanguageFr.php b/languages/classes/LanguageFr.php deleted file mode 100644 index edbe1fbc47..0000000000 --- a/languages/classes/LanguageFr.php +++ /dev/null @@ -1,44 +0,0 @@ -preConvertPlural( $forms, 2 ); - - return ( $count <= 1 ) ? $forms[0] : $forms[1]; - } -} diff --git a/languages/classes/LanguageGa.php b/languages/classes/LanguageGa.php index cb9fa046d0..2f58384a6a 100644 --- a/languages/classes/LanguageGa.php +++ b/languages/classes/LanguageGa.php @@ -64,24 +64,4 @@ class LanguageGa extends Language { return $word; } - /** - * @param $count int - * @param $forms array - * @return string - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - - // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ga - $forms = $this->preConvertPlural( $forms, 3 ); - - if ( $count == 1 ) { - $index = 0; - } elseif ( $count == 2 ) { - $index = 1; - } else { - $index = 2; - } - return $forms[$index]; - } } diff --git a/languages/classes/LanguageGd.php b/languages/classes/LanguageGd.php deleted file mode 100644 index f042b02772..0000000000 --- a/languages/classes/LanguageGd.php +++ /dev/null @@ -1,68 +0,0 @@ -preConvertPlural( $forms, 6 ); - - $count = abs( $count ); - if ( $count == 1 ) { - return $forms[0]; - } elseif ( $count == 2 ) { - return $forms[1]; - } elseif ( $count == 11 ) { - return $forms[2]; - } elseif ( $count == 12 ) { - return $forms[3]; - } elseif ( ($count >= 3 && $count <= 10) || ($count >= 13 && $count <= 19) ) { - return $forms[4]; - } else { - return $forms[5]; - } - } -} diff --git a/languages/classes/LanguageHe.php b/languages/classes/LanguageHe.php index 22be1dec25..48c0c05712 100644 --- a/languages/classes/LanguageHe.php +++ b/languages/classes/LanguageHe.php @@ -68,23 +68,4 @@ class LanguageHe extends Language { return $word; } - /** - * Gets a number and uses the suited form of the word. - * - * @param $count Integer: the number of items - * @param $forms Array with 3 items: the three plural forms - * @return String: the suited form of word - */ - function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 3 ); - - if ( $count == 1 ) { - return $forms[0]; // Singular - } elseif ( $count == 2 ) { - return $forms[2]; // Dual or plural if dual is not provided (filled in preConvertPlural) - } else { - return $forms[1]; // Plural - } - } } diff --git a/languages/data/plurals-mediawiki.xml b/languages/data/plurals-mediawiki.xml new file mode 100644 index 0000000000..fe9e03186c --- /dev/null +++ b/languages/data/plurals-mediawiki.xml @@ -0,0 +1,35 @@ + + + + + + n is 1 + n is 2 + + + + n mod 100 is 1 + n mod 100 is 2 + n mod 100 in 3..4 + + + n mod 10 is 1 + n mod 10 is 2 + n mod 10 in 3..4 + + + + n is 1 + n is 2 + n is 11 + n is 12 + n in 3..10 or n in 13..19 + + diff --git a/languages/data/plurals.xml b/languages/data/plurals.xml new file mode 100644 index 0000000000..8432df44f3 --- /dev/null +++ b/languages/data/plurals.xml @@ -0,0 +1,114 @@ + + + + + + + + + + n is 0 + n is 1 + n is 2 + n mod 100 in 3..10 + n mod 100 in 11..99 + + + n is 1 + + + n in 0..1 + + + n within 0..2 and n is not 2 + + + n is 0 + n mod 10 is 1 and n mod 100 is not 11 + + + n is 1 + n is 2 + + + n is 1 + n is 2 + n in 3..6 + n in 7..10 + + + n is 1 + n is 0 OR n is not 1 AND n mod 100 in 1..19 + + + n mod 10 is 1 and n mod 100 not in 11..19 + n mod 10 in 2..9 and n mod 100 not in 11..19 + + + n mod 10 is 1 and n mod 100 is not 11 + n mod 10 in 2..4 and n mod 100 not in 12..14 + n mod 10 is 0 or n mod 10 in 5..9 or n mod 100 in 11..14 + + + + n is 1 + n in 2..4 + + + n is 1 + n mod 10 in 2..4 and n mod 100 not in 12..14 + n is not 1 and n mod 10 in 0..1 or n mod 10 in 5..9 or n mod 100 in 12..14 + + + + + n mod 100 is 1 + n mod 100 is 2 + n mod 100 in 3..4 + + + n is 1 + n is 0 or n mod 100 in 2..10 + n mod 100 in 11..19 + + + n mod 10 is 1 and n is not 11 + + + n is 0 + n is 1 + n is 2 + n is 3 + n is 6 + + + n is 0 + n within 0..2 and n is not 0 and n is not 2 + + + n within 0..1 + n in 2..10 + + + n mod 10 is 1 and n mod 100 not in 11,71,91 + n mod 10 is 2 and n mod 100 not in 12,72,92 + n mod 10 in 3..4,9 and n mod 100 not in 10..19,70..79,90..99 + n mod 1000000 is 0 and n is not 0 + + + n is 0 + n is 1 + + + n in 0..1 or n in 11..99 + + + n mod 10 in 1..2 or n mod 20 is 0 + + + n in 1,11 + n in 2,12 + n in 3..10,13..19 + + + diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php new file mode 100644 index 0000000000..f420e41b58 --- /dev/null +++ b/languages/utils/CLDRPluralRuleEvaluator.php @@ -0,0 +1,72 @@ + rule format. + * @return int The index of the plural form which passed the evaluation + */ + public static function evaluate( $number, $rules ) { + $formIndex = 0; + if ( !$rules ) { + return 0; + } + foreach ( $rules as $form => $rule ) { + $parsedRule = self::parseCLDRRule( $rule, $number ); + // FIXME eval is bad. + if ( eval( "return $parsedRule;" ) ) { + return $formIndex; + } + $formIndex++; + } + return $formIndex; + } + private static function parseCLDRRule( $rule ) { + $rule = preg_replace( '/\bn\b/', '$number', $rule ); + $rule = preg_replace( '/([^ ]+) mod (\d+)/', 'self::mod(\1,\2)', $rule ); + $rule = preg_replace( '/([^ ]+) is not (\d+)/' , '\1!=\2', $rule ); + $rule = preg_replace( '/([^ ]+) is (\d+)/', '\1==\2', $rule ); + $rule = preg_replace( '/([^ ]+) not in (\d+)\.\.(\d+)/', '!self::in(\1,\2,\3)', $rule ); + $rule = preg_replace( '/([^ ]+) not within (\d+)\.\.(\d+)/', '!self::within(\1,\2,\3)', $rule ); + $rule = preg_replace( '/([^ ]+) in (\d+)\.\.(\d+)/', 'self::in(\1,\2,\3)', $rule ); + $rule = preg_replace( '/([^ ]+) within (\d+)\.\.(\d+)/', 'self::within(\1,\2,\3)', $rule ); + // AND takes precedence over OR + $andrule = '/([^ ]+) and ([^ ]+)/i'; + while ( preg_match( $andrule, $rule ) ) { + $rule = preg_replace( $andrule, '(\1&&\2)', $rule ); + } + $orrule = '/([^ ]+) or ([^ ]+)/i'; + while ( preg_match( $orrule, $rule ) ) { + $rule = preg_replace( $orrule, '(\1||\2)', $rule ); + } + + return $rule; + } + + private static function in( $num, $low, $high ) { + return is_int( $num ) && $num >= $low && $num <= $high; + } + + private static function within( $num, $low, $high ) { + return $num >= $low && $num <= $high; + } + + private static function mod( $num, $mod ) { + if ( is_int( $num ) ) { + return (int) fmod( $num, $mod ); + } + return fmod( $num, $mod ); + } +} diff --git a/tests/phpunit/languages/LanguageHeTest.php b/tests/phpunit/languages/LanguageHeTest.php index 9ac0f952d9..7833da7149 100644 --- a/tests/phpunit/languages/LanguageHeTest.php +++ b/tests/phpunit/languages/LanguageHeTest.php @@ -18,31 +18,31 @@ class LanguageHeTest extends MediaWikiTestCase { /** @dataProvider providerPluralDual */ function testPluralDual( $result, $value ) { - $forms = array( 'one', 'many', 'two' ); + $forms = array( 'one', 'two', 'other' ); $this->assertEquals( $result, $this->lang->convertPlural( $value, $forms ) ); } function providerPluralDual() { return array ( - array( 'many', 0 ), // Zero -> plural + array( 'other', 0 ), // Zero -> plural array( 'one', 1 ), // Singular array( 'two', 2 ), // Dual - array( 'many', 3 ), // Plural + array( 'other', 3 ), // Plural ); } /** @dataProvider providerPlural */ function testPlural( $result, $value ) { - $forms = array( 'one', 'many' ); + $forms = array( 'one', 'other' ); $this->assertEquals( $result, $this->lang->convertPlural( $value, $forms ) ); } function providerPlural() { return array ( - array( 'many', 0 ), // Zero -> plural + array( 'other', 0 ), // Zero -> plural array( 'one', 1 ), // Singular - array( 'many', 2 ), // Plural, no dual provided - array( 'many', 3 ), // Plural + array( 'other', 2 ), // Plural, no dual provided + array( 'other', 3 ), // Plural ); } } -- 2.20.1