The JavaScript code of the parser was written by Santhosh.
The original project is hosted at GitHub:
https://github.com/santhoshtr/CLDRPluralRuleParser
Introduces resourceloader modules: mediawiki.cldr and
mediawiki.libs.pluralruleparser.
hi.js and ar.js removed since it has only convertPlural method. More [lang].js
needs to remove convertPlural, but not done in this commit.
The actual rules will be taken straight from CLDR and they are not
integrated in this commit yet.
Change-Id: I1aa3b081f4dad68515fd6cd46e4ab2dbdb3291eb
'scripts' => 'resources/mediawiki.language/mediawiki.language.js',
'languageScripts' => array(
'am' => 'resources/mediawiki.language/languages/am.js',
- 'ar' => 'resources/mediawiki.language/languages/ar.js',
'bat-smg' => 'resources/mediawiki.language/languages/bat-smg.js',
'be' => 'resources/mediawiki.language/languages/be.js',
'be-tarask' => 'resources/mediawiki.language/languages/be-tarask.js',
'gd' => 'resources/mediawiki.language/languages/gd.js',
'gv' => 'resources/mediawiki.language/languages/gv.js',
'he' => 'resources/mediawiki.language/languages/he.js',
- 'hi' => 'resources/mediawiki.language/languages/hi.js',
'hr' => 'resources/mediawiki.language/languages/hr.js',
'hsb' => 'resources/mediawiki.language/languages/hsb.js',
'hu' => 'resources/mediawiki.language/languages/hu.js',
'uk' => 'resources/mediawiki.language/languages/uk.js',
'wa' => 'resources/mediawiki.language/languages/wa.js',
),
- 'dependencies' => array( 'mediawiki.language.data' ),
+ 'dependencies' => array(
+ 'mediawiki.language.data',
+ 'mediawiki.cldr'
+ ),
+ ),
+
+ 'mediawiki.cldr' => array(
+ 'scripts' => 'resources/mediawiki.language/mediawiki.cldr.js',
+ 'dependencies' => array(
+ 'mediawiki.libs.pluralruleparser',
+ ),
+ ),
+
+ 'mediawiki.libs.pluralruleparser' => array(
+ 'scripts' => 'resources/mediawiki.libs/CLDRPluralRuleParser.js',
),
'mediawiki.language.init' => array(
+++ /dev/null
-/**
- * Arabic (العربية) language functions
- */
-
-mediaWiki.language.convertPlural = function( count, forms ) {
- forms = mediaWiki.language.preConvertPlural( forms, 6 );
- if ( count == 0 ) {
- return forms[0];
- }
- if ( count == 1 ) {
- return forms[1];
- }
- if ( count == 2 ) {
- return forms[2];
- }
- if ( count % 100 >= 3 && count % 100 <= 10 ) {
- return forms[3];
- }
- if ( count % 100 >= 11 && count % 100 <= 99 ) {
- return forms[4];
- }
- return forms[5];
-};
-
* Hebrew (עברית) language functions
*/
-mediaWiki.language.convertPlural = function( count, forms ) {
- forms = mediaWiki.language.preConvertPlural( forms, 3 );
- if ( count == 1 ) {
- return forms[0];
- }
- if ( count == 2 && forms[2] ) {
- return forms[2];
- }
- return forms[1];
-};
-
mediaWiki.language.convertGrammar = function( word, form ) {
var grammarForms = mw.language.getData( 'he', 'grammarForms' );
if ( grammarForms && grammarForms[form] ) {
+++ /dev/null
-/**
- * Hindi (हिन्दी) language functions
- */
-
-mediaWiki.language.convertPlural = function( count, forms ) {
- forms = mediaWiki.language.preConvertPlural( forms, 2 );
- return ( count <= 1 ) ? forms[0] : forms[1];
-};
--- /dev/null
+/**
+ * CLDR related utility methods
+ */
+( function( mw ) {
+ "use strict";
+
+ var cldr = {
+ /**
+ * For the number, get the plural for index
+ * In case none of the rules passed, we return pluralRules.length
+ * That means it is the "other" form.
+ * @param number
+ * @param pluralRules
+ * @return plural form index
+ */
+ getPluralForm: function( number, pluralRules ) {
+ var pluralFormIndex = 0;
+ for ( pluralFormIndex = 0; pluralFormIndex < pluralRules.length; pluralFormIndex++ ) {
+ if ( mw.libs.pluralRuleParser( pluralRules[pluralFormIndex], number ) ) {
+ break;
+ }
+ }
+ return pluralFormIndex;
+ }
+ };
+
+ mw.cldr = cldr;
+} )( mediaWiki );
* @param forms array List of plural forms
* @return string Correct form for quantifier in this language
*/
- convertPlural: function ( count, forms ){
+ convertPlural: function( count, forms ) {
+ var pluralFormIndex = 0;
if ( !forms || forms.length === 0 ) {
return '';
}
- return ( parseInt( count, 10 ) === 1 ) ? forms[0] : forms[1];
+ var pluralRules = mw.language.getData( mw.config.get( 'wgUserLanguage' ), 'pluralRules' );
+ if ( !pluralRules ) {
+ // default fallback.
+ return ( count === 1 ) ? forms[0] : forms[1];
+ }
+ pluralFormIndex = mw.cldr.getPluralForm( count, pluralRules );
+ pluralFormIndex = Math.min( pluralFormIndex, forms.length - 1 );
+ return forms[pluralFormIndex];
},
/**
--- /dev/null
+/* This is cldrpluralparser 1.0, ported to MediaWiki ResourceLoader */
+
+/**
+* cldrpluralparser.js
+* A parser engine for CLDR plural rules.
+*
+* Copyright 2012 GPLV3+, Santhosh Thottingal
+*
+* @version 0.1.0-alpha
+* @source https://github.com/santhoshtr/CLDRPluralRuleParser
+* @author Santhosh Thottingal <santhosh.thottingal@gmail.com>
+* @author Timo Tijhof
+* @author Amir Aharoni
+*/
+
+/**
+ * Evaluates a plural rule in CLDR syntax for a number
+ * @param rule
+ * @param number
+ * @return true|false|null
+ */
+( function( mw ) {
+
+function pluralRuleParser(rule, number) {
+ /*
+ Syntax: see http://unicode.org/reports/tr35/#Language_Plural_Rules
+ -----------------------------------------------------------------
+
+ condition = and_condition ('or' and_condition)*
+ and_condition = relation ('and' relation)*
+ relation = is_relation | in_relation | within_relation | 'n' <EOL>
+ is_relation = expr 'is' ('not')? value
+ in_relation = expr ('not')? 'in' range_list
+ within_relation = expr ('not')? 'within' range_list
+ expr = 'n' ('mod' value)?
+ range_list = (range | value) (',' range_list)*
+ value = digit+
+ digit = 0|1|2|3|4|5|6|7|8|9
+ range = value'..'value
+
+ */
+ // Indicates current position in the rule as we parse through it.
+ // Shared among all parsing functions below.
+ var pos = 0;
+
+ var whitespace = makeRegexParser(/^\s+/);
+ var digits = makeRegexParser(/^\d+/);
+
+ var _n_ = makeStringParser('n');
+ var _is_ = makeStringParser('is');
+ var _mod_ = makeStringParser('mod');
+ var _not_ = makeStringParser('not');
+ var _in_ = makeStringParser('in');
+ var _within_ = makeStringParser('within');
+ var _range_ = makeStringParser('..');
+ var _comma_ = makeStringParser(',');
+ var _or_ = makeStringParser('or');
+ var _and_ = makeStringParser('and');
+
+ function debug() {
+ /* console.log.apply(console, arguments);*/
+ }
+
+ debug('pluralRuleParser', rule, number);
+
+ // Try parsers until one works, if none work return null
+ function choice(parserSyntax) {
+ return function () {
+ for (var i = 0; i < parserSyntax.length; i++) {
+ var result = parserSyntax[i]();
+ if (result !== null) {
+ return result;
+ }
+ }
+ return null;
+ };
+ }
+
+ // Try several parserSyntax-es in a row.
+ // All must succeed; otherwise, return null.
+ // This is the only eager one.
+ function sequence(parserSyntax) {
+ var originalPos = pos;
+ var result = [];
+ for (var i = 0; i < parserSyntax.length; i++) {
+ var res = parserSyntax[i]();
+ if (res === null) {
+ pos = originalPos;
+ return null;
+ }
+ result.push(res);
+ }
+ return result;
+ }
+
+ // Run the same parser over and over until it fails.
+ // Must succeed a minimum of n times; otherwise, return null.
+ function nOrMore(n, p) {
+ return function () {
+ var originalPos = pos;
+ var result = [];
+ var parsed = p();
+ while (parsed !== null) {
+ result.push(parsed);
+ parsed = p();
+ }
+ if (result.length < n) {
+ pos = originalPos;
+ return null;
+ }
+ return result;
+ };
+ }
+
+ // Helpers -- just make parserSyntax out of simpler JS builtin types
+
+ function makeStringParser(s) {
+ var len = s.length;
+ return function () {
+ var result = null;
+ if (rule.substr(pos, len) === s) {
+ result = s;
+ pos += len;
+ }
+ return result;
+ };
+ }
+
+ function makeRegexParser(regex) {
+ return function () {
+ var matches = rule.substr(pos).match(regex);
+ if (matches === null) {
+ return null;
+ }
+ pos += matches[0].length;
+ return matches[0];
+ };
+ }
+
+ function n() {
+ var result = _n_();
+ if (result === null) {
+ debug(" -- failed n");
+ return result;
+ }
+ result = parseInt(number, 10);
+ debug(" -- passed n ", result);
+ return result;
+ }
+
+ var expression = choice([mod, n]);
+
+ function mod() {
+ var result = sequence([n, whitespace, _mod_, whitespace, digits]);
+ if (result === null) {
+ debug(" -- failed mod");
+ return null;
+ }
+ debug(" -- passed mod");
+ return parseInt(result[0], 10) % parseInt(result[4], 10);
+ }
+
+ function not() {
+ var result = sequence([whitespace, _not_]);
+ if (result === null) {
+ debug(" -- failed not");
+ return null;
+ } else {
+ return result[1];
+ }
+ }
+
+ function is() {
+ var result = sequence([expression, whitespace, _is_, nOrMore(0, not), whitespace, digits]);
+ if (result !== null) {
+ debug(" -- passed is");
+ if (result[3][0] === 'not') {
+ return result[0] !== parseInt(result[5], 10);
+ } else {
+ return result[0] === parseInt(result[5], 10);
+ }
+ }
+ debug(" -- failed is");
+ return null;
+ }
+
+ function rangeList() {
+ // range_list = (range | value) (',' range_list)*
+ var result = sequence([choice([range, digits]), nOrMore(0, rangeTail)]);
+ var resultList = [];
+ if (result !== null) {
+ resultList = resultList.concat(result[0], result[1][0]);
+ return resultList;
+ }
+ debug(" -- failed rangeList");
+ return null;
+ }
+
+ function rangeTail() {
+ // ',' range_list
+ var result = sequence([_comma_, rangeList]);
+ if (result !== null) {
+ return result[1];
+ }
+ debug(" -- failed rangeTail");
+ return null;
+ }
+
+ function range() {
+ var result = sequence([digits, _range_, digits]);
+ if (result !== null) {
+ debug(" -- passed range");
+ var array = [];
+ var left = parseInt(result[0], 10);
+ var right = parseInt(result[2], 10);
+ for ( i = left; i <= right; i++) {
+ array.push(i);
+ }
+ return array;
+ }
+ debug(" -- failed range");
+ return null;
+ }
+
+ function _in() {
+ // in_relation = expr ('not')? 'in' range_list
+ var result = sequence([expression, nOrMore(0, not), whitespace, _in_, whitespace, rangeList]);
+ if (result !== null) {
+ debug(" -- passed _in");
+ var range_list = result[5];
+ for (var i = 0; i < range_list.length; i++) {
+ if (parseInt(range_list[i], 10) === result[0]) {
+ return (result[1][0] !== 'not');
+ }
+ }
+ return (result[1][0] === 'not');
+ }
+ debug(" -- failed _in ");
+ return null;
+ }
+
+ function within() {
+ var result = sequence([expression, whitespace, _within_, whitespace, rangeList]);
+ if (result !== null) {
+ debug(" -- passed within ");
+ var range_list = result[4];
+ return (parseInt( range_list[0],10 )<= result[0] && result[0] <= parseInt( range_list[1], 10));
+ }
+ debug(" -- failed within ");
+ return null;
+ }
+
+
+ var relation = choice([is, _in, within]);
+
+ function and() {
+ var result = sequence([relation, whitespace, _and_, whitespace, condition]);
+ if (result) {
+ debug(" -- passed and");
+ return result[0] && result[4];
+ }
+ debug(" -- failed and");
+ return null;
+ }
+
+ function or() {
+ var result = sequence([relation, whitespace, _or_, whitespace, condition]);
+ if (result) {
+ debug(" -- passed or");
+ return result[0] || result[4];
+ }
+ debug(" -- failed or");
+ return null;
+ }
+
+ var condition = choice([and, or, relation]);
+
+ function start() {
+ var result = condition();
+ return result;
+ }
+
+
+ var result = start();
+
+ /*
+ * For success, the pos must have gotten to the end of the rule
+ * and returned a non-null.
+ * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
+ */
+ if (result === null || pos !== rule.length) {
+ // throw new Error("Parse error at position " + pos.toString() + " in input: " + rule + " result is " + result);
+ }
+
+ return result;
+}
+
+/* For module loaders, e.g. NodeJS, NPM */
+if (typeof module !== 'undefined' && module.exports) {
+ module.exports = pluralRuleParser;
+}
+
+/* pluralRuleParser ends here */
+mw.libs.pluralRuleParser = pluralRuleParser;
+
+} )( mediaWiki );
'tests/qunit/suites/resources/mediawiki.api/mediawiki.api.parse.test.js',
'tests/qunit/suites/resources/mediawiki.special/mediawiki.special.recentchanges.test.js',
'tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js',
+ 'tests/qunit/suites/resources/mediawiki/mediawiki.cldr.test.js',
),
'dependencies' => array(
'jquery.autoEllipsis',
'mediawiki.util',
'mediawiki.special.recentchanges',
'mediawiki.language',
+ 'mediawiki.cldr',
),
'position' => 'top',
)
--- /dev/null
+module( 'mediawiki.cldr' );
+
+test( '-- Initial check', function() {
+ expect( 1 );
+ ok( mw.cldr, 'mw.cldr defined' );
+} );
+
+var pluralTestcases = {
+ /*
+ * Sample:
+ *"languagecode" : [
+ * [ number, [ "form1", "form2", ... ], "expected", "description" ],
+ * ]
+ */
+ "en": [
+ [ 0, [ "one", "other" ], "other", "English plural test- 0 is other" ],
+ [ 1, [ "one", "other" ], "one", "English plural test- 1 is one" ]
+ ],
+ "hi": [
+ [ 0, [ "one", "other" ], "one", "Hindi plural test- 0 is one" ],
+ [ 1, [ "one", "other" ], "one", "Hindi plural test- 1 is one" ],
+ [ 2, [ "one", "other" ], "other", "Hindi plural test- 2 is other" ]
+ ],
+ "he": [
+ [ 0, [ "one", "other" ], "other", "Hebrew plural test- 0 is other" ],
+ [ 1, [ "one", "other" ], "one", "Hebrew plural test- 1 is one" ],
+ [ 2, [ "one", "other" ], "other", "Hebrew plural test- 2 is other with 2 forms" ],
+ [ 2, [ "one", "dual", "other" ], "dual", "Hebrew plural test- 2 is dual with 3 forms" ]
+ ],
+ "ar": [
+ [ 0, [ "zero", "one", "two", "few", "many", "other" ], "zero", "Arabic plural test - 0 is zero" ],
+ [ 1, [ "zero", "one", "two", "few", "many", "other" ], "one", "Arabic plural test - 1 is one" ],
+ [ 2, [ "zero", "one", "two", "few", "many", "other" ], "two", "Arabic plural test - 2 is two" ],
+ [ 3, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 3 is few" ],
+ [ 9, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 9 is few" ],
+ [ "9", [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 9 is few" ],
+ [ 110, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 110 is few" ],
+ [ 11, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 11 is many" ],
+ [ 15, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 15 is many" ],
+ [ 99, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 99 is many" ],
+ [ 9999, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 9999 is many" ],
+ [ 100, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 100 is other" ],
+ [ 102, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 102 is other" ],
+ [ 1000, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 1000 is other" ]
+ // FIXME plural rules for decimal numbers does not work
+ // [ 1.7, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 1.7 is other" ],
+ ]
+};
+
+function pluralTest( langCode, tests ) {
+ QUnit.test('-- Plural Test for ' + langCode, function( assert ) {
+ QUnit.expect( tests.length );
+ for ( var i = 0; i < tests.length; i++ ) {
+ assert.equal(
+ mw.language.convertPlural( tests[i][0], tests[i][1] ),
+ tests[i][2], // Expected plural form
+ tests[i][3] // Description
+ );
+ }
+ } );
+}
+
+$.each( pluralTestcases, function( langCode, tests ) {
+ if ( langCode === mw.config.get( 'wgUserLanguage' ) ) {
+ pluralTest( langCode, tests );
+ }
+} );