From: Niklas Laxström Date: Wed, 22 Aug 2012 13:15:44 +0000 (+0000) Subject: Adding JavaScript CLDR plural parser. X-Git-Tag: 1.31.0-rc.0~22618^2 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/journal.php?a=commitdiff_plain;h=405518b8d9868231c5fbc866623a4ae2847b5e55;p=lhc%2Fweb%2Fwiklou.git Adding JavaScript CLDR plural parser. The JavaScript code of the parser was written by Santhosh. The original project is hosted at GitHub: https://github.com/santhoshtr/CLDRPluralRuleParser Introduces resourceloader modules: mediawiki.cldr and mediawiki.libs.pluralruleparser. hi.js and ar.js removed since it has only convertPlural method. More [lang].js needs to remove convertPlural, but not done in this commit. The actual rules will be taken straight from CLDR and they are not integrated in this commit yet. Change-Id: I1aa3b081f4dad68515fd6cd46e4ab2dbdb3291eb --- diff --git a/resources/Resources.php b/resources/Resources.php index 380a09979c..5a24355b0d 100644 --- a/resources/Resources.php +++ b/resources/Resources.php @@ -671,7 +671,6 @@ return array( 'scripts' => 'resources/mediawiki.language/mediawiki.language.js', 'languageScripts' => array( 'am' => 'resources/mediawiki.language/languages/am.js', - 'ar' => 'resources/mediawiki.language/languages/ar.js', 'bat-smg' => 'resources/mediawiki.language/languages/bat-smg.js', 'be' => 'resources/mediawiki.language/languages/be.js', 'be-tarask' => 'resources/mediawiki.language/languages/be-tarask.js', @@ -687,7 +686,6 @@ return array( 'gd' => 'resources/mediawiki.language/languages/gd.js', 'gv' => 'resources/mediawiki.language/languages/gv.js', 'he' => 'resources/mediawiki.language/languages/he.js', - 'hi' => 'resources/mediawiki.language/languages/hi.js', 'hr' => 'resources/mediawiki.language/languages/hr.js', 'hsb' => 'resources/mediawiki.language/languages/hsb.js', 'hu' => 'resources/mediawiki.language/languages/hu.js', @@ -719,7 +717,21 @@ return array( 'uk' => 'resources/mediawiki.language/languages/uk.js', 'wa' => 'resources/mediawiki.language/languages/wa.js', ), - 'dependencies' => array( 'mediawiki.language.data' ), + 'dependencies' => array( + 'mediawiki.language.data', + 'mediawiki.cldr' + ), + ), + + 'mediawiki.cldr' => array( + 'scripts' => 'resources/mediawiki.language/mediawiki.cldr.js', + 'dependencies' => array( + 'mediawiki.libs.pluralruleparser', + ), + ), + + 'mediawiki.libs.pluralruleparser' => array( + 'scripts' => 'resources/mediawiki.libs/CLDRPluralRuleParser.js', ), 'mediawiki.language.init' => array( diff --git a/resources/mediawiki.language/languages/ar.js b/resources/mediawiki.language/languages/ar.js deleted file mode 100644 index d21df7e597..0000000000 --- a/resources/mediawiki.language/languages/ar.js +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Arabic (العربية) language functions - */ - -mediaWiki.language.convertPlural = function( count, forms ) { - forms = mediaWiki.language.preConvertPlural( forms, 6 ); - if ( count == 0 ) { - return forms[0]; - } - if ( count == 1 ) { - return forms[1]; - } - if ( count == 2 ) { - return forms[2]; - } - if ( count % 100 >= 3 && count % 100 <= 10 ) { - return forms[3]; - } - if ( count % 100 >= 11 && count % 100 <= 99 ) { - return forms[4]; - } - return forms[5]; -}; - diff --git a/resources/mediawiki.language/languages/he.js b/resources/mediawiki.language/languages/he.js index e737a7c0f2..d35f77ed89 100644 --- a/resources/mediawiki.language/languages/he.js +++ b/resources/mediawiki.language/languages/he.js @@ -2,17 +2,6 @@ * Hebrew (עברית) language functions */ -mediaWiki.language.convertPlural = function( count, forms ) { - forms = mediaWiki.language.preConvertPlural( forms, 3 ); - if ( count == 1 ) { - return forms[0]; - } - if ( count == 2 && forms[2] ) { - return forms[2]; - } - return forms[1]; -}; - mediaWiki.language.convertGrammar = function( word, form ) { var grammarForms = mw.language.getData( 'he', 'grammarForms' ); if ( grammarForms && grammarForms[form] ) { diff --git a/resources/mediawiki.language/languages/hi.js b/resources/mediawiki.language/languages/hi.js deleted file mode 100644 index a22a0e17f1..0000000000 --- a/resources/mediawiki.language/languages/hi.js +++ /dev/null @@ -1,8 +0,0 @@ -/** - * Hindi (हिन्दी) language functions - */ - -mediaWiki.language.convertPlural = function( count, forms ) { - forms = mediaWiki.language.preConvertPlural( forms, 2 ); - return ( count <= 1 ) ? forms[0] : forms[1]; -}; diff --git a/resources/mediawiki.language/mediawiki.cldr.js b/resources/mediawiki.language/mediawiki.cldr.js new file mode 100644 index 0000000000..6660eca4f2 --- /dev/null +++ b/resources/mediawiki.language/mediawiki.cldr.js @@ -0,0 +1,28 @@ +/** + * CLDR related utility methods + */ +( function( mw ) { + "use strict"; + + var cldr = { + /** + * For the number, get the plural for index + * In case none of the rules passed, we return pluralRules.length + * That means it is the "other" form. + * @param number + * @param pluralRules + * @return plural form index + */ + getPluralForm: function( number, pluralRules ) { + var pluralFormIndex = 0; + for ( pluralFormIndex = 0; pluralFormIndex < pluralRules.length; pluralFormIndex++ ) { + if ( mw.libs.pluralRuleParser( pluralRules[pluralFormIndex], number ) ) { + break; + } + } + return pluralFormIndex; + } + }; + + mw.cldr = cldr; +} )( mediaWiki ); diff --git a/resources/mediawiki.language/mediawiki.language.js b/resources/mediawiki.language/mediawiki.language.js index 1234637afe..935d4ff6e6 100644 --- a/resources/mediawiki.language/mediawiki.language.js +++ b/resources/mediawiki.language/mediawiki.language.js @@ -43,11 +43,19 @@ var language = { * @param forms array List of plural forms * @return string Correct form for quantifier in this language */ - convertPlural: function ( count, forms ){ + convertPlural: function( count, forms ) { + var pluralFormIndex = 0; if ( !forms || forms.length === 0 ) { return ''; } - return ( parseInt( count, 10 ) === 1 ) ? forms[0] : forms[1]; + var pluralRules = mw.language.getData( mw.config.get( 'wgUserLanguage' ), 'pluralRules' ); + if ( !pluralRules ) { + // default fallback. + return ( count === 1 ) ? forms[0] : forms[1]; + } + pluralFormIndex = mw.cldr.getPluralForm( count, pluralRules ); + pluralFormIndex = Math.min( pluralFormIndex, forms.length - 1 ); + return forms[pluralFormIndex]; }, /** diff --git a/resources/mediawiki.libs/CLDRPluralRuleParser.js b/resources/mediawiki.libs/CLDRPluralRuleParser.js new file mode 100644 index 0000000000..bb1491d304 --- /dev/null +++ b/resources/mediawiki.libs/CLDRPluralRuleParser.js @@ -0,0 +1,306 @@ +/* This is cldrpluralparser 1.0, ported to MediaWiki ResourceLoader */ + +/** +* cldrpluralparser.js +* A parser engine for CLDR plural rules. +* +* Copyright 2012 GPLV3+, Santhosh Thottingal +* +* @version 0.1.0-alpha +* @source https://github.com/santhoshtr/CLDRPluralRuleParser +* @author Santhosh Thottingal +* @author Timo Tijhof +* @author Amir Aharoni +*/ + +/** + * Evaluates a plural rule in CLDR syntax for a number + * @param rule + * @param number + * @return true|false|null + */ +( function( mw ) { + +function pluralRuleParser(rule, number) { + /* + Syntax: see http://unicode.org/reports/tr35/#Language_Plural_Rules + ----------------------------------------------------------------- + + condition = and_condition ('or' and_condition)* + and_condition = relation ('and' relation)* + relation = is_relation | in_relation | within_relation | 'n' + is_relation = expr 'is' ('not')? value + in_relation = expr ('not')? 'in' range_list + within_relation = expr ('not')? 'within' range_list + expr = 'n' ('mod' value)? + range_list = (range | value) (',' range_list)* + value = digit+ + digit = 0|1|2|3|4|5|6|7|8|9 + range = value'..'value + + */ + // Indicates current position in the rule as we parse through it. + // Shared among all parsing functions below. + var pos = 0; + + var whitespace = makeRegexParser(/^\s+/); + var digits = makeRegexParser(/^\d+/); + + var _n_ = makeStringParser('n'); + var _is_ = makeStringParser('is'); + var _mod_ = makeStringParser('mod'); + var _not_ = makeStringParser('not'); + var _in_ = makeStringParser('in'); + var _within_ = makeStringParser('within'); + var _range_ = makeStringParser('..'); + var _comma_ = makeStringParser(','); + var _or_ = makeStringParser('or'); + var _and_ = makeStringParser('and'); + + function debug() { + /* console.log.apply(console, arguments);*/ + } + + debug('pluralRuleParser', rule, number); + + // Try parsers until one works, if none work return null + function choice(parserSyntax) { + return function () { + for (var i = 0; i < parserSyntax.length; i++) { + var result = parserSyntax[i](); + if (result !== null) { + return result; + } + } + return null; + }; + } + + // Try several parserSyntax-es in a row. + // All must succeed; otherwise, return null. + // This is the only eager one. + function sequence(parserSyntax) { + var originalPos = pos; + var result = []; + for (var i = 0; i < parserSyntax.length; i++) { + var res = parserSyntax[i](); + if (res === null) { + pos = originalPos; + return null; + } + result.push(res); + } + return result; + } + + // Run the same parser over and over until it fails. + // Must succeed a minimum of n times; otherwise, return null. + function nOrMore(n, p) { + return function () { + var originalPos = pos; + var result = []; + var parsed = p(); + while (parsed !== null) { + result.push(parsed); + parsed = p(); + } + if (result.length < n) { + pos = originalPos; + return null; + } + return result; + }; + } + + // Helpers -- just make parserSyntax out of simpler JS builtin types + + function makeStringParser(s) { + var len = s.length; + return function () { + var result = null; + if (rule.substr(pos, len) === s) { + result = s; + pos += len; + } + return result; + }; + } + + function makeRegexParser(regex) { + return function () { + var matches = rule.substr(pos).match(regex); + if (matches === null) { + return null; + } + pos += matches[0].length; + return matches[0]; + }; + } + + function n() { + var result = _n_(); + if (result === null) { + debug(" -- failed n"); + return result; + } + result = parseInt(number, 10); + debug(" -- passed n ", result); + return result; + } + + var expression = choice([mod, n]); + + function mod() { + var result = sequence([n, whitespace, _mod_, whitespace, digits]); + if (result === null) { + debug(" -- failed mod"); + return null; + } + debug(" -- passed mod"); + return parseInt(result[0], 10) % parseInt(result[4], 10); + } + + function not() { + var result = sequence([whitespace, _not_]); + if (result === null) { + debug(" -- failed not"); + return null; + } else { + return result[1]; + } + } + + function is() { + var result = sequence([expression, whitespace, _is_, nOrMore(0, not), whitespace, digits]); + if (result !== null) { + debug(" -- passed is"); + if (result[3][0] === 'not') { + return result[0] !== parseInt(result[5], 10); + } else { + return result[0] === parseInt(result[5], 10); + } + } + debug(" -- failed is"); + return null; + } + + function rangeList() { + // range_list = (range | value) (',' range_list)* + var result = sequence([choice([range, digits]), nOrMore(0, rangeTail)]); + var resultList = []; + if (result !== null) { + resultList = resultList.concat(result[0], result[1][0]); + return resultList; + } + debug(" -- failed rangeList"); + return null; + } + + function rangeTail() { + // ',' range_list + var result = sequence([_comma_, rangeList]); + if (result !== null) { + return result[1]; + } + debug(" -- failed rangeTail"); + return null; + } + + function range() { + var result = sequence([digits, _range_, digits]); + if (result !== null) { + debug(" -- passed range"); + var array = []; + var left = parseInt(result[0], 10); + var right = parseInt(result[2], 10); + for ( i = left; i <= right; i++) { + array.push(i); + } + return array; + } + debug(" -- failed range"); + return null; + } + + function _in() { + // in_relation = expr ('not')? 'in' range_list + var result = sequence([expression, nOrMore(0, not), whitespace, _in_, whitespace, rangeList]); + if (result !== null) { + debug(" -- passed _in"); + var range_list = result[5]; + for (var i = 0; i < range_list.length; i++) { + if (parseInt(range_list[i], 10) === result[0]) { + return (result[1][0] !== 'not'); + } + } + return (result[1][0] === 'not'); + } + debug(" -- failed _in "); + return null; + } + + function within() { + var result = sequence([expression, whitespace, _within_, whitespace, rangeList]); + if (result !== null) { + debug(" -- passed within "); + var range_list = result[4]; + return (parseInt( range_list[0],10 )<= result[0] && result[0] <= parseInt( range_list[1], 10)); + } + debug(" -- failed within "); + return null; + } + + + var relation = choice([is, _in, within]); + + function and() { + var result = sequence([relation, whitespace, _and_, whitespace, condition]); + if (result) { + debug(" -- passed and"); + return result[0] && result[4]; + } + debug(" -- failed and"); + return null; + } + + function or() { + var result = sequence([relation, whitespace, _or_, whitespace, condition]); + if (result) { + debug(" -- passed or"); + return result[0] || result[4]; + } + debug(" -- failed or"); + return null; + } + + var condition = choice([and, or, relation]); + + function start() { + var result = condition(); + return result; + } + + + var result = start(); + + /* + * For success, the pos must have gotten to the end of the rule + * and returned a non-null. + * n.b. This is part of language infrastructure, so we do not throw an internationalizable message. + */ + if (result === null || pos !== rule.length) { + // throw new Error("Parse error at position " + pos.toString() + " in input: " + rule + " result is " + result); + } + + return result; +} + +/* For module loaders, e.g. NodeJS, NPM */ +if (typeof module !== 'undefined' && module.exports) { + module.exports = pluralRuleParser; +} + +/* pluralRuleParser ends here */ +mw.libs.pluralRuleParser = pluralRuleParser; + +} )( mediaWiki ); diff --git a/tests/qunit/QUnitTestResources.php b/tests/qunit/QUnitTestResources.php index 1cd085ffee..59ae73cd1a 100644 --- a/tests/qunit/QUnitTestResources.php +++ b/tests/qunit/QUnitTestResources.php @@ -30,6 +30,7 @@ return array( 'tests/qunit/suites/resources/mediawiki.api/mediawiki.api.parse.test.js', 'tests/qunit/suites/resources/mediawiki.special/mediawiki.special.recentchanges.test.js', 'tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js', + 'tests/qunit/suites/resources/mediawiki/mediawiki.cldr.test.js', ), 'dependencies' => array( 'jquery.autoEllipsis', @@ -55,6 +56,7 @@ return array( 'mediawiki.util', 'mediawiki.special.recentchanges', 'mediawiki.language', + 'mediawiki.cldr', ), 'position' => 'top', ) diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.cldr.test.js b/tests/qunit/suites/resources/mediawiki/mediawiki.cldr.test.js new file mode 100644 index 0000000000..09a11b0e51 --- /dev/null +++ b/tests/qunit/suites/resources/mediawiki/mediawiki.cldr.test.js @@ -0,0 +1,67 @@ +module( 'mediawiki.cldr' ); + +test( '-- Initial check', function() { + expect( 1 ); + ok( mw.cldr, 'mw.cldr defined' ); +} ); + +var pluralTestcases = { + /* + * Sample: + *"languagecode" : [ + * [ number, [ "form1", "form2", ... ], "expected", "description" ], + * ] + */ + "en": [ + [ 0, [ "one", "other" ], "other", "English plural test- 0 is other" ], + [ 1, [ "one", "other" ], "one", "English plural test- 1 is one" ] + ], + "hi": [ + [ 0, [ "one", "other" ], "one", "Hindi plural test- 0 is one" ], + [ 1, [ "one", "other" ], "one", "Hindi plural test- 1 is one" ], + [ 2, [ "one", "other" ], "other", "Hindi plural test- 2 is other" ] + ], + "he": [ + [ 0, [ "one", "other" ], "other", "Hebrew plural test- 0 is other" ], + [ 1, [ "one", "other" ], "one", "Hebrew plural test- 1 is one" ], + [ 2, [ "one", "other" ], "other", "Hebrew plural test- 2 is other with 2 forms" ], + [ 2, [ "one", "dual", "other" ], "dual", "Hebrew plural test- 2 is dual with 3 forms" ] + ], + "ar": [ + [ 0, [ "zero", "one", "two", "few", "many", "other" ], "zero", "Arabic plural test - 0 is zero" ], + [ 1, [ "zero", "one", "two", "few", "many", "other" ], "one", "Arabic plural test - 1 is one" ], + [ 2, [ "zero", "one", "two", "few", "many", "other" ], "two", "Arabic plural test - 2 is two" ], + [ 3, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 3 is few" ], + [ 9, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 9 is few" ], + [ "9", [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 9 is few" ], + [ 110, [ "zero", "one", "two", "few", "many", "other" ], "few", "Arabic plural test - 110 is few" ], + [ 11, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 11 is many" ], + [ 15, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 15 is many" ], + [ 99, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 99 is many" ], + [ 9999, [ "zero", "one", "two", "few", "many", "other" ], "many", "Arabic plural test - 9999 is many" ], + [ 100, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 100 is other" ], + [ 102, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 102 is other" ], + [ 1000, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 1000 is other" ] + // FIXME plural rules for decimal numbers does not work + // [ 1.7, [ "zero", "one", "two", "few", "many", "other" ], "other", "Arabic plural test - 1.7 is other" ], + ] +}; + +function pluralTest( langCode, tests ) { + QUnit.test('-- Plural Test for ' + langCode, function( assert ) { + QUnit.expect( tests.length ); + for ( var i = 0; i < tests.length; i++ ) { + assert.equal( + mw.language.convertPlural( tests[i][0], tests[i][1] ), + tests[i][2], // Expected plural form + tests[i][3] // Description + ); + } + } ); +} + +$.each( pluralTestcases, function( langCode, tests ) { + if ( langCode === mw.config.get( 'wgUserLanguage' ) ) { + pluralTest( langCode, tests ); + } +} );