'digitTransformTable' => $language->digitTransformTable(),
'separatorTransformTable' => $language->separatorTransformTable(),
'grammarForms' => $language->getGrammarForms(),
+ 'grammarTransformations' => $language->getGrammarTransformations(),
'pluralRules' => $language->getPluralRules(),
'digitGroupingPattern' => $language->digitGroupingPattern(),
'fallbackLanguages' => $language->getFallbackLanguages(),
*/
static private $fallbackLanguageCache = [];
+ /**
+ * Cache for grammar rules data
+ * @var MapCacheLRU|null
+ */
+ static private $grammarTransformations;
+
/**
* Cache for language names
* @var HashBagOStuff|null
return $word;
}
+
/**
* Get the grammar forms for the content language
* @return array Array of grammar forms
return [];
}
+
+ /**
+ * Get the grammar transformations data for the language.
+ * Used like grammar forms, with {{GRAMMAR}} and cases,
+ * but uses pairs of regexes and replacements instead of code.
+ *
+ * @return array[] Array of grammar transformations.
+ * @since 1.28
+ */
+ public function getGrammarTransformations() {
+ $languageCode = $this->getCode();
+
+ if ( self::$grammarTransformations === null ) {
+ self::$grammarTransformations = new MapCacheLRU( 10 );
+ }
+
+ if ( self::$grammarTransformations->has( $languageCode ) ) {
+ return self::$grammarTransformations->get( $languageCode );
+ }
+
+ $data = [];
+
+ $grammarDataFile = __DIR__ . "/data/grammarTransformations/$languageCode.json";
+ if ( is_readable( $grammarDataFile ) ) {
+ $data = FormatJson::decode(
+ file_get_contents( $grammarDataFile ),
+ true
+ );
+ if ( $data === null ) {
+ throw new MWException( "Invalid grammar data for \"$languageCode\"." );
+ $data = [];
+ }
+
+ self::$grammarTransformations->set( $languageCode, $data );
+ }
+
+ return $data;
+ }
+
/**
* Provides an alternative text depending on specified gender.
* Usage {{gender:username|masculine|feminine|unknown}}.
* @ingroup Language
*/
class LanguageRu extends Language {
-
/**
* Convert from the nominative form of a noun to some other case
* Invoked with {{grammar:case|word}}
return $wgGrammarForms['ru'][$case][$word];
}
- $grammarDataFile = __DIR__ . '/data/grammar.ru.json';
- $grammarData = FormatJson::decode( file_get_contents( $grammarDataFile ), true );
+ $grammarTransformations = $this->getGrammarTransformations();
+
+ if ( isset( $grammarTransformations[$case] ) ) {
+ foreach ( array_values( $grammarTransformations[$case] ) as $rule ) {
+ $form = $rule[0];
- if ( array_key_exists( $case, $grammarData ) ) {
- foreach ( array_keys( $grammarData[$case] ) as $form ) {
if ( $form === '@metadata' ) {
continue;
}
+ $replacement = $rule[1];
+
$regex = "/$form/";
if ( preg_match( $regex, $word ) ) {
- $word = preg_replace( $regex, $grammarData[$case][$form], $word );
+ $word = preg_replace( $regex, $replacement, $word );
break;
}
+++ /dev/null
-{
- "@metadata": {
- "authors": [
- "Alexander Sigachov (alexander.sigachov at Googgle Mail)",
- "Amir E. Aharoni (amir.aharoni@mail.huji.ac.il)"
- ],
- "comment": "These rules don't cover the whole grammar of the language, and are intended only for names of languages and Wikimedia projects."
- },
- "genitive": {
- "(.+)ь$": "$1я",
- "(.+)ия$": "$1ии",
- "(.+)ка$": "$1ки",
- "(.+)ти$": "$1тей",
- "(.+)ды$": "$1дов",
- "(.+)д$": "$1да",
- "(.+)ник$": "$1ника",
- "(.+)ные$": "$1ных"
- },
- "prepositional": {
- "(.+)ь$": "$1е",
- "(.+)ия$": "$1ии",
- "(.+)ка$": "$1ке",
- "(.+)ти$": "$1тях",
- "(.+)ды$": "$1дах",
- "(.+)д$": "$1де",
- "(.+)ник$": "$1нике",
- "(.+)ные$": "$1ных"
- },
- "languagegen": {
- "@metadata": "язык в родительном падеже: '(с) русского'",
- "(.+)кий$": "$1кого",
- "иврит$": "иврита",
- "идиш$": "идиша",
- "(.+)$": "$1"
- },
- "languageprep": {
- "@metadata": "язык в предложном падеже: '(на) русском'",
- "(.+)кий$": "$1ком",
- "иврит$": "иврите",
- "идиш$": "идише",
- "(.+)$": "$1"
- },
- "languageadverb": {
- "@metadata": "наречие с названием языка: 'по-русски'",
- "(.+)кий$": "по-$1ки",
- "иврит$": "на иврите",
- "идиш$": "на идише",
- "(идо|урду|хинди|эсперанто)$": "на $1",
- "(.+)$": "на языке $1"
- }
-}
--- /dev/null
+{
+ "@metadata": {
+ "authors": [
+ "Alexander Sigachov (alexander.sigachov at Googgle Mail)",
+ "Amir E. Aharoni (amir.aharoni@mail.huji.ac.il)"
+ ],
+ "comment": "These rules don't cover the whole grammar of the language, and are intended only for names of languages and Wikimedia projects."
+ },
+ "genitive": [
+ [ "(.+)ь$", "$1я" ],
+ [ "(.+)ия$", "$1ии" ],
+ [ "(.+)ка$", "$1ки" ],
+ [ "(.+)ти$", "$1тей" ],
+ [ "(.+)ды$", "$1дов" ],
+ [ "(.+)д$", "$1да" ],
+ [ "(.+)ник$", "$1ника" ],
+ [ "(.+)ные$", "$1ных" ]
+ ],
+ "prepositional": [
+ [ "(.+)ь$", "$1е" ],
+ [ "(.+)ия$", "$1ии" ],
+ [ "(.+)ка$", "$1ке" ],
+ [ "(.+)ти$", "$1тях" ],
+ [ "(.+)ды$", "$1дах" ],
+ [ "(.+)д$", "$1де" ],
+ [ "(.+)ник$", "$1нике" ],
+ [ "(.+)ные$", "$1ных" ]
+ ],
+ "languagegen": [
+ [ "@metadata", [
+ "comment", "язык в родительном падеже: '(с) русского'"
+ ] ],
+ [ "(.+)кий$", "$1кого" ],
+ [ "иврит$", "иврита" ],
+ [ "идиш$", "идиша" ],
+ [ "(.+)$", "$1" ]
+ ],
+ "languageprep": [
+ [ "@metadata", [
+ "comment", "язык в предложном падеже: '(на) русском'"
+ ] ],
+ [ "(.+)кий$", "$1ком" ],
+ [ "иврит$", "иврите" ],
+ [ "идиш$", "идише" ],
+ [ "(.+)$", "$1" ]
+ ],
+ "languageadverb": [
+ [ "@metadata", [
+ "comment", "наречие с названием языка: 'по-русски'"
+ ] ],
+ [ "(.+)кий$", "по-$1ки" ],
+ [ "иврит$", "на иврите" ],
+ [ "идиш$", "на идише" ],
+ [ "(идо|урду|хинди|эсперанто)$", "на $1" ],
+ [ "(.+)$", "на языке $1" ]
+ ]
+}
* Russian (Русский) language functions
*/
-// These tests were originally made for names of Wikimedia
-// websites, so they don't currently cover all the possible
-// cases.
-
mediaWiki.language.convertGrammar = function ( word, form ) {
- /*global $ */
'use strict';
- var grammarForms = mediaWiki.language.getData( 'ru', 'grammarForms' );
- if ( grammarForms && grammarForms[ form ] ) {
- return grammarForms[ form ][ word ];
+ var forms, transformations, i, rule, sourcePattern, regexp, replacement;
+
+ forms = mediaWiki.language.getData( 'ru', 'grammarForms' );
+ if ( forms && forms[ form ] ) {
+ return forms[ form ][ word ];
+ }
+
+ transformations = mediaWiki.language.getData( 'ru', 'grammarTransformations' );
+
+ if ( !transformations[ form ] ) {
+ return word;
}
- switch ( form ) {
- case 'genitive': // родительный падеж
- if ( word.slice( -1 ) === 'ь' ) {
- word = word.slice( 0, -1 ) + 'я';
- } else if ( word.slice( -2 ) === 'ия' ) {
- word = word.slice( 0, -2 ) + 'ии';
- } else if ( word.slice( -2 ) === 'ка' ) {
- word = word.slice( 0, -2 ) + 'ки';
- } else if ( word.slice( -2 ) === 'ти' ) {
- word = word.slice( 0, -2 ) + 'тей';
- } else if ( word.slice( -2 ) === 'ды' ) {
- word = word.slice( 0, -2 ) + 'дов';
- } else if ( word.slice( -1 ) === 'д' ) {
- word = word.slice( 0, -1 ) + 'да';
- } else if ( word.slice( -3 ) === 'ные' ) {
- word = word.slice( 0, -3 ) + 'ных';
- } else if ( word.slice( -3 ) === 'ник' ) {
- word = word.slice( 0, -3 ) + 'ника';
- }
- break;
- case 'prepositional': // предложный падеж
- if ( word.slice( -1 ) === 'ь' ) {
- word = word.slice( 0, -1 ) + 'е';
- } else if ( word.slice( -2 ) === 'ия' ) {
- word = word.slice( 0, -2 ) + 'ии';
- } else if ( word.slice( -2 ) === 'ка' ) {
- word = word.slice( 0, -2 ) + 'ке';
- } else if ( word.slice( -2 ) === 'ти' ) {
- word = word.slice( 0, -2 ) + 'тях';
- } else if ( word.slice( -2 ) === 'ды' ) {
- word = word.slice( 0, -2 ) + 'дах';
- } else if ( word.slice( -1 ) === 'д' ) {
- word = word.slice( 0, -1 ) + 'де';
- } else if ( word.slice( -3 ) === 'ные' ) {
- word = word.slice( 0, -3 ) + 'ных';
- } else if ( word.slice( -3 ) === 'ник' ) {
- word = word.slice( 0, -3 ) + 'нике';
- }
- break;
- case 'languagegen': // язык в родительном падеже ("(с) русского")
- if ( word.slice( -3 ) === 'кий' ) {
- word = word.slice( 0, -2 ) + 'ого';
- } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
- word = word + 'а';
- }
- break;
- case 'languageprep': // язык в предложном падеже ("(на) русском")
- if ( word.slice( -3 ) === 'кий' ) {
- word = word.slice( 0, -2 ) + 'ом';
- } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
- word = word + 'е';
- }
- break;
- case 'languageadverb': // наречие с названием языка ("по-русски")
- if ( word.slice( -3 ) === 'кий' ) {
- word = 'по-' + word.slice( 0, -1 );
- } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
- word = 'на ' + word + 'е';
- } else if ( $.inArray( word, [ 'идо', 'урду', 'хинди', 'эсперанто' ] ) > -1 ) {
- word = 'на ' + word;
- } else {
- word = 'на языке ' + word;
- }
- break;
+
+ for ( i = 0; i < transformations[ form ].length; i++ ) {
+ rule = transformations[ form ][ i ];
+ sourcePattern = rule[ 0 ];
+
+ if ( sourcePattern === '@metadata' ) {
+ continue;
+ }
+
+ regexp = new RegExp( sourcePattern );
+ replacement = rule[ 1 ];
+
+ if ( word.match( regexp ) ) {
+ return word.replace( regexp, replacement );
+ }
}
+
return word;
};