'LanguageFi' => __DIR__ . '/languages/classes/LanguageFi.php',
'LanguageGa' => __DIR__ . '/languages/classes/LanguageGa.php',
'LanguageGan' => __DIR__ . '/languages/classes/LanguageGan.php',
- 'LanguageHe' => __DIR__ . '/languages/classes/LanguageHe.php',
'LanguageHsb' => __DIR__ . '/languages/classes/LanguageHsb.php',
'LanguageHu' => __DIR__ . '/languages/classes/LanguageHu.php',
'LanguageHy' => __DIR__ . '/languages/classes/LanguageHy.php',
return $wgGrammarForms[$this->getCode()][$case][$word];
}
+ $grammarTransformations = $this->getGrammarTransformations();
+
+ if ( isset( $grammarTransformations[$case] ) ) {
+ $forms = $grammarTransformations[$case];
+
+ // Some names of grammar rules are aliases for other rules.
+ // In such cases the value is a string rather than object,
+ // so load the actual rules.
+ if ( is_string( $forms ) ) {
+ $forms = $grammarTransformations[$forms];
+ }
+
+ foreach ( array_values( $forms ) as $rule ) {
+ $form = $rule[0];
+
+ if ( $form === '@metadata' ) {
+ continue;
+ }
+
+ $replacement = $rule[1];
+
+ $regex = '/' . addcslashes( $form, '/' ) . '/u';
+ $patternMatches = preg_match( $regex, $word );
+
+ if ( $patternMatches === false ) {
+ wfLogWarning(
+ 'An error occurred while processing grammar. ' .
+ "Word: '$word'. Regex: /$form/."
+ );
+ } elseif ( $patternMatches === 1 ) {
+ $word = preg_replace( $regex, $replacement, $word );
+
+ break;
+ }
+ }
+ }
+
return $word;
}
+++ /dev/null
-<?php
-/**
- * Hebrew (עברית) specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @author Rotem Liss
- * @ingroup Language
- */
-
-/**
- * Hebrew (עברית)
- *
- * @ingroup Language
- */
-class LanguageHe extends Language {
-
- /**
- * Convert grammar forms of words.
- *
- * Available cases:
- * "prefixed" (or "תחילית") - when the word has a prefix
- *
- * @param string $word The word to convert
- * @param string $case The case
- *
- * @return string
- */
- public function convertGrammar( $word, $case ) {
- global $wgGrammarForms;
- if ( isset( $wgGrammarForms['he'][$case][$word] ) ) {
- return $wgGrammarForms['he'][$case][$word];
- }
-
- switch ( $case ) {
- case 'prefixed':
- case 'תחילית':
- # Duplicate the "Waw" if prefixed, but not if it is already double.
- if ( substr( $word, 0, 2 ) === "ו" && substr( $word, 0, 4 ) !== "וו" ) {
- $word = "ו" . $word;
- }
-
- # Remove the "He" article if prefixed.
- if ( substr( $word, 0, 2 ) === "ה" ) {
- $word = substr( $word, 2 );
- }
-
- # Add a hyphen (maqaf) before non-Hebrew letters.
- if ( substr( $word, 0, 2 ) < "א" || substr( $word, 0, 2 ) > "ת" ) {
- $word = "־" . $word;
- }
- }
-
- return $word;
- }
-}
* @ingroup Language
*/
class LanguageRu extends Language {
- /**
- * Convert from the nominative form of a noun to some other case
- * Invoked with {{grammar:case|word}}
- *
- * @param string $word
- * @param string $case
- * @return string
- */
- function convertGrammar( $word, $case ) {
- global $wgGrammarForms;
- if ( isset( $wgGrammarForms['ru'][$case][$word] ) ) {
- return $wgGrammarForms['ru'][$case][$word];
- }
-
- $grammarTransformations = $this->getGrammarTransformations();
-
- if ( isset( $grammarTransformations[$case] ) ) {
- foreach ( array_values( $grammarTransformations[$case] ) as $rule ) {
- $form = $rule[0];
-
- if ( $form === '@metadata' ) {
- continue;
- }
-
- $replacement = $rule[1];
-
- $regex = "/$form/";
-
- if ( preg_match( $regex, $word ) ) {
- $word = preg_replace( $regex, $replacement, $word );
-
- break;
- }
- }
- }
-
- return $word;
- }
-
/**
* Four-digit number should be without group commas (spaces)
* See manual of style at https://ru.wikipedia.org/wiki/Википедия:Оформление_статей
--- /dev/null
+{
+ "@metadata": {
+ "authors": [
+ "Rotem Liss",
+ "Amir E. Aharoni (amir.aharoni@mail.huji.ac.il)"
+ ]
+ },
+ "prefixed": "תחילית",
+ "תחילית": [
+ [ "@metadata", [
+ "comment", "הכפלת ו, מחיקת ה הידיעה, הוספת מקף"
+ ] ],
+ [
+ "^(ו[^ו].+)$",
+ "ו$1"
+ ],
+ [
+ "ה(.+)$",
+ "$1"
+ ],
+ [
+ "^([^א-ת].+)$",
+ "־$1"
+ ]
+ ]
+}
'dsb' => 'resources/src/mediawiki.language/languages/dsb.js',
'fi' => 'resources/src/mediawiki.language/languages/fi.js',
'ga' => 'resources/src/mediawiki.language/languages/ga.js',
- 'he' => 'resources/src/mediawiki.language/languages/he.js',
'hsb' => 'resources/src/mediawiki.language/languages/hsb.js',
'hu' => 'resources/src/mediawiki.language/languages/hu.js',
'hy' => 'resources/src/mediawiki.language/languages/hy.js',
'la' => 'resources/src/mediawiki.language/languages/la.js',
'os' => 'resources/src/mediawiki.language/languages/os.js',
- 'ru' => 'resources/src/mediawiki.language/languages/ru.js',
'sl' => 'resources/src/mediawiki.language/languages/sl.js',
'uk' => 'resources/src/mediawiki.language/languages/uk.js',
],
+++ /dev/null
-/*!
- * Hebrew (עברית) language functions
- */
-
-mediaWiki.language.convertGrammar = function ( word, form ) {
- var grammarForms = mediaWiki.language.getData( 'he', 'grammarForms' );
- if ( grammarForms && grammarForms[ form ] ) {
- return grammarForms[ form ][ word ];
- }
- switch ( form ) {
- case 'prefixed':
- case 'תחילית': // the same word in Hebrew
- // Duplicate prefixed "Waw", but only if it's not already double
- if ( word.slice( 0, 1 ) === 'ו' && word.slice( 0, 2 ) !== 'וו' ) {
- word = 'ו' + word;
- }
-
- // Remove the "He" if prefixed
- if ( word.slice( 0, 1 ) === 'ה' ) {
- word = word.slice( 1 );
- }
-
- // Add a hyphen (maqaf) before numbers and non-Hebrew letters
- if ( word.slice( 0, 1 ) < 'א' || word.slice( 0, 1 ) > 'ת' ) {
- word = '־' + word;
- }
- }
- return word;
-};
+++ /dev/null
-/*!
- * Russian (Русский) language functions
- */
-
-mediaWiki.language.convertGrammar = function ( word, form ) {
- 'use strict';
-
- var forms, transformations, i, rule, sourcePattern, regexp, replacement;
-
- forms = mediaWiki.language.getData( 'ru', 'grammarForms' );
- if ( forms && forms[ form ] ) {
- return forms[ form ][ word ];
- }
-
- transformations = mediaWiki.language.getData( 'ru', 'grammarTransformations' );
-
- if ( !transformations[ form ] ) {
- return word;
- }
-
- for ( i = 0; i < transformations[ form ].length; i++ ) {
- rule = transformations[ form ][ i ];
- sourcePattern = rule[ 0 ];
-
- if ( sourcePattern === '@metadata' ) {
- continue;
- }
-
- regexp = new RegExp( sourcePattern );
- replacement = rule[ 1 ];
-
- if ( word.match( regexp ) ) {
- return word.replace( regexp, replacement );
- }
- }
-
- return word;
-};
/**
* Grammatical transformations, needed for inflected languages.
- * Invoked by putting `{{grammar:form|word}}` in a message.
+ * Invoked by putting `{{grammar:case|word}}` in a message.
*
* The rules can be defined in $wgGrammarForms global or computed
* dynamically by overriding this method per language.
* @return {string}
*/
convertGrammar: function ( word, form ) {
- var grammarForms = mw.language.getData( mw.config.get( 'wgUserLanguage' ), 'grammarForms' );
- if ( grammarForms && grammarForms[ form ] ) {
- return grammarForms[ form ][ word ] || word;
+ var userLanguage, forms, transformations,
+ patterns, i, rule, sourcePattern, regexp, replacement;
+
+ userLanguage = mw.config.get( 'wgUserLanguage' );
+
+ forms = mw.language.getData( userLanguage, 'grammarForms' );
+ if ( forms && forms[ form ] ) {
+ return forms[ form ][ word ];
+ }
+
+ transformations = mediaWiki.language.getData( userLanguage, 'grammarTransformations' );
+
+ if ( !( transformations && transformations[ form ] ) ) {
+ return word;
+ }
+
+ patterns = transformations[ form ];
+
+ // Some names of grammar rules are aliases for other rules.
+ // In such cases the value is a string rather than object,
+ // so load the actual rules.
+ if ( typeof patterns === 'string' ) {
+ patterns = transformations[ patterns ];
}
+
+ for ( i = 0; i < patterns.length; i++ ) {
+ rule = patterns[ i ];
+ sourcePattern = rule[ 0 ];
+
+ if ( sourcePattern === '@metadata' ) {
+ continue;
+ }
+
+ regexp = new RegExp( sourcePattern );
+ replacement = rule[ 1 ];
+
+ if ( word.match( regexp ) ) {
+ return word.replace( regexp, replacement );
+ }
+ }
+
return word;
},
* @file
*/
-/** Tests for MediaWiki languages/classes/LanguageHe.php */
+/** Tests for MediaWiki Hebrew grammar transformation handling */
class LanguageHeTest extends LanguageClassesTestCase {
/**
* The most common usage for the plural forms is two forms,