From 21ead7a98d1a103b77f1e3ba29a85493782d398b Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 26 Jun 2018 16:39:57 -0400 Subject: [PATCH] Ensure LanguageCode::bcp47() returns a valid BCP 47 language code MediaWiki uses a number of nonstandard codes which do not validate according to the IANA language subtag registry. Some of them have the wrong semantics entirely: MediaWiki's `sr-ec` variant maps to BCP 47 `sr-EC` which is "Serbian as used in Ethiopia" (!). Extend LanguageCode::bcp47() to map our nonstandard codes to valid BCP 47 language codes. Export the mapping so that it can be used in JavaScript's corresponding mw.language.bcp47() implementation as well, and return the standard BCP 47 codes in the siteinfo API. Thanks to TheDJ (I10b4473c7e53f027812bbccf26bb47aec15fddfd) and Fomafix (I93efc190714ba76247d30ba49fc21ae872fc3555) for previous attempts at this! Also removed a fixme for the name of 'Twi', dating back to 2004 (f59c3be23b209e178a917cb14ebedcc243c847b6) -- checking tw.wikipedia.org it certainly appears that the autonym of 'Twi' is correctly 'Twi'. Tracking bugs for invalid language codes are T125073 and T145535. Discussion of zh-XX => zh-HanX-XX mapping is at T198419. This is a replay of an earlier merged patch, 8380f0173e79b66f0e2afd6c49cd88afb9f4f6f3, which had to be reverted because it caused regressions in the Babel extension (T199941). Bug: T34483 Bug: T106367 Bug: T120847 Depends-On: I27a5b8e45b34c6b57c1b612b11548001c88cd483 Change-Id: Iebbc604af21d7f2af9c1f1ab2574cb5f309bf6ed --- RELEASE-NOTES-1.32 | 3 + includes/api/ApiQuerySiteinfo.php | 5 +- .../ResourceLoaderLanguageDataModule.php | 1 + languages/LanguageCode.php | 108 ++++++++++++++++-- languages/data/Names.php | 16 +-- languages/i18n/qqq.json | 4 +- .../mediawiki.language.init.js | 2 + .../mediawiki.language/mediawiki.language.js | 17 ++- .../includes/api/ApiQuerySiteinfoTest.php | 1 + tests/phpunit/languages/LanguageCodeTest.php | 43 ++++++- .../mediawiki/mediawiki.language.test.js | 40 ++++++- 11 files changed, 213 insertions(+), 27 deletions(-) diff --git a/RELEASE-NOTES-1.32 b/RELEASE-NOTES-1.32 index efc3a2e338..45b06ee18d 100644 --- a/RELEASE-NOTES-1.32 +++ b/RELEASE-NOTES-1.32 @@ -314,6 +314,9 @@ because of Phabricator reports. * 'uppercase-se' (NorthernSamiUppercaseCollation) - use 'uca-se' instead * 'xx-uca-et' (CollationEt) - use 'uca-et' instead * 'xx-uca-fa' (CollationFa) - use 'uca-fa' instead +* LanguageCode::bcp47() now always returns a valid BCP 47 code. This means + that some MediaWiki-specific language codes, such as `simple`, are mapped + into valid BCP 47 codes (eg `en-simple`). * The hooks 'SpecialRecentChangesFilters' & 'SpecialWatchlistFilters' deprecated in 1.23 were removed. Instead, use 'ChangesListSpecialPageStructuredFilters'. The ChangesListSpecialPage code for these legacy hooks, and their use in diff --git a/includes/api/ApiQuerySiteinfo.php b/includes/api/ApiQuerySiteinfo.php index 697eab69ba..d134edae78 100644 --- a/includes/api/ApiQuerySiteinfo.php +++ b/includes/api/ApiQuerySiteinfo.php @@ -701,7 +701,10 @@ class ApiQuerySiteinfo extends ApiQueryBase { $data = []; foreach ( $langNames as $code => $name ) { - $lang = [ 'code' => $code ]; + $lang = [ + 'code' => $code, + 'bcp47' => LanguageCode::bcp47( $code ), + ]; ApiResult::setContentValue( $lang, 'name', $name ); $data[] = $lang; } diff --git a/includes/resourceloader/ResourceLoaderLanguageDataModule.php b/includes/resourceloader/ResourceLoaderLanguageDataModule.php index 4b24081109..f718e5feb3 100644 --- a/includes/resourceloader/ResourceLoaderLanguageDataModule.php +++ b/includes/resourceloader/ResourceLoaderLanguageDataModule.php @@ -46,6 +46,7 @@ class ResourceLoaderLanguageDataModule extends ResourceLoaderFileModule { 'pluralRules' => $language->getPluralRules(), 'digitGroupingPattern' => $language->digitGroupingPattern(), 'fallbackLanguages' => $language->getFallbackLanguages(), + 'bcp47Map' => LanguageCode::getNonstandardLanguageCodeMapping(), ]; } diff --git a/languages/LanguageCode.php b/languages/LanguageCode.php index f50c55fe76..b0baec1341 100644 --- a/languages/LanguageCode.php +++ b/languages/LanguageCode.php @@ -30,22 +30,85 @@ class LanguageCode { /** * Mapping of deprecated language codes that were used in previous * versions of MediaWiki to up-to-date, current language codes. + * These may or may not be valid BCP 47 codes; they are included here + * because MediaWiki remapped these particular codes at some point. * * @var array Mapping from language code to language code * * @since 1.30 + * @see https://meta.wikimedia.org/wiki/Special_language_codes */ private static $deprecatedLanguageCodeMapping = [ // Note that als is actually a valid ISO 639 code (Tosk Albanian), but it // was previously used in MediaWiki for Alsatian, which comes under gsw - 'als' => 'gsw', - 'bat-smg' => 'sgs', - 'be-x-old' => 'be-tarask', - 'fiu-vro' => 'vro', - 'roa-rup' => 'rup', - 'zh-classical' => 'lzh', - 'zh-min-nan' => 'nan', - 'zh-yue' => 'yue', + 'als' => 'gsw', // T25215 + 'bat-smg' => 'sgs', // T27522 + 'be-x-old' => 'be-tarask', // T11823 + 'fiu-vro' => 'vro', // T31186 + 'roa-rup' => 'rup', // T17988 + 'zh-classical' => 'lzh', // T30443 + 'zh-min-nan' => 'nan', // T30442 + 'zh-yue' => 'yue', // T30441 + ]; + + /** + * Mapping of non-standard language codes used in MediaWiki to + * standardized BCP 47 codes. These are not deprecated (yet?): + * IANA may eventually recognize the subtag, in which case the `-x-` + * infix could be removed, or else we could rename the code in + * MediaWiki, in which case they'd move up to the above mapping + * of deprecated codes. + * + * As a rule, we preserve all distinctions made by MediaWiki + * internally. For example, `de-formal` becomes `de-x-formal` + * instead of just `de` because MediaWiki distinguishes `de-formal` + * from `de` (for example, for interface translations). Similarly, + * BCP 47 indicates that `kk-Cyrl` SHOULD not be used because it + * "typically does not add information", but in our case MediaWiki + * LanguageConverter distinguishes `kk` (render content in a mix of + * Kurdish variants) from `kk-Cyrl` (convert content to be uniformly + * Cyrillic). As the BCP 47 requirement is a SHOULD not a MUST, + * `kk-Cyrl` is a valid code, although some validators may emit + * a warning note. + * + * @var array Mapping from nonstandard codes to BCP 47 codes + * + * @since 1.32 + * @see https://meta.wikimedia.org/wiki/Special_language_codes + * @see https://phabricator.wikimedia.org/T125073 + */ + private static $nonstandardLanguageCodeMapping = [ + // All codes returned by Language::fetchLanguageNames() validated + // against IANA registry at + // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry + // with help of validator at + // http://schneegans.de/lv/ + 'cbk-zam' => 'cbk', // T124657 + 'de-formal' => 'de-x-formal', + 'eml' => 'egl', // T36217 + 'en-rtl' => 'en-x-rtl', + 'es-formal' => 'es-x-formal', + 'hu-formal' => 'hu-x-formal', + 'map-bms' => 'jv-x-bms', // [[en:Banyumasan_dialect]] T125073 + 'mo' => 'ro-Cyrl-MD', // T125073 + 'nrm' => 'nrf', // [[en:Norman_language]] T25216 + 'nl-informal' => 'nl-x-informal', + 'roa-tara' => 'nap-x-tara', // [[en:Tarantino_dialect]] + 'simple' => 'en-simple', + 'sr-ec' => 'sr-Cyrl', // T117845 + 'sr-el' => 'sr-Latn', // T117845 + + // Although these next codes aren't *wrong* per se, including + // both the script and the country code helps compatibility with + // other BCP 47 users. Note that MW also uses `zh-Hans`/`zh-Hant`, + // without a country code, and those should be left alone. + // (See $variantfallbacks in LanguageZh.php for Hans/Hant id.) + 'zh-cn' => 'zh-Hans-CN', + 'zh-sg' => 'zh-Hans-SG', + 'zh-my' => 'zh-Hans-MY', + 'zh-tw' => 'zh-Hant-TW', + 'zh-hk' => 'zh-Hant-HK', + 'zh-mo' => 'zh-Hant-MO', ]; /** @@ -64,6 +127,29 @@ class LanguageCode { return self::$deprecatedLanguageCodeMapping; } + /** + * Returns a mapping of non-standard language codes used by + * (current and previous version of) MediaWiki, mapped to standard + * BCP 47 names. + * + * This array is exported to JavaScript to ensure + * mediawiki.language.bcp47 stays in sync with LanguageCode::bcp47(). + * + * @return string[] + * + * @since 1.32 + */ + public static function getNonstandardLanguageCodeMapping() { + $result = []; + foreach ( self::$deprecatedLanguageCodeMapping as $code => $ignore ) { + $result[$code] = self::bcp47( $code ); + } + foreach ( self::$nonstandardLanguageCodeMapping as $code => $ignore ) { + $result[$code] = self::bcp47( $code ); + } + return $result; + } + /** * Replace deprecated language codes that were used in previous * versions of MediaWiki to up-to-date, current language codes. @@ -87,11 +173,15 @@ class LanguageCode { * See mediawiki.language.bcp47 for the JavaScript implementation. * * @param string $code The language code. - * @return string The language code which complying with BCP 47 standards. + * @return string A language code complying with BCP 47 standards. * * @since 1.31 */ public static function bcp47( $code ) { + $code = self::replaceDeprecatedCodes( strtolower( $code ) ); + if ( isset( self::$nonstandardLanguageCodeMapping[$code] ) ) { + $code = self::$nonstandardLanguageCodeMapping[$code]; + } $codeSegment = explode( '-', $code ); $codeBCP = []; foreach ( $codeSegment as $segNo => $seg ) { diff --git a/languages/data/Names.php b/languages/data/Names.php index b038f08b1e..ec7c96e2c8 100644 --- a/languages/data/Names.php +++ b/languages/data/Names.php @@ -82,7 +82,7 @@ class Names { 'ba' => 'башҡортса', # Bashkir 'ban' => 'Basa Bali', # Balinese 'bar' => 'Boarisch', # Bavarian (Austro-Bavarian and South Tyrolean) - 'bat-smg' => 'žemaitėška', # Samogitian (deprecated code, 'sgs' in ISO 693-3 since 2010-06-30 ) + 'bat-smg' => 'žemaitėška', # Samogitian (deprecated code, 'sgs' in ISO 639-3 since 2010-06-30 ) 'bbc' => 'Batak Toba', # Batak Toba (falls back to bbc-latn) 'bbc-latn' => 'Batak Toba', # Batak Toba 'bcc' => 'جهلسری بلوچی', # Southern Balochi @@ -288,7 +288,7 @@ class Names { 'lzh' => '文言', # Literary Chinese, T10217 'lzz' => 'Lazuri', # Laz 'mai' => 'मैथिली', # Maithili - 'map-bms' => 'Basa Banyumasan', # Banyumasan + 'map-bms' => 'Basa Banyumasan', # Banyumasan ('jv-x-bms') 'mdf' => 'мокшень', # Moksha 'mg' => 'Malagasy', # Malagasy 'mh' => 'Ebon', # Marshallese @@ -300,7 +300,7 @@ class Names { 'mn' => 'монгол', # Halh Mongolian (Cyrillic) (ISO 639-3: khk) 'mni' => 'মেইতেই লোন্', # Manipuri/Meitei 'mnw' => 'ဘာသာ မန်', # Mon, T201583 - 'mo' => 'молдовеняскэ', # Moldovan, deprecated + 'mo' => 'молдовеняскэ', # Moldovan, deprecated (ISO 639-2: ro-Cyrl-MD) 'mr' => 'मराठी', # Marathi 'mrj' => 'кырык мары', # Hill Mari 'ms' => 'Bahasa Melayu', # Malay @@ -311,7 +311,7 @@ class Names { 'myv' => 'эрзянь', # Erzya 'mzn' => 'مازِرونی', # Mazanderani 'na' => 'Dorerin Naoero', # Nauruan - 'nah' => 'Nāhuatl', # Nahuatl (not in ISO 639-3) + 'nah' => 'Nāhuatl', # Nahuatl (added to ISO 639-3 on 2006-10-31) 'nan' => 'Bân-lâm-gú', # Min-nan, T10217 'nap' => 'Napulitano', # Neapolitan, T45793 'nb' => 'norsk bokmål', # Norwegian (Bokmal) @@ -326,7 +326,7 @@ class Names { 'nn' => 'norsk nynorsk', # Norwegian (Nynorsk) 'no' => 'norsk', # Norwegian macro language (falls back to nb). 'nov' => 'Novial', # Novial - 'nrm' => 'Nouormand', # Norman + 'nrm' => 'Nouormand', # Norman (invalid code; 'nrf' in ISO 639 since 2014) 'nso' => 'Sesotho sa Leboa', # Northern Sotho 'nv' => 'Diné bizaad', # Navajo 'ny' => 'Chi-Chewa', # Chichewa @@ -362,8 +362,8 @@ class Names { 'rmy' => 'Romani', # Vlax Romany 'rn' => 'Kirundi', # Rundi/Kirundi/Urundi 'ro' => 'română', # Romanian - 'roa-rup' => 'armãneashti', # Aromanian (deprecated code, 'rup' exists in ISO 693-3) - 'roa-tara' => 'tarandíne', # Tarantino + 'roa-rup' => 'armãneashti', # Aromanian (deprecated code, 'rup' exists in ISO 639-3) + 'roa-tara' => 'tarandíne', # Tarantino ('nap-x-tara') 'ru' => 'русский', # Russian 'rue' => 'русиньскый', # Rusyn 'rup' => 'armãneashti', # Aromanian @@ -439,7 +439,7 @@ class Names { 'tt-cyrl' => 'татарча', # Tatar (Cyrillic script) (default) 'tt-latn' => 'tatarça', # Tatar (Latin script) 'tum' => 'chiTumbuka', # Tumbuka - 'tw' => 'Twi', # Twi, (FIXME!) + 'tw' => 'Twi', # Twi 'ty' => 'reo tahiti', # Tahitian 'tyv' => 'тыва дыл', # Tyvan 'tzm' => 'ⵜⴰⵎⴰⵣⵉⵖⵜ', # Tamazight diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 2ca765fece..a17cfca049 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -3417,8 +3417,8 @@ "variantname-gan-hans": "{{Optional}}\n\nVariant option for wikis with variants conversion enabled.", "variantname-gan-hant": "{{Optional}}\n\nVariant option for wikis with variants conversion enabled.", "variantname-gan": "{{Optional}}\n\nVariant option for wikis with variants conversion enabled.", - "variantname-sr-ec": "{{optional}}\nVariant Option for wikis with variants conversion enabled.\n\nNote that sr-ec is not a conforming BCP47 language tag. Wikis should be migrated by:\n* allowing it only as a legacy alias of the preferred tag sr-cyrl (possibly insert a tracking category in templates as long as they must support the legacy tag),\n* making the new tag the default to look first, before looking for the old tag,\n* moving the translations to the new code by renaming them,\n* checking links in source pages still using the legacy tag to change it to the new tag,\n* possibly cleanup the redirect pages.", - "variantname-sr-el": "{{optional}}\nVariant Option for wikis with variants conversion enabled.\n\nNote that sr-el is not a conforming BCP47 language tag. Wikis should be migrated by:\n* allowing it only as a legacy alias of the preferred tag sr-latn (possibly insert a tracking category in templates as long as they must support the legacy tag),\n* making the new tag the default to look first, before looking for the old tag,\n* moving the translations to the new code by renaming them,\n* checking links in source pages still using the legacy tag to change it to the new tag,\n* possibly cleanup the redirect pages.", + "variantname-sr-ec": "{{optional}}\nVariant Option for wikis with variants conversion enabled.\n\nNote that sr-ec is not a conforming BCP 47 language tag. Wikis should be migrated by:\n* allowing it only as a legacy alias of the preferred tag sr-cyrl (possibly insert a tracking category in templates as long as they must support the legacy tag),\n* making the new tag the default to look first, before looking for the old tag,\n* moving the translations to the new code by renaming them,\n* checking links in source pages still using the legacy tag to change it to the new tag,\n* possibly cleanup the redirect pages.", + "variantname-sr-el": "{{optional}}\nVariant Option for wikis with variants conversion enabled.\n\nNote that sr-el is not a conforming BCP 47 language tag. Wikis should be migrated by:\n* allowing it only as a legacy alias of the preferred tag sr-latn (possibly insert a tracking category in templates as long as they must support the legacy tag),\n* making the new tag the default to look first, before looking for the old tag,\n* moving the translations to the new code by renaming them,\n* checking links in source pages still using the legacy tag to change it to the new tag,\n* possibly cleanup the redirect pages.", "variantname-sr": "{{optional}}\nVariant Option for wikis with variants conversion enabled.", "variantname-kk-kz": "{{optional}}\nVariant Option for wikis with variants conversion enabled.", "variantname-kk-tr": "{{optional}}\nVariant Option for wikis with variants conversion enabled.", diff --git a/resources/src/mediawiki.language/mediawiki.language.init.js b/resources/src/mediawiki.language/mediawiki.language.init.js index 33f8fd7d93..dbd7cb92c4 100644 --- a/resources/src/mediawiki.language/mediawiki.language.init.js +++ b/resources/src/mediawiki.language/mediawiki.language.init.js @@ -37,6 +37,8 @@ * - `pluralRules` * - `digitGroupingPattern` * - `fallbackLanguages` + * - `bcp47Map` + * - `languageNames` * * @property */ diff --git a/resources/src/mediawiki.language/mediawiki.language.js b/resources/src/mediawiki.language/mediawiki.language.js index dfb7112870..8fed6954f5 100644 --- a/resources/src/mediawiki.language/mediawiki.language.js +++ b/resources/src/mediawiki.language/mediawiki.language.js @@ -163,18 +163,27 @@ }, /** - * Formats language tags according the BCP47 standard. + * Formats language tags according the BCP 47 standard. * See LanguageCode::bcp47 for the PHP implementation. * * @param {string} languageTag Well-formed language tag * @return {string} */ bcp47: function ( languageTag ) { - var formatted, + var bcp47Map, + formatted, + segments, isFirstSegment = true, - isPrivate = false, - segments = languageTag.split( '-' ); + isPrivate = false; + languageTag = languageTag.toLowerCase(); + + bcp47Map = mw.language.getData( mw.config.get( 'wgUserLanguage' ), 'bcp47Map' ); + if ( bcp47Map && Object.prototype.hasOwnProperty.call( bcp47Map, languageTag ) ) { + languageTag = bcp47Map[ languageTag ]; + } + + segments = languageTag.split( '-' ); formatted = segments.map( function ( segment ) { var newSegment; diff --git a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php index 9587a763f1..225c19537b 100644 --- a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php +++ b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php @@ -489,6 +489,7 @@ class ApiQuerySiteinfoTest extends ApiTestCase { function ( $code, $name ) { return [ 'code' => $code, + 'bcp47' => LanguageCode::bcp47( $code ), 'name' => $name ]; }, diff --git a/tests/phpunit/languages/LanguageCodeTest.php b/tests/phpunit/languages/LanguageCodeTest.php index 544a063566..d8251bc6f6 100644 --- a/tests/phpunit/languages/LanguageCodeTest.php +++ b/tests/phpunit/languages/LanguageCodeTest.php @@ -54,14 +54,18 @@ class LanguageCodeTest extends PHPUnit\Framework\TestCase { * @dataProvider provideLanguageCodes() */ public function testBcp47( $code, $expected ) { + $this->assertEquals( $expected, LanguageCode::bcp47( $code ), + "Applying BCP 47 standard to '$code'" + ); + $code = strtolower( $code ); $this->assertEquals( $expected, LanguageCode::bcp47( $code ), - "Applying BCP47 standard to lower case '$code'" + "Applying BCP 47 standard to lower case '$code'" ); $code = strtoupper( $code ); $this->assertEquals( $expected, LanguageCode::bcp47( $code ), - "Applying BCP47 standard to upper case '$code'" + "Applying BCP 47 standard to upper case '$code'" ); } @@ -155,6 +159,41 @@ class LanguageCodeTest extends PHPUnit\Framework\TestCase { // de-419-DE // a-DE // ar-a-aaa-b-bbb-a-ccc + + # Non-standard and deprecated language codes used by MediaWiki + [ 'als', 'gsw' ], + [ 'bat-smg', 'sgs' ], + [ 'be-x-old', 'be-tarask' ], + [ 'fiu-vro', 'vro' ], + [ 'roa-rup', 'rup' ], + [ 'zh-classical', 'lzh' ], + [ 'zh-min-nan', 'nan' ], + [ 'zh-yue', 'yue' ], + [ 'cbk-zam', 'cbk' ], + [ 'de-formal', 'de-x-formal' ], + [ 'eml', 'egl' ], + [ 'en-rtl', 'en-x-rtl' ], + [ 'es-formal', 'es-x-formal' ], + [ 'hu-formal', 'hu-x-formal' ], + [ 'kk-Arab', 'kk-Arab' ], + [ 'kk-Cyrl', 'kk-Cyrl' ], + [ 'kk-Latn', 'kk-Latn' ], + [ 'map-bms', 'jv-x-bms' ], + [ 'mo', 'ro-Cyrl-MD' ], + [ 'nrm', 'nrf' ], + [ 'nl-informal', 'nl-x-informal' ], + [ 'roa-tara', 'nap-x-tara' ], + [ 'simple', 'en-simple' ], + [ 'sr-ec', 'sr-Cyrl' ], + [ 'sr-el', 'sr-Latn' ], + [ 'zh-cn', 'zh-Hans-CN' ], + [ 'zh-sg', 'zh-Hans-SG' ], + [ 'zh-my', 'zh-Hans-MY' ], + [ 'zh-tw', 'zh-Hant-TW' ], + [ 'zh-hk', 'zh-Hant-HK' ], + [ 'zh-mo', 'zh-Hant-MO' ], + [ 'zh-hans', 'zh-Hans' ], + [ 'zh-hant', 'zh-Hant' ], ]; } diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js index 3040b85817..2208ab9b1b 100644 --- a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js +++ b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js @@ -692,19 +692,57 @@ // # Tags that use extensions [ 'en-us-u-islamcal', 'en-US-u-islamcal' ], [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ], - [ 'en-a-myext-b-another', 'en-a-myext-b-another' ] + [ 'en-a-myext-b-another', 'en-a-myext-b-another' ], // # Invalid: // de-419-DE // a-DE // ar-a-aaa-b-bbb-a-ccc + + // Non-standard and deprecated language codes used by MediaWiki + [ 'als', 'gsw' ], + [ 'bat-smg', 'sgs' ], + [ 'be-x-old', 'be-tarask' ], + [ 'fiu-vro', 'vro' ], + [ 'roa-rup', 'rup' ], + [ 'zh-classical', 'lzh' ], + [ 'zh-min-nan', 'nan' ], + [ 'zh-yue', 'yue' ], + [ 'cbk-zam', 'cbk' ], + [ 'de-formal', 'de-x-formal' ], + [ 'eml', 'egl' ], + [ 'en-rtl', 'en-x-rtl' ], + [ 'es-formal', 'es-x-formal' ], + [ 'hu-formal', 'hu-x-formal' ], + [ 'kk-Arab', 'kk-Arab' ], + [ 'kk-Cyrl', 'kk-Cyrl' ], + [ 'kk-Latn', 'kk-Latn' ], + [ 'map-bms', 'jv-x-bms' ], + [ 'mo', 'ro-Cyrl-MD' ], + [ 'nrm', 'nrf' ], + [ 'nl-informal', 'nl-x-informal' ], + [ 'roa-tara', 'nap-x-tara' ], + [ 'simple', 'en-simple' ], + [ 'sr-ec', 'sr-Cyrl' ], + [ 'sr-el', 'sr-Latn' ], + [ 'zh-cn', 'zh-Hans-CN' ], + [ 'zh-sg', 'zh-Hans-SG' ], + [ 'zh-my', 'zh-Hans-MY' ], + [ 'zh-tw', 'zh-Hant-TW' ], + [ 'zh-hk', 'zh-Hant-HK' ], + [ 'zh-mo', 'zh-Hant-MO' ], + [ 'zh-hans', 'zh-Hans' ], + [ 'zh-hant', 'zh-Hant' ] ]; QUnit.test( 'mw.language.bcp47', function ( assert ) { + mw.language.data = this.liveLangData; bcp47Tests.forEach( function ( data ) { var input = data[ 0 ], expected = data[ 1 ]; assert.strictEqual( mw.language.bcp47( input ), expected ); + assert.strictEqual( mw.language.bcp47( input.toLowerCase() ), expected ); + assert.strictEqual( mw.language.bcp47( input.toUpperCase() ), expected ); } ); } ); }() ); -- 2.20.1