From ea0bd74a94444fc0bbf7f6c8637e84d4d70b68d6 Mon Sep 17 00:00:00 2001 From: Fomafix Date: Thu, 6 Apr 2017 17:17:19 +0200 Subject: [PATCH] Refactor global function wfBCP47 to static function LanguageCode::bcp47 Deprecate global function wfBCP47. Change-Id: Ie6bb061b5d6ca67289bb18bc468a87421f38fc94 --- RELEASE-NOTES-1.31 | 2 + includes/Feed.php | 2 +- includes/GlobalFunctions.php | 22 +--- includes/OutputPage.php | 4 +- includes/Preferences.php | 4 +- includes/installer/WebInstallerOutput.php | 2 +- includes/page/ImagePage.php | 6 +- includes/parser/CoreParserFunctions.php | 2 +- includes/skins/SkinTemplate.php | 6 +- languages/Language.php | 2 +- languages/LanguageCode.php | 32 +++++ .../mediawiki.language/mediawiki.language.js | 2 +- .../includes/GlobalFunctions/wfBCP47Test.php | 121 ------------------ tests/phpunit/languages/LanguageCodeTest.php | 115 +++++++++++++++++ 14 files changed, 167 insertions(+), 155 deletions(-) delete mode 100644 tests/phpunit/includes/GlobalFunctions/wfBCP47Test.php diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index 60a49b7652..3fd1fc8ac9 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -40,6 +40,8 @@ changes to languages because of Phabricator reports. === Other changes in 1.31 === * MessageBlobStore::insertMessageBlob() (deprecated in 1.27) was removed. +* The global function wfBCP47 was renamed to LanguageCode::bcp47. +* The global function wfBCP47 is now deprecated. == Compatibility == MediaWiki 1.31 requires PHP 5.5.9 or later. There is experimental support for diff --git a/includes/Feed.php b/includes/Feed.php index f76a634d3f..bc7747fe72 100644 --- a/includes/Feed.php +++ b/includes/Feed.php @@ -139,7 +139,7 @@ class FeedItem { */ public function getLanguage() { global $wgLanguageCode; - return wfBCP47( $wgLanguageCode ); + return LanguageCode::bcp47( $wgLanguageCode ); } /** diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 484dfe8d4d..be8c40fffe 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -3166,29 +3166,13 @@ function wfShorthandToInteger( $string = '', $default = -1 ) { * See unit test for examples. * See mediawiki.language.bcp47 for the JavaScript implementation. * + * @deprecated since 1.31, use LanguageCode::bcp47() directly. + * * @param string $code The language code. * @return string The language code which complying with BCP 47 standards. */ function wfBCP47( $code ) { - $codeSegment = explode( '-', $code ); - $codeBCP = []; - foreach ( $codeSegment as $segNo => $seg ) { - // when previous segment is x, it is a private segment and should be lc - if ( $segNo > 0 && strtolower( $codeSegment[( $segNo - 1 )] ) == 'x' ) { - $codeBCP[$segNo] = strtolower( $seg ); - // ISO 3166 country code - } elseif ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) { - $codeBCP[$segNo] = strtoupper( $seg ); - // ISO 15924 script code - } elseif ( ( strlen( $seg ) == 4 ) && ( $segNo > 0 ) ) { - $codeBCP[$segNo] = ucfirst( strtolower( $seg ) ); - // Use lowercase for other cases - } else { - $codeBCP[$segNo] = strtolower( $seg ); - } - } - $langCode = implode( '-', $codeBCP ); - return $langCode; + return LanguageCode::bcp47( $code ); } /** diff --git a/includes/OutputPage.php b/includes/OutputPage.php index 785641d1a6..8a489b8641 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -2200,7 +2200,7 @@ class OutputPage extends ContextSource { // IE and some other browsers use BCP 47 standards in // their Accept-Language header, like "zh-CN" or "zh-Hant". // We should handle these too. - $variantBCP47 = wfBCP47( $variant ); + $variantBCP47 = LanguageCode::bcp47( $variant ); if ( $variantBCP47 !== $variant ) { $aloption[] = 'substr=' . $variantBCP47; } @@ -3437,7 +3437,7 @@ class OutputPage extends ContextSource { foreach ( $variants as $variant ) { $tags["variant-$variant"] = Html::element( 'link', [ 'rel' => 'alternate', - 'hreflang' => wfBCP47( $variant ), + 'hreflang' => LanguageCode::bcp47( $variant ), 'href' => $this->getTitle()->getLocalURL( [ 'variant' => $variant ] ) ] diff --git a/includes/Preferences.php b/includes/Preferences.php index 96b002b93a..ba90121a83 100644 --- a/includes/Preferences.php +++ b/includes/Preferences.php @@ -352,7 +352,7 @@ class Preferences { $options = []; foreach ( $languages as $code => $name ) { - $display = wfBCP47( $code ) . ' - ' . $name; + $display = LanguageCode::bcp47( $code ) . ' - ' . $name; $options[$display] = $code; } $defaultPreferences['language'] = [ @@ -394,7 +394,7 @@ class Preferences { $options = []; foreach ( $variantArray as $code => $name ) { - $display = wfBCP47( $code ) . ' - ' . $name; + $display = LanguageCode::bcp47( $code ) . ' - ' . $name; $options[$display] = $code; } diff --git a/includes/installer/WebInstallerOutput.php b/includes/installer/WebInstallerOutput.php index e4eb255bbd..6a55d69690 100644 --- a/includes/installer/WebInstallerOutput.php +++ b/includes/installer/WebInstallerOutput.php @@ -233,7 +233,7 @@ class WebInstallerOutput { public function getHeadAttribs() { return [ 'dir' => $this->getDir(), - 'lang' => wfBCP47( $this->getLanguageCode() ), + 'lang' => LanguageCode::bcp47( $this->getLanguageCode() ), ]; } diff --git a/includes/page/ImagePage.php b/includes/page/ImagePage.php index 0e3eaa5bcc..62f5d00c33 100644 --- a/includes/page/ImagePage.php +++ b/includes/page/ImagePage.php @@ -1054,8 +1054,8 @@ EOT protected function doRenderLangOpt( array $langChoices, $curLang, $defaultLang ) { global $wgScript; sort( $langChoices ); - $curLang = wfBCP47( $curLang ); - $defaultLang = wfBCP47( $defaultLang ); + $curLang = LanguageCode::bcp47( $curLang ); + $defaultLang = LanguageCode::bcp47( $defaultLang ); $opts = ''; $haveCurrentLang = false; $haveDefaultLang = false; @@ -1067,7 +1067,7 @@ EOT // include a choice for that. Last of all, if we're viewing // the file in a language not on the list, add it as a choice. foreach ( $langChoices as $lang ) { - $code = wfBCP47( $lang ); + $code = LanguageCode::bcp47( $lang ); $name = Language::fetchLanguageName( $code, $this->getContext()->getLanguage()->getCode() ); if ( $name !== '' ) { $display = $this->getContext()->msg( 'img-lang-opt', $code, $name )->text(); diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index bebf3f8a0b..6211196cf8 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -875,7 +875,7 @@ class CoreParserFunctions { $code = strtolower( $code ); $inLanguage = strtolower( $inLanguage ); $lang = Language::fetchLanguageName( $code, $inLanguage ); - return $lang !== '' ? $lang : wfBCP47( $code ); + return $lang !== '' ? $lang : LanguageCode::bcp47( $code ); } /** diff --git a/includes/skins/SkinTemplate.php b/includes/skins/SkinTemplate.php index 0690f035c0..532ee518a5 100644 --- a/includes/skins/SkinTemplate.php +++ b/includes/skins/SkinTemplate.php @@ -174,7 +174,7 @@ class SkinTemplate extends Skin { )->text(); } - $ilInterwikiCodeBCP47 = wfBCP47( $ilInterwikiCode ); + $ilInterwikiCodeBCP47 = LanguageCode::bcp47( $ilInterwikiCode ); $languageLink = [ 'href' => $languageLinkTitle->getFullURL(), 'text' => $ilLangName, @@ -1125,8 +1125,8 @@ class SkinTemplate extends Skin { 'class' => ( $code == $preferred ) ? 'selected' : false, 'text' => $varname, 'href' => $title->getLocalURL( [ 'variant' => $code ] + $params ), - 'lang' => wfBCP47( $code ), - 'hreflang' => wfBCP47( $code ), + 'lang' => LanguageCode::bcp47( $code ), + 'hreflang' => LanguageCode::bcp47( $code ), ]; } } diff --git a/languages/Language.php b/languages/Language.php index c514cdc595..435f058367 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -4309,7 +4309,7 @@ class Language { */ public function getHtmlCode() { if ( is_null( $this->mHtmlCode ) ) { - $this->mHtmlCode = wfBCP47( $this->getCode() ); + $this->mHtmlCode = LanguageCode::bcp47( $this->getCode() ); } return $this->mHtmlCode; } diff --git a/languages/LanguageCode.php b/languages/LanguageCode.php index 3fa3dc16e5..f50c55fe76 100644 --- a/languages/LanguageCode.php +++ b/languages/LanguageCode.php @@ -80,4 +80,36 @@ class LanguageCode { } return $code; } + + /** + * Get the normalised IETF language tag + * See unit test for examples. + * See mediawiki.language.bcp47 for the JavaScript implementation. + * + * @param string $code The language code. + * @return string The language code which complying with BCP 47 standards. + * + * @since 1.31 + */ + public static function bcp47( $code ) { + $codeSegment = explode( '-', $code ); + $codeBCP = []; + foreach ( $codeSegment as $segNo => $seg ) { + // when previous segment is x, it is a private segment and should be lc + if ( $segNo > 0 && strtolower( $codeSegment[( $segNo - 1 )] ) == 'x' ) { + $codeBCP[$segNo] = strtolower( $seg ); + // ISO 3166 country code + } elseif ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) { + $codeBCP[$segNo] = strtoupper( $seg ); + // ISO 15924 script code + } elseif ( ( strlen( $seg ) == 4 ) && ( $segNo > 0 ) ) { + $codeBCP[$segNo] = ucfirst( strtolower( $seg ) ); + // Use lowercase for other cases + } else { + $codeBCP[$segNo] = strtolower( $seg ); + } + } + $langCode = implode( '-', $codeBCP ); + return $langCode; + } } diff --git a/resources/src/mediawiki.language/mediawiki.language.js b/resources/src/mediawiki.language/mediawiki.language.js index 6a5243403c..45863a3e33 100644 --- a/resources/src/mediawiki.language/mediawiki.language.js +++ b/resources/src/mediawiki.language/mediawiki.language.js @@ -192,7 +192,7 @@ /** * Formats language tags according the BCP47 standard. - * See wfBCP47 for the PHP implementation. + * See LanguageCode::bcp47 for the PHP implementation. * * @param {string} languageTag Well-formed language tag * @return {string} diff --git a/tests/phpunit/includes/GlobalFunctions/wfBCP47Test.php b/tests/phpunit/includes/GlobalFunctions/wfBCP47Test.php deleted file mode 100644 index 8fbca6cfe3..0000000000 --- a/tests/phpunit/includes/GlobalFunctions/wfBCP47Test.php +++ /dev/null @@ -1,121 +0,0 @@ -assertEquals( $expected, wfBCP47( $code ), - "Applying BCP47 standard to lower case '$code'" - ); - - $code = strtoupper( $code ); - $this->assertEquals( $expected, wfBCP47( $code ), - "Applying BCP47 standard to upper case '$code'" - ); - } - - /** - * Array format is ($code, $expected) - */ - public static function provideLanguageCodes() { - return [ - // Extracted from BCP 47 (list not exhaustive) - # 2.1.1 - [ 'en-ca-x-ca', 'en-CA-x-ca' ], - [ 'sgn-be-fr', 'sgn-BE-FR' ], - [ 'az-latn-x-latn', 'az-Latn-x-latn' ], - # 2.2 - [ 'sr-Latn-RS', 'sr-Latn-RS' ], - [ 'az-arab-ir', 'az-Arab-IR' ], - - # 2.2.5 - [ 'sl-nedis', 'sl-nedis' ], - [ 'de-ch-1996', 'de-CH-1996' ], - - # 2.2.6 - [ - 'en-latn-gb-boont-r-extended-sequence-x-private', - 'en-Latn-GB-boont-r-extended-sequence-x-private' - ], - - // Examples from BCP 47 Appendix A - # Simple language subtag: - [ 'DE', 'de' ], - [ 'fR', 'fr' ], - [ 'ja', 'ja' ], - - # Language subtag plus script subtag: - [ 'zh-hans', 'zh-Hans' ], - [ 'sr-cyrl', 'sr-Cyrl' ], - [ 'sr-latn', 'sr-Latn' ], - - # Extended language subtags and their primary language subtag - # counterparts: - [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ], - [ 'cmn-hans-cn', 'cmn-Hans-CN' ], - [ 'zh-yue-hk', 'zh-yue-HK' ], - [ 'yue-hk', 'yue-HK' ], - - # Language-Script-Region: - [ 'zh-hans-cn', 'zh-Hans-CN' ], - [ 'sr-latn-RS', 'sr-Latn-RS' ], - - # Language-Variant: - [ 'sl-rozaj', 'sl-rozaj' ], - [ 'sl-rozaj-biske', 'sl-rozaj-biske' ], - [ 'sl-nedis', 'sl-nedis' ], - - # Language-Region-Variant: - [ 'de-ch-1901', 'de-CH-1901' ], - [ 'sl-it-nedis', 'sl-IT-nedis' ], - - # Language-Script-Region-Variant: - [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ], - - # Language-Region: - [ 'de-de', 'de-DE' ], - [ 'en-us', 'en-US' ], - [ 'es-419', 'es-419' ], - - # Private use subtags: - [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ], - [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ], - /** - * Previous test does not reflect the BCP 47 which states: - * az-Arab-x-AZE-derbend - * AZE being private, it should be lower case, hence the test above - * should probably be: - * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ], - */ - - # Private use registry values: - [ 'x-whatever', 'x-whatever' ], - [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ], - [ 'de-qaaa', 'de-Qaaa' ], - [ 'sr-latn-qm', 'sr-Latn-QM' ], - [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ], - - # Tags that use extensions - [ 'en-us-u-islamcal', 'en-US-u-islamcal' ], - [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ], - [ 'en-a-myext-b-another', 'en-a-myext-b-another' ], - - # Invalid: - // de-419-DE - // a-DE - // ar-a-aaa-b-bbb-a-ccc - ]; - } -} diff --git a/tests/phpunit/languages/LanguageCodeTest.php b/tests/phpunit/languages/LanguageCodeTest.php index 7689ef1dda..0da03dfecc 100644 --- a/tests/phpunit/languages/LanguageCodeTest.php +++ b/tests/phpunit/languages/LanguageCodeTest.php @@ -43,4 +43,119 @@ class LanguageCodeTest extends PHPUnit_Framework_TestCase { $this->assertEquals( null, LanguageCode::replaceDeprecatedCodes( null ) ); } + /** + * test @see LanguageCode::bcp47(). + * Please note the BCP 47 explicitly state that language codes are case + * insensitive, there are some exceptions to the rule :) + * This test is used to verify our formatting against all lower and + * all upper cases language code. + * + * @see https://tools.ietf.org/html/bcp47 + * @dataProvider provideLanguageCodes() + */ + public function testBcp47( $code, $expected ) { + $code = strtolower( $code ); + $this->assertEquals( $expected, LanguageCode::bcp47( $code ), + "Applying BCP47 standard to lower case '$code'" + ); + + $code = strtoupper( $code ); + $this->assertEquals( $expected, LanguageCode::bcp47( $code ), + "Applying BCP47 standard to upper case '$code'" + ); + } + + /** + * Array format is ($code, $expected) + */ + public static function provideLanguageCodes() { + return [ + // Extracted from BCP 47 (list not exhaustive) + # 2.1.1 + [ 'en-ca-x-ca', 'en-CA-x-ca' ], + [ 'sgn-be-fr', 'sgn-BE-FR' ], + [ 'az-latn-x-latn', 'az-Latn-x-latn' ], + # 2.2 + [ 'sr-Latn-RS', 'sr-Latn-RS' ], + [ 'az-arab-ir', 'az-Arab-IR' ], + + # 2.2.5 + [ 'sl-nedis', 'sl-nedis' ], + [ 'de-ch-1996', 'de-CH-1996' ], + + # 2.2.6 + [ + 'en-latn-gb-boont-r-extended-sequence-x-private', + 'en-Latn-GB-boont-r-extended-sequence-x-private' + ], + + // Examples from BCP 47 Appendix A + # Simple language subtag: + [ 'DE', 'de' ], + [ 'fR', 'fr' ], + [ 'ja', 'ja' ], + + # Language subtag plus script subtag: + [ 'zh-hans', 'zh-Hans' ], + [ 'sr-cyrl', 'sr-Cyrl' ], + [ 'sr-latn', 'sr-Latn' ], + + # Extended language subtags and their primary language subtag + # counterparts: + [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ], + [ 'cmn-hans-cn', 'cmn-Hans-CN' ], + [ 'zh-yue-hk', 'zh-yue-HK' ], + [ 'yue-hk', 'yue-HK' ], + + # Language-Script-Region: + [ 'zh-hans-cn', 'zh-Hans-CN' ], + [ 'sr-latn-RS', 'sr-Latn-RS' ], + + # Language-Variant: + [ 'sl-rozaj', 'sl-rozaj' ], + [ 'sl-rozaj-biske', 'sl-rozaj-biske' ], + [ 'sl-nedis', 'sl-nedis' ], + + # Language-Region-Variant: + [ 'de-ch-1901', 'de-CH-1901' ], + [ 'sl-it-nedis', 'sl-IT-nedis' ], + + # Language-Script-Region-Variant: + [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ], + + # Language-Region: + [ 'de-de', 'de-DE' ], + [ 'en-us', 'en-US' ], + [ 'es-419', 'es-419' ], + + # Private use subtags: + [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ], + [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ], + /** + * Previous test does not reflect the BCP 47 which states: + * az-Arab-x-AZE-derbend + * AZE being private, it should be lower case, hence the test above + * should probably be: + * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ], + */ + + # Private use registry values: + [ 'x-whatever', 'x-whatever' ], + [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ], + [ 'de-qaaa', 'de-Qaaa' ], + [ 'sr-latn-qm', 'sr-Latn-QM' ], + [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ], + + # Tags that use extensions + [ 'en-us-u-islamcal', 'en-US-u-islamcal' ], + [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ], + [ 'en-a-myext-b-another', 'en-a-myext-b-another' ], + + # Invalid: + // de-419-DE + // a-DE + // ar-a-aaa-b-bbb-a-ccc + ]; + } + } -- 2.20.1