From 2e52f48c2ed8dcf480843e2186f685a86810e2ac Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Thu, 2 May 2019 17:23:42 +0300 Subject: [PATCH] Split some Language methods to LanguageNameUtils These are static methods that have to do with processing language names and codes. I didn't include fallback behavior, because that would mean a circular dependency with LocalisationCache. In the new class, I renamed AS_AUTONYMS to AUTONYMS, and added a class constant DEFINED for 'mw' to match the existing SUPPORTED and ALL. I also renamed fetchLanguageName(s) to getLanguageName(s). There is 100% test coverage for the code in the new class. Change-Id: I245ae94bfc1f62b6af75ea57525139adf2539fe6 --- RELEASE-NOTES-1.34 | 5 + autoload.php | 1 + includes/MediaWikiServices.php | 9 + includes/ServiceWiring.php | 11 +- .../cache/localisation/LocalisationCache.php | 15 +- includes/language/LanguageCode.php | 1 - includes/language/LanguageNameUtils.php | 319 ++++++++++ languages/Language.php | 211 ++----- languages/data/Names.php | 2 +- maintenance/rebuildLocalisationCache.php | 3 +- tests/common/TestsAutoLoader.php | 3 + .../includes/api/ApiQuerySiteinfoTest.php | 1 + .../includes/cache/LocalisationCacheTest.php | 36 +- tests/phpunit/languages/LanguageTest.php | 196 +++---- .../language/LanguageNameUtilsTest.php | 66 +++ .../language/LanguageNameUtilsTestTrait.php | 555 ++++++++++++++++++ 16 files changed, 1165 insertions(+), 269 deletions(-) create mode 100644 includes/language/LanguageNameUtils.php create mode 100644 tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php create mode 100644 tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php diff --git a/RELEASE-NOTES-1.34 b/RELEASE-NOTES-1.34 index 3f4287bc1d..ff0be676cb 100644 --- a/RELEASE-NOTES-1.34 +++ b/RELEASE-NOTES-1.34 @@ -462,6 +462,11 @@ because of Phabricator reports. * TempFSFile::factory() has been deprecated. Use TempFSFileFactory instead. * wfIsBadImage() is deprecated. Use the BadFileLookup service instead. * Language::getLocalisationCache() is deprecated. Use MediaWikiServices. +* The following Language methods are deprecated: isSupportedLanguage, + isValidCode, isValidBuiltInCode, isKnownLanguageTag, fetchLanguageNames, + fetchLanguageName, getFileName, getMessagesFileName, getJsonMessagesFileName. + Use the new LanguageNameUtils class instead. (Note that fetchLanguageName(s) + are called getLanguageName(s) in the new class.) === Other changes in 1.34 === * … diff --git a/autoload.php b/autoload.php index c593b0c508..9b35c1be77 100644 --- a/autoload.php +++ b/autoload.php @@ -892,6 +892,7 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Languages\\Data\\CrhExceptions' => __DIR__ . '/languages/data/CrhExceptions.php', 'MediaWiki\\Languages\\Data\\Names' => __DIR__ . '/languages/data/Names.php', 'MediaWiki\\Languages\\Data\\ZhConversion' => __DIR__ . '/languages/data/ZhConversion.php', + 'MediaWiki\\Languages\\LanguageNameUtils' => __DIR__ . '/includes/language/LanguageNameUtils.php', 'MediaWiki\\Logger\\ConsoleLogger' => __DIR__ . '/includes/debug/logger/ConsoleLogger.php', 'MediaWiki\\Logger\\ConsoleSpi' => __DIR__ . '/includes/debug/logger/ConsoleSpi.php', 'MediaWiki\\Logger\\LegacyLogger' => __DIR__ . '/includes/debug/logger/LegacyLogger.php', diff --git a/includes/MediaWikiServices.php b/includes/MediaWikiServices.php index 8445842b9e..40559ef5cd 100644 --- a/includes/MediaWikiServices.php +++ b/includes/MediaWikiServices.php @@ -19,6 +19,7 @@ use MediaWiki\Block\BlockManager; use MediaWiki\Block\BlockRestrictionStore; use MediaWiki\FileBackend\FSFile\TempFSFileFactory; use MediaWiki\Http\HttpRequestFactory; +use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\Page\MovePageFactory; use MediaWiki\Permissions\PermissionManager; use MediaWiki\Preferences\PreferencesFactory; @@ -623,6 +624,14 @@ class MediaWikiServices extends ServiceContainer { return $this->getService( 'InterwikiLookup' ); } + /** + * @since 1.34 + * @return LanguageNameUtils + */ + public function getLanguageNameUtils() { + return $this->getService( 'LanguageNameUtils' ); + } + /** * @since 1.28 * @return LinkCache diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index 7bdd7c8af1..2d2a7dc8ed 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -48,6 +48,7 @@ use MediaWiki\FileBackend\FSFile\TempFSFileFactory; use MediaWiki\Http\HttpRequestFactory; use MediaWiki\Interwiki\ClassicInterwikiLookup; use MediaWiki\Interwiki\InterwikiLookup; +use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\Linker\LinkRenderer; use MediaWiki\Linker\LinkRendererFactory; use MediaWiki\Logger\LoggerFactory; @@ -255,6 +256,13 @@ return [ ); }, + 'LanguageNameUtils' => function ( MediaWikiServices $services ) : LanguageNameUtils { + return new LanguageNameUtils( new ServiceOptions( + LanguageNameUtils::$constructorOptions, + $services->getMainConfig() + ) ); + }, + 'LinkCache' => function ( MediaWikiServices $services ) : LinkCache { return new LinkCache( $services->getTitleFormatter(), @@ -326,7 +334,8 @@ return [ $logger, [ function () use ( $services ) { $services->getResourceLoader()->getMessageBlobStore()->clear(); - } ] + } ], + $services->getLanguageNameUtils() ); }, diff --git a/includes/cache/localisation/LocalisationCache.php b/includes/cache/localisation/LocalisationCache.php index 103124559b..fb4675eb30 100644 --- a/includes/cache/localisation/LocalisationCache.php +++ b/includes/cache/localisation/LocalisationCache.php @@ -23,6 +23,7 @@ use CLDRPluralRuleParser\Evaluator; use CLDRPluralRuleParser\Error as CLDRPluralRuleError; use MediaWiki\Config\ServiceOptions; +use MediaWiki\Languages\LanguageNameUtils; use Psr\Log\LoggerInterface; /** @@ -73,6 +74,9 @@ class LocalisationCache { /** @var callable[] See comment for parameter in constructor */ private $clearStoreCallbacks; + /** @var LanguageNameUtils */ + private $langNameUtils; + /** * A 2-d associative array, code/key, where presence indicates that the item * is loaded. Value arbitrary. @@ -212,13 +216,15 @@ class LocalisationCache { * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be * used to clear other caches that depend on this one, such as ResourceLoader's * MessageBlobStore. + * @param LanguageNameUtils $langNameUtils * @throws MWException */ function __construct( ServiceOptions $options, LCStore $store, LoggerInterface $logger, - array $clearStoreCallbacks = [] + array $clearStoreCallbacks, + LanguageNameUtils $langNameUtils ) { $options->assertRequiredOptions( self::$constructorOptions ); @@ -226,6 +232,7 @@ class LocalisationCache { $this->store = $store; $this->logger = $logger; $this->clearStoreCallbacks = $clearStoreCallbacks; + $this->langNameUtils = $langNameUtils; // Keep this separate from $this->options so it can be mutable $this->manualRecache = $options->get( 'manualRecache' ); @@ -438,7 +445,7 @@ class LocalisationCache { $this->initialisedLangs[$code] = true; # If the code is of the wrong form for a Messages*.php file, do a shallow fallback - if ( !Language::isValidBuiltInCode( $code ) ) { + if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { $this->initShallowFallback( $code, 'en' ); return; @@ -446,7 +453,7 @@ class LocalisationCache { # Recache the data if necessary if ( !$this->manualRecache && $this->isExpired( $code ) ) { - if ( Language::isSupportedLanguage( $code ) ) { + if ( $this->langNameUtils->isSupportedLanguage( $code ) ) { $this->recache( $code ); } elseif ( $code === 'en' ) { throw new MWException( 'MessagesEn.php is missing.' ); @@ -684,7 +691,7 @@ class LocalisationCache { global $IP; // This reads in the PHP i18n file with non-messages l10n data - $fileName = Language::getMessagesFileName( $code ); + $fileName = $this->langNameUtils->getMessagesFileName( $code ); if ( !file_exists( $fileName ) ) { $data = []; } else { diff --git a/includes/language/LanguageCode.php b/includes/language/LanguageCode.php index 7d954d3803..1d2f0b4e83 100644 --- a/includes/language/LanguageCode.php +++ b/includes/language/LanguageCode.php @@ -21,7 +21,6 @@ /** * Methods for dealing with language codes. - * @todo Move some of the code-related static methods out of Language into this class * * @since 1.29 * @ingroup Language diff --git a/includes/language/LanguageNameUtils.php b/includes/language/LanguageNameUtils.php new file mode 100644 index 0000000000..08d9ab3e0d --- /dev/null +++ b/includes/language/LanguageNameUtils.php @@ -0,0 +1,319 @@ +assertRequiredOptions( self::$constructorOptions ); + $this->options = $options; + } + + /** + * Checks whether any localisation is available for that language tag in MediaWiki + * (MessagesXx.php or xx.json exists). + * + * @param string $code Language tag (in lower case) + * @return bool Whether language is supported + */ + public function isSupportedLanguage( $code ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + return false; + } + + if ( $code === 'qqq' ) { + // Special code for internal use, not supported even though there is a qqq.json + return false; + } + + return is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ); + } + + /** + * Returns true if a language code string is of a valid form, whether or not it exists. This + * includes codes which are used solely for customisation via the MediaWiki namespace. + * + * @param string $code + * + * @return bool + */ + public function isValidCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + if ( !isset( $this->validCodeCache[$code] ) ) { + // People think language codes are HTML-safe, so enforce it. Ideally we should only + // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs + // T39564, T39587, T38938. + $this->validCodeCache[$code] = + // Protect against path traversal + strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) && + !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ); + } + return $this->validCodeCache[$code]; + } + + /** + * Returns true if a language code is of a valid form for the purposes of internal customisation + * of MediaWiki, via Messages*.php or *.json. + * + * @param string $code + * @return bool + */ + public function isValidBuiltInCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + + return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + } + + /** + * Returns true if a language code is an IETF tag known to MediaWiki. + * + * @param string $tag + * + * @return bool + */ + public function isKnownLanguageTag( $tag ) { + // Quick escape for invalid input to avoid exceptions down the line when code tries to + // process tags which are not valid at all. + if ( !$this->isValidBuiltInCode( $tag ) ) { + return false; + } + + if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) { + return true; + } + + return false; + } + + /** + * Get an array of language names, indexed by code. + * @param null|string $inLanguage Code of language in which to return the names + * Use self::AUTONYMS for autonyms (native names) + * @param string $include One of: + * self::ALL all available languages + * self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames + * (default) + * self::SUPPORTED only if the language is in self::DEFINED *and* has a message file + * @return array Language code => language name (sorted by key) + */ + public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) { + $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage; + $cacheKey .= ":$include"; + if ( !$this->languageNameCache ) { + $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] ); + } + + $ret = $this->languageNameCache->get( $cacheKey ); + if ( !$ret ) { + $ret = $this->getLanguageNamesUncached( $inLanguage, $include ); + $this->languageNameCache->set( $cacheKey, $ret ); + } + return $ret; + } + + /** + * Uncached helper for getLanguageNames + * @param null|string $inLanguage As getLanguageNames + * @param string $include As getLanguageNames + * @return array Language code => language name (sorted by key) + */ + private function getLanguageNamesUncached( $inLanguage, $include ) { + // If passed an invalid language code to use, fallback to en + if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) { + $inLanguage = 'en'; + } + + $names = []; + + if ( $inLanguage !== self::AUTONYMS ) { + # TODO: also include for self::AUTONYMS, when this code is more efficient + Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] ); + } + + $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names; + if ( $this->options->get( 'UsePigLatinVariant' ) ) { + // Pig Latin (for variant development) + $mwNames['en-x-piglatin'] = 'Igpay Atinlay'; + } + + foreach ( $mwNames as $mwCode => $mwName ) { + # - Prefer own MediaWiki native name when not using the hook + # - For other names just add if not added through the hook + if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) { + $names[$mwCode] = $mwName; + } + } + + if ( $include === self::ALL ) { + ksort( $names ); + return $names; + } + + $returnMw = []; + $coreCodes = array_keys( $mwNames ); + foreach ( $coreCodes as $coreCode ) { + $returnMw[$coreCode] = $names[$coreCode]; + } + + if ( $include === self::SUPPORTED ) { + $namesMwFile = []; + # We do this using a foreach over the codes instead of a directory loop so that messages + # files in extensions will work correctly. + foreach ( $returnMw as $code => $value ) { + if ( is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ) + ) { + $namesMwFile[$code] = $names[$code]; + } + } + + ksort( $namesMwFile ); + return $namesMwFile; + } + + ksort( $returnMw ); + # self::DEFINED option; default if it's not one of the other two options + # (self::ALL/self::SUPPORTED) + return $returnMw; + } + + /** + * @param string $code The code of the language for which to get the name + * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS + * for autonyms) + * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of + * self::DEFINED + * @return string Language name or empty + * @since 1.20 + */ + public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) { + $code = strtolower( $code ); + $array = $this->getLanguageNames( $inLanguage, $include ); + return $array[$code] ?? ''; + } + + /** + * Get the name of a file for a certain language code + * @param string $prefix Prepend this to the filename + * @param string $code Language code + * @param string $suffix Append this to the filename + * @throws MWException + * @return string $prefix . $mangledCode . $suffix + */ + public function getFileName( $prefix, $code, $suffix = '.php' ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix; + } + + /** + * @param string $code + * @return string + */ + public function getMessagesFileName( $code ) { + global $IP; + $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' ); + Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] ); + return $file; + } + + /** + * @param string $code + * @return string + * @throws MWException + */ + public function getJsonMessagesFileName( $code ) { + global $IP; + + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return "$IP/languages/i18n/$code.json"; + } +} diff --git a/languages/Language.php b/languages/Language.php index 643d0ebd2c..872614c448 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -27,8 +27,8 @@ */ use CLDRPluralRuleParser\Evaluator; +use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\MediaWikiServices; -use Wikimedia\Assert\Assert; /** * Internationalisation code @@ -38,21 +38,24 @@ class Language { /** * Return autonyms in fetchLanguageName(s). * @since 1.32 + * @deprecated since 1.34, LanguageNameUtils::AUTONYMS */ - const AS_AUTONYMS = null; + const AS_AUTONYMS = LanguageNameUtils::AUTONYMS; /** * Return all known languages in fetchLanguageName(s). * @since 1.32 + * @deprecated since 1.34, use LanguageNameUtils::ALL */ - const ALL = 'all'; + const ALL = LanguageNameUtils::ALL; /** * Return in fetchLanguageName(s) only the languages for which we have at * least some localisation. * @since 1.32 + * @deprecated since 1.34, use LanguageNameUtils::SUPPORTED */ - const SUPPORTED = 'mwfile'; + const SUPPORTED = LanguageNameUtils::SUPPORTED; /** * @var LanguageConverter @@ -78,6 +81,9 @@ class Language { /** @var LocalisationCache */ private $localisationCache; + /** @var LanguageNameUtils */ + private $langNameUtils; + public static $mLangObjCache = []; /** @@ -92,6 +98,7 @@ class Language { */ const STRICT_FALLBACKS = 1; + // TODO Make these const once we drop HHVM support (T192166) public static $mWeekdayMsgs = [ 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday' @@ -176,12 +183,6 @@ class Language { */ private static $grammarTransformations; - /** - * Cache for language names - * @var HashBagOStuff|null - */ - private static $languageNameCache; - /** * Unicode directional formatting characters, for embedBidi() */ @@ -237,11 +238,12 @@ class Language { * @return Language */ protected static function newFromCode( $code, $fallback = false ) { - if ( !self::isValidCode( $code ) ) { + $langNameUtils = MediaWikiServices::getInstance()->getLanguageNameUtils(); + if ( !$langNameUtils->isValidCode( $code ) ) { throw new MWException( "Invalid language code \"$code\"" ); } - if ( !self::isValidBuiltInCode( $code ) ) { + if ( !$langNameUtils->isValidBuiltInCode( $code ) ) { // It's not possible to customise this code with class files, so // just return a Language object. This is to support uselang= hacks. $lang = new Language; @@ -260,7 +262,7 @@ class Language { // Keep trying the fallback list until we find an existing class $fallbacks = self::getFallbacksFor( $code ); foreach ( $fallbacks as $fallbackCode ) { - if ( !self::isValidBuiltInCode( $fallbackCode ) ) { + if ( !$langNameUtils->isValidBuiltInCode( $fallbackCode ) ) { throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" ); } @@ -286,32 +288,25 @@ class Language { } if ( !defined( 'MEDIAWIKI_INSTALL' ) ) { MediaWikiServices::getInstance()->resetServiceForTesting( 'LocalisationCache' ); + MediaWikiServices::getInstance()->resetServiceForTesting( 'LanguageNameUtils' ); } self::$mLangObjCache = []; self::$fallbackLanguageCache = []; self::$grammarTransformations = null; - self::$languageNameCache = null; } /** * Checks whether any localisation is available for that language tag * in MediaWiki (MessagesXx.php exists). * + * @deprecated since 1.34, use LanguageNameUtils * @param string $code Language tag (in lower case) * @return bool Whether language is supported * @since 1.21 */ public static function isSupportedLanguage( $code ) { - if ( !self::isValidBuiltInCode( $code ) ) { - return false; - } - - if ( $code === 'qqq' ) { - return false; - } - - return is_readable( self::getMessagesFileName( $code ) ) || - is_readable( self::getJsonMessagesFileName( $code ) ); + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isSupportedLanguage( $code ); } /** @@ -379,63 +374,45 @@ class Language { * not it exists. This includes codes which are used solely for * customisation via the MediaWiki namespace. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $code * * @return bool */ public static function isValidCode( $code ) { - static $cache = []; - Assert::parameterType( 'string', $code, '$code' ); - if ( !isset( $cache[$code] ) ) { - // People think language codes are html safe, so enforce it. - // Ideally we should only allow a-zA-Z0-9- - // but, .+ and other chars are often used for {{int:}} hacks - // see bugs T39564, T39587, T38938 - $cache[$code] = - // Protect against path traversal - strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) - && !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ); - } - return $cache[$code]; + return MediaWikiServices::getInstance()->getLanguageNameUtils()->isValidCode( $code ); } /** * Returns true if a language code is of a valid form for the purposes of * internal customisation of MediaWiki, via Messages*.php or *.json. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $code * * @since 1.18 * @return bool */ public static function isValidBuiltInCode( $code ) { - Assert::parameterType( 'string', $code, '$code' ); - - return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isValidBuiltInCode( $code ); } /** * Returns true if a language code is an IETF tag known to MediaWiki. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $tag * * @since 1.21 * @return bool */ public static function isKnownLanguageTag( $tag ) { - // Quick escape for invalid input to avoid exceptions down the line - // when code tries to process tags which are not valid at all. - if ( !self::isValidBuiltInCode( $tag ) ) { - return false; - } - - if ( isset( MediaWiki\Languages\Data\Names::$names[$tag] ) - || self::fetchLanguageName( $tag, $tag ) !== '' - ) { - return true; - } - - return false; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isKnownLanguageTag( $tag ); } /** @@ -456,7 +433,9 @@ class Language { } else { $this->mCode = str_replace( '_', '-', strtolower( substr( static::class, 8 ) ) ); } - $this->localisationCache = MediaWikiServices::getInstance()->getLocalisationCache(); + $services = MediaWikiServices::getInstance(); + $this->localisationCache = $services->getLocalisationCache(); + $this->langNameUtils = $services->getLanguageNameUtils(); } /** @@ -761,7 +740,7 @@ class Language { if ( $usemsg && wfMessage( $msg )->exists() ) { return $this->getMessageFromDB( $msg ); } - $name = self::fetchLanguageName( $code ); + $name = $this->langNameUtils->getLanguageName( $code ); if ( $name ) { return $name; # if it's defined as a language name, show that } else { @@ -822,6 +801,8 @@ class Language { /** * Get an array of language names, indexed by code. + * + * @deprecated since 1.34, use LanguageNameUtils::getLanguageNames * @param null|string $inLanguage Code of language in which to return the names * Use self::AS_AUTONYMS for autonyms (native names) * @param string $include One of: @@ -832,95 +813,12 @@ class Language { * @since 1.20 */ public static function fetchLanguageNames( $inLanguage = self::AS_AUTONYMS, $include = 'mw' ) { - $cacheKey = $inLanguage === self::AS_AUTONYMS ? 'null' : $inLanguage; - $cacheKey .= ":$include"; - if ( self::$languageNameCache === null ) { - self::$languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] ); - } - - $ret = self::$languageNameCache->get( $cacheKey ); - if ( !$ret ) { - $ret = self::fetchLanguageNamesUncached( $inLanguage, $include ); - self::$languageNameCache->set( $cacheKey, $ret ); - } - return $ret; - } - - /** - * Uncached helper for fetchLanguageNames - * @param null|string $inLanguage Code of language in which to return the names - * Use self::AS_AUTONYMS for autonyms (native names) - * @param string $include One of: - * self::ALL all available languages - * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default) - * self::SUPPORTED only if the language is in 'mw' *and* has a message file - * @return array Language code => language name (sorted by key) - */ - private static function fetchLanguageNamesUncached( - $inLanguage = self::AS_AUTONYMS, - $include = 'mw' - ) { - global $wgExtraLanguageNames, $wgUsePigLatinVariant; - - // If passed an invalid language code to use, fallback to en - if ( $inLanguage !== self::AS_AUTONYMS && !self::isValidCode( $inLanguage ) ) { - $inLanguage = 'en'; - } - - $names = []; - - if ( $inLanguage ) { - # TODO: also include when $inLanguage is null, when this code is more efficient - Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] ); - } - - $mwNames = $wgExtraLanguageNames + MediaWiki\Languages\Data\Names::$names; - if ( $wgUsePigLatinVariant ) { - // Pig Latin (for variant development) - $mwNames['en-x-piglatin'] = 'Igpay Atinlay'; - } - - foreach ( $mwNames as $mwCode => $mwName ) { - # - Prefer own MediaWiki native name when not using the hook - # - For other names just add if not added through the hook - if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) { - $names[$mwCode] = $mwName; - } - } - - if ( $include === self::ALL ) { - ksort( $names ); - return $names; - } - - $returnMw = []; - $coreCodes = array_keys( $mwNames ); - foreach ( $coreCodes as $coreCode ) { - $returnMw[$coreCode] = $names[$coreCode]; - } - - if ( $include === self::SUPPORTED ) { - $namesMwFile = []; - # We do this using a foreach over the codes instead of a directory - # loop so that messages files in extensions will work correctly. - foreach ( $returnMw as $code => $value ) { - if ( is_readable( self::getMessagesFileName( $code ) ) - || is_readable( self::getJsonMessagesFileName( $code ) ) - ) { - $namesMwFile[$code] = $names[$code]; - } - } - - ksort( $namesMwFile ); - return $namesMwFile; - } - - ksort( $returnMw ); - # 'mw' option; default if it's not one of the other two options (all/mwfile) - return $returnMw; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getLanguageNames( $inLanguage, $include ); } /** + * @deprecated since 1.34, use LanguageNameUtils::getLanguageName * @param string $code The code of the language for which to get the name * @param null|string $inLanguage Code of language in which to return the name * (SELF::AS_AUTONYMS for autonyms) @@ -933,9 +831,8 @@ class Language { $inLanguage = self::AS_AUTONYMS, $include = self::ALL ) { - $code = strtolower( $code ); - $array = self::fetchLanguageNames( $inLanguage, $include ); - return !array_key_exists( $code, $array ) ? '' : $array[$code]; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getLanguageName( $code, $inLanguage, $include ); } /** @@ -4441,6 +4338,8 @@ class Language { /** * Get the name of a file for a certain language code + * + * @deprecated since 1.34, use LanguageNameUtils * @param string $prefix Prepend this to the filename * @param string $code Language code * @param string $suffix Append this to the filename @@ -4448,38 +4347,30 @@ class Language { * @return string $prefix . $mangledCode . $suffix */ public static function getFileName( $prefix, $code, $suffix = '.php' ) { - if ( !self::isValidBuiltInCode( $code ) ) { - throw new MWException( "Invalid language code \"$code\"" ); - } - - return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getFileName( $prefix, $code, $suffix ); } /** + * @deprecated since 1.34, use LanguageNameUtils * @param string $code * @return string */ public static function getMessagesFileName( $code ) { - global $IP; - $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' ); - Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] ); - return $file; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getMessagesFileName( $code ); } /** + * @deprecated since 1.34, use LanguageNameUtils * @param string $code * @return string * @throws MWException * @since 1.23 */ public static function getJsonMessagesFileName( $code ) { - global $IP; - - if ( !self::isValidBuiltInCode( $code ) ) { - throw new MWException( "Invalid language code \"$code\"" ); - } - - return "$IP/languages/i18n/$code.json"; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getJsonMessagesFileName( $code ); } /** diff --git a/languages/data/Names.php b/languages/data/Names.php index 00f35b2564..1d80f6b257 100644 --- a/languages/data/Names.php +++ b/languages/data/Names.php @@ -39,7 +39,7 @@ namespace MediaWiki\Languages\Data; * If you are adding support for such a language, add it also to * the relevant section in shared.css. * - * Do not use this class directly. Use Language::fetchLanguageNames(), which + * Do not use this class directly. Use LanguageNameUtils::getLanguageNames(), which * includes support for the CLDR extension. * * @ingroup Language diff --git a/maintenance/rebuildLocalisationCache.php b/maintenance/rebuildLocalisationCache.php index 23a25054e2..a239fa0e28 100644 --- a/maintenance/rebuildLocalisationCache.php +++ b/maintenance/rebuildLocalisationCache.php @@ -97,7 +97,8 @@ class RebuildLocalisationCache extends Maintenance { [ function () { MediaWikiServices::getInstance()->getResourceLoader() ->getMessageBlobStore()->clear(); - } ] + } ], + MediaWikiServices::getInstance()->getLanguageNameUtils() ); $allCodes = array_keys( Language::fetchLanguageNames( null, 'mwfile' ) ); diff --git a/tests/common/TestsAutoLoader.php b/tests/common/TestsAutoLoader.php index 7c8df1a6f5..99b548ed7f 100644 --- a/tests/common/TestsAutoLoader.php +++ b/tests/common/TestsAutoLoader.php @@ -221,6 +221,9 @@ $wgAutoloadClasses += [ # tests/phpunit/unit/includes 'BadFileLookupTest' => "$testDir/phpunit/unit/includes/BadFileLookupTest.php", + # tests/phpunit/unit/includes/language + 'LanguageNameUtilsTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php", + # tests/phpunit/unit/includes/libs/filebackend/fsfile 'TempFSFileTestTrait' => "$testDir/phpunit/unit/includes/libs/filebackend/fsfile/TempFSFileTestTrait.php", diff --git a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php index 282188d264..6308b82c58 100644 --- a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php +++ b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php @@ -160,6 +160,7 @@ class ApiQuerySiteinfoTest extends ApiTestCase { 'wgExtraInterlanguageLinkPrefixes' => [ 'self' ], 'wgExtraLanguageNames' => [ 'self' => 'Recursion' ], ] ); + $this->resetServices(); MessageCache::singleton()->enable(); diff --git a/tests/phpunit/includes/cache/LocalisationCacheTest.php b/tests/phpunit/includes/cache/LocalisationCacheTest.php index ecdfae4614..39526fb9c7 100644 --- a/tests/phpunit/includes/cache/LocalisationCacheTest.php +++ b/tests/phpunit/includes/cache/LocalisationCacheTest.php @@ -1,6 +1,7 @@ createMock( LanguageNameUtils::class ); + $mockLangNameUtils->method( 'isValidBuiltInCode' )->will( $this->returnCallback( + function ( $code ) { + // Copy-paste, but it's only one line + return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + } + ) ); + $mockLangNameUtils->method( 'isSupportedLanguage' )->will( $this->returnCallback( + function ( $code ) { + return in_array( $code, [ + 'ar', + 'arz', + 'ba', + 'de', + 'en', + 'ksh', + 'ru', + ] ); + } + ) ); + $mockLangNameUtils->method( 'getMessagesFileName' )->will( $this->returnCallback( + function ( $code ) { + global $IP; + $code = str_replace( '-', '_', ucfirst( $code ) ); + return "$IP/languages/messages/Messages$code.php"; + } + ) ); + $mockLangNameUtils->expects( $this->never() )->method( $this->anythingBut( + 'isValidBuiltInCode', 'isSupportedLanguage', 'getMessagesFileName' + ) ); + $lc = $this->getMockBuilder( LocalisationCache::class ) ->setConstructorArgs( [ new ServiceOptions( LocalisationCache::$constructorOptions, [ @@ -33,7 +65,9 @@ class LocalisationCacheTest extends MediaWikiTestCase { 'MessagesDirs' => [], ] ), new LCStoreDB( [] ), - new NullLogger + new NullLogger, + [], + $mockLangNameUtils ] ) ->setMethods( [ 'getMessagesDirs' ] ) ->getMock(); diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php index 391e1b68e2..6f618a2cc9 100644 --- a/tests/phpunit/languages/LanguageTest.php +++ b/tests/phpunit/languages/LanguageTest.php @@ -3,6 +3,24 @@ use Wikimedia\TestingAccessWrapper; class LanguageTest extends LanguageClassesTestCase { + use LanguageNameUtilsTestTrait; + + /** @var array Copy of $wgHooks from before we unset LanguageGetTranslatedLanguageNames */ + private $origHooks; + + public function setUp() { + global $wgHooks; + + parent::setUp(); + + // Don't allow installed hooks to run, except if a test restores them via origHooks (needed + // for testIsKnownLanguageTag_cldr) + $this->origHooks = $wgHooks; + $newHooks = $wgHooks; + unset( $newHooks['LanguageGetTranslatedLanguageNames'] ); + $this->setMwGlobals( 'wgHooks', $newHooks ); + } + /** * @covers Language::convertDoubleWidth * @covers Language::normalizeForSearch @@ -510,84 +528,6 @@ class LanguageTest extends LanguageClassesTestCase { ); } - /** - * Test Language::isValidBuiltInCode() - * @dataProvider provideLanguageCodes - * @covers Language::isValidBuiltInCode - */ - public function testBuiltInCodeValidation( $code, $expected, $message = '' ) { - $this->assertEquals( $expected, - (bool)Language::isValidBuiltInCode( $code ), - "validating code $code $message" - ); - } - - public static function provideLanguageCodes() { - return [ - [ 'fr', true, 'Two letters, minor case' ], - [ 'EN', false, 'Two letters, upper case' ], - [ 'tyv', true, 'Three letters' ], - [ 'be-tarask', true, 'With dash' ], - [ 'be-x-old', true, 'With extension (two dashes)' ], - [ 'be_tarask', false, 'Reject underscores' ], - ]; - } - - /** - * Test Language::isKnownLanguageTag() - * @dataProvider provideKnownLanguageTags - * @covers Language::isKnownLanguageTag - */ - public function testKnownLanguageTag( $code, $message = '' ) { - $this->assertTrue( - (bool)Language::isKnownLanguageTag( $code ), - "validating code $code - $message" - ); - } - - public static function provideKnownLanguageTags() { - return [ - [ 'fr', 'simple code' ], - [ 'bat-smg', 'an MW legacy tag' ], - [ 'sgs', 'an internal standard MW name, for which a legacy tag is used externally' ], - ]; - } - - /** - * @covers Language::isKnownLanguageTag - */ - public function testKnownCldrLanguageTag() { - if ( !class_exists( 'LanguageNames' ) ) { - $this->markTestSkipped( 'The LanguageNames class is not available. ' - . 'The CLDR extension is probably not installed.' ); - } - - $this->assertTrue( - (bool)Language::isKnownLanguageTag( 'pal' ), - 'validating code "pal" an ancient language, which probably will ' - . 'not appear in Names.php, but appears in CLDR in English' - ); - } - - /** - * Negative tests for Language::isKnownLanguageTag() - * @dataProvider provideUnKnownLanguageTags - * @covers Language::isKnownLanguageTag - */ - public function testUnknownLanguageTag( $code, $message = '' ) { - $this->assertFalse( - (bool)Language::isKnownLanguageTag( $code ), - "checking that code $code is invalid - $message" - ); - } - - public static function provideUnknownLanguageTags() { - return [ - [ 'mw', 'non-existent two-letter code' ], - [ 'foo"getGrammarTransformations(); $this->assertNotNull( $languageClass->grammarTransformations ); - // Populate $languageNameCache - Language::fetchLanguageNames(); - $this->assertNotNull( $languageClass->languageNameCache ); - Language::clearCaches(); $this->assertCount( 0, Language::$mLangObjCache ); $this->assertCount( 0, $languageClass->fallbackLanguageCache ); $this->assertNull( $languageClass->grammarTransformations ); - $this->assertNull( $languageClass->languageNameCache ); - } - - /** - * @dataProvider provideIsSupportedLanguage - * @covers Language::isSupportedLanguage - */ - public function testIsSupportedLanguage( $code, $expected, $comment ) { - $this->assertEquals( $expected, Language::isSupportedLanguage( $code ), $comment ); - } - - public static function provideIsSupportedLanguage() { - return [ - [ 'en', true, 'is supported language' ], - [ 'fi', true, 'is supported language' ], - [ 'bunny', false, 'is not supported language' ], - [ 'FI', false, 'is not supported language, input should be in lower case' ], - ]; } /** @@ -1956,4 +1874,82 @@ class LanguageTest extends LanguageClassesTestCase { [ 'èl', 'Ll' , 'Non-ASCII is overridden', [ 'è' => 'L' ] ], ]; } + + // The following methods are for LanguageNameUtilsTestTrait + + private function isSupportedLanguage( $code ) { + return Language::isSupportedLanguage( $code ); + } + + private function isValidCode( $code ) { + return Language::isValidCode( $code ); + } + + private function isValidBuiltInCode( $code ) { + return Language::isValidBuiltInCode( $code ); + } + + private function isKnownLanguageTag( $code ) { + return Language::isKnownLanguageTag( $code ); + } + + /** + * Call getLanguageName() and getLanguageNames() using the Language static methods. + * + * @param array $options To set globals for testing Language + * @param string $expected + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) { + if ( $options ) { + foreach ( $options as $key => $val ) { + $this->setMwGlobals( "wg$key", $val ); + } + $this->resetServices(); + } + $this->assertSame( $expected, + Language::fetchLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' ); + $this->assertSame( $expected, Language::fetchLanguageName( $code, ...$otherArgs ) ); + } + + private function getLanguageNames( ...$args ) { + return Language::fetchLanguageNames( ...$args ); + } + + private function getLanguageName( ...$args ) { + return Language::fetchLanguageName( ...$args ); + } + + private static function getFileName( ...$args ) { + return Language::getFileName( ...$args ); + } + + private static function getMessagesFileName( $code ) { + return Language::getMessagesFileName( $code ); + } + + private static function getJsonMessagesFileName( $code ) { + return Language::getJsonMessagesFileName( $code ); + } + + /** + * @todo This really belongs in the cldr extension's tests. + * + * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag + * @covers Language::isKnownLanguageTag + */ + public function testIsKnownLanguageTag_cldr() { + if ( !class_exists( 'LanguageNames' ) ) { + $this->markTestSkipped( 'The LanguageNames class is not available. ' + . 'The CLDR extension is probably not installed.' ); + } + + // We need to restore the extension's hook that we removed. + $this->setMwGlobals( 'wgHooks', $this->origHooks ); + + // "pal" is an ancient language, which probably will not appear in Names.php, but appears in + // CLDR in English + $this->assertTrue( Language::isKnownLanguageTag( 'pal' ) ); + } } diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php new file mode 100644 index 0000000000..6fbd4a2863 --- /dev/null +++ b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php @@ -0,0 +1,66 @@ + [], + 'LanguageCode' => 'en', + 'UsePigLatinVariant' => false, + ] + ) ); + } + + use LanguageNameUtilsTestTrait; + + private function isSupportedLanguage( $code ) { + return $this->newObj()->isSupportedLanguage( $code ); + } + + private function isValidCode( $code ) { + return $this->newObj()->isValidCode( $code ); + } + + private function isValidBuiltInCode( $code ) { + return $this->newObj()->isValidBuiltInCode( $code ); + } + + private function isKnownLanguageTag( $code ) { + return $this->newObj()->isKnownLanguageTag( $code ); + } + + private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) { + $this->assertSame( $expected, $this->newObj( $options ) + ->getLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' ); + $this->assertSame( $expected, + $this->newObj( $options )->getLanguageName( $code, ...$otherArgs ) ); + } + + private function getLanguageNames( ...$args ) { + return $this->newObj()->getLanguageNames( ...$args ); + } + + private function getLanguageName( ...$args ) { + return $this->newObj()->getLanguageName( ...$args ); + } + + private static function getFileName( ...$args ) { + return self::newObj()->getFileName( ...$args ); + } + + private static function getMessagesFileName( $code ) { + return self::newObj()->getMessagesFileName( $code ); + } + + private static function getJsonMessagesFileName( $code ) { + return self::newObj()->getJsonMessagesFileName( $code ); + } +} diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php new file mode 100644 index 0000000000..bd777e9a58 --- /dev/null +++ b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php @@ -0,0 +1,555 @@ +assertSame( $expected, $this->isSupportedLanguage( $code ) ); + } + + public static function provideIsSupportedLanguage() { + return [ + 'en' => [ 'en', true ], + 'fi' => [ 'fi', true ], + 'bunny' => [ 'bunny', false ], + 'qqq' => [ 'qqq', false ], + 'uppercase is not considered supported' => [ 'FI', false ], + ]; + } + + abstract protected function isValidCode( $code ); + + /** + * We don't test that the result is cached, because that should only be noticeable if the + * configuration changes in between calls, and 1) that should never happen in normal operation, + * 2) if you do it you deserve whatever you get, and 3) once the static Language method is + * dropped and the invalid title regex is moved to something injected instead of a static call, + * the cache will be undetectable. + * + * @todo Should we test changes to $wgLegalTitleChars here? Does anybody actually change that? + * Is it possible to change it usefully without breaking everything? + * + * @dataProvider provideIsValidCode + * @covers MediaWiki\Languages\LanguageNameUtils::isValidCode + * @covers Language::isValidCode + * + * @param string $code + * @param bool $expected + */ + public function testIsValidCode( $code, $expected ) { + $this->assertSame( $expected, $this->isValidCode( $code ) ); + } + + public static function provideIsValidCode() { + $ret = [ + 'en' => [ 'en', true ], + 'en-GB' => [ 'en-GB', true ], + 'Funny chars' => [ "%!$()*,-.;=?@^_`~\x80\xA2\xFF+", true ], + 'Percent escape not allowed' => [ 'a%aF', false ], + 'Percent with only one following char is okay' => [ '%a', true ], + 'Percent with non-hex following chars is okay' => [ '%AG', true ], + 'Named char reference "a"' => [ 'a&a', false ], + 'Named char reference "A"' => [ 'a&A', false ], + 'Named char reference "0"' => [ 'a&0', false ], + 'Named char reference non-ASCII' => [ "a&\x92", false ], + 'Numeric char reference' => [ "a�", false ], + 'Hex char reference 0' => [ "a�", false ], + 'Hex char reference A' => [ "a ", false ], + 'Lone ampersand is valid for title but not lang code' => [ '&', false ], + 'Ampersand followed by just # is valid for title but not lang code' => [ '&#', false ], + 'Ampersand followed by # and non-x/digit is valid for title but not lang code' => + [ '&#a', false ], + ]; + $disallowedChars = ":/\\\000&<>'\""; + foreach ( str_split( $disallowedChars ) as $char ) { + $ret["Disallowed character $char"] = [ "a{$char}a", false ]; + } + return $ret; + } + + abstract protected function isValidBuiltInCode( $code ); + + /** + * @dataProvider provideIsValidBuiltInCode + * @covers MediaWiki\Languages\LanguageNameUtils::isValidBuiltInCode + * @covers Language::isValidBuiltInCode + * + * @param string $code + * @param bool $expected + */ + public function testIsValidBuiltInCode( $code, $expected ) { + $this->assertSame( $expected, $this->isValidBuiltInCode( $code ) ); + } + + public static function provideIsValidBuiltInCode() { + return [ + 'Two letters, lowercase' => [ 'fr', true ], + 'Two letters, uppercase' => [ 'EN', false ], + 'Three letters' => [ 'tyv', true ], + 'With dash' => [ 'be-tarask', true ], + 'With extension (two dashes)' => [ 'be-x-old', true ], + 'Reject underscores' => [ 'be_tarask', false ], + 'One letter' => [ 'a', false ], + 'Only digits' => [ '00', true ], + 'Only dashes' => [ '--', true ], + 'Unreasonably long' => [ str_repeat( 'x', 100 ), true ], + 'qqq' => [ 'qqq', true ], + ]; + } + + abstract protected function isKnownLanguageTag( $code ); + + /** + * @dataProvider provideIsKnownLanguageTag + * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag + * @covers Language::isKnownLanguageTag + * + * @param string $code + * @param bool $expected + */ + public function testIsKnownLanguageTag( $code, $expected ) { + $this->assertSame( $expected, $this->isKnownLanguageTag( $code ) ); + } + + public static function provideIsKnownLanguageTag() { + $invalidBuiltInCodes = array_filter( static::provideIsValidBuiltInCode(), + function ( $arr ) { + // If isValidBuiltInCode() returns false, we want to also, but if it returns true, + // we could still return false from isKnownLanguageTag(), so skip those. + return !$arr[1]; + } + ); + return array_merge( $invalidBuiltInCodes, [ + 'Simple code' => [ 'fr', true ], + 'An MW legacy tag' => [ 'bat-smg', true ], + 'An internal standard MW name, for which a legacy tag is used externally' => + [ 'sgs', true ], + 'Non-existent two-letter code' => [ 'mw', false ], + 'Very invalid language code' => [ 'foo"assertGetLanguageNames( [], $expected, $code, ...$otherArgs ); + } + + public static function provideGetLanguageNames() { + // @todo There are probably lots of interesting tests to add here. + return [ + 'Simple code' => [ 'Deutsch', 'de' ], + 'Simple code in a different language (doesn\'t work without hook)' => + [ 'Deutsch', 'de', 'fr' ], + 'Invalid code' => [ '', '&' ], + 'Pig Latin not enabled' => [ '', 'en-x-piglatin', AUTONYMS, ALL ], + 'qqq doesn\'t have a name' => [ '', 'qqq', AUTONYMS, ALL ], + 'An MW legacy tag is recognized' => [ 'žemaitėška', 'bat-smg' ], + // @todo Is the next test's result desired? + 'An MW legacy tag is not supported' => [ '', 'bat-smg', AUTONYMS, SUPPORTED ], + 'An internal standard name, for which a legacy tag is used externally, is supported' => + [ 'žemaitėška', 'sgs', AUTONYMS, SUPPORTED ], + ]; + } + + /** + * @dataProvider provideGetLanguageNames_withHook + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected Expected return value of getLanguageName() + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_withHook( $expected, $code, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names, $inLanguage ) { + switch ( $inLanguage ) { + case 'de': + $names = [ + 'de' => 'Deutsch', + 'en' => 'Englisch', + 'fr' => 'Französisch', + ]; + break; + + case 'en': + $names = [ + 'de' => 'German', + 'en' => 'English', + 'fr' => 'French', + 'sqsqsqsq' => '!!?!', + 'bat-smg' => 'Samogitian', + ]; + break; + + case 'fr': + $names = [ + 'de' => 'allemand', + 'en' => 'anglais', + // Deliberate mistake (no cedilla) + 'fr' => 'francais', + ]; + break; + } + } + ); + + // Really we could dispense with assertGetLanguageNames() and just call + // testGetLanguageNames() here, but it looks weird to call a test method from another test + // method. + $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs ); + } + + public static function provideGetLanguageNames_withHook() { + return [ + 'Simple code in a different language' => [ 'allemand', 'de', 'fr' ], + 'Invalid inLanguage defaults to English' => [ 'German', 'de', '&' ], + 'If inLanguage not provided, default to autonym' => [ 'Deutsch', 'de' ], + 'Hooks ignored for explicitly-requested autonym' => [ 'français', 'fr', 'fr' ], + 'Hooks don\'t make a language supported' => [ '', 'bat-smg', 'en', SUPPORTED ], + 'Hooks don\'t make a language defined' => [ '', 'sqsqsqsq', 'en', DEFINED ], + 'Hooks do make a language name returned with ALL' => [ '!!?!', 'sqsqsqsq', 'en', ALL ], + ]; + } + + /** + * @dataProvider provideGetLanguageNames_ExtraLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected Expected return value of getLanguageName() + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_ExtraLanguageNames( $expected, $code, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names ) { + $names['de'] = 'die deutsche Sprache'; + } + ); + $this->assertGetLanguageNames( + [ 'ExtraLanguageNames' => [ 'de' => 'deutsche Sprache', 'sqsqsqsq' => '!!?!' ] ], + $expected, $code, ...$otherArgs + ); + } + + public static function provideGetLanguageNames_ExtraLanguageNames() { + return [ + 'Simple extra language name' => [ '!!?!', 'sqsqsqsq' ], + 'Extra language is defined' => [ '!!?!', 'sqsqsqsq', AUTONYMS, DEFINED ], + 'Extra language is not supported' => [ '', 'sqsqsqsq', AUTONYMS, SUPPORTED ], + 'Extra language overrides default' => [ 'deutsche Sprache', 'de' ], + 'Extra language overrides hook for explicitly requested autonym' => + [ 'deutsche Sprache', 'de', 'de' ], + 'Hook overrides extra language for non-autonym' => + [ 'die deutsche Sprache', 'de', 'fr' ], + ]; + } + + /** + * Test that getLanguageNames() defaults to DEFINED, and getLanguageName() defaults to ALL. + * + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + */ + public function testGetLanguageNames_parameterDefault() { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names ) { + $names = [ 'sqsqsqsq' => '!!?!' ]; + } + ); + + // We use 'en' here because the hook is not run if we're requesting autonyms, although in + // this case (language that isn't defined by MediaWiki itself) that behavior seems wrong. + $this->assertArrayNotHasKey( 'sqsqsqsq', $this->getLanguageNames(), 'en' ); + + $this->assertSame( '!!?!', $this->getLanguageName( 'sqsqsqsq', 'en' ) ); + } + + /** + * @dataProvider provideGetLanguageNames_sorted + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers Language::fetchLanguageNames + * + * @param mixed ...$args To pass to method + */ + public function testGetLanguageNames_sorted( ...$args ) { + $names = $this->getLanguageNames( ...$args ); + $sortedNames = $names; + ksort( $sortedNames ); + $this->assertSame( $sortedNames, $names ); + } + + public static function provideGetLanguageNames_sorted() { + return [ + [], + [ AUTONYMS ], + [ AUTONYMS, 'mw' ], + [ AUTONYMS, ALL ], + [ AUTONYMS, SUPPORTED ], + [ 'he', 'mw' ], + [ 'he', ALL ], + [ 'he', SUPPORTED ], + ]; + } + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers Language::fetchLanguageNames + */ + public function testGetLanguageNames_hookNotCalledForAutonyms() { + $count = 0; + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function () use ( &$count ) { + $count++; + } + ); + + $this->getLanguageNames(); + $this->assertSame( 0, $count, 'Hook must not be called for autonyms' ); + + // We test elsewhere that the hook works, but the following verifies that our test is + // working and $count isn't being incremented above only because we're checking autonyms. + $this->getLanguageNames( 'fr' ); + $this->assertSame( 1, $count, 'Hook must be called for non-autonyms' ); + } + + /** + * @dataProvider provideGetLanguageNames_pigLatin + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_pigLatin( $expected, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names, $inLanguage ) { + switch ( $inLanguage ) { + case 'fr': + $names = [ 'en-x-piglatin' => 'latin de cochons' ]; + break; + + case 'en-x-piglatin': + // Deliberately lowercase + $names = [ 'en-x-piglatin' => 'igpay atinlay' ]; + break; + } + } + ); + + $this->assertGetLanguageNames( + [ 'UsePigLatinVariant' => true ], $expected, 'en-x-piglatin', ...$otherArgs ); + } + + public static function provideGetLanguageNames_pigLatin() { + return [ + 'Simple test' => [ 'Igpay Atinlay' ], + 'Not supported' => [ '', AUTONYMS, SUPPORTED ], + 'Foreign language' => [ 'latin de cochons', 'fr' ], + 'Hook doesn\'t override explicit autonym' => + [ 'Igpay Atinlay', 'en-x-piglatin', 'en-x-piglatin' ], + ]; + } + + /** + * Just for the sake of completeness, test that ExtraLanguageNames will not override the name + * for pig Latin. Nobody actually cares about this and if anything current behavior is probably + * wrong, but once we're testing the whole file we may as well be comprehensive. + * + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + */ + public function testGetLanguageNames_pigLatinAndExtraLanguageNames() { + $this->assertGetLanguageNames( + [ + 'UsePigLatinVariant' => true, + 'ExtraLanguageNames' => [ 'en-x-piglatin' => 'igpay atinlay' ] + ], + 'Igpay Atinlay', + 'en-x-piglatin' + ); + } + + abstract protected static function getFileName( ...$args ); + + /** + * @dataProvider provideGetFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getFileName + * @covers Language::getFileName + * + * @param string $expected + * @param mixed ...$args To pass to method + */ + public function testGetFileName( $expected, ...$args ) { + $this->assertSame( $expected, $this->getFileName( ...$args ) ); + } + + public static function provideGetFileName() { + return [ + 'Simple case' => [ 'MessagesXx.php', 'Messages', 'xx' ], + 'With extension' => [ 'MessagesXx.ext', 'Messages', 'xx', '.ext' ], + 'Replacing dashes' => [ '!__?', '!', '--', '?' ], + 'Empty prefix and extension' => [ 'Xx', '', 'xx', '' ], + 'Uppercase only first letter' => [ 'Messages_a.php', 'Messages', '-a' ], + ]; + } + + abstract protected function getMessagesFileName( $code ); + + /** + * @dataProvider provideGetMessagesFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers Language::getMessagesFileName + * + * @param string $code + * @param string $expected + */ + public function testGetMessagesFileName( $code, $expected ) { + $this->assertSame( $expected, $this->getMessagesFileName( $code ) ); + } + + public static function provideGetMessagesFileName() { + global $IP; + return [ + 'Simple case' => [ 'en', "$IP/languages/messages/MessagesEn.php" ], + 'Replacing dashes' => [ '--', "$IP/languages/messages/Messages__.php" ], + 'Uppercase only first letter' => [ '-a', "$IP/languages/messages/Messages_a.php" ], + ]; + } + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers Language::getMessagesFileName + */ + public function testGetMessagesFileName_withHook() { + $called = 0; + + $this->setTemporaryHook( 'Language::getMessagesFileName', + function ( $code, &$file ) use ( &$called ) { + global $IP; + + $called++; + + $this->assertSame( 'ab-cd', $code ); + $this->assertSame( "$IP/languages/messages/MessagesAb_cd.php", $file ); + $file = 'bye-bye'; + } + ); + + $this->assertSame( 'bye-bye', $this->getMessagesFileName( 'ab-cd' ) ); + $this->assertSame( 1, $called ); + } + + abstract protected function getJsonMessagesFileName( $code ); + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName + * @covers Language::getJsonMessagesFileName + */ + public function testGetJsonMessagesFileName() { + global $IP; + + // Not so much to test here, one test seems to be enough + $expected = "$IP/languages/i18n/en--123.json"; + $this->assertSame( $expected, $this->getJsonMessagesFileName( 'en--123' ) ); + } + + /** + * getFileName, getMessagesFileName, and getJsonMessagesFileName all throw if they get an + * invalid code. To save boilerplate, test them all in one method. + * + * @dataProvider provideExceptionFromInvalidCode + * @covers MediaWiki\Languages\LanguageNameUtils::getFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName + * @covers Language::getFileName + * @covers Language::getMessagesFileName + * @covers Language::getJsonMessagesFileName + * + * @param callable $callback Will throw when passed $code + * @param string $code + */ + public function testExceptionFromInvalidCode( $callback, $code ) { + $this->setExpectedException( MWException::class, "Invalid language code \"$code\"" ); + + $callback( $code ); + } + + public static function provideExceptionFromInvalidCode() { + $ret = []; + foreach ( static::provideIsValidBuiltInCode() as $desc => list( $code, $valid ) ) { + if ( $valid ) { + // Won't get an exception from this one + continue; + } + + // For getFileName, we define an anonymous function because of the extra first param + $ret["getFileName: $desc"] = [ + function ( $code ) { + return static::getFileName( 'Messages', $code ); + }, + $code + ]; + + $ret["getMessagesFileName: $desc"] = + [ [ static::class, 'getMessagesFileName' ], $code ]; + + $ret["getJsonMessagesFileName: $desc"] = + [ [ static::class, 'getJsonMessagesFileName' ], $code ]; + } + return $ret; + } +} -- 2.20.1