3 * Internationalisation code.
4 * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
26 * @defgroup Language Language
29 namespace MediaWiki\Languages
;
33 use MediaWiki\Config\ServiceOptions
;
34 use MediaWikiTitleCodec
;
36 use Wikimedia\Assert\Assert
;
41 * A service that provides utilities to do with language names and codes.
45 class LanguageNameUtils
{
47 * Return autonyms in getLanguageName(s).
49 const AUTONYMS
= null;
52 * Return all known languages in getLanguageName(s).
57 * Return in getLanguageName(s) only the languages that are defined by MediaWiki.
62 * Return in getLanguageName(s) only the languages for which we have at least some localisation.
64 const SUPPORTED
= 'mwfile';
66 /** @var ServiceOptions */
70 * Cache for language names
71 * @var HashBagOStuff|null
73 private $languageNameCache;
76 * Cache for validity of language codes
79 private $validCodeCache = [];
81 public static $constructorOptions = [
87 * @param ServiceOptions $options
89 public function __construct( ServiceOptions
$options ) {
90 $options->assertRequiredOptions( self
::$constructorOptions );
91 $this->options
= $options;
95 * Checks whether any localisation is available for that language tag in MediaWiki
96 * (MessagesXx.php or xx.json exists).
98 * @param string $code Language tag (in lower case)
99 * @return bool Whether language is supported
101 public function isSupportedLanguage( $code ) {
102 if ( !$this->isValidBuiltInCode( $code ) ) {
106 if ( $code === 'qqq' ) {
107 // Special code for internal use, not supported even though there is a qqq.json
111 return is_readable( $this->getMessagesFileName( $code ) ) ||
112 is_readable( $this->getJsonMessagesFileName( $code ) );
116 * Returns true if a language code string is of a valid form, whether or not it exists. This
117 * includes codes which are used solely for customisation via the MediaWiki namespace.
119 * @param string $code
123 public function isValidCode( $code ) {
124 Assert
::parameterType( 'string', $code, '$code' );
125 if ( !isset( $this->validCodeCache
[$code] ) ) {
126 // People think language codes are HTML-safe, so enforce it. Ideally we should only
127 // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs
128 // T39564, T39587, T38938.
129 $this->validCodeCache
[$code] =
130 // Protect against path traversal
131 strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
132 !preg_match( MediaWikiTitleCodec
::getTitleInvalidRegex(), $code );
134 return $this->validCodeCache
[$code];
138 * Returns true if a language code is of a valid form for the purposes of internal customisation
139 * of MediaWiki, via Messages*.php or *.json.
141 * @param string $code
144 public function isValidBuiltInCode( $code ) {
145 Assert
::parameterType( 'string', $code, '$code' );
147 return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
151 * Returns true if a language code is an IETF tag known to MediaWiki.
157 public function isKnownLanguageTag( $tag ) {
158 // Quick escape for invalid input to avoid exceptions down the line when code tries to
159 // process tags which are not valid at all.
160 if ( !$this->isValidBuiltInCode( $tag ) ) {
164 if ( isset( Data\Names
::$names[$tag] ) ||
$this->getLanguageName( $tag, $tag ) !== '' ) {
172 * Get an array of language names, indexed by code.
173 * @param null|string $inLanguage Code of language in which to return the names
174 * Use self::AUTONYMS for autonyms (native names)
175 * @param string $include One of:
176 * self::ALL all available languages
177 * self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames
179 * self::SUPPORTED only if the language is in self::DEFINED *and* has a message file
180 * @return array Language code => language name (sorted by key)
182 public function getLanguageNames( $inLanguage = self
::AUTONYMS
, $include = self
::DEFINED
) {
183 $cacheKey = $inLanguage === self
::AUTONYMS ?
'null' : $inLanguage;
184 $cacheKey .= ":$include";
185 if ( !$this->languageNameCache
) {
186 $this->languageNameCache
= new HashBagOStuff( [ 'maxKeys' => 20 ] );
189 $ret = $this->languageNameCache
->get( $cacheKey );
191 $ret = $this->getLanguageNamesUncached( $inLanguage, $include );
192 $this->languageNameCache
->set( $cacheKey, $ret );
198 * Uncached helper for getLanguageNames
199 * @param null|string $inLanguage As getLanguageNames
200 * @param string $include As getLanguageNames
201 * @return array Language code => language name (sorted by key)
203 private function getLanguageNamesUncached( $inLanguage, $include ) {
204 // If passed an invalid language code to use, fallback to en
205 if ( $inLanguage !== self
::AUTONYMS
&& !$this->isValidCode( $inLanguage ) ) {
211 if ( $inLanguage !== self
::AUTONYMS
) {
212 # TODO: also include for self::AUTONYMS, when this code is more efficient
213 Hooks
::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
216 $mwNames = $this->options
->get( 'ExtraLanguageNames' ) + Data\Names
::$names;
217 if ( $this->options
->get( 'UsePigLatinVariant' ) ) {
218 // Pig Latin (for variant development)
219 $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
222 foreach ( $mwNames as $mwCode => $mwName ) {
223 # - Prefer own MediaWiki native name when not using the hook
224 # - For other names just add if not added through the hook
225 if ( $mwCode === $inLanguage ||
!isset( $names[$mwCode] ) ) {
226 $names[$mwCode] = $mwName;
230 if ( $include === self
::ALL
) {
236 $coreCodes = array_keys( $mwNames );
237 foreach ( $coreCodes as $coreCode ) {
238 $returnMw[$coreCode] = $names[$coreCode];
241 if ( $include === self
::SUPPORTED
) {
243 # We do this using a foreach over the codes instead of a directory loop so that messages
244 # files in extensions will work correctly.
245 foreach ( $returnMw as $code => $value ) {
246 if ( is_readable( $this->getMessagesFileName( $code ) ) ||
247 is_readable( $this->getJsonMessagesFileName( $code ) )
249 $namesMwFile[$code] = $names[$code];
253 ksort( $namesMwFile );
258 # self::DEFINED option; default if it's not one of the other two options
259 # (self::ALL/self::SUPPORTED)
264 * @param string $code The code of the language for which to get the name
265 * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS
267 * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of
269 * @return string Language name or empty
272 public function getLanguageName( $code, $inLanguage = self
::AUTONYMS
, $include = self
::ALL
) {
273 $code = strtolower( $code );
274 $array = $this->getLanguageNames( $inLanguage, $include );
275 return $array[$code] ??
'';
279 * Get the name of a file for a certain language code
280 * @param string $prefix Prepend this to the filename
281 * @param string $code Language code
282 * @param string $suffix Append this to the filename
283 * @throws MWException
284 * @return string $prefix . $mangledCode . $suffix
286 public function getFileName( $prefix, $code, $suffix = '.php' ) {
287 if ( !$this->isValidBuiltInCode( $code ) ) {
288 throw new MWException( "Invalid language code \"$code\"" );
291 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
295 * @param string $code
298 public function getMessagesFileName( $code ) {
300 $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
301 Hooks
::run( 'Language::getMessagesFileName', [ $code, &$file ] );
306 * @param string $code
308 * @throws MWException
310 public function getJsonMessagesFileName( $code ) {
313 if ( !$this->isValidBuiltInCode( $code ) ) {
314 throw new MWException( "Invalid language code \"$code\"" );
317 return "$IP/languages/i18n/$code.json";