Merge "JSON i18n shim: Only register LocalisationCacheRecache handler once"
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * Internationalisation code.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Language
22 */
23
24 /**
25 * @defgroup Language Language
26 */
27
28 if ( !defined( 'MEDIAWIKI' ) ) {
29 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
30 exit( 1 );
31 }
32
33 if ( function_exists( 'mb_strtoupper' ) ) {
34 mb_internal_encoding( 'UTF-8' );
35 }
36
37 /**
38 * a fake language converter
39 *
40 * @ingroup Language
41 */
42 class FakeConverter {
43 /**
44 * @var Language
45 */
46 public $mLang;
47
48 function __construct( $langobj ) {
49 $this->mLang = $langobj;
50 }
51
52 function autoConvert( $text, $variant = false ) {
53 return $text;
54 }
55
56 function autoConvertToAllVariants( $text ) {
57 return array( $this->mLang->getCode() => $text );
58 }
59
60 function convert( $t ) {
61 return $t;
62 }
63
64 function convertTo( $text, $variant ) {
65 return $text;
66 }
67
68 /**
69 * @param Title $t
70 * @return mixed
71 */
72 function convertTitle( $t ) {
73 return $t->getPrefixedText();
74 }
75
76 function convertNamespace( $ns ) {
77 return $this->mLang->getFormattedNsText( $ns );
78 }
79
80 function getVariants() {
81 return array( $this->mLang->getCode() );
82 }
83
84 function getVariantFallbacks( $variant ) {
85 return $this->mLang->getCode();
86 }
87
88 function getPreferredVariant() {
89 return $this->mLang->getCode();
90 }
91
92 function getDefaultVariant() {
93 return $this->mLang->getCode();
94 }
95
96 function getURLVariant() {
97 return '';
98 }
99
100 function getConvRuleTitle() {
101 return false;
102 }
103
104 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) {
105 }
106
107 function getExtraHashOptions() {
108 return '';
109 }
110
111 function getParsedTitle() {
112 return '';
113 }
114
115 function markNoConversion( $text, $noParse = false ) {
116 return $text;
117 }
118
119 function convertCategoryKey( $key ) {
120 return $key;
121 }
122
123 /** @deprecated since 1.22 is no longer used */
124 function armourMath( $text ) {
125 return $text;
126 }
127
128 function validateVariant( $variant = null ) {
129 return $variant === $this->mLang->getCode() ? $variant : null;
130 }
131
132 function translate( $text, $variant ) {
133 return $text;
134 }
135 }
136
137 /**
138 * Internationalisation code
139 * @ingroup Language
140 */
141 class Language {
142 /**
143 * @var LanguageConverter
144 */
145 public $mConverter;
146
147 public $mVariants, $mCode, $mLoaded = false;
148 public $mMagicExtensions = array(), $mMagicHookDone = false;
149 private $mHtmlCode = null, $mParentLanguage = false;
150
151 public $dateFormatStrings = array();
152 public $mExtendedSpecialPageAliases;
153
154 protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
155
156 /**
157 * ReplacementArray object caches
158 */
159 public $transformData = array();
160
161 /**
162 * @var LocalisationCache
163 */
164 static public $dataCache;
165
166 static public $mLangObjCache = array();
167
168 static public $mWeekdayMsgs = array(
169 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
170 'friday', 'saturday'
171 );
172
173 static public $mWeekdayAbbrevMsgs = array(
174 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
175 );
176
177 static public $mMonthMsgs = array(
178 'january', 'february', 'march', 'april', 'may_long', 'june',
179 'july', 'august', 'september', 'october', 'november',
180 'december'
181 );
182 static public $mMonthGenMsgs = array(
183 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
184 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
185 'december-gen'
186 );
187 static public $mMonthAbbrevMsgs = array(
188 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
189 'sep', 'oct', 'nov', 'dec'
190 );
191
192 static public $mIranianCalendarMonthMsgs = array(
193 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
194 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
195 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
196 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
197 );
198
199 static public $mHebrewCalendarMonthMsgs = array(
200 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
201 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
202 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
203 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
204 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
205 );
206
207 static public $mHebrewCalendarMonthGenMsgs = array(
208 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
209 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
210 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
211 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
212 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
213 );
214
215 static public $mHijriCalendarMonthMsgs = array(
216 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
217 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
218 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
219 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
220 );
221
222 /**
223 * @since 1.20
224 * @var array
225 */
226 static public $durationIntervals = array(
227 'millennia' => 31556952000,
228 'centuries' => 3155695200,
229 'decades' => 315569520,
230 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
231 'weeks' => 604800,
232 'days' => 86400,
233 'hours' => 3600,
234 'minutes' => 60,
235 'seconds' => 1,
236 );
237
238 /**
239 * Cache for language fallbacks.
240 * @see Language::getFallbacksIncludingSiteLanguage
241 * @since 1.21
242 * @var array
243 */
244 static private $fallbackLanguageCache = array();
245
246 /**
247 * Get a cached or new language object for a given language code
248 * @param string $code
249 * @return Language
250 */
251 static function factory( $code ) {
252 global $wgDummyLanguageCodes, $wgLangObjCacheSize;
253
254 if ( isset( $wgDummyLanguageCodes[$code] ) ) {
255 $code = $wgDummyLanguageCodes[$code];
256 }
257
258 // get the language object to process
259 $langObj = isset( self::$mLangObjCache[$code] )
260 ? self::$mLangObjCache[$code]
261 : self::newFromCode( $code );
262
263 // merge the language object in to get it up front in the cache
264 self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
265 // get rid of the oldest ones in case we have an overflow
266 self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
267
268 return $langObj;
269 }
270
271 /**
272 * Create a language object for a given language code
273 * @param string $code
274 * @throws MWException
275 * @return Language
276 */
277 protected static function newFromCode( $code ) {
278 // Protect against path traversal below
279 if ( !Language::isValidCode( $code )
280 || strcspn( $code, ":/\\\000" ) !== strlen( $code )
281 ) {
282 throw new MWException( "Invalid language code \"$code\"" );
283 }
284
285 if ( !Language::isValidBuiltInCode( $code ) ) {
286 // It's not possible to customise this code with class files, so
287 // just return a Language object. This is to support uselang= hacks.
288 $lang = new Language;
289 $lang->setCode( $code );
290 return $lang;
291 }
292
293 // Check if there is a language class for the code
294 $class = self::classFromCode( $code );
295 self::preloadLanguageClass( $class );
296 if ( class_exists( $class ) ) {
297 $lang = new $class;
298 return $lang;
299 }
300
301 // Keep trying the fallback list until we find an existing class
302 $fallbacks = Language::getFallbacksFor( $code );
303 foreach ( $fallbacks as $fallbackCode ) {
304 if ( !Language::isValidBuiltInCode( $fallbackCode ) ) {
305 throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
306 }
307
308 $class = self::classFromCode( $fallbackCode );
309 self::preloadLanguageClass( $class );
310 if ( class_exists( $class ) ) {
311 $lang = Language::newFromCode( $fallbackCode );
312 $lang->setCode( $code );
313 return $lang;
314 }
315 }
316
317 throw new MWException( "Invalid fallback sequence for language '$code'" );
318 }
319
320 /**
321 * Checks whether any localisation is available for that language tag
322 * in MediaWiki (MessagesXx.php exists).
323 *
324 * @param string $code Language tag (in lower case)
325 * @return bool Whether language is supported
326 * @since 1.21
327 */
328 public static function isSupportedLanguage( $code ) {
329 return self::isValidBuiltInCode( $code )
330 && ( is_readable( self::getMessagesFileName( $code ) )
331 || is_readable( self::getJsonMessagesFileName( $code ) )
332 );
333 }
334
335 /**
336 * Returns true if a language code string is a well-formed language tag
337 * according to RFC 5646.
338 * This function only checks well-formedness; it doesn't check that
339 * language, script or variant codes actually exist in the repositories.
340 *
341 * Based on regexes by Mark Davis of the Unicode Consortium:
342 * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
343 *
344 * @param string $code
345 * @param bool $lenient Whether to allow '_' as separator. The default is only '-'.
346 *
347 * @return bool
348 * @since 1.21
349 */
350 public static function isWellFormedLanguageTag( $code, $lenient = false ) {
351 $alpha = '[a-z]';
352 $digit = '[0-9]';
353 $alphanum = '[a-z0-9]';
354 $x = 'x'; # private use singleton
355 $singleton = '[a-wy-z]'; # other singleton
356 $s = $lenient ? '[-_]' : '-';
357
358 $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
359 $script = "$alpha{4}"; # ISO 15924
360 $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
361 $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
362 $extension = "$singleton(?:$s$alphanum{2,8})+";
363 $privateUse = "$x(?:$s$alphanum{1,8})+";
364
365 # Define certain grandfathered codes, since otherwise the regex is pretty useless.
366 # Since these are limited, this is safe even later changes to the registry --
367 # the only oddity is that it might change the type of the tag, and thus
368 # the results from the capturing groups.
369 # http://www.iana.org/assignments/language-subtag-registry
370
371 $grandfathered = "en{$s}GB{$s}oed"
372 . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
373 . "|no{$s}(?:bok|nyn)"
374 . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
375 . "|zh{$s}min{$s}nan";
376
377 $variantList = "$variant(?:$s$variant)*";
378 $extensionList = "$extension(?:$s$extension)*";
379
380 $langtag = "(?:($language)"
381 . "(?:$s$script)?"
382 . "(?:$s$region)?"
383 . "(?:$s$variantList)?"
384 . "(?:$s$extensionList)?"
385 . "(?:$s$privateUse)?)";
386
387 # The final breakdown, with capturing groups for each of these components
388 # The variants, extensions, grandfathered, and private-use may have interior '-'
389
390 $root = "^(?:$langtag|$privateUse|$grandfathered)$";
391
392 return (bool)preg_match( "/$root/", strtolower( $code ) );
393 }
394
395 /**
396 * Returns true if a language code string is of a valid form, whether or
397 * not it exists. This includes codes which are used solely for
398 * customisation via the MediaWiki namespace.
399 *
400 * @param string $code
401 *
402 * @return bool
403 */
404 public static function isValidCode( $code ) {
405 static $cache = array();
406 if ( isset( $cache[$code] ) ) {
407 return $cache[$code];
408 }
409 // People think language codes are html safe, so enforce it.
410 // Ideally we should only allow a-zA-Z0-9-
411 // but, .+ and other chars are often used for {{int:}} hacks
412 // see bugs 37564, 37587, 36938
413 $cache[$code] =
414 strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
415 && !preg_match( Title::getTitleInvalidRegex(), $code );
416
417 return $cache[$code];
418 }
419
420 /**
421 * Returns true if a language code is of a valid form for the purposes of
422 * internal customisation of MediaWiki, via Messages*.php or *.json.
423 *
424 * @param string $code
425 *
426 * @throws MWException
427 * @since 1.18
428 * @return bool
429 */
430 public static function isValidBuiltInCode( $code ) {
431
432 if ( !is_string( $code ) ) {
433 if ( is_object( $code ) ) {
434 $addmsg = " of class " . get_class( $code );
435 } else {
436 $addmsg = '';
437 }
438 $type = gettype( $code );
439 throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
440 }
441
442 return (bool)preg_match( '/^[a-z0-9-]{2,}$/i', $code );
443 }
444
445 /**
446 * Returns true if a language code is an IETF tag known to MediaWiki.
447 *
448 * @param string $code
449 *
450 * @since 1.21
451 * @return bool
452 */
453 public static function isKnownLanguageTag( $tag ) {
454 static $coreLanguageNames;
455
456 // Quick escape for invalid input to avoid exceptions down the line
457 // when code tries to process tags which are not valid at all.
458 if ( !self::isValidBuiltInCode( $tag ) ) {
459 return false;
460 }
461
462 if ( $coreLanguageNames === null ) {
463 global $IP;
464 include "$IP/languages/Names.php";
465 }
466
467 if ( isset( $coreLanguageNames[$tag] )
468 || self::fetchLanguageName( $tag, $tag ) !== ''
469 ) {
470 return true;
471 }
472
473 return false;
474 }
475
476 /**
477 * @param string $code
478 * @return string Name of the language class
479 */
480 public static function classFromCode( $code ) {
481 if ( $code == 'en' ) {
482 return 'Language';
483 } else {
484 return 'Language' . str_replace( '-', '_', ucfirst( $code ) );
485 }
486 }
487
488 /**
489 * Includes language class files
490 *
491 * @param string $class Name of the language class
492 */
493 public static function preloadLanguageClass( $class ) {
494 global $IP;
495
496 if ( $class === 'Language' ) {
497 return;
498 }
499
500 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
501 include_once "$IP/languages/classes/$class.php";
502 }
503 }
504
505 /**
506 * Get the LocalisationCache instance
507 *
508 * @return LocalisationCache
509 */
510 public static function getLocalisationCache() {
511 if ( is_null( self::$dataCache ) ) {
512 global $wgLocalisationCacheConf;
513 $class = $wgLocalisationCacheConf['class'];
514 self::$dataCache = new $class( $wgLocalisationCacheConf );
515 }
516 return self::$dataCache;
517 }
518
519 function __construct() {
520 $this->mConverter = new FakeConverter( $this );
521 // Set the code to the name of the descendant
522 if ( get_class( $this ) == 'Language' ) {
523 $this->mCode = 'en';
524 } else {
525 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
526 }
527 self::getLocalisationCache();
528 }
529
530 /**
531 * Reduce memory usage
532 */
533 function __destruct() {
534 foreach ( $this as $name => $value ) {
535 unset( $this->$name );
536 }
537 }
538
539 /**
540 * Hook which will be called if this is the content language.
541 * Descendants can use this to register hook functions or modify globals
542 */
543 function initContLang() {
544 }
545
546 /**
547 * Same as getFallbacksFor for current language.
548 * @return array|bool
549 * @deprecated since 1.19
550 */
551 function getFallbackLanguageCode() {
552 wfDeprecated( __METHOD__, '1.19' );
553
554 return self::getFallbackFor( $this->mCode );
555 }
556
557 /**
558 * @return array
559 * @since 1.19
560 */
561 function getFallbackLanguages() {
562 return self::getFallbacksFor( $this->mCode );
563 }
564
565 /**
566 * Exports $wgBookstoreListEn
567 * @return array
568 */
569 function getBookstoreList() {
570 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
571 }
572
573 /**
574 * Returns an array of localised namespaces indexed by their numbers. If the namespace is not
575 * available in localised form, it will be included in English.
576 *
577 * @return array
578 */
579 public function getNamespaces() {
580 if ( is_null( $this->namespaceNames ) ) {
581 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
582
583 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
584 $validNamespaces = MWNamespace::getCanonicalNamespaces();
585
586 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
587
588 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
589 if ( $wgMetaNamespaceTalk ) {
590 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
591 } else {
592 $talk = $this->namespaceNames[NS_PROJECT_TALK];
593 $this->namespaceNames[NS_PROJECT_TALK] =
594 $this->fixVariableInNamespace( $talk );
595 }
596
597 # Sometimes a language will be localised but not actually exist on this wiki.
598 foreach ( $this->namespaceNames as $key => $text ) {
599 if ( !isset( $validNamespaces[$key] ) ) {
600 unset( $this->namespaceNames[$key] );
601 }
602 }
603
604 # The above mixing may leave namespaces out of canonical order.
605 # Re-order by namespace ID number...
606 ksort( $this->namespaceNames );
607
608 wfRunHooks( 'LanguageGetNamespaces', array( &$this->namespaceNames ) );
609 }
610
611 return $this->namespaceNames;
612 }
613
614 /**
615 * Arbitrarily set all of the namespace names at once. Mainly used for testing
616 * @param array $namespaces Array of namespaces (id => name)
617 */
618 public function setNamespaces( array $namespaces ) {
619 $this->namespaceNames = $namespaces;
620 $this->mNamespaceIds = null;
621 }
622
623 /**
624 * Resets all of the namespace caches. Mainly used for testing
625 */
626 public function resetNamespaces() {
627 $this->namespaceNames = null;
628 $this->mNamespaceIds = null;
629 $this->namespaceAliases = null;
630 }
631
632 /**
633 * A convenience function that returns the same thing as
634 * getNamespaces() except with the array values changed to ' '
635 * where it found '_', useful for producing output to be displayed
636 * e.g. in <select> forms.
637 *
638 * @return array
639 */
640 function getFormattedNamespaces() {
641 $ns = $this->getNamespaces();
642 foreach ( $ns as $k => $v ) {
643 $ns[$k] = strtr( $v, '_', ' ' );
644 }
645 return $ns;
646 }
647
648 /**
649 * Get a namespace value by key
650 * <code>
651 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
652 * echo $mw_ns; // prints 'MediaWiki'
653 * </code>
654 *
655 * @param int $index The array key of the namespace to return
656 * @return string|bool String if the namespace value exists, otherwise false
657 */
658 function getNsText( $index ) {
659 $ns = $this->getNamespaces();
660
661 return isset( $ns[$index] ) ? $ns[$index] : false;
662 }
663
664 /**
665 * A convenience function that returns the same thing as
666 * getNsText() except with '_' changed to ' ', useful for
667 * producing output.
668 *
669 * <code>
670 * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
671 * echo $mw_ns; // prints 'MediaWiki talk'
672 * </code>
673 *
674 * @param int $index The array key of the namespace to return
675 * @return string Namespace name without underscores (empty string if namespace does not exist)
676 */
677 function getFormattedNsText( $index ) {
678 $ns = $this->getNsText( $index );
679
680 return strtr( $ns, '_', ' ' );
681 }
682
683 /**
684 * Returns gender-dependent namespace alias if available.
685 * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces
686 * @param int $index Namespace index
687 * @param string $gender Gender key (male, female... )
688 * @return string
689 * @since 1.18
690 */
691 function getGenderNsText( $index, $gender ) {
692 global $wgExtraGenderNamespaces;
693
694 $ns = $wgExtraGenderNamespaces +
695 self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
696
697 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
698 }
699
700 /**
701 * Whether this language uses gender-dependent namespace aliases.
702 * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces
703 * @return bool
704 * @since 1.18
705 */
706 function needsGenderDistinction() {
707 global $wgExtraGenderNamespaces, $wgExtraNamespaces;
708 if ( count( $wgExtraGenderNamespaces ) > 0 ) {
709 // $wgExtraGenderNamespaces overrides everything
710 return true;
711 } elseif ( isset( $wgExtraNamespaces[NS_USER] ) && isset( $wgExtraNamespaces[NS_USER_TALK] ) ) {
712 /// @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future
713 // $wgExtraNamespaces overrides any gender aliases specified in i18n files
714 return false;
715 } else {
716 // Check what is in i18n files
717 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
718 return count( $aliases ) > 0;
719 }
720 }
721
722 /**
723 * Get a namespace key by value, case insensitive.
724 * Only matches namespace names for the current language, not the
725 * canonical ones defined in Namespace.php.
726 *
727 * @param string $text
728 * @return int|bool An integer if $text is a valid value otherwise false
729 */
730 function getLocalNsIndex( $text ) {
731 $lctext = $this->lc( $text );
732 $ids = $this->getNamespaceIds();
733 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
734 }
735
736 /**
737 * @return array
738 */
739 function getNamespaceAliases() {
740 if ( is_null( $this->namespaceAliases ) ) {
741 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
742 if ( !$aliases ) {
743 $aliases = array();
744 } else {
745 foreach ( $aliases as $name => $index ) {
746 if ( $index === NS_PROJECT_TALK ) {
747 unset( $aliases[$name] );
748 $name = $this->fixVariableInNamespace( $name );
749 $aliases[$name] = $index;
750 }
751 }
752 }
753
754 global $wgExtraGenderNamespaces;
755 $genders = $wgExtraGenderNamespaces +
756 (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
757 foreach ( $genders as $index => $forms ) {
758 foreach ( $forms as $alias ) {
759 $aliases[$alias] = $index;
760 }
761 }
762
763 # Also add converted namespace names as aliases, to avoid confusion.
764 $convertedNames = array();
765 foreach ( $this->getVariants() as $variant ) {
766 if ( $variant === $this->mCode ) {
767 continue;
768 }
769 foreach ( $this->getNamespaces() as $ns => $_ ) {
770 $convertedNames[$this->getConverter()->convertNamespace( $ns, $variant )] = $ns;
771 }
772 }
773
774 $this->namespaceAliases = $aliases + $convertedNames;
775 }
776
777 return $this->namespaceAliases;
778 }
779
780 /**
781 * @return array
782 */
783 function getNamespaceIds() {
784 if ( is_null( $this->mNamespaceIds ) ) {
785 global $wgNamespaceAliases;
786 # Put namespace names and aliases into a hashtable.
787 # If this is too slow, then we should arrange it so that it is done
788 # before caching. The catch is that at pre-cache time, the above
789 # class-specific fixup hasn't been done.
790 $this->mNamespaceIds = array();
791 foreach ( $this->getNamespaces() as $index => $name ) {
792 $this->mNamespaceIds[$this->lc( $name )] = $index;
793 }
794 foreach ( $this->getNamespaceAliases() as $name => $index ) {
795 $this->mNamespaceIds[$this->lc( $name )] = $index;
796 }
797 if ( $wgNamespaceAliases ) {
798 foreach ( $wgNamespaceAliases as $name => $index ) {
799 $this->mNamespaceIds[$this->lc( $name )] = $index;
800 }
801 }
802 }
803 return $this->mNamespaceIds;
804 }
805
806 /**
807 * Get a namespace key by value, case insensitive. Canonical namespace
808 * names override custom ones defined for the current language.
809 *
810 * @param string $text
811 * @return int|bool An integer if $text is a valid value otherwise false
812 */
813 function getNsIndex( $text ) {
814 $lctext = $this->lc( $text );
815 $ns = MWNamespace::getCanonicalIndex( $lctext );
816 if ( $ns !== null ) {
817 return $ns;
818 }
819 $ids = $this->getNamespaceIds();
820 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
821 }
822
823 /**
824 * short names for language variants used for language conversion links.
825 *
826 * @param string $code
827 * @param bool $usemsg Use the "variantname-xyz" message if it exists
828 * @return string
829 */
830 function getVariantname( $code, $usemsg = true ) {
831 $msg = "variantname-$code";
832 if ( $usemsg && wfMessage( $msg )->exists() ) {
833 return $this->getMessageFromDB( $msg );
834 }
835 $name = self::fetchLanguageName( $code );
836 if ( $name ) {
837 return $name; # if it's defined as a language name, show that
838 } else {
839 # otherwise, output the language code
840 return $code;
841 }
842 }
843
844 /**
845 * @param string $name
846 * @return string
847 */
848 function specialPage( $name ) {
849 $aliases = $this->getSpecialPageAliases();
850 if ( isset( $aliases[$name][0] ) ) {
851 $name = $aliases[$name][0];
852 }
853 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
854 }
855
856 /**
857 * @return array
858 */
859 function getDatePreferences() {
860 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
861 }
862
863 /**
864 * @return array
865 */
866 function getDateFormats() {
867 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
868 }
869
870 /**
871 * @return array|string
872 */
873 function getDefaultDateFormat() {
874 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
875 if ( $df === 'dmy or mdy' ) {
876 global $wgAmericanDates;
877 return $wgAmericanDates ? 'mdy' : 'dmy';
878 } else {
879 return $df;
880 }
881 }
882
883 /**
884 * @return array
885 */
886 function getDatePreferenceMigrationMap() {
887 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
888 }
889
890 /**
891 * @param string $image
892 * @return array|null
893 */
894 function getImageFile( $image ) {
895 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
896 }
897
898 /**
899 * @return array
900 */
901 function getExtraUserToggles() {
902 return (array)self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
903 }
904
905 /**
906 * @param string $tog
907 * @return string
908 */
909 function getUserToggle( $tog ) {
910 return $this->getMessageFromDB( "tog-$tog" );
911 }
912
913 /**
914 * Get native language names, indexed by code.
915 * Only those defined in MediaWiki, no other data like CLDR.
916 * If $customisedOnly is true, only returns codes with a messages file
917 *
918 * @param bool $customisedOnly
919 *
920 * @return array
921 * @deprecated since 1.20, use fetchLanguageNames()
922 */
923 public static function getLanguageNames( $customisedOnly = false ) {
924 return self::fetchLanguageNames( null, $customisedOnly ? 'mwfile' : 'mw' );
925 }
926
927 /**
928 * Get translated language names. This is done on best effort and
929 * by default this is exactly the same as Language::getLanguageNames.
930 * The CLDR extension provides translated names.
931 * @param string $code Language code.
932 * @return array Language code => language name
933 * @since 1.18.0
934 * @deprecated since 1.20, use fetchLanguageNames()
935 */
936 public static function getTranslatedLanguageNames( $code ) {
937 return self::fetchLanguageNames( $code, 'all' );
938 }
939
940 /**
941 * Get an array of language names, indexed by code.
942 * @param null|string $inLanguage Code of language in which to return the names
943 * Use null for autonyms (native names)
944 * @param string $include One of:
945 * 'all' all available languages
946 * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
947 * 'mwfile' only if the language is in 'mw' *and* has a message file
948 * @return array Language code => language name
949 * @since 1.20
950 */
951 public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
952 global $wgExtraLanguageNames;
953 static $coreLanguageNames;
954
955 if ( $coreLanguageNames === null ) {
956 global $IP;
957 include "$IP/languages/Names.php";
958 }
959
960 $names = array();
961
962 if ( $inLanguage ) {
963 # TODO: also include when $inLanguage is null, when this code is more efficient
964 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) );
965 }
966
967 $mwNames = $wgExtraLanguageNames + $coreLanguageNames;
968 foreach ( $mwNames as $mwCode => $mwName ) {
969 # - Prefer own MediaWiki native name when not using the hook
970 # - For other names just add if not added through the hook
971 if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
972 $names[$mwCode] = $mwName;
973 }
974 }
975
976 if ( $include === 'all' ) {
977 return $names;
978 }
979
980 $returnMw = array();
981 $coreCodes = array_keys( $mwNames );
982 foreach ( $coreCodes as $coreCode ) {
983 $returnMw[$coreCode] = $names[$coreCode];
984 }
985
986 if ( $include === 'mwfile' ) {
987 $namesMwFile = array();
988 # We do this using a foreach over the codes instead of a directory
989 # loop so that messages files in extensions will work correctly.
990 foreach ( $returnMw as $code => $value ) {
991 if ( is_readable( self::getMessagesFileName( $code ) )
992 || is_readable( self::getJsonMessagesFileName( $code ) )
993 ) {
994 $namesMwFile[$code] = $names[$code];
995 }
996 }
997
998 return $namesMwFile;
999 }
1000
1001 # 'mw' option; default if it's not one of the other two options (all/mwfile)
1002 return $returnMw;
1003 }
1004
1005 /**
1006 * @param string $code The code of the language for which to get the name
1007 * @param null|string $inLanguage Code of language in which to return the name (null for autonyms)
1008 * @param string $include 'all', 'mw' or 'mwfile'; see fetchLanguageNames()
1009 * @return string Language name or empty
1010 * @since 1.20
1011 */
1012 public static function fetchLanguageName( $code, $inLanguage = null, $include = 'all' ) {
1013 $code = strtolower( $code );
1014 $array = self::fetchLanguageNames( $inLanguage, $include );
1015 return !array_key_exists( $code, $array ) ? '' : $array[$code];
1016 }
1017
1018 /**
1019 * Get a message from the MediaWiki namespace.
1020 *
1021 * @param string $msg Message name
1022 * @return string
1023 */
1024 function getMessageFromDB( $msg ) {
1025 return wfMessage( $msg )->inLanguage( $this )->text();
1026 }
1027
1028 /**
1029 * Get the native language name of $code.
1030 * Only if defined in MediaWiki, no other data like CLDR.
1031 * @param string $code
1032 * @return string
1033 * @deprecated since 1.20, use fetchLanguageName()
1034 */
1035 function getLanguageName( $code ) {
1036 return self::fetchLanguageName( $code );
1037 }
1038
1039 /**
1040 * @param string $key
1041 * @return string
1042 */
1043 function getMonthName( $key ) {
1044 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
1045 }
1046
1047 /**
1048 * @return array
1049 */
1050 function getMonthNamesArray() {
1051 $monthNames = array( '' );
1052 for ( $i = 1; $i < 13; $i++ ) {
1053 $monthNames[] = $this->getMonthName( $i );
1054 }
1055 return $monthNames;
1056 }
1057
1058 /**
1059 * @param string $key
1060 * @return string
1061 */
1062 function getMonthNameGen( $key ) {
1063 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
1064 }
1065
1066 /**
1067 * @param string $key
1068 * @return string
1069 */
1070 function getMonthAbbreviation( $key ) {
1071 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
1072 }
1073
1074 /**
1075 * @return array
1076 */
1077 function getMonthAbbreviationsArray() {
1078 $monthNames = array( '' );
1079 for ( $i = 1; $i < 13; $i++ ) {
1080 $monthNames[] = $this->getMonthAbbreviation( $i );
1081 }
1082 return $monthNames;
1083 }
1084
1085 /**
1086 * @param string $key
1087 * @return string
1088 */
1089 function getWeekdayName( $key ) {
1090 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
1091 }
1092
1093 /**
1094 * @param string $key
1095 * @return string
1096 */
1097 function getWeekdayAbbreviation( $key ) {
1098 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
1099 }
1100
1101 /**
1102 * @param string $key
1103 * @return string
1104 */
1105 function getIranianCalendarMonthName( $key ) {
1106 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
1107 }
1108
1109 /**
1110 * @param string $key
1111 * @return string
1112 */
1113 function getHebrewCalendarMonthName( $key ) {
1114 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
1115 }
1116
1117 /**
1118 * @param string $key
1119 * @return string
1120 */
1121 function getHebrewCalendarMonthNameGen( $key ) {
1122 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
1123 }
1124
1125 /**
1126 * @param string $key
1127 * @return string
1128 */
1129 function getHijriCalendarMonthName( $key ) {
1130 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
1131 }
1132
1133 /**
1134 * This is a workalike of PHP's date() function, but with better
1135 * internationalisation, a reduced set of format characters, and a better
1136 * escaping format.
1137 *
1138 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrUeIOPTZ. See
1139 * the PHP manual for definitions. There are a number of extensions, which
1140 * start with "x":
1141 *
1142 * xn Do not translate digits of the next numeric format character
1143 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
1144 * xr Use roman numerals for the next numeric format character
1145 * xh Use hebrew numerals for the next numeric format character
1146 * xx Literal x
1147 * xg Genitive month name
1148 *
1149 * xij j (day number) in Iranian calendar
1150 * xiF F (month name) in Iranian calendar
1151 * xin n (month number) in Iranian calendar
1152 * xiy y (two digit year) in Iranian calendar
1153 * xiY Y (full year) in Iranian calendar
1154 *
1155 * xjj j (day number) in Hebrew calendar
1156 * xjF F (month name) in Hebrew calendar
1157 * xjt t (days in month) in Hebrew calendar
1158 * xjx xg (genitive month name) in Hebrew calendar
1159 * xjn n (month number) in Hebrew calendar
1160 * xjY Y (full year) in Hebrew calendar
1161 *
1162 * xmj j (day number) in Hijri calendar
1163 * xmF F (month name) in Hijri calendar
1164 * xmn n (month number) in Hijri calendar
1165 * xmY Y (full year) in Hijri calendar
1166 *
1167 * xkY Y (full year) in Thai solar calendar. Months and days are
1168 * identical to the Gregorian calendar
1169 * xoY Y (full year) in Minguo calendar or Juche year.
1170 * Months and days are identical to the
1171 * Gregorian calendar
1172 * xtY Y (full year) in Japanese nengo. Months and days are
1173 * identical to the Gregorian calendar
1174 *
1175 * Characters enclosed in double quotes will be considered literal (with
1176 * the quotes themselves removed). Unmatched quotes will be considered
1177 * literal quotes. Example:
1178 *
1179 * "The month is" F => The month is January
1180 * i's" => 20'11"
1181 *
1182 * Backslash escaping is also supported.
1183 *
1184 * Input timestamp is assumed to be pre-normalized to the desired local
1185 * time zone, if any. Note that the format characters crUeIOPTZ will assume
1186 * $ts is UTC if $zone is not given.
1187 *
1188 * @param string $format
1189 * @param string $ts 14-character timestamp
1190 * YYYYMMDDHHMMSS
1191 * 01234567890123
1192 * @param DateTimeZone $zone Timezone of $ts
1193 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
1194 *
1195 * @throws MWException
1196 * @return string
1197 */
1198 function sprintfDate( $format, $ts, DateTimeZone $zone = null ) {
1199 $s = '';
1200 $raw = false;
1201 $roman = false;
1202 $hebrewNum = false;
1203 $dateTimeObj = false;
1204 $rawToggle = false;
1205 $iranian = false;
1206 $hebrew = false;
1207 $hijri = false;
1208 $thai = false;
1209 $minguo = false;
1210 $tenno = false;
1211
1212 if ( strlen( $ts ) !== 14 ) {
1213 throw new MWException( __METHOD__ . ": The timestamp $ts should have 14 characters" );
1214 }
1215
1216 if ( !ctype_digit( $ts ) ) {
1217 throw new MWException( __METHOD__ . ": The timestamp $ts should be a number" );
1218 }
1219
1220 $formatLength = strlen( $format );
1221 for ( $p = 0; $p < $formatLength; $p++ ) {
1222 $num = false;
1223 $code = $format[$p];
1224 if ( $code == 'x' && $p < $formatLength - 1 ) {
1225 $code .= $format[++$p];
1226 }
1227
1228 if ( ( $code === 'xi'
1229 || $code === 'xj'
1230 || $code === 'xk'
1231 || $code === 'xm'
1232 || $code === 'xo'
1233 || $code === 'xt' )
1234 && $p < $formatLength - 1 ) {
1235 $code .= $format[++$p];
1236 }
1237
1238 switch ( $code ) {
1239 case 'xx':
1240 $s .= 'x';
1241 break;
1242 case 'xn':
1243 $raw = true;
1244 break;
1245 case 'xN':
1246 $rawToggle = !$rawToggle;
1247 break;
1248 case 'xr':
1249 $roman = true;
1250 break;
1251 case 'xh':
1252 $hebrewNum = true;
1253 break;
1254 case 'xg':
1255 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
1256 break;
1257 case 'xjx':
1258 if ( !$hebrew ) {
1259 $hebrew = self::tsToHebrew( $ts );
1260 }
1261 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
1262 break;
1263 case 'd':
1264 $num = substr( $ts, 6, 2 );
1265 break;
1266 case 'D':
1267 if ( !$dateTimeObj ) {
1268 $dateTimeObj = DateTime::createFromFormat(
1269 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' )
1270 );
1271 }
1272 $s .= $this->getWeekdayAbbreviation( $dateTimeObj->format( 'w' ) + 1 );
1273 break;
1274 case 'j':
1275 $num = intval( substr( $ts, 6, 2 ) );
1276 break;
1277 case 'xij':
1278 if ( !$iranian ) {
1279 $iranian = self::tsToIranian( $ts );
1280 }
1281 $num = $iranian[2];
1282 break;
1283 case 'xmj':
1284 if ( !$hijri ) {
1285 $hijri = self::tsToHijri( $ts );
1286 }
1287 $num = $hijri[2];
1288 break;
1289 case 'xjj':
1290 if ( !$hebrew ) {
1291 $hebrew = self::tsToHebrew( $ts );
1292 }
1293 $num = $hebrew[2];
1294 break;
1295 case 'l':
1296 if ( !$dateTimeObj ) {
1297 $dateTimeObj = DateTime::createFromFormat(
1298 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' )
1299 );
1300 }
1301 $s .= $this->getWeekdayName( $dateTimeObj->format( 'w' ) + 1 );
1302 break;
1303 case 'F':
1304 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
1305 break;
1306 case 'xiF':
1307 if ( !$iranian ) {
1308 $iranian = self::tsToIranian( $ts );
1309 }
1310 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
1311 break;
1312 case 'xmF':
1313 if ( !$hijri ) {
1314 $hijri = self::tsToHijri( $ts );
1315 }
1316 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
1317 break;
1318 case 'xjF':
1319 if ( !$hebrew ) {
1320 $hebrew = self::tsToHebrew( $ts );
1321 }
1322 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
1323 break;
1324 case 'm':
1325 $num = substr( $ts, 4, 2 );
1326 break;
1327 case 'M':
1328 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
1329 break;
1330 case 'n':
1331 $num = intval( substr( $ts, 4, 2 ) );
1332 break;
1333 case 'xin':
1334 if ( !$iranian ) {
1335 $iranian = self::tsToIranian( $ts );
1336 }
1337 $num = $iranian[1];
1338 break;
1339 case 'xmn':
1340 if ( !$hijri ) {
1341 $hijri = self::tsToHijri ( $ts );
1342 }
1343 $num = $hijri[1];
1344 break;
1345 case 'xjn':
1346 if ( !$hebrew ) {
1347 $hebrew = self::tsToHebrew( $ts );
1348 }
1349 $num = $hebrew[1];
1350 break;
1351 case 'xjt':
1352 if ( !$hebrew ) {
1353 $hebrew = self::tsToHebrew( $ts );
1354 }
1355 $num = $hebrew[3];
1356 break;
1357 case 'Y':
1358 $num = substr( $ts, 0, 4 );
1359 break;
1360 case 'xiY':
1361 if ( !$iranian ) {
1362 $iranian = self::tsToIranian( $ts );
1363 }
1364 $num = $iranian[0];
1365 break;
1366 case 'xmY':
1367 if ( !$hijri ) {
1368 $hijri = self::tsToHijri( $ts );
1369 }
1370 $num = $hijri[0];
1371 break;
1372 case 'xjY':
1373 if ( !$hebrew ) {
1374 $hebrew = self::tsToHebrew( $ts );
1375 }
1376 $num = $hebrew[0];
1377 break;
1378 case 'xkY':
1379 if ( !$thai ) {
1380 $thai = self::tsToYear( $ts, 'thai' );
1381 }
1382 $num = $thai[0];
1383 break;
1384 case 'xoY':
1385 if ( !$minguo ) {
1386 $minguo = self::tsToYear( $ts, 'minguo' );
1387 }
1388 $num = $minguo[0];
1389 break;
1390 case 'xtY':
1391 if ( !$tenno ) {
1392 $tenno = self::tsToYear( $ts, 'tenno' );
1393 }
1394 $num = $tenno[0];
1395 break;
1396 case 'y':
1397 $num = substr( $ts, 2, 2 );
1398 break;
1399 case 'xiy':
1400 if ( !$iranian ) {
1401 $iranian = self::tsToIranian( $ts );
1402 }
1403 $num = substr( $iranian[0], -2 );
1404 break;
1405 case 'a':
1406 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
1407 break;
1408 case 'A':
1409 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1410 break;
1411 case 'g':
1412 $h = substr( $ts, 8, 2 );
1413 $num = $h % 12 ? $h % 12 : 12;
1414 break;
1415 case 'G':
1416 $num = intval( substr( $ts, 8, 2 ) );
1417 break;
1418 case 'h':
1419 $h = substr( $ts, 8, 2 );
1420 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1421 break;
1422 case 'H':
1423 $num = substr( $ts, 8, 2 );
1424 break;
1425 case 'i':
1426 $num = substr( $ts, 10, 2 );
1427 break;
1428 case 's':
1429 $num = substr( $ts, 12, 2 );
1430 break;
1431 case 'c':
1432 case 'r':
1433 case 'e':
1434 case 'O':
1435 case 'P':
1436 case 'T':
1437 // Pass through string from $dateTimeObj->format()
1438 if ( !$dateTimeObj ) {
1439 $dateTimeObj = DateTime::createFromFormat(
1440 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' )
1441 );
1442 }
1443 $s .= $dateTimeObj->format( $code );
1444 break;
1445 case 'w':
1446 case 'N':
1447 case 'z':
1448 case 'W':
1449 case 't':
1450 case 'L':
1451 case 'o':
1452 case 'U':
1453 case 'I':
1454 case 'Z':
1455 // Pass through number from $dateTimeObj->format()
1456 if ( !$dateTimeObj ) {
1457 $dateTimeObj = DateTime::createFromFormat(
1458 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' )
1459 );
1460 }
1461 $num = $dateTimeObj->format( $code );
1462 break;
1463 case '\\':
1464 # Backslash escaping
1465 if ( $p < $formatLength - 1 ) {
1466 $s .= $format[++$p];
1467 } else {
1468 $s .= '\\';
1469 }
1470 break;
1471 case '"':
1472 # Quoted literal
1473 if ( $p < $formatLength - 1 ) {
1474 $endQuote = strpos( $format, '"', $p + 1 );
1475 if ( $endQuote === false ) {
1476 # No terminating quote, assume literal "
1477 $s .= '"';
1478 } else {
1479 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1480 $p = $endQuote;
1481 }
1482 } else {
1483 # Quote at end of string, assume literal "
1484 $s .= '"';
1485 }
1486 break;
1487 default:
1488 $s .= $format[$p];
1489 }
1490 if ( $num !== false ) {
1491 if ( $rawToggle || $raw ) {
1492 $s .= $num;
1493 $raw = false;
1494 } elseif ( $roman ) {
1495 $s .= Language::romanNumeral( $num );
1496 $roman = false;
1497 } elseif ( $hebrewNum ) {
1498 $s .= self::hebrewNumeral( $num );
1499 $hebrewNum = false;
1500 } else {
1501 $s .= $this->formatNum( $num, true );
1502 }
1503 }
1504 }
1505
1506 return $s;
1507 }
1508
1509 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1510 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1511
1512 /**
1513 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1514 * Gregorian dates to Iranian dates. Originally written in C, it
1515 * is released under the terms of GNU Lesser General Public
1516 * License. Conversion to PHP was performed by Niklas Laxström.
1517 *
1518 * Link: http://www.farsiweb.info/jalali/jalali.c
1519 *
1520 * @param string $ts
1521 *
1522 * @return string
1523 */
1524 private static function tsToIranian( $ts ) {
1525 $gy = substr( $ts, 0, 4 ) -1600;
1526 $gm = substr( $ts, 4, 2 ) -1;
1527 $gd = substr( $ts, 6, 2 ) -1;
1528
1529 # Days passed from the beginning (including leap years)
1530 $gDayNo = 365 * $gy
1531 + floor( ( $gy + 3 ) / 4 )
1532 - floor( ( $gy + 99 ) / 100 )
1533 + floor( ( $gy + 399 ) / 400 );
1534
1535 // Add days of the past months of this year
1536 for ( $i = 0; $i < $gm; $i++ ) {
1537 $gDayNo += self::$GREG_DAYS[$i];
1538 }
1539
1540 // Leap years
1541 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1542 $gDayNo++;
1543 }
1544
1545 // Days passed in current month
1546 $gDayNo += (int)$gd;
1547
1548 $jDayNo = $gDayNo - 79;
1549
1550 $jNp = floor( $jDayNo / 12053 );
1551 $jDayNo %= 12053;
1552
1553 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1554 $jDayNo %= 1461;
1555
1556 if ( $jDayNo >= 366 ) {
1557 $jy += floor( ( $jDayNo - 1 ) / 365 );
1558 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1559 }
1560
1561 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1562 $jDayNo -= self::$IRANIAN_DAYS[$i];
1563 }
1564
1565 $jm = $i + 1;
1566 $jd = $jDayNo + 1;
1567
1568 return array( $jy, $jm, $jd );
1569 }
1570
1571 /**
1572 * Converting Gregorian dates to Hijri dates.
1573 *
1574 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1575 *
1576 * @see http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1577 *
1578 * @param string $ts
1579 *
1580 * @return string
1581 */
1582 private static function tsToHijri( $ts ) {
1583 $year = substr( $ts, 0, 4 );
1584 $month = substr( $ts, 4, 2 );
1585 $day = substr( $ts, 6, 2 );
1586
1587 $zyr = $year;
1588 $zd = $day;
1589 $zm = $month;
1590 $zy = $zyr;
1591
1592 if (
1593 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1594 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1595 ) {
1596 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1597 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1598 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1599 $zd - 32075;
1600 } else {
1601 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1602 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1603 }
1604
1605 $zl = $zjd -1948440 + 10632;
1606 $zn = (int)( ( $zl - 1 ) / 10631 );
1607 $zl = $zl - 10631 * $zn + 354;
1608 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
1609 ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1610 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) -
1611 ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1612 $zm = (int)( ( 24 * $zl ) / 709 );
1613 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1614 $zy = 30 * $zn + $zj - 30;
1615
1616 return array( $zy, $zm, $zd );
1617 }
1618
1619 /**
1620 * Converting Gregorian dates to Hebrew dates.
1621 *
1622 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1623 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1624 * to translate the relevant functions into PHP and release them under
1625 * GNU GPL.
1626 *
1627 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1628 * and Adar II is 14. In a non-leap year, Adar is 6.
1629 *
1630 * @param string $ts
1631 *
1632 * @return string
1633 */
1634 private static function tsToHebrew( $ts ) {
1635 # Parse date
1636 $year = substr( $ts, 0, 4 );
1637 $month = substr( $ts, 4, 2 );
1638 $day = substr( $ts, 6, 2 );
1639
1640 # Calculate Hebrew year
1641 $hebrewYear = $year + 3760;
1642
1643 # Month number when September = 1, August = 12
1644 $month += 4;
1645 if ( $month > 12 ) {
1646 # Next year
1647 $month -= 12;
1648 $year++;
1649 $hebrewYear++;
1650 }
1651
1652 # Calculate day of year from 1 September
1653 $dayOfYear = $day;
1654 for ( $i = 1; $i < $month; $i++ ) {
1655 if ( $i == 6 ) {
1656 # February
1657 $dayOfYear += 28;
1658 # Check if the year is leap
1659 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1660 $dayOfYear++;
1661 }
1662 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1663 $dayOfYear += 30;
1664 } else {
1665 $dayOfYear += 31;
1666 }
1667 }
1668
1669 # Calculate the start of the Hebrew year
1670 $start = self::hebrewYearStart( $hebrewYear );
1671
1672 # Calculate next year's start
1673 if ( $dayOfYear <= $start ) {
1674 # Day is before the start of the year - it is the previous year
1675 # Next year's start
1676 $nextStart = $start;
1677 # Previous year
1678 $year--;
1679 $hebrewYear--;
1680 # Add days since previous year's 1 September
1681 $dayOfYear += 365;
1682 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1683 # Leap year
1684 $dayOfYear++;
1685 }
1686 # Start of the new (previous) year
1687 $start = self::hebrewYearStart( $hebrewYear );
1688 } else {
1689 # Next year's start
1690 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1691 }
1692
1693 # Calculate Hebrew day of year
1694 $hebrewDayOfYear = $dayOfYear - $start;
1695
1696 # Difference between year's days
1697 $diff = $nextStart - $start;
1698 # Add 12 (or 13 for leap years) days to ignore the difference between
1699 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1700 # difference is only about the year type
1701 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1702 $diff += 13;
1703 } else {
1704 $diff += 12;
1705 }
1706
1707 # Check the year pattern, and is leap year
1708 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1709 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1710 # and non-leap years
1711 $yearPattern = $diff % 30;
1712 # Check if leap year
1713 $isLeap = $diff >= 30;
1714
1715 # Calculate day in the month from number of day in the Hebrew year
1716 # Don't check Adar - if the day is not in Adar, we will stop before;
1717 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1718 $hebrewDay = $hebrewDayOfYear;
1719 $hebrewMonth = 1;
1720 $days = 0;
1721 while ( $hebrewMonth <= 12 ) {
1722 # Calculate days in this month
1723 if ( $isLeap && $hebrewMonth == 6 ) {
1724 # Adar in a leap year
1725 if ( $isLeap ) {
1726 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1727 $days = 30;
1728 if ( $hebrewDay <= $days ) {
1729 # Day in Adar I
1730 $hebrewMonth = 13;
1731 } else {
1732 # Subtract the days of Adar I
1733 $hebrewDay -= $days;
1734 # Try Adar II
1735 $days = 29;
1736 if ( $hebrewDay <= $days ) {
1737 # Day in Adar II
1738 $hebrewMonth = 14;
1739 }
1740 }
1741 }
1742 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1743 # Cheshvan in a complete year (otherwise as the rule below)
1744 $days = 30;
1745 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1746 # Kislev in an incomplete year (otherwise as the rule below)
1747 $days = 29;
1748 } else {
1749 # Odd months have 30 days, even have 29
1750 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1751 }
1752 if ( $hebrewDay <= $days ) {
1753 # In the current month
1754 break;
1755 } else {
1756 # Subtract the days of the current month
1757 $hebrewDay -= $days;
1758 # Try in the next month
1759 $hebrewMonth++;
1760 }
1761 }
1762
1763 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1764 }
1765
1766 /**
1767 * This calculates the Hebrew year start, as days since 1 September.
1768 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1769 * Used for Hebrew date.
1770 *
1771 * @param int $year
1772 *
1773 * @return string
1774 */
1775 private static function hebrewYearStart( $year ) {
1776 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1777 $b = intval( ( $year - 1 ) % 4 );
1778 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1779 if ( $m < 0 ) {
1780 $m--;
1781 }
1782 $Mar = intval( $m );
1783 if ( $m < 0 ) {
1784 $m++;
1785 }
1786 $m -= $Mar;
1787
1788 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1789 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1790 $Mar++;
1791 } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1792 $Mar += 2;
1793 } elseif ( $c == 2 || $c == 4 || $c == 6 ) {
1794 $Mar++;
1795 }
1796
1797 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1798 return $Mar;
1799 }
1800
1801 /**
1802 * Algorithm to convert Gregorian dates to Thai solar dates,
1803 * Minguo dates or Minguo dates.
1804 *
1805 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1806 * http://en.wikipedia.org/wiki/Minguo_calendar
1807 * http://en.wikipedia.org/wiki/Japanese_era_name
1808 *
1809 * @param string $ts 14-character timestamp
1810 * @param string $cName Calender name
1811 * @return array Converted year, month, day
1812 */
1813 private static function tsToYear( $ts, $cName ) {
1814 $gy = substr( $ts, 0, 4 );
1815 $gm = substr( $ts, 4, 2 );
1816 $gd = substr( $ts, 6, 2 );
1817
1818 if ( !strcmp( $cName, 'thai' ) ) {
1819 # Thai solar dates
1820 # Add 543 years to the Gregorian calendar
1821 # Months and days are identical
1822 $gy_offset = $gy + 543;
1823 } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1824 # Minguo dates
1825 # Deduct 1911 years from the Gregorian calendar
1826 # Months and days are identical
1827 $gy_offset = $gy - 1911;
1828 } elseif ( !strcmp( $cName, 'tenno' ) ) {
1829 # Nengō dates up to Meiji period
1830 # Deduct years from the Gregorian calendar
1831 # depending on the nengo periods
1832 # Months and days are identical
1833 if ( ( $gy < 1912 )
1834 || ( ( $gy == 1912 ) && ( $gm < 7 ) )
1835 || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) )
1836 ) {
1837 # Meiji period
1838 $gy_gannen = $gy - 1868 + 1;
1839 $gy_offset = $gy_gannen;
1840 if ( $gy_gannen == 1 ) {
1841 $gy_offset = '元';
1842 }
1843 $gy_offset = '明治' . $gy_offset;
1844 } elseif (
1845 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1846 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1847 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1848 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1849 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1850 ) {
1851 # Taishō period
1852 $gy_gannen = $gy - 1912 + 1;
1853 $gy_offset = $gy_gannen;
1854 if ( $gy_gannen == 1 ) {
1855 $gy_offset = '元';
1856 }
1857 $gy_offset = '大正' . $gy_offset;
1858 } elseif (
1859 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1860 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1861 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1862 ) {
1863 # Shōwa period
1864 $gy_gannen = $gy - 1926 + 1;
1865 $gy_offset = $gy_gannen;
1866 if ( $gy_gannen == 1 ) {
1867 $gy_offset = '元';
1868 }
1869 $gy_offset = '昭和' . $gy_offset;
1870 } else {
1871 # Heisei period
1872 $gy_gannen = $gy - 1989 + 1;
1873 $gy_offset = $gy_gannen;
1874 if ( $gy_gannen == 1 ) {
1875 $gy_offset = '元';
1876 }
1877 $gy_offset = '平成' . $gy_offset;
1878 }
1879 } else {
1880 $gy_offset = $gy;
1881 }
1882
1883 return array( $gy_offset, $gm, $gd );
1884 }
1885
1886 /**
1887 * Roman number formatting up to 10000
1888 *
1889 * @param int $num
1890 *
1891 * @return string
1892 */
1893 static function romanNumeral( $num ) {
1894 static $table = array(
1895 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1896 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1897 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1898 array( '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM',
1899 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' )
1900 );
1901
1902 $num = intval( $num );
1903 if ( $num > 10000 || $num <= 0 ) {
1904 return $num;
1905 }
1906
1907 $s = '';
1908 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1909 if ( $num >= $pow10 ) {
1910 $s .= $table[$i][(int)floor( $num / $pow10 )];
1911 }
1912 $num = $num % $pow10;
1913 }
1914 return $s;
1915 }
1916
1917 /**
1918 * Hebrew Gematria number formatting up to 9999
1919 *
1920 * @param int $num
1921 *
1922 * @return string
1923 */
1924 static function hebrewNumeral( $num ) {
1925 static $table = array(
1926 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1927 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1928 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1929 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1930 );
1931
1932 $num = intval( $num );
1933 if ( $num > 9999 || $num <= 0 ) {
1934 return $num;
1935 }
1936
1937 $s = '';
1938 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1939 if ( $num >= $pow10 ) {
1940 if ( $num == 15 || $num == 16 ) {
1941 $s .= $table[0][9] . $table[0][$num - 9];
1942 $num = 0;
1943 } else {
1944 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1945 if ( $pow10 == 1000 ) {
1946 $s .= "'";
1947 }
1948 }
1949 }
1950 $num = $num % $pow10;
1951 }
1952 if ( strlen( $s ) == 2 ) {
1953 $str = $s . "'";
1954 } else {
1955 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1956 $str .= substr( $s, strlen( $s ) - 2, 2 );
1957 }
1958 $start = substr( $str, 0, strlen( $str ) - 2 );
1959 $end = substr( $str, strlen( $str ) - 2 );
1960 switch ( $end ) {
1961 case 'כ':
1962 $str = $start . 'ך';
1963 break;
1964 case 'מ':
1965 $str = $start . 'ם';
1966 break;
1967 case 'נ':
1968 $str = $start . 'ן';
1969 break;
1970 case 'פ':
1971 $str = $start . 'ף';
1972 break;
1973 case 'צ':
1974 $str = $start . 'ץ';
1975 break;
1976 }
1977 return $str;
1978 }
1979
1980 /**
1981 * Used by date() and time() to adjust the time output.
1982 *
1983 * @param int $ts The time in date('YmdHis') format
1984 * @param mixed $tz Adjust the time by this amount (default false, mean we
1985 * get user timecorrection setting)
1986 * @return int
1987 */
1988 function userAdjust( $ts, $tz = false ) {
1989 global $wgUser, $wgLocalTZoffset;
1990
1991 if ( $tz === false ) {
1992 $tz = $wgUser->getOption( 'timecorrection' );
1993 }
1994
1995 $data = explode( '|', $tz, 3 );
1996
1997 if ( $data[0] == 'ZoneInfo' ) {
1998 wfSuppressWarnings();
1999 $userTZ = timezone_open( $data[2] );
2000 wfRestoreWarnings();
2001 if ( $userTZ !== false ) {
2002 $date = date_create( $ts, timezone_open( 'UTC' ) );
2003 date_timezone_set( $date, $userTZ );
2004 $date = date_format( $date, 'YmdHis' );
2005 return $date;
2006 }
2007 # Unrecognized timezone, default to 'Offset' with the stored offset.
2008 $data[0] = 'Offset';
2009 }
2010
2011 $minDiff = 0;
2012 if ( $data[0] == 'System' || $tz == '' ) {
2013 #  Global offset in minutes.
2014 if ( isset( $wgLocalTZoffset ) ) {
2015 $minDiff = $wgLocalTZoffset;
2016 }
2017 } elseif ( $data[0] == 'Offset' ) {
2018 $minDiff = intval( $data[1] );
2019 } else {
2020 $data = explode( ':', $tz );
2021 if ( count( $data ) == 2 ) {
2022 $data[0] = intval( $data[0] );
2023 $data[1] = intval( $data[1] );
2024 $minDiff = abs( $data[0] ) * 60 + $data[1];
2025 if ( $data[0] < 0 ) {
2026 $minDiff = -$minDiff;
2027 }
2028 } else {
2029 $minDiff = intval( $data[0] ) * 60;
2030 }
2031 }
2032
2033 # No difference ? Return time unchanged
2034 if ( 0 == $minDiff ) {
2035 return $ts;
2036 }
2037
2038 wfSuppressWarnings(); // E_STRICT system time bitching
2039 # Generate an adjusted date; take advantage of the fact that mktime
2040 # will normalize out-of-range values so we don't have to split $minDiff
2041 # into hours and minutes.
2042 $t = mktime( (
2043 (int)substr( $ts, 8, 2 ) ), # Hours
2044 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
2045 (int)substr( $ts, 12, 2 ), # Seconds
2046 (int)substr( $ts, 4, 2 ), # Month
2047 (int)substr( $ts, 6, 2 ), # Day
2048 (int)substr( $ts, 0, 4 ) ); # Year
2049
2050 $date = date( 'YmdHis', $t );
2051 wfRestoreWarnings();
2052
2053 return $date;
2054 }
2055
2056 /**
2057 * This is meant to be used by time(), date(), and timeanddate() to get
2058 * the date preference they're supposed to use, it should be used in
2059 * all children.
2060 *
2061 *<code>
2062 * function timeanddate([...], $format = true) {
2063 * $datePreference = $this->dateFormat($format);
2064 * [...]
2065 * }
2066 *</code>
2067 *
2068 * @param int|string|bool $usePrefs If true, the user's preference is used
2069 * if false, the site/language default is used
2070 * if int/string, assumed to be a format.
2071 * @return string
2072 */
2073 function dateFormat( $usePrefs = true ) {
2074 global $wgUser;
2075
2076 if ( is_bool( $usePrefs ) ) {
2077 if ( $usePrefs ) {
2078 $datePreference = $wgUser->getDatePreference();
2079 } else {
2080 $datePreference = (string)User::getDefaultOption( 'date' );
2081 }
2082 } else {
2083 $datePreference = (string)$usePrefs;
2084 }
2085
2086 // return int
2087 if ( $datePreference == '' ) {
2088 return 'default';
2089 }
2090
2091 return $datePreference;
2092 }
2093
2094 /**
2095 * Get a format string for a given type and preference
2096 * @param string $type May be date, time or both
2097 * @param string $pref The format name as it appears in Messages*.php
2098 *
2099 * @since 1.22 New type 'pretty' that provides a more readable timestamp format
2100 *
2101 * @return string
2102 */
2103 function getDateFormatString( $type, $pref ) {
2104 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
2105 if ( $pref == 'default' ) {
2106 $pref = $this->getDefaultDateFormat();
2107 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2108 } else {
2109 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2110
2111 if ( $type === 'pretty' && $df === null ) {
2112 $df = $this->getDateFormatString( 'date', $pref );
2113 }
2114
2115 if ( $df === null ) {
2116 $pref = $this->getDefaultDateFormat();
2117 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2118 }
2119 }
2120 $this->dateFormatStrings[$type][$pref] = $df;
2121 }
2122 return $this->dateFormatStrings[$type][$pref];
2123 }
2124
2125 /**
2126 * @param mixed $ts The time format which needs to be turned into a
2127 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2128 * @param bool $adj Whether to adjust the time output according to the
2129 * user configured offset ($timecorrection)
2130 * @param mixed $format True to use user's date format preference
2131 * @param string|bool $timecorrection The time offset as returned by
2132 * validateTimeZone() in Special:Preferences
2133 * @return string
2134 */
2135 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
2136 $ts = wfTimestamp( TS_MW, $ts );
2137 if ( $adj ) {
2138 $ts = $this->userAdjust( $ts, $timecorrection );
2139 }
2140 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
2141 return $this->sprintfDate( $df, $ts );
2142 }
2143
2144 /**
2145 * @param mixed $ts The time format which needs to be turned into a
2146 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2147 * @param bool $adj Whether to adjust the time output according to the
2148 * user configured offset ($timecorrection)
2149 * @param mixed $format True to use user's date format preference
2150 * @param string|bool $timecorrection The time offset as returned by
2151 * validateTimeZone() in Special:Preferences
2152 * @return string
2153 */
2154 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
2155 $ts = wfTimestamp( TS_MW, $ts );
2156 if ( $adj ) {
2157 $ts = $this->userAdjust( $ts, $timecorrection );
2158 }
2159 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
2160 return $this->sprintfDate( $df, $ts );
2161 }
2162
2163 /**
2164 * @param mixed $ts The time format which needs to be turned into a
2165 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2166 * @param bool $adj Whether to adjust the time output according to the
2167 * user configured offset ($timecorrection)
2168 * @param mixed $format What format to return, if it's false output the
2169 * default one (default true)
2170 * @param string|bool $timecorrection The time offset as returned by
2171 * validateTimeZone() in Special:Preferences
2172 * @return string
2173 */
2174 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
2175 $ts = wfTimestamp( TS_MW, $ts );
2176 if ( $adj ) {
2177 $ts = $this->userAdjust( $ts, $timecorrection );
2178 }
2179 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
2180 return $this->sprintfDate( $df, $ts );
2181 }
2182
2183 /**
2184 * Takes a number of seconds and turns it into a text using values such as hours and minutes.
2185 *
2186 * @since 1.20
2187 *
2188 * @param int $seconds The amount of seconds.
2189 * @param array $chosenIntervals The intervals to enable.
2190 *
2191 * @return string
2192 */
2193 public function formatDuration( $seconds, array $chosenIntervals = array() ) {
2194 $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals );
2195
2196 $segments = array();
2197
2198 foreach ( $intervals as $intervalName => $intervalValue ) {
2199 // Messages: duration-seconds, duration-minutes, duration-hours, duration-days, duration-weeks,
2200 // duration-years, duration-decades, duration-centuries, duration-millennia
2201 $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
2202 $segments[] = $message->inLanguage( $this )->escaped();
2203 }
2204
2205 return $this->listToText( $segments );
2206 }
2207
2208 /**
2209 * Takes a number of seconds and returns an array with a set of corresponding intervals.
2210 * For example 65 will be turned into array( minutes => 1, seconds => 5 ).
2211 *
2212 * @since 1.20
2213 *
2214 * @param int $seconds The amount of seconds.
2215 * @param array $chosenIntervals The intervals to enable.
2216 *
2217 * @return array
2218 */
2219 public function getDurationIntervals( $seconds, array $chosenIntervals = array() ) {
2220 if ( empty( $chosenIntervals ) ) {
2221 $chosenIntervals = array(
2222 'millennia',
2223 'centuries',
2224 'decades',
2225 'years',
2226 'days',
2227 'hours',
2228 'minutes',
2229 'seconds'
2230 );
2231 }
2232
2233 $intervals = array_intersect_key( self::$durationIntervals, array_flip( $chosenIntervals ) );
2234 $sortedNames = array_keys( $intervals );
2235 $smallestInterval = array_pop( $sortedNames );
2236
2237 $segments = array();
2238
2239 foreach ( $intervals as $name => $length ) {
2240 $value = floor( $seconds / $length );
2241
2242 if ( $value > 0 || ( $name == $smallestInterval && empty( $segments ) ) ) {
2243 $seconds -= $value * $length;
2244 $segments[$name] = $value;
2245 }
2246 }
2247
2248 return $segments;
2249 }
2250
2251 /**
2252 * Internal helper function for userDate(), userTime() and userTimeAndDate()
2253 *
2254 * @param string $type Can be 'date', 'time' or 'both'
2255 * @param mixed $ts The time format which needs to be turned into a
2256 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2257 * @param User $user User object used to get preferences for timezone and format
2258 * @param array $options Array, can contain the following keys:
2259 * - 'timecorrection': time correction, can have the following values:
2260 * - true: use user's preference
2261 * - false: don't use time correction
2262 * - int: value of time correction in minutes
2263 * - 'format': format to use, can have the following values:
2264 * - true: use user's preference
2265 * - false: use default preference
2266 * - string: format to use
2267 * @since 1.19
2268 * @return string
2269 */
2270 private function internalUserTimeAndDate( $type, $ts, User $user, array $options ) {
2271 $ts = wfTimestamp( TS_MW, $ts );
2272 $options += array( 'timecorrection' => true, 'format' => true );
2273 if ( $options['timecorrection'] !== false ) {
2274 if ( $options['timecorrection'] === true ) {
2275 $offset = $user->getOption( 'timecorrection' );
2276 } else {
2277 $offset = $options['timecorrection'];
2278 }
2279 $ts = $this->userAdjust( $ts, $offset );
2280 }
2281 if ( $options['format'] === true ) {
2282 $format = $user->getDatePreference();
2283 } else {
2284 $format = $options['format'];
2285 }
2286 $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) );
2287 return $this->sprintfDate( $df, $ts );
2288 }
2289
2290 /**
2291 * Get the formatted date for the given timestamp and formatted for
2292 * the given user.
2293 *
2294 * @param mixed $ts Mixed: the time format which needs to be turned into a
2295 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2296 * @param User $user User object used to get preferences for timezone and format
2297 * @param array $options Array, can contain the following keys:
2298 * - 'timecorrection': time correction, can have the following values:
2299 * - true: use user's preference
2300 * - false: don't use time correction
2301 * - int: value of time correction in minutes
2302 * - 'format': format to use, can have the following values:
2303 * - true: use user's preference
2304 * - false: use default preference
2305 * - string: format to use
2306 * @since 1.19
2307 * @return string
2308 */
2309 public function userDate( $ts, User $user, array $options = array() ) {
2310 return $this->internalUserTimeAndDate( 'date', $ts, $user, $options );
2311 }
2312
2313 /**
2314 * Get the formatted time for the given timestamp and formatted for
2315 * the given user.
2316 *
2317 * @param mixed $ts The time format which needs to be turned into a
2318 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2319 * @param User $user User object used to get preferences for timezone and format
2320 * @param array $options Array, can contain the following keys:
2321 * - 'timecorrection': time correction, can have the following values:
2322 * - true: use user's preference
2323 * - false: don't use time correction
2324 * - int: value of time correction in minutes
2325 * - 'format': format to use, can have the following values:
2326 * - true: use user's preference
2327 * - false: use default preference
2328 * - string: format to use
2329 * @since 1.19
2330 * @return string
2331 */
2332 public function userTime( $ts, User $user, array $options = array() ) {
2333 return $this->internalUserTimeAndDate( 'time', $ts, $user, $options );
2334 }
2335
2336 /**
2337 * Get the formatted date and time for the given timestamp and formatted for
2338 * the given user.
2339 *
2340 * @param mixed $ts the time format which needs to be turned into a
2341 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2342 * @param User $user User object used to get preferences for timezone and format
2343 * @param array $options Array, can contain the following keys:
2344 * - 'timecorrection': time correction, can have the following values:
2345 * - true: use user's preference
2346 * - false: don't use time correction
2347 * - int: value of time correction in minutes
2348 * - 'format': format to use, can have the following values:
2349 * - true: use user's preference
2350 * - false: use default preference
2351 * - string: format to use
2352 * @since 1.19
2353 * @return string
2354 */
2355 public function userTimeAndDate( $ts, User $user, array $options = array() ) {
2356 return $this->internalUserTimeAndDate( 'both', $ts, $user, $options );
2357 }
2358
2359 /**
2360 * Convert an MWTimestamp into a pretty human-readable timestamp using
2361 * the given user preferences and relative base time.
2362 *
2363 * DO NOT USE THIS FUNCTION DIRECTLY. Instead, call MWTimestamp::getHumanTimestamp
2364 * on your timestamp object, which will then call this function. Calling
2365 * this function directly will cause hooks to be skipped over.
2366 *
2367 * @see MWTimestamp::getHumanTimestamp
2368 * @param MWTimestamp $ts Timestamp to prettify
2369 * @param MWTimestamp $relativeTo Base timestamp
2370 * @param User $user User preferences to use
2371 * @return string Human timestamp
2372 * @since 1.22
2373 */
2374 public function getHumanTimestamp( MWTimestamp $ts, MWTimestamp $relativeTo, User $user ) {
2375 $diff = $ts->diff( $relativeTo );
2376 $diffDay = (bool)( (int)$ts->timestamp->format( 'w' ) -
2377 (int)$relativeTo->timestamp->format( 'w' ) );
2378 $days = $diff->days ?: (int)$diffDay;
2379 if ( $diff->invert || $days > 5
2380 && $ts->timestamp->format( 'Y' ) !== $relativeTo->timestamp->format( 'Y' )
2381 ) {
2382 // Timestamps are in different years: use full timestamp
2383 // Also do full timestamp for future dates
2384 /**
2385 * @FIXME Add better handling of future timestamps.
2386 */
2387 $format = $this->getDateFormatString( 'both', $user->getDatePreference() ?: 'default' );
2388 $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) );
2389 } elseif ( $days > 5 ) {
2390 // Timestamps are in same year, but more than 5 days ago: show day and month only.
2391 $format = $this->getDateFormatString( 'pretty', $user->getDatePreference() ?: 'default' );
2392 $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) );
2393 } elseif ( $days > 1 ) {
2394 // Timestamp within the past week: show the day of the week and time
2395 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2396 $weekday = self::$mWeekdayMsgs[$ts->timestamp->format( 'w' )];
2397 // Messages:
2398 // sunday-at, monday-at, tuesday-at, wednesday-at, thursday-at, friday-at, saturday-at
2399 $ts = wfMessage( "$weekday-at" )
2400 ->inLanguage( $this )
2401 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2402 ->text();
2403 } elseif ( $days == 1 ) {
2404 // Timestamp was yesterday: say 'yesterday' and the time.
2405 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2406 $ts = wfMessage( 'yesterday-at' )
2407 ->inLanguage( $this )
2408 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2409 ->text();
2410 } elseif ( $diff->h > 1 || $diff->h == 1 && $diff->i > 30 ) {
2411 // Timestamp was today, but more than 90 minutes ago: say 'today' and the time.
2412 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2413 $ts = wfMessage( 'today-at' )
2414 ->inLanguage( $this )
2415 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2416 ->text();
2417
2418 // From here on in, the timestamp was soon enough ago so that we can simply say
2419 // XX units ago, e.g., "2 hours ago" or "5 minutes ago"
2420 } elseif ( $diff->h == 1 ) {
2421 // Less than 90 minutes, but more than an hour ago.
2422 $ts = wfMessage( 'hours-ago' )->inLanguage( $this )->numParams( 1 )->text();
2423 } elseif ( $diff->i >= 1 ) {
2424 // A few minutes ago.
2425 $ts = wfMessage( 'minutes-ago' )->inLanguage( $this )->numParams( $diff->i )->text();
2426 } elseif ( $diff->s >= 30 ) {
2427 // Less than a minute, but more than 30 sec ago.
2428 $ts = wfMessage( 'seconds-ago' )->inLanguage( $this )->numParams( $diff->s )->text();
2429 } else {
2430 // Less than 30 seconds ago.
2431 $ts = wfMessage( 'just-now' )->text();
2432 }
2433
2434 return $ts;
2435 }
2436
2437 /**
2438 * @param string $key
2439 * @return array|null
2440 */
2441 function getMessage( $key ) {
2442 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
2443 }
2444
2445 /**
2446 * @return array
2447 */
2448 function getAllMessages() {
2449 return self::$dataCache->getItem( $this->mCode, 'messages' );
2450 }
2451
2452 /**
2453 * @param string $in
2454 * @param string $out
2455 * @param string $string
2456 * @return string
2457 */
2458 function iconv( $in, $out, $string ) {
2459 # This is a wrapper for iconv in all languages except esperanto,
2460 # which does some nasty x-conversions beforehand
2461
2462 # Even with //IGNORE iconv can whine about illegal characters in
2463 # *input* string. We just ignore those too.
2464 # REF: http://bugs.php.net/bug.php?id=37166
2465 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
2466 wfSuppressWarnings();
2467 $text = iconv( $in, $out . '//IGNORE', $string );
2468 wfRestoreWarnings();
2469 return $text;
2470 }
2471
2472 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
2473
2474 /**
2475 * @param array $matches
2476 * @return mixed|string
2477 */
2478 function ucwordbreaksCallbackAscii( $matches ) {
2479 return $this->ucfirst( $matches[1] );
2480 }
2481
2482 /**
2483 * @param array $matches
2484 * @return string
2485 */
2486 function ucwordbreaksCallbackMB( $matches ) {
2487 return mb_strtoupper( $matches[0] );
2488 }
2489
2490 /**
2491 * @param array $matches
2492 * @return string
2493 */
2494 function ucCallback( $matches ) {
2495 list( $wikiUpperChars ) = self::getCaseMaps();
2496 return strtr( $matches[1], $wikiUpperChars );
2497 }
2498
2499 /**
2500 * @param array $matches
2501 * @return string
2502 */
2503 function lcCallback( $matches ) {
2504 list( , $wikiLowerChars ) = self::getCaseMaps();
2505 return strtr( $matches[1], $wikiLowerChars );
2506 }
2507
2508 /**
2509 * @param array $matches
2510 * @return string
2511 */
2512 function ucwordsCallbackMB( $matches ) {
2513 return mb_strtoupper( $matches[0] );
2514 }
2515
2516 /**
2517 * @param array $matches
2518 * @return string
2519 */
2520 function ucwordsCallbackWiki( $matches ) {
2521 list( $wikiUpperChars ) = self::getCaseMaps();
2522 return strtr( $matches[0], $wikiUpperChars );
2523 }
2524
2525 /**
2526 * Make a string's first character uppercase
2527 *
2528 * @param string $str
2529 *
2530 * @return string
2531 */
2532 function ucfirst( $str ) {
2533 $o = ord( $str );
2534 if ( $o < 96 ) { // if already uppercase...
2535 return $str;
2536 } elseif ( $o < 128 ) {
2537 return ucfirst( $str ); // use PHP's ucfirst()
2538 } else {
2539 // fall back to more complex logic in case of multibyte strings
2540 return $this->uc( $str, true );
2541 }
2542 }
2543
2544 /**
2545 * Convert a string to uppercase
2546 *
2547 * @param string $str
2548 * @param bool $first
2549 *
2550 * @return string
2551 */
2552 function uc( $str, $first = false ) {
2553 if ( function_exists( 'mb_strtoupper' ) ) {
2554 if ( $first ) {
2555 if ( $this->isMultibyte( $str ) ) {
2556 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2557 } else {
2558 return ucfirst( $str );
2559 }
2560 } else {
2561 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
2562 }
2563 } else {
2564 if ( $this->isMultibyte( $str ) ) {
2565 $x = $first ? '^' : '';
2566 return preg_replace_callback(
2567 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2568 array( $this, 'ucCallback' ),
2569 $str
2570 );
2571 } else {
2572 return $first ? ucfirst( $str ) : strtoupper( $str );
2573 }
2574 }
2575 }
2576
2577 /**
2578 * @param string $str
2579 * @return mixed|string
2580 */
2581 function lcfirst( $str ) {
2582 $o = ord( $str );
2583 if ( !$o ) {
2584 return strval( $str );
2585 } elseif ( $o >= 128 ) {
2586 return $this->lc( $str, true );
2587 } elseif ( $o > 96 ) {
2588 return $str;
2589 } else {
2590 $str[0] = strtolower( $str[0] );
2591 return $str;
2592 }
2593 }
2594
2595 /**
2596 * @param string $str
2597 * @param bool $first
2598 * @return mixed|string
2599 */
2600 function lc( $str, $first = false ) {
2601 if ( function_exists( 'mb_strtolower' ) ) {
2602 if ( $first ) {
2603 if ( $this->isMultibyte( $str ) ) {
2604 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2605 } else {
2606 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
2607 }
2608 } else {
2609 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
2610 }
2611 } else {
2612 if ( $this->isMultibyte( $str ) ) {
2613 $x = $first ? '^' : '';
2614 return preg_replace_callback(
2615 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2616 array( $this, 'lcCallback' ),
2617 $str
2618 );
2619 } else {
2620 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
2621 }
2622 }
2623 }
2624
2625 /**
2626 * @param string $str
2627 * @return bool
2628 */
2629 function isMultibyte( $str ) {
2630 return (bool)preg_match( '/[\x80-\xff]/', $str );
2631 }
2632
2633 /**
2634 * @param string $str
2635 * @return mixed|string
2636 */
2637 function ucwords( $str ) {
2638 if ( $this->isMultibyte( $str ) ) {
2639 $str = $this->lc( $str );
2640
2641 // regexp to find first letter in each word (i.e. after each space)
2642 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2643
2644 // function to use to capitalize a single char
2645 if ( function_exists( 'mb_strtoupper' ) ) {
2646 return preg_replace_callback(
2647 $replaceRegexp,
2648 array( $this, 'ucwordsCallbackMB' ),
2649 $str
2650 );
2651 } else {
2652 return preg_replace_callback(
2653 $replaceRegexp,
2654 array( $this, 'ucwordsCallbackWiki' ),
2655 $str
2656 );
2657 }
2658 } else {
2659 return ucwords( strtolower( $str ) );
2660 }
2661 }
2662
2663 /**
2664 * capitalize words at word breaks
2665 *
2666 * @param string $str
2667 * @return mixed
2668 */
2669 function ucwordbreaks( $str ) {
2670 if ( $this->isMultibyte( $str ) ) {
2671 $str = $this->lc( $str );
2672
2673 // since \b doesn't work for UTF-8, we explicitely define word break chars
2674 $breaks = "[ \-\(\)\}\{\.,\?!]";
2675
2676 // find first letter after word break
2677 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|" .
2678 "$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2679
2680 if ( function_exists( 'mb_strtoupper' ) ) {
2681 return preg_replace_callback(
2682 $replaceRegexp,
2683 array( $this, 'ucwordbreaksCallbackMB' ),
2684 $str
2685 );
2686 } else {
2687 return preg_replace_callback(
2688 $replaceRegexp,
2689 array( $this, 'ucwordsCallbackWiki' ),
2690 $str
2691 );
2692 }
2693 } else {
2694 return preg_replace_callback(
2695 '/\b([\w\x80-\xff]+)\b/',
2696 array( $this, 'ucwordbreaksCallbackAscii' ),
2697 $str
2698 );
2699 }
2700 }
2701
2702 /**
2703 * Return a case-folded representation of $s
2704 *
2705 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
2706 * and $s2 are the same except for the case of their characters. It is not
2707 * necessary for the value returned to make sense when displayed.
2708 *
2709 * Do *not* perform any other normalisation in this function. If a caller
2710 * uses this function when it should be using a more general normalisation
2711 * function, then fix the caller.
2712 *
2713 * @param string $s
2714 *
2715 * @return string
2716 */
2717 function caseFold( $s ) {
2718 return $this->uc( $s );
2719 }
2720
2721 /**
2722 * @param string $s
2723 * @return string
2724 */
2725 function checkTitleEncoding( $s ) {
2726 if ( is_array( $s ) ) {
2727 throw new MWException( 'Given array to checkTitleEncoding.' );
2728 }
2729 if ( StringUtils::isUtf8( $s ) ) {
2730 return $s;
2731 }
2732
2733 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
2734 }
2735
2736 /**
2737 * @return array
2738 */
2739 function fallback8bitEncoding() {
2740 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
2741 }
2742
2743 /**
2744 * Most writing systems use whitespace to break up words.
2745 * Some languages such as Chinese don't conventionally do this,
2746 * which requires special handling when breaking up words for
2747 * searching etc.
2748 *
2749 * @return bool
2750 */
2751 function hasWordBreaks() {
2752 return true;
2753 }
2754
2755 /**
2756 * Some languages such as Chinese require word segmentation,
2757 * Specify such segmentation when overridden in derived class.
2758 *
2759 * @param string $string
2760 * @return string
2761 */
2762 function segmentByWord( $string ) {
2763 return $string;
2764 }
2765
2766 /**
2767 * Some languages have special punctuation need to be normalized.
2768 * Make such changes here.
2769 *
2770 * @param string $string
2771 * @return string
2772 */
2773 function normalizeForSearch( $string ) {
2774 return self::convertDoubleWidth( $string );
2775 }
2776
2777 /**
2778 * convert double-width roman characters to single-width.
2779 * range: ff00-ff5f ~= 0020-007f
2780 *
2781 * @param string $string
2782 *
2783 * @return string
2784 */
2785 protected static function convertDoubleWidth( $string ) {
2786 static $full = null;
2787 static $half = null;
2788
2789 if ( $full === null ) {
2790 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2791 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2792 $full = str_split( $fullWidth, 3 );
2793 $half = str_split( $halfWidth );
2794 }
2795
2796 $string = str_replace( $full, $half, $string );
2797 return $string;
2798 }
2799
2800 /**
2801 * @param string $string
2802 * @param string $pattern
2803 * @return string
2804 */
2805 protected static function insertSpace( $string, $pattern ) {
2806 $string = preg_replace( $pattern, " $1 ", $string );
2807 $string = preg_replace( '/ +/', ' ', $string );
2808 return $string;
2809 }
2810
2811 /**
2812 * @param array $termsArray
2813 * @return array
2814 */
2815 function convertForSearchResult( $termsArray ) {
2816 # some languages, e.g. Chinese, need to do a conversion
2817 # in order for search results to be displayed correctly
2818 return $termsArray;
2819 }
2820
2821 /**
2822 * Get the first character of a string.
2823 *
2824 * @param string $s
2825 * @return string
2826 */
2827 function firstChar( $s ) {
2828 $matches = array();
2829 preg_match(
2830 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
2831 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
2832 $s,
2833 $matches
2834 );
2835
2836 if ( isset( $matches[1] ) ) {
2837 if ( strlen( $matches[1] ) != 3 ) {
2838 return $matches[1];
2839 }
2840
2841 // Break down Hangul syllables to grab the first jamo
2842 $code = utf8ToCodepoint( $matches[1] );
2843 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
2844 return $matches[1];
2845 } elseif ( $code < 0xb098 ) {
2846 return "\xe3\x84\xb1";
2847 } elseif ( $code < 0xb2e4 ) {
2848 return "\xe3\x84\xb4";
2849 } elseif ( $code < 0xb77c ) {
2850 return "\xe3\x84\xb7";
2851 } elseif ( $code < 0xb9c8 ) {
2852 return "\xe3\x84\xb9";
2853 } elseif ( $code < 0xbc14 ) {
2854 return "\xe3\x85\x81";
2855 } elseif ( $code < 0xc0ac ) {
2856 return "\xe3\x85\x82";
2857 } elseif ( $code < 0xc544 ) {
2858 return "\xe3\x85\x85";
2859 } elseif ( $code < 0xc790 ) {
2860 return "\xe3\x85\x87";
2861 } elseif ( $code < 0xcc28 ) {
2862 return "\xe3\x85\x88";
2863 } elseif ( $code < 0xce74 ) {
2864 return "\xe3\x85\x8a";
2865 } elseif ( $code < 0xd0c0 ) {
2866 return "\xe3\x85\x8b";
2867 } elseif ( $code < 0xd30c ) {
2868 return "\xe3\x85\x8c";
2869 } elseif ( $code < 0xd558 ) {
2870 return "\xe3\x85\x8d";
2871 } else {
2872 return "\xe3\x85\x8e";
2873 }
2874 } else {
2875 return '';
2876 }
2877 }
2878
2879 function initEncoding() {
2880 # Some languages may have an alternate char encoding option
2881 # (Esperanto X-coding, Japanese furigana conversion, etc)
2882 # If this language is used as the primary content language,
2883 # an override to the defaults can be set here on startup.
2884 }
2885
2886 /**
2887 * @param string $s
2888 * @return string
2889 */
2890 function recodeForEdit( $s ) {
2891 # For some languages we'll want to explicitly specify
2892 # which characters make it into the edit box raw
2893 # or are converted in some way or another.
2894 global $wgEditEncoding;
2895 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
2896 return $s;
2897 } else {
2898 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2899 }
2900 }
2901
2902 /**
2903 * @param string $s
2904 * @return string
2905 */
2906 function recodeInput( $s ) {
2907 # Take the previous into account.
2908 global $wgEditEncoding;
2909 if ( $wgEditEncoding != '' ) {
2910 $enc = $wgEditEncoding;
2911 } else {
2912 $enc = 'UTF-8';
2913 }
2914 if ( $enc == 'UTF-8' ) {
2915 return $s;
2916 } else {
2917 return $this->iconv( $enc, 'UTF-8', $s );
2918 }
2919 }
2920
2921 /**
2922 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2923 * also cleans up certain backwards-compatible sequences, converting them
2924 * to the modern Unicode equivalent.
2925 *
2926 * This is language-specific for performance reasons only.
2927 *
2928 * @param string $s
2929 *
2930 * @return string
2931 */
2932 function normalize( $s ) {
2933 global $wgAllUnicodeFixes;
2934 $s = UtfNormal::cleanUp( $s );
2935 if ( $wgAllUnicodeFixes ) {
2936 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2937 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2938 }
2939
2940 return $s;
2941 }
2942
2943 /**
2944 * Transform a string using serialized data stored in the given file (which
2945 * must be in the serialized subdirectory of $IP). The file contains pairs
2946 * mapping source characters to destination characters.
2947 *
2948 * The data is cached in process memory. This will go faster if you have the
2949 * FastStringSearch extension.
2950 *
2951 * @param string $file
2952 * @param string $string
2953 *
2954 * @throws MWException
2955 * @return string
2956 */
2957 function transformUsingPairFile( $file, $string ) {
2958 if ( !isset( $this->transformData[$file] ) ) {
2959 $data = wfGetPrecompiledData( $file );
2960 if ( $data === false ) {
2961 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2962 }
2963 $this->transformData[$file] = new ReplacementArray( $data );
2964 }
2965 return $this->transformData[$file]->replace( $string );
2966 }
2967
2968 /**
2969 * For right-to-left language support
2970 *
2971 * @return bool
2972 */
2973 function isRTL() {
2974 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2975 }
2976
2977 /**
2978 * Return the correct HTML 'dir' attribute value for this language.
2979 * @return string
2980 */
2981 function getDir() {
2982 return $this->isRTL() ? 'rtl' : 'ltr';
2983 }
2984
2985 /**
2986 * Return 'left' or 'right' as appropriate alignment for line-start
2987 * for this language's text direction.
2988 *
2989 * Should be equivalent to CSS3 'start' text-align value....
2990 *
2991 * @return string
2992 */
2993 function alignStart() {
2994 return $this->isRTL() ? 'right' : 'left';
2995 }
2996
2997 /**
2998 * Return 'right' or 'left' as appropriate alignment for line-end
2999 * for this language's text direction.
3000 *
3001 * Should be equivalent to CSS3 'end' text-align value....
3002 *
3003 * @return string
3004 */
3005 function alignEnd() {
3006 return $this->isRTL() ? 'left' : 'right';
3007 }
3008
3009 /**
3010 * A hidden direction mark (LRM or RLM), depending on the language direction.
3011 * Unlike getDirMark(), this function returns the character as an HTML entity.
3012 * This function should be used when the output is guaranteed to be HTML,
3013 * because it makes the output HTML source code more readable. When
3014 * the output is plain text or can be escaped, getDirMark() should be used.
3015 *
3016 * @param bool $opposite Get the direction mark opposite to your language
3017 * @return string
3018 * @since 1.20
3019 */
3020 function getDirMarkEntity( $opposite = false ) {
3021 if ( $opposite ) {
3022 return $this->isRTL() ? '&lrm;' : '&rlm;';
3023 }
3024 return $this->isRTL() ? '&rlm;' : '&lrm;';
3025 }
3026
3027 /**
3028 * A hidden direction mark (LRM or RLM), depending on the language direction.
3029 * This function produces them as invisible Unicode characters and
3030 * the output may be hard to read and debug, so it should only be used
3031 * when the output is plain text or can be escaped. When the output is
3032 * HTML, use getDirMarkEntity() instead.
3033 *
3034 * @param bool $opposite Get the direction mark opposite to your language
3035 * @return string
3036 */
3037 function getDirMark( $opposite = false ) {
3038 $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
3039 $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
3040 if ( $opposite ) {
3041 return $this->isRTL() ? $lrm : $rlm;
3042 }
3043 return $this->isRTL() ? $rlm : $lrm;
3044 }
3045
3046 /**
3047 * @return array
3048 */
3049 function capitalizeAllNouns() {
3050 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
3051 }
3052
3053 /**
3054 * An arrow, depending on the language direction.
3055 *
3056 * @param string $direction The direction of the arrow: forwards (default),
3057 * backwards, left, right, up, down.
3058 * @return string
3059 */
3060 function getArrow( $direction = 'forwards' ) {
3061 switch ( $direction ) {
3062 case 'forwards':
3063 return $this->isRTL() ? '←' : '→';
3064 case 'backwards':
3065 return $this->isRTL() ? '→' : '←';
3066 case 'left':
3067 return '←';
3068 case 'right':
3069 return '→';
3070 case 'up':
3071 return '↑';
3072 case 'down':
3073 return '↓';
3074 }
3075 }
3076
3077 /**
3078 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
3079 *
3080 * @return bool
3081 */
3082 function linkPrefixExtension() {
3083 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
3084 }
3085
3086 /**
3087 * Get all magic words from cache.
3088 * @return array
3089 */
3090 function getMagicWords() {
3091 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
3092 }
3093
3094 /**
3095 * Run the LanguageGetMagic hook once.
3096 */
3097 protected function doMagicHook() {
3098 if ( $this->mMagicHookDone ) {
3099 return;
3100 }
3101 $this->mMagicHookDone = true;
3102 wfProfileIn( 'LanguageGetMagic' );
3103 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
3104 wfProfileOut( 'LanguageGetMagic' );
3105 }
3106
3107 /**
3108 * Fill a MagicWord object with data from here
3109 *
3110 * @param MagicWord $mw
3111 */
3112 function getMagic( $mw ) {
3113 // Saves a function call
3114 if ( ! $this->mMagicHookDone ) {
3115 $this->doMagicHook();
3116 }
3117
3118 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
3119 $rawEntry = $this->mMagicExtensions[$mw->mId];
3120 } else {
3121 $rawEntry = self::$dataCache->getSubitem(
3122 $this->mCode, 'magicWords', $mw->mId );
3123 }
3124
3125 if ( !is_array( $rawEntry ) ) {
3126 error_log( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" );
3127 } else {
3128 $mw->mCaseSensitive = $rawEntry[0];
3129 $mw->mSynonyms = array_slice( $rawEntry, 1 );
3130 }
3131 }
3132
3133 /**
3134 * Add magic words to the extension array
3135 *
3136 * @param array $newWords
3137 */
3138 function addMagicWordsByLang( $newWords ) {
3139 $fallbackChain = $this->getFallbackLanguages();
3140 $fallbackChain = array_reverse( $fallbackChain );
3141 foreach ( $fallbackChain as $code ) {
3142 if ( isset( $newWords[$code] ) ) {
3143 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
3144 }
3145 }
3146 }
3147
3148 /**
3149 * Get special page names, as an associative array
3150 * case folded alias => real name
3151 */
3152 function getSpecialPageAliases() {
3153 // Cache aliases because it may be slow to load them
3154 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
3155 // Initialise array
3156 $this->mExtendedSpecialPageAliases =
3157 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
3158 wfRunHooks( 'LanguageGetSpecialPageAliases',
3159 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
3160 }
3161
3162 return $this->mExtendedSpecialPageAliases;
3163 }
3164
3165 /**
3166 * Italic is unsuitable for some languages
3167 *
3168 * @param string $text The text to be emphasized.
3169 * @return string
3170 */
3171 function emphasize( $text ) {
3172 return "<em>$text</em>";
3173 }
3174
3175 /**
3176 * Normally we output all numbers in plain en_US style, that is
3177 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
3178 * point twohundredthirtyfive. However this is not suitable for all
3179 * languages, some such as Punjabi want ੨੯੩,੨੯੫.੨੩੫ and others such as
3180 * Icelandic just want to use commas instead of dots, and dots instead
3181 * of commas like "293.291,235".
3182 *
3183 * An example of this function being called:
3184 * <code>
3185 * wfMessage( 'message' )->numParams( $num )->text()
3186 * </code>
3187 *
3188 * See $separatorTransformTable on MessageIs.php for
3189 * the , => . and . => , implementation.
3190 *
3191 * @todo check if it's viable to use localeconv() for the decimal separator thing.
3192 * @param int|float $number The string to be formatted, should be an integer
3193 * or a floating point number.
3194 * @param bool $nocommafy Set to true for special numbers like dates
3195 * @return string
3196 */
3197 public function formatNum( $number, $nocommafy = false ) {
3198 global $wgTranslateNumerals;
3199 if ( !$nocommafy ) {
3200 $number = $this->commafy( $number );
3201 $s = $this->separatorTransformTable();
3202 if ( $s ) {
3203 $number = strtr( $number, $s );
3204 }
3205 }
3206
3207 if ( $wgTranslateNumerals ) {
3208 $s = $this->digitTransformTable();
3209 if ( $s ) {
3210 $number = strtr( $number, $s );
3211 }
3212 }
3213
3214 return $number;
3215 }
3216
3217 /**
3218 * Front-end for non-commafied formatNum
3219 *
3220 * @param int|float $number The string to be formatted, should be an integer
3221 * or a floating point number.
3222 * @since 1.21
3223 * @return string
3224 */
3225 public function formatNumNoSeparators( $number ) {
3226 return $this->formatNum( $number, true );
3227 }
3228
3229 /**
3230 * @param string $number
3231 * @return string
3232 */
3233 function parseFormattedNumber( $number ) {
3234 $s = $this->digitTransformTable();
3235 if ( $s ) {
3236 $number = strtr( $number, array_flip( $s ) );
3237 }
3238
3239 $s = $this->separatorTransformTable();
3240 if ( $s ) {
3241 $number = strtr( $number, array_flip( $s ) );
3242 }
3243
3244 $number = strtr( $number, array( ',' => '' ) );
3245 return $number;
3246 }
3247
3248 /**
3249 * Adds commas to a given number
3250 * @since 1.19
3251 * @param mixed $number
3252 * @return string
3253 */
3254 function commafy( $number ) {
3255 $digitGroupingPattern = $this->digitGroupingPattern();
3256 if ( $number === null ) {
3257 return '';
3258 }
3259
3260 if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
3261 // default grouping is at thousands, use the same for ###,###,### pattern too.
3262 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
3263 } else {
3264 // Ref: http://cldr.unicode.org/translation/number-patterns
3265 $sign = "";
3266 if ( intval( $number ) < 0 ) {
3267 // For negative numbers apply the algorithm like positive number and add sign.
3268 $sign = "-";
3269 $number = substr( $number, 1 );
3270 }
3271 $integerPart = array();
3272 $decimalPart = array();
3273 $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
3274 preg_match( "/\d+/", $number, $integerPart );
3275 preg_match( "/\.\d*/", $number, $decimalPart );
3276 $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0] : "";
3277 if ( $groupedNumber === $number ) {
3278 // the string does not have any number part. Eg: .12345
3279 return $sign . $groupedNumber;
3280 }
3281 $start = $end = strlen( $integerPart[0] );
3282 while ( $start > 0 ) {
3283 $match = $matches[0][$numMatches - 1];
3284 $matchLen = strlen( $match );
3285 $start = $end - $matchLen;
3286 if ( $start < 0 ) {
3287 $start = 0;
3288 }
3289 $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber;
3290 $end = $start;
3291 if ( $numMatches > 1 ) {
3292 // use the last pattern for the rest of the number
3293 $numMatches--;
3294 }
3295 if ( $start > 0 ) {
3296 $groupedNumber = "," . $groupedNumber;
3297 }
3298 }
3299 return $sign . $groupedNumber;
3300 }
3301 }
3302
3303 /**
3304 * @return string
3305 */
3306 function digitGroupingPattern() {
3307 return self::$dataCache->getItem( $this->mCode, 'digitGroupingPattern' );
3308 }
3309
3310 /**
3311 * @return array
3312 */
3313 function digitTransformTable() {
3314 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
3315 }
3316
3317 /**
3318 * @return array
3319 */
3320 function separatorTransformTable() {
3321 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
3322 }
3323
3324 /**
3325 * Take a list of strings and build a locale-friendly comma-separated
3326 * list, using the local comma-separator message.
3327 * The last two strings are chained with an "and".
3328 * NOTE: This function will only work with standard numeric array keys (0, 1, 2…)
3329 *
3330 * @param string[] $l
3331 * @return string
3332 */
3333 function listToText( array $l ) {
3334 $m = count( $l ) - 1;
3335 if ( $m < 0 ) {
3336 return '';
3337 }
3338 if ( $m > 0 ) {
3339 $and = $this->getMessageFromDB( 'and' );
3340 $space = $this->getMessageFromDB( 'word-separator' );
3341 if ( $m > 1 ) {
3342 $comma = $this->getMessageFromDB( 'comma-separator' );
3343 }
3344 }
3345 $s = $l[$m];
3346 for ( $i = $m - 1; $i >= 0; $i-- ) {
3347 if ( $i == $m - 1 ) {
3348 $s = $l[$i] . $and . $space . $s;
3349 } else {
3350 $s = $l[$i] . $comma . $s;
3351 }
3352 }
3353 return $s;
3354 }
3355
3356 /**
3357 * Take a list of strings and build a locale-friendly comma-separated
3358 * list, using the local comma-separator message.
3359 * @param string[] $list Array of strings to put in a comma list
3360 * @return string
3361 */
3362 function commaList( array $list ) {
3363 return implode(
3364 wfMessage( 'comma-separator' )->inLanguage( $this )->escaped(),
3365 $list
3366 );
3367 }
3368
3369 /**
3370 * Take a list of strings and build a locale-friendly semicolon-separated
3371 * list, using the local semicolon-separator message.
3372 * @param string[] $list Array of strings to put in a semicolon list
3373 * @return string
3374 */
3375 function semicolonList( array $list ) {
3376 return implode(
3377 wfMessage( 'semicolon-separator' )->inLanguage( $this )->escaped(),
3378 $list
3379 );
3380 }
3381
3382 /**
3383 * Same as commaList, but separate it with the pipe instead.
3384 * @param string[] $list Array of strings to put in a pipe list
3385 * @return string
3386 */
3387 function pipeList( array $list ) {
3388 return implode(
3389 wfMessage( 'pipe-separator' )->inLanguage( $this )->escaped(),
3390 $list
3391 );
3392 }
3393
3394 /**
3395 * Truncate a string to a specified length in bytes, appending an optional
3396 * string (e.g. for ellipses)
3397 *
3398 * The database offers limited byte lengths for some columns in the database;
3399 * multi-byte character sets mean we need to ensure that only whole characters
3400 * are included, otherwise broken characters can be passed to the user
3401 *
3402 * If $length is negative, the string will be truncated from the beginning
3403 *
3404 * @param string $string String to truncate
3405 * @param int $length Maximum length (including ellipses)
3406 * @param string $ellipsis String to append to the truncated text
3407 * @param bool $adjustLength Subtract length of ellipsis from $length.
3408 * $adjustLength was introduced in 1.18, before that behaved as if false.
3409 * @return string
3410 */
3411 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
3412 # Use the localized ellipsis character
3413 if ( $ellipsis == '...' ) {
3414 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3415 }
3416 # Check if there is no need to truncate
3417 if ( $length == 0 ) {
3418 return $ellipsis; // convention
3419 } elseif ( strlen( $string ) <= abs( $length ) ) {
3420 return $string; // no need to truncate
3421 }
3422 $stringOriginal = $string;
3423 # If ellipsis length is >= $length then we can't apply $adjustLength
3424 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
3425 $string = $ellipsis; // this can be slightly unexpected
3426 # Otherwise, truncate and add ellipsis...
3427 } else {
3428 $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
3429 if ( $length > 0 ) {
3430 $length -= $eLength;
3431 $string = substr( $string, 0, $length ); // xyz...
3432 $string = $this->removeBadCharLast( $string );
3433 $string = rtrim( $string );
3434 $string = $string . $ellipsis;
3435 } else {
3436 $length += $eLength;
3437 $string = substr( $string, $length ); // ...xyz
3438 $string = $this->removeBadCharFirst( $string );
3439 $string = ltrim( $string );
3440 $string = $ellipsis . $string;
3441 }
3442 }
3443 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
3444 # This check is *not* redundant if $adjustLength, due to the single case where
3445 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
3446 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
3447 return $string;
3448 } else {
3449 return $stringOriginal;
3450 }
3451 }
3452
3453 /**
3454 * Remove bytes that represent an incomplete Unicode character
3455 * at the end of string (e.g. bytes of the char are missing)
3456 *
3457 * @param string $string
3458 * @return string
3459 */
3460 protected function removeBadCharLast( $string ) {
3461 if ( $string != '' ) {
3462 $char = ord( $string[strlen( $string ) - 1] );
3463 $m = array();
3464 if ( $char >= 0xc0 ) {
3465 # We got the first byte only of a multibyte char; remove it.
3466 $string = substr( $string, 0, -1 );
3467 } elseif ( $char >= 0x80 &&
3468 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
3469 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m )
3470 ) {
3471 # We chopped in the middle of a character; remove it
3472 $string = $m[1];
3473 }
3474 }
3475 return $string;
3476 }
3477
3478 /**
3479 * Remove bytes that represent an incomplete Unicode character
3480 * at the start of string (e.g. bytes of the char are missing)
3481 *
3482 * @param string $string
3483 * @return string
3484 */
3485 protected function removeBadCharFirst( $string ) {
3486 if ( $string != '' ) {
3487 $char = ord( $string[0] );
3488 if ( $char >= 0x80 && $char < 0xc0 ) {
3489 # We chopped in the middle of a character; remove the whole thing
3490 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
3491 }
3492 }
3493 return $string;
3494 }
3495
3496 /**
3497 * Truncate a string of valid HTML to a specified length in bytes,
3498 * appending an optional string (e.g. for ellipses), and return valid HTML
3499 *
3500 * This is only intended for styled/linked text, such as HTML with
3501 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
3502 * Also, this will not detect things like "display:none" CSS.
3503 *
3504 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
3505 *
3506 * @param string $text HTML string to truncate
3507 * @param int $length (zero/positive) Maximum length (including ellipses)
3508 * @param string $ellipsis String to append to the truncated text
3509 * @return string
3510 */
3511 function truncateHtml( $text, $length, $ellipsis = '...' ) {
3512 # Use the localized ellipsis character
3513 if ( $ellipsis == '...' ) {
3514 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3515 }
3516 # Check if there is clearly no need to truncate
3517 if ( $length <= 0 ) {
3518 return $ellipsis; // no text shown, nothing to format (convention)
3519 } elseif ( strlen( $text ) <= $length ) {
3520 return $text; // string short enough even *with* HTML (short-circuit)
3521 }
3522
3523 $dispLen = 0; // innerHTML legth so far
3524 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
3525 $tagType = 0; // 0-open, 1-close
3526 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
3527 $entityState = 0; // 0-not entity, 1-entity
3528 $tag = $ret = ''; // accumulated tag name, accumulated result string
3529 $openTags = array(); // open tag stack
3530 $maybeState = null; // possible truncation state
3531
3532 $textLen = strlen( $text );
3533 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
3534 for ( $pos = 0; true; ++$pos ) {
3535 # Consider truncation once the display length has reached the maximim.
3536 # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
3537 # Check that we're not in the middle of a bracket/entity...
3538 if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
3539 if ( !$testingEllipsis ) {
3540 $testingEllipsis = true;
3541 # Save where we are; we will truncate here unless there turn out to
3542 # be so few remaining characters that truncation is not necessary.
3543 if ( !$maybeState ) { // already saved? ($neLength = 0 case)
3544 $maybeState = array( $ret, $openTags ); // save state
3545 }
3546 } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
3547 # String in fact does need truncation, the truncation point was OK.
3548 list( $ret, $openTags ) = $maybeState; // reload state
3549 $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
3550 $ret .= $ellipsis; // add ellipsis
3551 break;
3552 }
3553 }
3554 if ( $pos >= $textLen ) {
3555 break; // extra iteration just for above checks
3556 }
3557
3558 # Read the next char...
3559 $ch = $text[$pos];
3560 $lastCh = $pos ? $text[$pos - 1] : '';
3561 $ret .= $ch; // add to result string
3562 if ( $ch == '<' ) {
3563 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
3564 $entityState = 0; // for bad HTML
3565 $bracketState = 1; // tag started (checking for backslash)
3566 } elseif ( $ch == '>' ) {
3567 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
3568 $entityState = 0; // for bad HTML
3569 $bracketState = 0; // out of brackets
3570 } elseif ( $bracketState == 1 ) {
3571 if ( $ch == '/' ) {
3572 $tagType = 1; // close tag (e.g. "</span>")
3573 } else {
3574 $tagType = 0; // open tag (e.g. "<span>")
3575 $tag .= $ch;
3576 }
3577 $bracketState = 2; // building tag name
3578 } elseif ( $bracketState == 2 ) {
3579 if ( $ch != ' ' ) {
3580 $tag .= $ch;
3581 } else {
3582 // Name found (e.g. "<a href=..."), add on tag attributes...
3583 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
3584 }
3585 } elseif ( $bracketState == 0 ) {
3586 if ( $entityState ) {
3587 if ( $ch == ';' ) {
3588 $entityState = 0;
3589 $dispLen++; // entity is one displayed char
3590 }
3591 } else {
3592 if ( $neLength == 0 && !$maybeState ) {
3593 // Save state without $ch. We want to *hit* the first
3594 // display char (to get tags) but not *use* it if truncating.
3595 $maybeState = array( substr( $ret, 0, -1 ), $openTags );
3596 }
3597 if ( $ch == '&' ) {
3598 $entityState = 1; // entity found, (e.g. "&#160;")
3599 } else {
3600 $dispLen++; // this char is displayed
3601 // Add the next $max display text chars after this in one swoop...
3602 $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen;
3603 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
3604 $dispLen += $skipped;
3605 $pos += $skipped;
3606 }
3607 }
3608 }
3609 }
3610 // Close the last tag if left unclosed by bad HTML
3611 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
3612 while ( count( $openTags ) > 0 ) {
3613 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
3614 }
3615 return $ret;
3616 }
3617
3618 /**
3619 * truncateHtml() helper function
3620 * like strcspn() but adds the skipped chars to $ret
3621 *
3622 * @param string $ret
3623 * @param string $text
3624 * @param string $search
3625 * @param int $start
3626 * @param null|int $len
3627 * @return int
3628 */
3629 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
3630 if ( $len === null ) {
3631 $len = -1; // -1 means "no limit" for strcspn
3632 } elseif ( $len < 0 ) {
3633 $len = 0; // sanity
3634 }
3635 $skipCount = 0;
3636 if ( $start < strlen( $text ) ) {
3637 $skipCount = strcspn( $text, $search, $start, $len );
3638 $ret .= substr( $text, $start, $skipCount );
3639 }
3640 return $skipCount;
3641 }
3642
3643 /**
3644 * truncateHtml() helper function
3645 * (a) push or pop $tag from $openTags as needed
3646 * (b) clear $tag value
3647 * @param string &$tag Current HTML tag name we are looking at
3648 * @param int $tagType (0-open tag, 1-close tag)
3649 * @param string $lastCh Character before the '>' that ended this tag
3650 * @param array &$openTags Open tag stack (not accounting for $tag)
3651 */
3652 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
3653 $tag = ltrim( $tag );
3654 if ( $tag != '' ) {
3655 if ( $tagType == 0 && $lastCh != '/' ) {
3656 $openTags[] = $tag; // tag opened (didn't close itself)
3657 } elseif ( $tagType == 1 ) {
3658 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
3659 array_pop( $openTags ); // tag closed
3660 }
3661 }
3662 $tag = '';
3663 }
3664 }
3665
3666 /**
3667 * Grammatical transformations, needed for inflected languages
3668 * Invoked by putting {{grammar:case|word}} in a message
3669 *
3670 * @param string $word
3671 * @param string $case
3672 * @return string
3673 */
3674 function convertGrammar( $word, $case ) {
3675 global $wgGrammarForms;
3676 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
3677 return $wgGrammarForms[$this->getCode()][$case][$word];
3678 }
3679
3680 return $word;
3681 }
3682 /**
3683 * Get the grammar forms for the content language
3684 * @return array Array of grammar forms
3685 * @since 1.20
3686 */
3687 function getGrammarForms() {
3688 global $wgGrammarForms;
3689 if ( isset( $wgGrammarForms[$this->getCode()] )
3690 && is_array( $wgGrammarForms[$this->getCode()] )
3691 ) {
3692 return $wgGrammarForms[$this->getCode()];
3693 }
3694
3695 return array();
3696 }
3697 /**
3698 * Provides an alternative text depending on specified gender.
3699 * Usage {{gender:username|masculine|feminine|unknown}}.
3700 * username is optional, in which case the gender of current user is used,
3701 * but only in (some) interface messages; otherwise default gender is used.
3702 *
3703 * If no forms are given, an empty string is returned. If only one form is
3704 * given, it will be returned unconditionally. These details are implied by
3705 * the caller and cannot be overridden in subclasses.
3706 *
3707 * If three forms are given, the default is to use the third (unknown) form.
3708 * If fewer than three forms are given, the default is to use the first (masculine) form.
3709 * These details can be overridden in subclasses.
3710 *
3711 * @param string $gender
3712 * @param array $forms
3713 *
3714 * @return string
3715 */
3716 function gender( $gender, $forms ) {
3717 if ( !count( $forms ) ) {
3718 return '';
3719 }
3720 $forms = $this->preConvertPlural( $forms, 2 );
3721 if ( $gender === 'male' ) {
3722 return $forms[0];
3723 }
3724 if ( $gender === 'female' ) {
3725 return $forms[1];
3726 }
3727 return isset( $forms[2] ) ? $forms[2] : $forms[0];
3728 }
3729
3730 /**
3731 * Plural form transformations, needed for some languages.
3732 * For example, there are 3 form of plural in Russian and Polish,
3733 * depending on "count mod 10". See [[w:Plural]]
3734 * For English it is pretty simple.
3735 *
3736 * Invoked by putting {{plural:count|wordform1|wordform2}}
3737 * or {{plural:count|wordform1|wordform2|wordform3}}
3738 *
3739 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
3740 *
3741 * @param int $count Non-localized number
3742 * @param array $forms Different plural forms
3743 * @return string Correct form of plural for $count in this language
3744 */
3745 function convertPlural( $count, $forms ) {
3746 // Handle explicit n=pluralform cases
3747 $forms = $this->handleExplicitPluralForms( $count, $forms );
3748 if ( is_string( $forms ) ) {
3749 return $forms;
3750 }
3751 if ( !count( $forms ) ) {
3752 return '';
3753 }
3754
3755 $pluralForm = $this->getPluralRuleIndexNumber( $count );
3756 $pluralForm = min( $pluralForm, count( $forms ) - 1 );
3757 return $forms[$pluralForm];
3758 }
3759
3760 /**
3761 * Handles explicit plural forms for Language::convertPlural()
3762 *
3763 * In {{PLURAL:$1|0=nothing|one|many}}, 0=nothing will be returned if $1 equals zero.
3764 * If an explicitly defined plural form matches the $count, then
3765 * string value returned, otherwise array returned for further consideration
3766 * by CLDR rules or overridden convertPlural().
3767 *
3768 * @since 1.23
3769 *
3770 * @param int $count non-localized number
3771 * @param array $forms different plural forms
3772 *
3773 * @return array|string
3774 */
3775 protected function handleExplicitPluralForms( $count, array $forms ) {
3776 foreach ( $forms as $index => $form ) {
3777 if ( preg_match( '/\d+=/i', $form ) ) {
3778 $pos = strpos( $form, '=' );
3779 if ( substr( $form, 0, $pos ) === (string) $count ) {
3780 return substr( $form, $pos + 1 );
3781 }
3782 unset( $forms[$index] );
3783 }
3784 }
3785 return array_values( $forms );
3786 }
3787
3788 /**
3789 * Checks that convertPlural was given an array and pads it to requested
3790 * amount of forms by copying the last one.
3791 *
3792 * @param int $count How many forms should there be at least
3793 * @param array $forms Array of forms given to convertPlural
3794 * @return array Padded array of forms or an exception if not an array
3795 */
3796 protected function preConvertPlural( /* Array */ $forms, $count ) {
3797 while ( count( $forms ) < $count ) {
3798 $forms[] = $forms[count( $forms ) - 1];
3799 }
3800 return $forms;
3801 }
3802
3803 /**
3804 * @todo Maybe translate block durations. Note that this function is somewhat misnamed: it
3805 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
3806 * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used
3807 * on old expiry lengths recorded in log entries. You'd need to provide the start date to
3808 * match up with it.
3809 *
3810 * @param string $str The validated block duration in English
3811 * @return string Somehow translated block duration
3812 * @see LanguageFi.php for example implementation
3813 */
3814 function translateBlockExpiry( $str ) {
3815 $duration = SpecialBlock::getSuggestedDurations( $this );
3816 foreach ( $duration as $show => $value ) {
3817 if ( strcmp( $str, $value ) == 0 ) {
3818 return htmlspecialchars( trim( $show ) );
3819 }
3820 }
3821
3822 // Since usually only infinite or indefinite is only on list, so try
3823 // equivalents if still here.
3824 $indefs = array( 'infinite', 'infinity', 'indefinite' );
3825 if ( in_array( $str, $indefs ) ) {
3826 foreach ( $indefs as $val ) {
3827 $show = array_search( $val, $duration, true );
3828 if ( $show !== false ) {
3829 return htmlspecialchars( trim( $show ) );
3830 }
3831 }
3832 }
3833
3834 // If all else fails, return a standard duration or timestamp description.
3835 $time = strtotime( $str, 0 );
3836 if ( $time === false ) { // Unknown format. Return it as-is in case.
3837 return $str;
3838 } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
3839 // $time is relative to 0 so it's a duration length.
3840 return $this->formatDuration( $time );
3841 } else { // It's an absolute timestamp.
3842 if ( $time === 0 ) {
3843 // wfTimestamp() handles 0 as current time instead of epoch.
3844 return $this->timeanddate( '19700101000000' );
3845 } else {
3846 return $this->timeanddate( $time );
3847 }
3848 }
3849 }
3850
3851 /**
3852 * languages like Chinese need to be segmented in order for the diff
3853 * to be of any use
3854 *
3855 * @param string $text
3856 * @return string
3857 */
3858 public function segmentForDiff( $text ) {
3859 return $text;
3860 }
3861
3862 /**
3863 * and unsegment to show the result
3864 *
3865 * @param string $text
3866 * @return string
3867 */
3868 public function unsegmentForDiff( $text ) {
3869 return $text;
3870 }
3871
3872 /**
3873 * Return the LanguageConverter used in the Language
3874 *
3875 * @since 1.19
3876 * @return LanguageConverter
3877 */
3878 public function getConverter() {
3879 return $this->mConverter;
3880 }
3881
3882 /**
3883 * convert text to all supported variants
3884 *
3885 * @param string $text
3886 * @return array
3887 */
3888 public function autoConvertToAllVariants( $text ) {
3889 return $this->mConverter->autoConvertToAllVariants( $text );
3890 }
3891
3892 /**
3893 * convert text to different variants of a language.
3894 *
3895 * @param string $text
3896 * @return string
3897 */
3898 public function convert( $text ) {
3899 return $this->mConverter->convert( $text );
3900 }
3901
3902 /**
3903 * Convert a Title object to a string in the preferred variant
3904 *
3905 * @param Title $title
3906 * @return string
3907 */
3908 public function convertTitle( $title ) {
3909 return $this->mConverter->convertTitle( $title );
3910 }
3911
3912 /**
3913 * Convert a namespace index to a string in the preferred variant
3914 *
3915 * @param int $ns
3916 * @return string
3917 */
3918 public function convertNamespace( $ns ) {
3919 return $this->mConverter->convertNamespace( $ns );
3920 }
3921
3922 /**
3923 * Check if this is a language with variants
3924 *
3925 * @return bool
3926 */
3927 public function hasVariants() {
3928 return count( $this->getVariants() ) > 1;
3929 }
3930
3931 /**
3932 * Check if the language has the specific variant
3933 *
3934 * @since 1.19
3935 * @param string $variant
3936 * @return bool
3937 */
3938 public function hasVariant( $variant ) {
3939 return (bool)$this->mConverter->validateVariant( $variant );
3940 }
3941
3942 /**
3943 * Put custom tags (e.g. -{ }-) around math to prevent conversion
3944 *
3945 * @param string $text
3946 * @return string
3947 * @deprecated since 1.22 is no longer used
3948 */
3949 public function armourMath( $text ) {
3950 return $this->mConverter->armourMath( $text );
3951 }
3952
3953 /**
3954 * Perform output conversion on a string, and encode for safe HTML output.
3955 * @param string $text Text to be converted
3956 * @param bool $isTitle Whether this conversion is for the article title
3957 * @return string
3958 * @todo this should get integrated somewhere sane
3959 */
3960 public function convertHtml( $text, $isTitle = false ) {
3961 return htmlspecialchars( $this->convert( $text, $isTitle ) );
3962 }
3963
3964 /**
3965 * @param string $key
3966 * @return string
3967 */
3968 public function convertCategoryKey( $key ) {
3969 return $this->mConverter->convertCategoryKey( $key );
3970 }
3971
3972 /**
3973 * Get the list of variants supported by this language
3974 * see sample implementation in LanguageZh.php
3975 *
3976 * @return array an array of language codes
3977 */
3978 public function getVariants() {
3979 return $this->mConverter->getVariants();
3980 }
3981
3982 /**
3983 * @return string
3984 */
3985 public function getPreferredVariant() {
3986 return $this->mConverter->getPreferredVariant();
3987 }
3988
3989 /**
3990 * @return string
3991 */
3992 public function getDefaultVariant() {
3993 return $this->mConverter->getDefaultVariant();
3994 }
3995
3996 /**
3997 * @return string
3998 */
3999 public function getURLVariant() {
4000 return $this->mConverter->getURLVariant();
4001 }
4002
4003 /**
4004 * If a language supports multiple variants, it is
4005 * possible that non-existing link in one variant
4006 * actually exists in another variant. this function
4007 * tries to find it. See e.g. LanguageZh.php
4008 *
4009 * @param string $link The name of the link
4010 * @param Title $nt The title object of the link
4011 * @param bool $ignoreOtherCond To disable other conditions when
4012 * we need to transclude a template or update a category's link
4013 * @return null the input parameters may be modified upon return
4014 */
4015 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
4016 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
4017 }
4018
4019 /**
4020 * returns language specific options used by User::getPageRenderHash()
4021 * for example, the preferred language variant
4022 *
4023 * @return string
4024 */
4025 function getExtraHashOptions() {
4026 return $this->mConverter->getExtraHashOptions();
4027 }
4028
4029 /**
4030 * For languages that support multiple variants, the title of an
4031 * article may be displayed differently in different variants. this
4032 * function returns the apporiate title defined in the body of the article.
4033 *
4034 * @return string
4035 */
4036 public function getParsedTitle() {
4037 return $this->mConverter->getParsedTitle();
4038 }
4039
4040 /**
4041 * Prepare external link text for conversion. When the text is
4042 * a URL, it shouldn't be converted, and it'll be wrapped in
4043 * the "raw" tag (-{R| }-) to prevent conversion.
4044 *
4045 * This function is called "markNoConversion" for historical
4046 * reasons.
4047 *
4048 * @param string $text Text to be used for external link
4049 * @param bool $noParse Wrap it without confirming it's a real URL first
4050 * @return string The tagged text
4051 */
4052 public function markNoConversion( $text, $noParse = false ) {
4053 // Excluding protocal-relative URLs may avoid many false positives.
4054 if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
4055 return $this->mConverter->markNoConversion( $text );
4056 } else {
4057 return $text;
4058 }
4059 }
4060
4061 /**
4062 * A regular expression to match legal word-trailing characters
4063 * which should be merged onto a link of the form [[foo]]bar.
4064 *
4065 * @return string
4066 */
4067 public function linkTrail() {
4068 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
4069 }
4070
4071 /**
4072 * A regular expression character set to match legal word-prefixing
4073 * characters which should be merged onto a link of the form foo[[bar]].
4074 *
4075 * @return string
4076 */
4077 public function linkPrefixCharset() {
4078 return self::$dataCache->getItem( $this->mCode, 'linkPrefixCharset' );
4079 }
4080
4081 /**
4082 * @return Language
4083 */
4084 function getLangObj() {
4085 return $this;
4086 }
4087
4088 /**
4089 * Get the "parent" language which has a converter to convert a "compatible" language
4090 * (in another variant) to this language (eg. zh for zh-cn, but not en for en-gb).
4091 *
4092 * @return Language|null
4093 * @since 1.22
4094 */
4095 public function getParentLanguage() {
4096 if ( $this->mParentLanguage !== false ) {
4097 return $this->mParentLanguage;
4098 }
4099
4100 $pieces = explode( '-', $this->getCode() );
4101 $code = $pieces[0];
4102 if ( !in_array( $code, LanguageConverter::$languagesWithVariants ) ) {
4103 $this->mParentLanguage = null;
4104 return null;
4105 }
4106 $lang = Language::factory( $code );
4107 if ( !$lang->hasVariant( $this->getCode() ) ) {
4108 $this->mParentLanguage = null;
4109 return null;
4110 }
4111
4112 $this->mParentLanguage = $lang;
4113 return $lang;
4114 }
4115
4116 /**
4117 * Get the RFC 3066 code for this language object
4118 *
4119 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
4120 * htmlspecialchars() or similar
4121 *
4122 * @return string
4123 */
4124 public function getCode() {
4125 return $this->mCode;
4126 }
4127
4128 /**
4129 * Get the code in Bcp47 format which we can use
4130 * inside of html lang="" tags.
4131 *
4132 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
4133 * htmlspecialchars() or similar.
4134 *
4135 * @since 1.19
4136 * @return string
4137 */
4138 public function getHtmlCode() {
4139 if ( is_null( $this->mHtmlCode ) ) {
4140 $this->mHtmlCode = wfBCP47( $this->getCode() );
4141 }
4142 return $this->mHtmlCode;
4143 }
4144
4145 /**
4146 * @param string $code
4147 */
4148 public function setCode( $code ) {
4149 $this->mCode = $code;
4150 // Ensure we don't leave incorrect cached data lying around
4151 $this->mHtmlCode = null;
4152 $this->mParentLanguage = false;
4153 }
4154
4155 /**
4156 * Get the name of a file for a certain language code
4157 * @param string $prefix Prepend this to the filename
4158 * @param string $code Language code
4159 * @param string $suffix Append this to the filename
4160 * @throws MWException
4161 * @return string $prefix . $mangledCode . $suffix
4162 */
4163 public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
4164 if ( !self::isValidBuiltInCode( $code ) ) {
4165 throw new MWException( "Invalid language code \"$code\"" );
4166 }
4167
4168 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
4169 }
4170
4171 /**
4172 * Get the language code from a file name. Inverse of getFileName()
4173 * @param string $filename $prefix . $languageCode . $suffix
4174 * @param string $prefix Prefix before the language code
4175 * @param string $suffix Suffix after the language code
4176 * @return string Language code, or false if $prefix or $suffix isn't found
4177 */
4178 public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
4179 $m = null;
4180 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
4181 preg_quote( $suffix, '/' ) . '/', $filename, $m );
4182 if ( !count( $m ) ) {
4183 return false;
4184 }
4185 return str_replace( '_', '-', strtolower( $m[1] ) );
4186 }
4187
4188 /**
4189 * @param string $code
4190 * @return string
4191 */
4192 public static function getMessagesFileName( $code ) {
4193 global $IP;
4194 $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
4195 wfRunHooks( 'Language::getMessagesFileName', array( $code, &$file ) );
4196 return $file;
4197 }
4198
4199 /**
4200 * @param string $code
4201 * @return string
4202 * @since 1.23
4203 */
4204 public static function getJsonMessagesFileName( $code ) {
4205 global $IP;
4206
4207 if ( !self::isValidBuiltInCode( $code ) ) {
4208 throw new MWException( "Invalid language code \"$code\"" );
4209 }
4210
4211 return "$IP/languages/i18n/$code.json";
4212 }
4213
4214 /**
4215 * @param string $code
4216 * @return string
4217 */
4218 public static function getClassFileName( $code ) {
4219 global $IP;
4220 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
4221 }
4222
4223 /**
4224 * Get the first fallback for a given language.
4225 *
4226 * @param string $code
4227 *
4228 * @return bool|string
4229 */
4230 public static function getFallbackFor( $code ) {
4231 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
4232 return false;
4233 } else {
4234 $fallbacks = self::getFallbacksFor( $code );
4235 $first = array_shift( $fallbacks );
4236 return $first;
4237 }
4238 }
4239
4240 /**
4241 * Get the ordered list of fallback languages.
4242 *
4243 * @since 1.19
4244 * @param string $code Language code
4245 * @return array
4246 */
4247 public static function getFallbacksFor( $code ) {
4248 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
4249 return array();
4250 } else {
4251 $v = self::getLocalisationCache()->getItem( $code, 'fallback' );
4252 $v = array_map( 'trim', explode( ',', $v ) );
4253 if ( $v[count( $v ) - 1] !== 'en' ) {
4254 $v[] = 'en';
4255 }
4256 return $v;
4257 }
4258 }
4259
4260 /**
4261 * Get the ordered list of fallback languages, ending with the fallback
4262 * language chain for the site language.
4263 *
4264 * @since 1.22
4265 * @param string $code Language code
4266 * @return array array( fallbacks, site fallbacks )
4267 */
4268 public static function getFallbacksIncludingSiteLanguage( $code ) {
4269 global $wgLanguageCode;
4270
4271 // Usually, we will only store a tiny number of fallback chains, so we
4272 // keep them in static memory.
4273 $cacheKey = "{$code}-{$wgLanguageCode}";
4274
4275 if ( !array_key_exists( $cacheKey, self::$fallbackLanguageCache ) ) {
4276 $fallbacks = self::getFallbacksFor( $code );
4277
4278 // Append the site's fallback chain, including the site language itself
4279 $siteFallbacks = self::getFallbacksFor( $wgLanguageCode );
4280 array_unshift( $siteFallbacks, $wgLanguageCode );
4281
4282 // Eliminate any languages already included in the chain
4283 $siteFallbacks = array_diff( $siteFallbacks, $fallbacks );
4284
4285 self::$fallbackLanguageCache[$cacheKey] = array( $fallbacks, $siteFallbacks );
4286 }
4287 return self::$fallbackLanguageCache[$cacheKey];
4288 }
4289
4290 /**
4291 * Get all messages for a given language
4292 * WARNING: this may take a long time. If you just need all message *keys*
4293 * but need the *contents* of only a few messages, consider using getMessageKeysFor().
4294 *
4295 * @param string $code
4296 *
4297 * @return array
4298 */
4299 public static function getMessagesFor( $code ) {
4300 return self::getLocalisationCache()->getItem( $code, 'messages' );
4301 }
4302
4303 /**
4304 * Get a message for a given language
4305 *
4306 * @param string $key
4307 * @param string $code
4308 *
4309 * @return string
4310 */
4311 public static function getMessageFor( $key, $code ) {
4312 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
4313 }
4314
4315 /**
4316 * Get all message keys for a given language. This is a faster alternative to
4317 * array_keys( Language::getMessagesFor( $code ) )
4318 *
4319 * @since 1.19
4320 * @param string $code Language code
4321 * @return array of message keys (strings)
4322 */
4323 public static function getMessageKeysFor( $code ) {
4324 return self::getLocalisationCache()->getSubItemList( $code, 'messages' );
4325 }
4326
4327 /**
4328 * @param string $talk
4329 * @return mixed
4330 */
4331 function fixVariableInNamespace( $talk ) {
4332 if ( strpos( $talk, '$1' ) === false ) {
4333 return $talk;
4334 }
4335
4336 global $wgMetaNamespace;
4337 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
4338
4339 # Allow grammar transformations
4340 # Allowing full message-style parsing would make simple requests
4341 # such as action=raw much more expensive than they need to be.
4342 # This will hopefully cover most cases.
4343 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
4344 array( &$this, 'replaceGrammarInNamespace' ), $talk );
4345 return str_replace( ' ', '_', $talk );
4346 }
4347
4348 /**
4349 * @param string $m
4350 * @return string
4351 */
4352 function replaceGrammarInNamespace( $m ) {
4353 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
4354 }
4355
4356 /**
4357 * @throws MWException
4358 * @return array
4359 */
4360 static function getCaseMaps() {
4361 static $wikiUpperChars, $wikiLowerChars;
4362 if ( isset( $wikiUpperChars ) ) {
4363 return array( $wikiUpperChars, $wikiLowerChars );
4364 }
4365
4366 wfProfileIn( __METHOD__ );
4367 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
4368 if ( $arr === false ) {
4369 throw new MWException(
4370 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
4371 }
4372 $wikiUpperChars = $arr['wikiUpperChars'];
4373 $wikiLowerChars = $arr['wikiLowerChars'];
4374 wfProfileOut( __METHOD__ );
4375 return array( $wikiUpperChars, $wikiLowerChars );
4376 }
4377
4378 /**
4379 * Decode an expiry (block, protection, etc) which has come from the DB
4380 *
4381 * @todo FIXME: why are we returnings DBMS-dependent strings???
4382 *
4383 * @param string $expiry Database expiry String
4384 * @param bool|int $format True to process using language functions, or TS_ constant
4385 * to return the expiry in a given timestamp
4386 * @return string
4387 * @since 1.18
4388 */
4389 public function formatExpiry( $expiry, $format = true ) {
4390 static $infinity;
4391 if ( $infinity === null ) {
4392 $infinity = wfGetDB( DB_SLAVE )->getInfinity();
4393 }
4394
4395 if ( $expiry == '' || $expiry == $infinity ) {
4396 return $format === true
4397 ? $this->getMessageFromDB( 'infiniteblock' )
4398 : $infinity;
4399 } else {
4400 return $format === true
4401 ? $this->timeanddate( $expiry, /* User preference timezone */ true )
4402 : wfTimestamp( $format, $expiry );
4403 }
4404 }
4405
4406 /**
4407 * @todo Document
4408 * @param int|float $seconds
4409 * @param array $format Optional
4410 * If $format['avoid'] === 'avoidseconds': don't mention seconds if $seconds >= 1 hour.
4411 * If $format['avoid'] === 'avoidminutes': don't mention seconds/minutes if $seconds > 48 hours.
4412 * If $format['noabbrevs'] is true: use 'seconds' and friends instead of 'seconds-abbrev'
4413 * and friends.
4414 * For backwards compatibility, $format may also be one of the strings 'avoidseconds'
4415 * or 'avoidminutes'.
4416 * @return string
4417 */
4418 function formatTimePeriod( $seconds, $format = array() ) {
4419 if ( !is_array( $format ) ) {
4420 $format = array( 'avoid' => $format ); // For backwards compatibility
4421 }
4422 if ( !isset( $format['avoid'] ) ) {
4423 $format['avoid'] = false;
4424 }
4425 if ( !isset( $format['noabbrevs' ] ) ) {
4426 $format['noabbrevs'] = false;
4427 }
4428 $secondsMsg = wfMessage(
4429 $format['noabbrevs'] ? 'seconds' : 'seconds-abbrev' )->inLanguage( $this );
4430 $minutesMsg = wfMessage(
4431 $format['noabbrevs'] ? 'minutes' : 'minutes-abbrev' )->inLanguage( $this );
4432 $hoursMsg = wfMessage(
4433 $format['noabbrevs'] ? 'hours' : 'hours-abbrev' )->inLanguage( $this );
4434 $daysMsg = wfMessage(
4435 $format['noabbrevs'] ? 'days' : 'days-abbrev' )->inLanguage( $this );
4436
4437 if ( round( $seconds * 10 ) < 100 ) {
4438 $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) );
4439 $s = $secondsMsg->params( $s )->text();
4440 } elseif ( round( $seconds ) < 60 ) {
4441 $s = $this->formatNum( round( $seconds ) );
4442 $s = $secondsMsg->params( $s )->text();
4443 } elseif ( round( $seconds ) < 3600 ) {
4444 $minutes = floor( $seconds / 60 );
4445 $secondsPart = round( fmod( $seconds, 60 ) );
4446 if ( $secondsPart == 60 ) {
4447 $secondsPart = 0;
4448 $minutes++;
4449 }
4450 $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4451 $s .= ' ';
4452 $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4453 } elseif ( round( $seconds ) <= 2 * 86400 ) {
4454 $hours = floor( $seconds / 3600 );
4455 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
4456 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
4457 if ( $secondsPart == 60 ) {
4458 $secondsPart = 0;
4459 $minutes++;
4460 }
4461 if ( $minutes == 60 ) {
4462 $minutes = 0;
4463 $hours++;
4464 }
4465 $s = $hoursMsg->params( $this->formatNum( $hours ) )->text();
4466 $s .= ' ';
4467 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4468 if ( !in_array( $format['avoid'], array( 'avoidseconds', 'avoidminutes' ) ) ) {
4469 $s .= ' ' . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4470 }
4471 } else {
4472 $days = floor( $seconds / 86400 );
4473 if ( $format['avoid'] === 'avoidminutes' ) {
4474 $hours = round( ( $seconds - $days * 86400 ) / 3600 );
4475 if ( $hours == 24 ) {
4476 $hours = 0;
4477 $days++;
4478 }
4479 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4480 $s .= ' ';
4481 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4482 } elseif ( $format['avoid'] === 'avoidseconds' ) {
4483 $hours = floor( ( $seconds - $days * 86400 ) / 3600 );
4484 $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 );
4485 if ( $minutes == 60 ) {
4486 $minutes = 0;
4487 $hours++;
4488 }
4489 if ( $hours == 24 ) {
4490 $hours = 0;
4491 $days++;
4492 }
4493 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4494 $s .= ' ';
4495 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4496 $s .= ' ';
4497 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4498 } else {
4499 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4500 $s .= ' ';
4501 $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format );
4502 }
4503 }
4504 return $s;
4505 }
4506
4507 /**
4508 * Format a bitrate for output, using an appropriate
4509 * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps or Ybps) according to
4510 * the magnitude in question.
4511 *
4512 * This use base 1000. For base 1024 use formatSize(), for another base
4513 * see formatComputingNumbers().
4514 *
4515 * @param int $bps
4516 * @return string
4517 */
4518 function formatBitrate( $bps ) {
4519 return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" );
4520 }
4521
4522 /**
4523 * @param int $size Size of the unit
4524 * @param int $boundary Size boundary (1000, or 1024 in most cases)
4525 * @param string $messageKey Message key to be uesd
4526 * @return string
4527 */
4528 function formatComputingNumbers( $size, $boundary, $messageKey ) {
4529 if ( $size <= 0 ) {
4530 return str_replace( '$1', $this->formatNum( $size ),
4531 $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) )
4532 );
4533 }
4534 $sizes = array( '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zeta', 'yotta' );
4535 $index = 0;
4536
4537 $maxIndex = count( $sizes ) - 1;
4538 while ( $size >= $boundary && $index < $maxIndex ) {
4539 $index++;
4540 $size /= $boundary;
4541 }
4542
4543 // For small sizes no decimal places necessary
4544 $round = 0;
4545 if ( $index > 1 ) {
4546 // For MB and bigger two decimal places are smarter
4547 $round = 2;
4548 }
4549 $msg = str_replace( '$1', $sizes[$index], $messageKey );
4550
4551 $size = round( $size, $round );
4552 $text = $this->getMessageFromDB( $msg );
4553 return str_replace( '$1', $this->formatNum( $size ), $text );
4554 }
4555
4556 /**
4557 * Format a size in bytes for output, using an appropriate
4558 * unit (B, KB, MB, GB, TB, PB, EB, ZB or YB) according to the magnitude in question
4559 *
4560 * This method use base 1024. For base 1000 use formatBitrate(), for
4561 * another base see formatComputingNumbers()
4562 *
4563 * @param int $size Size to format
4564 * @return string Plain text (not HTML)
4565 */
4566 function formatSize( $size ) {
4567 return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" );
4568 }
4569
4570 /**
4571 * Make a list item, used by various special pages
4572 *
4573 * @param string $page Page link
4574 * @param string $details Text between brackets
4575 * @param bool $oppositedm Add the direction mark opposite to your
4576 * language, to display text properly
4577 * @return string
4578 */
4579 function specialList( $page, $details, $oppositedm = true ) {
4580 $dirmark = ( $oppositedm ? $this->getDirMark( true ) : '' ) .
4581 $this->getDirMark();
4582 $details = $details ? $dirmark . $this->getMessageFromDB( 'word-separator' ) .
4583 wfMessage( 'parentheses' )->rawParams( $details )->inLanguage( $this )->escaped() : '';
4584 return $page . $details;
4585 }
4586
4587 /**
4588 * Generate (prev x| next x) (20|50|100...) type links for paging
4589 *
4590 * @param Title $title Title object to link
4591 * @param int $offset
4592 * @param int $limit
4593 * @param array|string $query Optional URL query parameter string
4594 * @param bool $atend Optional param for specified if this is the last page
4595 * @return string
4596 */
4597 public function viewPrevNext( Title $title, $offset, $limit,
4598 array $query = array(), $atend = false
4599 ) {
4600 // @todo FIXME: Why on earth this needs one message for the text and another one for tooltip?
4601
4602 # Make 'previous' link
4603 $prev = wfMessage( 'prevn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4604 if ( $offset > 0 ) {
4605 $plink = $this->numLink( $title, max( $offset - $limit, 0 ), $limit,
4606 $query, $prev, 'prevn-title', 'mw-prevlink' );
4607 } else {
4608 $plink = htmlspecialchars( $prev );
4609 }
4610
4611 # Make 'next' link
4612 $next = wfMessage( 'nextn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4613 if ( $atend ) {
4614 $nlink = htmlspecialchars( $next );
4615 } else {
4616 $nlink = $this->numLink( $title, $offset + $limit, $limit,
4617 $query, $next, 'nextn-title', 'mw-nextlink' );
4618 }
4619
4620 # Make links to set number of items per page
4621 $numLinks = array();
4622 foreach ( array( 20, 50, 100, 250, 500 ) as $num ) {
4623 $numLinks[] = $this->numLink( $title, $offset, $num,
4624 $query, $this->formatNum( $num ), 'shown-title', 'mw-numlink' );
4625 }
4626
4627 return wfMessage( 'viewprevnext' )->inLanguage( $this )->title( $title
4628 )->rawParams( $plink, $nlink, $this->pipeList( $numLinks ) )->escaped();
4629 }
4630
4631 /**
4632 * Helper function for viewPrevNext() that generates links
4633 *
4634 * @param Title $title Title object to link
4635 * @param int $offset
4636 * @param int $limit
4637 * @param array $query Extra query parameters
4638 * @param string $link Text to use for the link; will be escaped
4639 * @param string $tooltipMsg Name of the message to use as tooltip
4640 * @param string $class Value of the "class" attribute of the link
4641 * @return string HTML fragment
4642 */
4643 private function numLink( Title $title, $offset, $limit, array $query, $link,
4644 $tooltipMsg, $class
4645 ) {
4646 $query = array( 'limit' => $limit, 'offset' => $offset ) + $query;
4647 $tooltip = wfMessage( $tooltipMsg )->inLanguage( $this )->title( $title )
4648 ->numParams( $limit )->text();
4649
4650 return Html::element( 'a', array( 'href' => $title->getLocalURL( $query ),
4651 'title' => $tooltip, 'class' => $class ), $link );
4652 }
4653
4654 /**
4655 * Get the conversion rule title, if any.
4656 *
4657 * @return string
4658 */
4659 public function getConvRuleTitle() {
4660 return $this->mConverter->getConvRuleTitle();
4661 }
4662
4663 /**
4664 * Get the compiled plural rules for the language
4665 * @since 1.20
4666 * @return array Associative array with plural form, and plural rule as key-value pairs
4667 */
4668 public function getCompiledPluralRules() {
4669 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' );
4670 $fallbacks = Language::getFallbacksFor( $this->mCode );
4671 if ( !$pluralRules ) {
4672 foreach ( $fallbacks as $fallbackCode ) {
4673 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' );
4674 if ( $pluralRules ) {
4675 break;
4676 }
4677 }
4678 }
4679 return $pluralRules;
4680 }
4681
4682 /**
4683 * Get the plural rules for the language
4684 * @since 1.20
4685 * @return array Associative array with plural form number and plural rule as key-value pairs
4686 */
4687 public function getPluralRules() {
4688 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' );
4689 $fallbacks = Language::getFallbacksFor( $this->mCode );
4690 if ( !$pluralRules ) {
4691 foreach ( $fallbacks as $fallbackCode ) {
4692 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRules' );
4693 if ( $pluralRules ) {
4694 break;
4695 }
4696 }
4697 }
4698 return $pluralRules;
4699 }
4700
4701 /**
4702 * Get the plural rule types for the language
4703 * @since 1.22
4704 * @return array Associative array with plural form number and plural rule type as key-value pairs
4705 */
4706 public function getPluralRuleTypes() {
4707 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRuleTypes' );
4708 $fallbacks = Language::getFallbacksFor( $this->mCode );
4709 if ( !$pluralRuleTypes ) {
4710 foreach ( $fallbacks as $fallbackCode ) {
4711 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRuleTypes' );
4712 if ( $pluralRuleTypes ) {
4713 break;
4714 }
4715 }
4716 }
4717 return $pluralRuleTypes;
4718 }
4719
4720 /**
4721 * Find the index number of the plural rule appropriate for the given number
4722 * @return int The index number of the plural rule
4723 */
4724 public function getPluralRuleIndexNumber( $number ) {
4725 $pluralRules = $this->getCompiledPluralRules();
4726 $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules );
4727 return $form;
4728 }
4729
4730 /**
4731 * Find the plural rule type appropriate for the given number
4732 * For example, if the language is set to Arabic, getPluralType(5) should
4733 * return 'few'.
4734 * @since 1.22
4735 * @return string The name of the plural rule type, e.g. one, two, few, many
4736 */
4737 public function getPluralRuleType( $number ) {
4738 $index = $this->getPluralRuleIndexNumber( $number );
4739 $pluralRuleTypes = $this->getPluralRuleTypes();
4740 if ( isset( $pluralRuleTypes[$index] ) ) {
4741 return $pluralRuleTypes[$index];
4742 } else {
4743 return 'other';
4744 }
4745 }
4746 }