3 * Internationalisation code.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
25 * @defgroup Language Language
28 if ( !defined( 'MEDIAWIKI' ) ) {
29 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
34 global $wgLanguageNames;
35 require_once( __DIR__
. '/Names.php' );
37 if ( function_exists( 'mb_strtoupper' ) ) {
38 mb_internal_encoding( 'UTF-8' );
42 * a fake language converter
52 function __construct( $langobj ) { $this->mLang
= $langobj; }
53 function autoConvertToAllVariants( $text ) { return array( $this->mLang
->getCode() => $text ); }
54 function convert( $t ) { return $t; }
55 function convertTo( $text, $variant ) { return $text; }
56 function convertTitle( $t ) { return $t->getPrefixedText(); }
57 function convertNamespace( $ns ) { return $this->mLang
->getFormattedNsText( $ns ); }
58 function getVariants() { return array( $this->mLang
->getCode() ); }
59 function getPreferredVariant() { return $this->mLang
->getCode(); }
60 function getDefaultVariant() { return $this->mLang
->getCode(); }
61 function getURLVariant() { return ''; }
62 function getConvRuleTitle() { return false; }
63 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
64 function getExtraHashOptions() { return ''; }
65 function getParsedTitle() { return ''; }
66 function markNoConversion( $text, $noParse = false ) { return $text; }
67 function convertCategoryKey( $key ) { return $key; }
68 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
69 function armourMath( $text ) { return $text; }
73 * Internationalisation code
79 * @var LanguageConverter
83 public $mVariants, $mCode, $mLoaded = false;
84 public $mMagicExtensions = array(), $mMagicHookDone = false;
85 private $mHtmlCode = null;
87 public $dateFormatStrings = array();
88 public $mExtendedSpecialPageAliases;
90 protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
93 * ReplacementArray object caches
95 public $transformData = array();
98 * @var LocalisationCache
100 static public $dataCache;
102 static public $mLangObjCache = array();
104 static public $mWeekdayMsgs = array(
105 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
109 static public $mWeekdayAbbrevMsgs = array(
110 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
113 static public $mMonthMsgs = array(
114 'january', 'february', 'march', 'april', 'may_long', 'june',
115 'july', 'august', 'september', 'october', 'november',
118 static public $mMonthGenMsgs = array(
119 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
120 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
123 static public $mMonthAbbrevMsgs = array(
124 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
125 'sep', 'oct', 'nov', 'dec'
128 static public $mIranianCalendarMonthMsgs = array(
129 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
130 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
131 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
132 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
135 static public $mHebrewCalendarMonthMsgs = array(
136 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
137 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
138 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
139 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
140 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
143 static public $mHebrewCalendarMonthGenMsgs = array(
144 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
145 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
146 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
147 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
148 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
151 static public $mHijriCalendarMonthMsgs = array(
152 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
153 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
154 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
155 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
162 static public $durationIntervals = array(
163 'millennia' => 31556952000,
164 'centuries' => 3155695200,
165 'decades' => 315569520,
166 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
175 * Get a cached or new language object for a given language code
176 * @param $code String
179 static function factory( $code ) {
180 global $wgDummyLanguageCodes, $wgLangObjCacheSize;
182 if ( isset( $wgDummyLanguageCodes[$code] ) ) {
183 $code = $wgDummyLanguageCodes[$code];
186 // get the language object to process
187 $langObj = isset( self
::$mLangObjCache[$code] )
188 ? self
::$mLangObjCache[$code]
189 : self
::newFromCode( $code );
191 // merge the language object in to get it up front in the cache
192 self
::$mLangObjCache = array_merge( array( $code => $langObj ), self
::$mLangObjCache );
193 // get rid of the oldest ones in case we have an overflow
194 self
::$mLangObjCache = array_slice( self
::$mLangObjCache, 0, $wgLangObjCacheSize, true );
200 * Create a language object for a given language code
201 * @param $code String
202 * @throws MWException
205 protected static function newFromCode( $code ) {
206 // Protect against path traversal below
207 if ( !Language
::isValidCode( $code )
208 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
210 throw new MWException( "Invalid language code \"$code\"" );
213 if ( !Language
::isValidBuiltInCode( $code ) ) {
214 // It's not possible to customise this code with class files, so
215 // just return a Language object. This is to support uselang= hacks.
216 $lang = new Language
;
217 $lang->setCode( $code );
221 // Check if there is a language class for the code
222 $class = self
::classFromCode( $code );
223 self
::preloadLanguageClass( $class );
224 if ( MWInit
::classExists( $class ) ) {
229 // Keep trying the fallback list until we find an existing class
230 $fallbacks = Language
::getFallbacksFor( $code );
231 foreach ( $fallbacks as $fallbackCode ) {
232 if ( !Language
::isValidBuiltInCode( $fallbackCode ) ) {
233 throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
236 $class = self
::classFromCode( $fallbackCode );
237 self
::preloadLanguageClass( $class );
238 if ( MWInit
::classExists( $class ) ) {
239 $lang = Language
::newFromCode( $fallbackCode );
240 $lang->setCode( $code );
245 throw new MWException( "Invalid fallback sequence for language '$code'" );
249 * Returns true if a language code string is of a valid form, whether or
250 * not it exists. This includes codes which are used solely for
251 * customisation via the MediaWiki namespace.
253 * @param $code string
257 public static function isValidCode( $code ) {
259 // People think language codes are html safe, so enforce it.
260 // Ideally we should only allow a-zA-Z0-9-
261 // but, .+ and other chars are often used for {{int:}} hacks
262 // see bugs 37564, 37587, 36938
263 strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
264 && !preg_match( Title
::getTitleInvalidRegex(), $code );
268 * Returns true if a language code is of a valid form for the purposes of
269 * internal customisation of MediaWiki, via Messages*.php.
271 * @param $code string
273 * @throws MWException
277 public static function isValidBuiltInCode( $code ) {
279 if ( !is_string( $code ) ) {
280 $type = gettype( $code );
281 if ( $type === 'object' ) {
282 $addmsg = " of class " . get_class( $code );
286 throw new MWException( __METHOD__
. " must be passed a string, $type given$addmsg" );
289 return (bool)preg_match( '/^[a-z0-9-]+$/i', $code );
294 * @return String Name of the language class
296 public static function classFromCode( $code ) {
297 if ( $code == 'en' ) {
300 return 'Language' . str_replace( '-', '_', ucfirst( $code ) );
305 * Includes language class files
307 * @param $class string Name of the language class
309 public static function preloadLanguageClass( $class ) {
312 if ( $class === 'Language' ) {
316 if ( !defined( 'MW_COMPILED' ) ) {
317 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
318 include_once( "$IP/languages/classes/$class.php" );
324 * Get the LocalisationCache instance
326 * @return LocalisationCache
328 public static function getLocalisationCache() {
329 if ( is_null( self
::$dataCache ) ) {
330 global $wgLocalisationCacheConf;
331 $class = $wgLocalisationCacheConf['class'];
332 self
::$dataCache = new $class( $wgLocalisationCacheConf );
334 return self
::$dataCache;
337 function __construct() {
338 $this->mConverter
= new FakeConverter( $this );
339 // Set the code to the name of the descendant
340 if ( get_class( $this ) == 'Language' ) {
343 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
345 self
::getLocalisationCache();
349 * Reduce memory usage
351 function __destruct() {
352 foreach ( $this as $name => $value ) {
353 unset( $this->$name );
358 * Hook which will be called if this is the content language.
359 * Descendants can use this to register hook functions or modify globals
361 function initContLang() { }
364 * Same as getFallbacksFor for current language.
366 * @deprecated in 1.19
368 function getFallbackLanguageCode() {
369 wfDeprecated( __METHOD__
, '1.19' );
370 return self
::getFallbackFor( $this->mCode
);
377 function getFallbackLanguages() {
378 return self
::getFallbacksFor( $this->mCode
);
382 * Exports $wgBookstoreListEn
385 function getBookstoreList() {
386 return self
::$dataCache->getItem( $this->mCode
, 'bookstoreList' );
392 public function getNamespaces() {
393 if ( is_null( $this->namespaceNames
) ) {
394 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
396 $this->namespaceNames
= self
::$dataCache->getItem( $this->mCode
, 'namespaceNames' );
397 $validNamespaces = MWNamespace
::getCanonicalNamespaces();
399 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames +
$validNamespaces;
401 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
402 if ( $wgMetaNamespaceTalk ) {
403 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
405 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
406 $this->namespaceNames
[NS_PROJECT_TALK
] =
407 $this->fixVariableInNamespace( $talk );
410 # Sometimes a language will be localised but not actually exist on this wiki.
411 foreach ( $this->namespaceNames
as $key => $text ) {
412 if ( !isset( $validNamespaces[$key] ) ) {
413 unset( $this->namespaceNames
[$key] );
417 # The above mixing may leave namespaces out of canonical order.
418 # Re-order by namespace ID number...
419 ksort( $this->namespaceNames
);
421 wfRunHooks( 'LanguageGetNamespaces', array( &$this->namespaceNames
) );
423 return $this->namespaceNames
;
427 * Arbitrarily set all of the namespace names at once. Mainly used for testing
428 * @param $namespaces Array of namespaces (id => name)
430 public function setNamespaces( array $namespaces ) {
431 $this->namespaceNames
= $namespaces;
432 $this->mNamespaceIds
= null;
436 * Resets all of the namespace caches. Mainly used for testing
438 public function resetNamespaces( ) {
439 $this->namespaceNames
= null;
440 $this->mNamespaceIds
= null;
441 $this->namespaceAliases
= null;
445 * A convenience function that returns the same thing as
446 * getNamespaces() except with the array values changed to ' '
447 * where it found '_', useful for producing output to be displayed
448 * e.g. in <select> forms.
452 function getFormattedNamespaces() {
453 $ns = $this->getNamespaces();
454 foreach ( $ns as $k => $v ) {
455 $ns[$k] = strtr( $v, '_', ' ' );
461 * Get a namespace value by key
463 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
464 * echo $mw_ns; // prints 'MediaWiki'
467 * @param $index Int: the array key of the namespace to return
468 * @return mixed, string if the namespace value exists, otherwise false
470 function getNsText( $index ) {
471 $ns = $this->getNamespaces();
472 return isset( $ns[$index] ) ?
$ns[$index] : false;
476 * A convenience function that returns the same thing as
477 * getNsText() except with '_' changed to ' ', useful for
481 * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
482 * echo $mw_ns; // prints 'MediaWiki talk'
485 * @param int $index The array key of the namespace to return
486 * @return string Namespace name without underscores (empty string if namespace does not exist)
488 function getFormattedNsText( $index ) {
489 $ns = $this->getNsText( $index );
490 return strtr( $ns, '_', ' ' );
494 * Returns gender-dependent namespace alias if available.
495 * @param $index Int: namespace index
496 * @param $gender String: gender key (male, female... )
500 function getGenderNsText( $index, $gender ) {
501 global $wgExtraGenderNamespaces;
503 $ns = $wgExtraGenderNamespaces + self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
504 return isset( $ns[$index][$gender] ) ?
$ns[$index][$gender] : $this->getNsText( $index );
508 * Whether this language makes distinguishes genders for example in
513 function needsGenderDistinction() {
514 global $wgExtraGenderNamespaces, $wgExtraNamespaces;
515 if ( count( $wgExtraGenderNamespaces ) > 0 ) {
516 // $wgExtraGenderNamespaces overrides everything
518 } elseif ( isset( $wgExtraNamespaces[NS_USER
] ) && isset( $wgExtraNamespaces[NS_USER_TALK
] ) ) {
519 /// @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future
520 // $wgExtraNamespaces overrides any gender aliases specified in i18n files
523 // Check what is in i18n files
524 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
525 return count( $aliases ) > 0;
530 * Get a namespace key by value, case insensitive.
531 * Only matches namespace names for the current language, not the
532 * canonical ones defined in Namespace.php.
534 * @param $text String
535 * @return mixed An integer if $text is a valid value otherwise false
537 function getLocalNsIndex( $text ) {
538 $lctext = $this->lc( $text );
539 $ids = $this->getNamespaceIds();
540 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
546 function getNamespaceAliases() {
547 if ( is_null( $this->namespaceAliases
) ) {
548 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceAliases' );
552 foreach ( $aliases as $name => $index ) {
553 if ( $index === NS_PROJECT_TALK
) {
554 unset( $aliases[$name] );
555 $name = $this->fixVariableInNamespace( $name );
556 $aliases[$name] = $index;
561 global $wgExtraGenderNamespaces;
562 $genders = $wgExtraGenderNamespaces +
(array)self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
563 foreach ( $genders as $index => $forms ) {
564 foreach ( $forms as $alias ) {
565 $aliases[$alias] = $index;
569 $this->namespaceAliases
= $aliases;
571 return $this->namespaceAliases
;
577 function getNamespaceIds() {
578 if ( is_null( $this->mNamespaceIds
) ) {
579 global $wgNamespaceAliases;
580 # Put namespace names and aliases into a hashtable.
581 # If this is too slow, then we should arrange it so that it is done
582 # before caching. The catch is that at pre-cache time, the above
583 # class-specific fixup hasn't been done.
584 $this->mNamespaceIds
= array();
585 foreach ( $this->getNamespaces() as $index => $name ) {
586 $this->mNamespaceIds
[$this->lc( $name )] = $index;
588 foreach ( $this->getNamespaceAliases() as $name => $index ) {
589 $this->mNamespaceIds
[$this->lc( $name )] = $index;
591 if ( $wgNamespaceAliases ) {
592 foreach ( $wgNamespaceAliases as $name => $index ) {
593 $this->mNamespaceIds
[$this->lc( $name )] = $index;
597 return $this->mNamespaceIds
;
601 * Get a namespace key by value, case insensitive. Canonical namespace
602 * names override custom ones defined for the current language.
604 * @param $text String
605 * @return mixed An integer if $text is a valid value otherwise false
607 function getNsIndex( $text ) {
608 $lctext = $this->lc( $text );
609 $ns = MWNamespace
::getCanonicalIndex( $lctext );
610 if ( $ns !== null ) {
613 $ids = $this->getNamespaceIds();
614 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
618 * short names for language variants used for language conversion links.
620 * @param $code String
621 * @param $usemsg bool Use the "variantname-xyz" message if it exists
624 function getVariantname( $code, $usemsg = true ) {
625 $msg = "variantname-$code";
626 if ( $usemsg && wfMessage( $msg )->exists() ) {
627 return $this->getMessageFromDB( $msg );
629 $name = self
::fetchLanguageName( $code );
631 return $name; # if it's defined as a language name, show that
633 # otherwise, output the language code
639 * @param $name string
642 function specialPage( $name ) {
643 $aliases = $this->getSpecialPageAliases();
644 if ( isset( $aliases[$name][0] ) ) {
645 $name = $aliases[$name][0];
647 return $this->getNsText( NS_SPECIAL
) . ':' . $name;
653 function getQuickbarSettings() {
655 $this->getMessage( 'qbsettings-none' ),
656 $this->getMessage( 'qbsettings-fixedleft' ),
657 $this->getMessage( 'qbsettings-fixedright' ),
658 $this->getMessage( 'qbsettings-floatingleft' ),
659 $this->getMessage( 'qbsettings-floatingright' ),
660 $this->getMessage( 'qbsettings-directionality' )
667 function getDatePreferences() {
668 return self
::$dataCache->getItem( $this->mCode
, 'datePreferences' );
674 function getDateFormats() {
675 return self
::$dataCache->getItem( $this->mCode
, 'dateFormats' );
679 * @return array|string
681 function getDefaultDateFormat() {
682 $df = self
::$dataCache->getItem( $this->mCode
, 'defaultDateFormat' );
683 if ( $df === 'dmy or mdy' ) {
684 global $wgAmericanDates;
685 return $wgAmericanDates ?
'mdy' : 'dmy';
694 function getDatePreferenceMigrationMap() {
695 return self
::$dataCache->getItem( $this->mCode
, 'datePreferenceMigrationMap' );
702 function getImageFile( $image ) {
703 return self
::$dataCache->getSubitem( $this->mCode
, 'imageFiles', $image );
709 function getExtraUserToggles() {
710 return (array)self
::$dataCache->getItem( $this->mCode
, 'extraUserToggles' );
717 function getUserToggle( $tog ) {
718 return $this->getMessageFromDB( "tog-$tog" );
722 * Get native language names, indexed by code.
723 * Only those defined in MediaWiki, no other data like CLDR.
724 * If $customisedOnly is true, only returns codes with a messages file
726 * @param $customisedOnly bool
729 * @deprecated in 1.20, use fetchLanguageNames()
731 public static function getLanguageNames( $customisedOnly = false ) {
732 return self
::fetchLanguageNames( null, $customisedOnly ?
'mwfile' : 'mw' );
736 * Get translated language names. This is done on best effort and
737 * by default this is exactly the same as Language::getLanguageNames.
738 * The CLDR extension provides translated names.
739 * @param $code String Language code.
740 * @return Array language code => language name
742 * @deprecated in 1.20, use fetchLanguageNames()
744 public static function getTranslatedLanguageNames( $code ) {
745 return self
::fetchLanguageNames( $code, 'all' );
749 * Get an array of language names, indexed by code.
750 * @param $inLanguage null|string: Code of language in which to return the names
751 * Use null for autonyms (native names)
752 * @param $include string:
753 * 'all' all available languages
754 * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
755 * 'mwfile' only if the language is in 'mw' *and* has a message file
756 * @return array: language code => language name
759 public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
760 global $wgExtraLanguageNames;
761 static $coreLanguageNames;
763 if ( $coreLanguageNames === null ) {
764 include( MWInit
::compiledPath( 'languages/Names.php' ) );
770 # TODO: also include when $inLanguage is null, when this code is more efficient
771 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) );
774 $mwNames = $wgExtraLanguageNames +
$coreLanguageNames;
775 foreach ( $mwNames as $mwCode => $mwName ) {
776 # - Prefer own MediaWiki native name when not using the hook
777 # - For other names just add if not added through the hook
778 if ( $mwCode === $inLanguage ||
!isset( $names[$mwCode] ) ) {
779 $names[$mwCode] = $mwName;
783 if ( $include === 'all' ) {
788 $coreCodes = array_keys( $mwNames );
789 foreach ( $coreCodes as $coreCode ) {
790 $returnMw[$coreCode] = $names[$coreCode];
793 if ( $include === 'mwfile' ) {
794 $namesMwFile = array();
795 # We do this using a foreach over the codes instead of a directory
796 # loop so that messages files in extensions will work correctly.
797 foreach ( $returnMw as $code => $value ) {
798 if ( is_readable( self
::getMessagesFileName( $code ) ) ) {
799 $namesMwFile[$code] = $names[$code];
804 # 'mw' option; default if it's not one of the other two options (all/mwfile)
809 * @param $code string: The code of the language for which to get the name
810 * @param $inLanguage null|string: Code of language in which to return the name (null for autonyms)
811 * @param $include string: 'all', 'mw' or 'mwfile'; see fetchLanguageNames()
812 * @return string: Language name or empty
815 public static function fetchLanguageName( $code, $inLanguage = null, $include = 'all' ) {
816 $array = self
::fetchLanguageNames( $inLanguage, $include );
817 return !array_key_exists( $code, $array ) ?
'' : $array[$code];
821 * Get a message from the MediaWiki namespace.
823 * @param $msg String: message name
826 function getMessageFromDB( $msg ) {
827 return wfMessage( $msg )->inLanguage( $this )->text();
831 * Get the native language name of $code.
832 * Only if defined in MediaWiki, no other data like CLDR.
833 * @param $code string
835 * @deprecated in 1.20, use fetchLanguageName()
837 function getLanguageName( $code ) {
838 return self
::fetchLanguageName( $code );
845 function getMonthName( $key ) {
846 return $this->getMessageFromDB( self
::$mMonthMsgs[$key - 1] );
852 function getMonthNamesArray() {
853 $monthNames = array( '' );
854 for ( $i = 1; $i < 13; $i++
) {
855 $monthNames[] = $this->getMonthName( $i );
864 function getMonthNameGen( $key ) {
865 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key - 1] );
872 function getMonthAbbreviation( $key ) {
873 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key - 1] );
879 function getMonthAbbreviationsArray() {
880 $monthNames = array( '' );
881 for ( $i = 1; $i < 13; $i++
) {
882 $monthNames[] = $this->getMonthAbbreviation( $i );
891 function getWeekdayName( $key ) {
892 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key - 1] );
899 function getWeekdayAbbreviation( $key ) {
900 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key - 1] );
907 function getIranianCalendarMonthName( $key ) {
908 return $this->getMessageFromDB( self
::$mIranianCalendarMonthMsgs[$key - 1] );
915 function getHebrewCalendarMonthName( $key ) {
916 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthMsgs[$key - 1] );
923 function getHebrewCalendarMonthNameGen( $key ) {
924 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthGenMsgs[$key - 1] );
931 function getHijriCalendarMonthName( $key ) {
932 return $this->getMessageFromDB( self
::$mHijriCalendarMonthMsgs[$key - 1] );
936 * This is a workalike of PHP's date() function, but with better
937 * internationalisation, a reduced set of format characters, and a better
940 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
941 * PHP manual for definitions. There are a number of extensions, which
944 * xn Do not translate digits of the next numeric format character
945 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
946 * xr Use roman numerals for the next numeric format character
947 * xh Use hebrew numerals for the next numeric format character
949 * xg Genitive month name
951 * xij j (day number) in Iranian calendar
952 * xiF F (month name) in Iranian calendar
953 * xin n (month number) in Iranian calendar
954 * xiy y (two digit year) in Iranian calendar
955 * xiY Y (full year) in Iranian calendar
957 * xjj j (day number) in Hebrew calendar
958 * xjF F (month name) in Hebrew calendar
959 * xjt t (days in month) in Hebrew calendar
960 * xjx xg (genitive month name) in Hebrew calendar
961 * xjn n (month number) in Hebrew calendar
962 * xjY Y (full year) in Hebrew calendar
964 * xmj j (day number) in Hijri calendar
965 * xmF F (month name) in Hijri calendar
966 * xmn n (month number) in Hijri calendar
967 * xmY Y (full year) in Hijri calendar
969 * xkY Y (full year) in Thai solar calendar. Months and days are
970 * identical to the Gregorian calendar
971 * xoY Y (full year) in Minguo calendar or Juche year.
972 * Months and days are identical to the
974 * xtY Y (full year) in Japanese nengo. Months and days are
975 * identical to the Gregorian calendar
977 * Characters enclosed in double quotes will be considered literal (with
978 * the quotes themselves removed). Unmatched quotes will be considered
979 * literal quotes. Example:
981 * "The month is" F => The month is January
984 * Backslash escaping is also supported.
986 * Input timestamp is assumed to be pre-normalized to the desired local
989 * @param $format String
990 * @param $ts String: 14-character timestamp
993 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
997 function sprintfDate( $format, $ts ) {
1010 for ( $p = 0; $p < strlen( $format ); $p++
) {
1012 $code = $format[$p];
1013 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
1014 $code .= $format[++
$p];
1017 if ( ( $code === 'xi' ||
$code == 'xj' ||
$code == 'xk' ||
$code == 'xm' ||
$code == 'xo' ||
$code == 'xt' ) && $p < strlen( $format ) - 1 ) {
1018 $code .= $format[++
$p];
1029 $rawToggle = !$rawToggle;
1038 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
1041 if ( !$hebrew ) $hebrew = self
::tsToHebrew( $ts );
1042 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
1045 $num = substr( $ts, 6, 2 );
1048 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
1049 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) +
1 );
1052 $num = intval( substr( $ts, 6, 2 ) );
1056 $iranian = self
::tsToIranian( $ts );
1062 $hijri = self
::tsToHijri( $ts );
1068 $hebrew = self
::tsToHebrew( $ts );
1074 $unix = wfTimestamp( TS_UNIX
, $ts );
1076 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) +
1 );
1080 $unix = wfTimestamp( TS_UNIX
, $ts );
1082 $w = gmdate( 'w', $unix );
1087 $unix = wfTimestamp( TS_UNIX
, $ts );
1089 $num = gmdate( 'w', $unix );
1093 $unix = wfTimestamp( TS_UNIX
, $ts );
1095 $num = gmdate( 'z', $unix );
1099 $unix = wfTimestamp( TS_UNIX
, $ts );
1101 $num = gmdate( 'W', $unix );
1104 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
1108 $iranian = self
::tsToIranian( $ts );
1110 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
1114 $hijri = self
::tsToHijri( $ts );
1116 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
1120 $hebrew = self
::tsToHebrew( $ts );
1122 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
1125 $num = substr( $ts, 4, 2 );
1128 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
1131 $num = intval( substr( $ts, 4, 2 ) );
1135 $iranian = self
::tsToIranian( $ts );
1141 $hijri = self
::tsToHijri ( $ts );
1147 $hebrew = self
::tsToHebrew( $ts );
1153 $unix = wfTimestamp( TS_UNIX
, $ts );
1155 $num = gmdate( 't', $unix );
1159 $hebrew = self
::tsToHebrew( $ts );
1165 $unix = wfTimestamp( TS_UNIX
, $ts );
1167 $num = gmdate( 'L', $unix );
1171 $unix = wfTimestamp( TS_UNIX
, $ts );
1173 $num = gmdate( 'o', $unix );
1176 $num = substr( $ts, 0, 4 );
1180 $iranian = self
::tsToIranian( $ts );
1186 $hijri = self
::tsToHijri( $ts );
1192 $hebrew = self
::tsToHebrew( $ts );
1198 $thai = self
::tsToYear( $ts, 'thai' );
1204 $minguo = self
::tsToYear( $ts, 'minguo' );
1210 $tenno = self
::tsToYear( $ts, 'tenno' );
1215 $num = substr( $ts, 2, 2 );
1219 $iranian = self
::tsToIranian( $ts );
1221 $num = substr( $iranian[0], -2 );
1224 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
1227 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
1230 $h = substr( $ts, 8, 2 );
1231 $num = $h %
12 ?
$h %
12 : 12;
1234 $num = intval( substr( $ts, 8, 2 ) );
1237 $h = substr( $ts, 8, 2 );
1238 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
1241 $num = substr( $ts, 8, 2 );
1244 $num = substr( $ts, 10, 2 );
1247 $num = substr( $ts, 12, 2 );
1251 $unix = wfTimestamp( TS_UNIX
, $ts );
1253 $s .= gmdate( 'c', $unix );
1257 $unix = wfTimestamp( TS_UNIX
, $ts );
1259 $s .= gmdate( 'r', $unix );
1263 $unix = wfTimestamp( TS_UNIX
, $ts );
1268 # Backslash escaping
1269 if ( $p < strlen( $format ) - 1 ) {
1270 $s .= $format[++
$p];
1277 if ( $p < strlen( $format ) - 1 ) {
1278 $endQuote = strpos( $format, '"', $p +
1 );
1279 if ( $endQuote === false ) {
1280 # No terminating quote, assume literal "
1283 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
1287 # Quote at end of string, assume literal "
1294 if ( $num !== false ) {
1295 if ( $rawToggle ||
$raw ) {
1298 } elseif ( $roman ) {
1299 $s .= Language
::romanNumeral( $num );
1301 } elseif ( $hebrewNum ) {
1302 $s .= self
::hebrewNumeral( $num );
1305 $s .= $this->formatNum( $num, true );
1312 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1313 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1316 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1317 * Gregorian dates to Iranian dates. Originally written in C, it
1318 * is released under the terms of GNU Lesser General Public
1319 * License. Conversion to PHP was performed by Niklas Laxström.
1321 * Link: http://www.farsiweb.info/jalali/jalali.c
1327 private static function tsToIranian( $ts ) {
1328 $gy = substr( $ts, 0, 4 ) -1600;
1329 $gm = substr( $ts, 4, 2 ) -1;
1330 $gd = substr( $ts, 6, 2 ) -1;
1332 # Days passed from the beginning (including leap years)
1334 +
floor( ( $gy +
3 ) / 4 )
1335 - floor( ( $gy +
99 ) / 100 )
1336 +
floor( ( $gy +
399 ) / 400 );
1338 // Add days of the past months of this year
1339 for ( $i = 0; $i < $gm; $i++
) {
1340 $gDayNo +
= self
::$GREG_DAYS[$i];
1344 if ( $gm > 1 && ( ( $gy %
4 === 0 && $gy %
100 !== 0 ||
( $gy %
400 == 0 ) ) ) ) {
1348 // Days passed in current month
1349 $gDayNo +
= (int)$gd;
1351 $jDayNo = $gDayNo - 79;
1353 $jNp = floor( $jDayNo / 12053 );
1356 $jy = 979 +
33 * $jNp +
4 * floor( $jDayNo / 1461 );
1359 if ( $jDayNo >= 366 ) {
1360 $jy +
= floor( ( $jDayNo - 1 ) / 365 );
1361 $jDayNo = floor( ( $jDayNo - 1 ) %
365 );
1364 for ( $i = 0; $i < 11 && $jDayNo >= self
::$IRANIAN_DAYS[$i]; $i++
) {
1365 $jDayNo -= self
::$IRANIAN_DAYS[$i];
1371 return array( $jy, $jm, $jd );
1375 * Converting Gregorian dates to Hijri dates.
1377 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1379 * @see http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1385 private static function tsToHijri( $ts ) {
1386 $year = substr( $ts, 0, 4 );
1387 $month = substr( $ts, 4, 2 );
1388 $day = substr( $ts, 6, 2 );
1396 ( $zy > 1582 ) ||
( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1397 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1400 $zjd = (int)( ( 1461 * ( $zy +
4800 +
(int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1401 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1402 (int)( ( 3 * (int)( ( ( $zy +
4900 +
(int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1405 $zjd = 367 * $zy - (int)( ( 7 * ( $zy +
5001 +
(int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1406 (int)( ( 275 * $zm ) / 9 ) +
$zd +
1729777;
1409 $zl = $zjd -1948440 +
10632;
1410 $zn = (int)( ( $zl - 1 ) / 10631 );
1411 $zl = $zl - 10631 * $zn +
354;
1412 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1413 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) +
29;
1414 $zm = (int)( ( 24 * $zl ) / 709 );
1415 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1416 $zy = 30 * $zn +
$zj - 30;
1418 return array( $zy, $zm, $zd );
1422 * Converting Gregorian dates to Hebrew dates.
1424 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1425 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1426 * to translate the relevant functions into PHP and release them under
1429 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1430 * and Adar II is 14. In a non-leap year, Adar is 6.
1436 private static function tsToHebrew( $ts ) {
1438 $year = substr( $ts, 0, 4 );
1439 $month = substr( $ts, 4, 2 );
1440 $day = substr( $ts, 6, 2 );
1442 # Calculate Hebrew year
1443 $hebrewYear = $year +
3760;
1445 # Month number when September = 1, August = 12
1447 if ( $month > 12 ) {
1454 # Calculate day of year from 1 September
1456 for ( $i = 1; $i < $month; $i++
) {
1460 # Check if the year is leap
1461 if ( $year %
400 == 0 ||
( $year %
4 == 0 && $year %
100 > 0 ) ) {
1464 } elseif ( $i == 8 ||
$i == 10 ||
$i == 1 ||
$i == 3 ) {
1471 # Calculate the start of the Hebrew year
1472 $start = self
::hebrewYearStart( $hebrewYear );
1474 # Calculate next year's start
1475 if ( $dayOfYear <= $start ) {
1476 # Day is before the start of the year - it is the previous year
1478 $nextStart = $start;
1482 # Add days since previous year's 1 September
1484 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1488 # Start of the new (previous) year
1489 $start = self
::hebrewYearStart( $hebrewYear );
1492 $nextStart = self
::hebrewYearStart( $hebrewYear +
1 );
1495 # Calculate Hebrew day of year
1496 $hebrewDayOfYear = $dayOfYear - $start;
1498 # Difference between year's days
1499 $diff = $nextStart - $start;
1500 # Add 12 (or 13 for leap years) days to ignore the difference between
1501 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1502 # difference is only about the year type
1503 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1509 # Check the year pattern, and is leap year
1510 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1511 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1512 # and non-leap years
1513 $yearPattern = $diff %
30;
1514 # Check if leap year
1515 $isLeap = $diff >= 30;
1517 # Calculate day in the month from number of day in the Hebrew year
1518 # Don't check Adar - if the day is not in Adar, we will stop before;
1519 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1520 $hebrewDay = $hebrewDayOfYear;
1523 while ( $hebrewMonth <= 12 ) {
1524 # Calculate days in this month
1525 if ( $isLeap && $hebrewMonth == 6 ) {
1526 # Adar in a leap year
1528 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1530 if ( $hebrewDay <= $days ) {
1534 # Subtract the days of Adar I
1535 $hebrewDay -= $days;
1538 if ( $hebrewDay <= $days ) {
1544 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1545 # Cheshvan in a complete year (otherwise as the rule below)
1547 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1548 # Kislev in an incomplete year (otherwise as the rule below)
1551 # Odd months have 30 days, even have 29
1552 $days = 30 - ( $hebrewMonth - 1 ) %
2;
1554 if ( $hebrewDay <= $days ) {
1555 # In the current month
1558 # Subtract the days of the current month
1559 $hebrewDay -= $days;
1560 # Try in the next month
1565 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1569 * This calculates the Hebrew year start, as days since 1 September.
1570 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1571 * Used for Hebrew date.
1577 private static function hebrewYearStart( $year ) {
1578 $a = intval( ( 12 * ( $year - 1 ) +
17 ) %
19 );
1579 $b = intval( ( $year - 1 ) %
4 );
1580 $m = 32.044093161144 +
1.5542417966212 * $a +
$b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1584 $Mar = intval( $m );
1590 $c = intval( ( $Mar +
3 * ( $year - 1 ) +
5 * $b +
5 ) %
7 );
1591 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1593 } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1595 } elseif ( $c == 2 ||
$c == 4 ||
$c == 6 ) {
1599 $Mar +
= intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1604 * Algorithm to convert Gregorian dates to Thai solar dates,
1605 * Minguo dates or Minguo dates.
1607 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1608 * http://en.wikipedia.org/wiki/Minguo_calendar
1609 * http://en.wikipedia.org/wiki/Japanese_era_name
1611 * @param $ts String: 14-character timestamp
1612 * @param $cName String: calender name
1613 * @return Array: converted year, month, day
1615 private static function tsToYear( $ts, $cName ) {
1616 $gy = substr( $ts, 0, 4 );
1617 $gm = substr( $ts, 4, 2 );
1618 $gd = substr( $ts, 6, 2 );
1620 if ( !strcmp( $cName, 'thai' ) ) {
1622 # Add 543 years to the Gregorian calendar
1623 # Months and days are identical
1624 $gy_offset = $gy +
543;
1625 } elseif ( ( !strcmp( $cName, 'minguo' ) ) ||
!strcmp( $cName, 'juche' ) ) {
1627 # Deduct 1911 years from the Gregorian calendar
1628 # Months and days are identical
1629 $gy_offset = $gy - 1911;
1630 } elseif ( !strcmp( $cName, 'tenno' ) ) {
1631 # Nengō dates up to Meiji period
1632 # Deduct years from the Gregorian calendar
1633 # depending on the nengo periods
1634 # Months and days are identical
1635 if ( ( $gy < 1912 ) ||
( ( $gy == 1912 ) && ( $gm < 7 ) ) ||
( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1637 $gy_gannen = $gy - 1868 +
1;
1638 $gy_offset = $gy_gannen;
1639 if ( $gy_gannen == 1 ) {
1642 $gy_offset = '明治' . $gy_offset;
1644 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1645 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1646 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1647 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1648 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1652 $gy_gannen = $gy - 1912 +
1;
1653 $gy_offset = $gy_gannen;
1654 if ( $gy_gannen == 1 ) {
1657 $gy_offset = '大正' . $gy_offset;
1659 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1660 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1661 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1665 $gy_gannen = $gy - 1926 +
1;
1666 $gy_offset = $gy_gannen;
1667 if ( $gy_gannen == 1 ) {
1670 $gy_offset = '昭和' . $gy_offset;
1673 $gy_gannen = $gy - 1989 +
1;
1674 $gy_offset = $gy_gannen;
1675 if ( $gy_gannen == 1 ) {
1678 $gy_offset = '平成' . $gy_offset;
1684 return array( $gy_offset, $gm, $gd );
1688 * Roman number formatting up to 10000
1694 static function romanNumeral( $num ) {
1695 static $table = array(
1696 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1697 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1698 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1699 array( '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM', 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' )
1702 $num = intval( $num );
1703 if ( $num > 10000 ||
$num <= 0 ) {
1708 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1709 if ( $num >= $pow10 ) {
1710 $s .= $table[$i][(int)floor( $num / $pow10 )];
1712 $num = $num %
$pow10;
1718 * Hebrew Gematria number formatting up to 9999
1724 static function hebrewNumeral( $num ) {
1725 static $table = array(
1726 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1727 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1728 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1729 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1732 $num = intval( $num );
1733 if ( $num > 9999 ||
$num <= 0 ) {
1738 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1739 if ( $num >= $pow10 ) {
1740 if ( $num == 15 ||
$num == 16 ) {
1741 $s .= $table[0][9] . $table[0][$num - 9];
1744 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1745 if ( $pow10 == 1000 ) {
1750 $num = $num %
$pow10;
1752 if ( strlen( $s ) == 2 ) {
1755 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1756 $str .= substr( $s, strlen( $s ) - 2, 2 );
1758 $start = substr( $str, 0, strlen( $str ) - 2 );
1759 $end = substr( $str, strlen( $str ) - 2 );
1762 $str = $start . 'ך';
1765 $str = $start . 'ם';
1768 $str = $start . 'ן';
1771 $str = $start . 'ף';
1774 $str = $start . 'ץ';
1781 * Used by date() and time() to adjust the time output.
1783 * @param $ts Int the time in date('YmdHis') format
1784 * @param $tz Mixed: adjust the time by this amount (default false, mean we
1785 * get user timecorrection setting)
1788 function userAdjust( $ts, $tz = false ) {
1789 global $wgUser, $wgLocalTZoffset;
1791 if ( $tz === false ) {
1792 $tz = $wgUser->getOption( 'timecorrection' );
1795 $data = explode( '|', $tz, 3 );
1797 if ( $data[0] == 'ZoneInfo' ) {
1798 wfSuppressWarnings();
1799 $userTZ = timezone_open( $data[2] );
1800 wfRestoreWarnings();
1801 if ( $userTZ !== false ) {
1802 $date = date_create( $ts, timezone_open( 'UTC' ) );
1803 date_timezone_set( $date, $userTZ );
1804 $date = date_format( $date, 'YmdHis' );
1807 # Unrecognized timezone, default to 'Offset' with the stored offset.
1808 $data[0] = 'Offset';
1812 if ( $data[0] == 'System' ||
$tz == '' ) {
1813 # Global offset in minutes.
1814 if ( isset( $wgLocalTZoffset ) ) {
1815 $minDiff = $wgLocalTZoffset;
1817 } elseif ( $data[0] == 'Offset' ) {
1818 $minDiff = intval( $data[1] );
1820 $data = explode( ':', $tz );
1821 if ( count( $data ) == 2 ) {
1822 $data[0] = intval( $data[0] );
1823 $data[1] = intval( $data[1] );
1824 $minDiff = abs( $data[0] ) * 60 +
$data[1];
1825 if ( $data[0] < 0 ) {
1826 $minDiff = -$minDiff;
1829 $minDiff = intval( $data[0] ) * 60;
1833 # No difference ? Return time unchanged
1834 if ( 0 == $minDiff ) {
1838 wfSuppressWarnings(); // E_STRICT system time bitching
1839 # Generate an adjusted date; take advantage of the fact that mktime
1840 # will normalize out-of-range values so we don't have to split $minDiff
1841 # into hours and minutes.
1843 (int)substr( $ts, 8, 2 ) ), # Hours
1844 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
1845 (int)substr( $ts, 12, 2 ), # Seconds
1846 (int)substr( $ts, 4, 2 ), # Month
1847 (int)substr( $ts, 6, 2 ), # Day
1848 (int)substr( $ts, 0, 4 ) ); # Year
1850 $date = date( 'YmdHis', $t );
1851 wfRestoreWarnings();
1857 * This is meant to be used by time(), date(), and timeanddate() to get
1858 * the date preference they're supposed to use, it should be used in
1862 * function timeanddate([...], $format = true) {
1863 * $datePreference = $this->dateFormat($format);
1868 * @param $usePrefs Mixed: if true, the user's preference is used
1869 * if false, the site/language default is used
1870 * if int/string, assumed to be a format.
1873 function dateFormat( $usePrefs = true ) {
1876 if ( is_bool( $usePrefs ) ) {
1878 $datePreference = $wgUser->getDatePreference();
1880 $datePreference = (string)User
::getDefaultOption( 'date' );
1883 $datePreference = (string)$usePrefs;
1887 if ( $datePreference == '' ) {
1891 return $datePreference;
1895 * Get a format string for a given type and preference
1896 * @param $type string May be date, time or both
1897 * @param $pref string The format name as it appears in Messages*.php
1901 function getDateFormatString( $type, $pref ) {
1902 if ( !isset( $this->dateFormatStrings
[$type][$pref] ) ) {
1903 if ( $pref == 'default' ) {
1904 $pref = $this->getDefaultDateFormat();
1905 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1907 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1908 if ( is_null( $df ) ) {
1909 $pref = $this->getDefaultDateFormat();
1910 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1913 $this->dateFormatStrings
[$type][$pref] = $df;
1915 return $this->dateFormatStrings
[$type][$pref];
1919 * @param $ts Mixed: the time format which needs to be turned into a
1920 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1921 * @param $adj Bool: whether to adjust the time output according to the
1922 * user configured offset ($timecorrection)
1923 * @param $format Mixed: true to use user's date format preference
1924 * @param $timecorrection String|bool the time offset as returned by
1925 * validateTimeZone() in Special:Preferences
1928 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1929 $ts = wfTimestamp( TS_MW
, $ts );
1931 $ts = $this->userAdjust( $ts, $timecorrection );
1933 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1934 return $this->sprintfDate( $df, $ts );
1938 * @param $ts Mixed: the time format which needs to be turned into a
1939 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1940 * @param $adj Bool: whether to adjust the time output according to the
1941 * user configured offset ($timecorrection)
1942 * @param $format Mixed: true to use user's date format preference
1943 * @param $timecorrection String|bool the time offset as returned by
1944 * validateTimeZone() in Special:Preferences
1947 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1948 $ts = wfTimestamp( TS_MW
, $ts );
1950 $ts = $this->userAdjust( $ts, $timecorrection );
1952 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1953 return $this->sprintfDate( $df, $ts );
1957 * @param $ts Mixed: the time format which needs to be turned into a
1958 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1959 * @param $adj Bool: whether to adjust the time output according to the
1960 * user configured offset ($timecorrection)
1961 * @param $format Mixed: what format to return, if it's false output the
1962 * default one (default true)
1963 * @param $timecorrection String|bool the time offset as returned by
1964 * validateTimeZone() in Special:Preferences
1967 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1968 $ts = wfTimestamp( TS_MW
, $ts );
1970 $ts = $this->userAdjust( $ts, $timecorrection );
1972 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1973 return $this->sprintfDate( $df, $ts );
1977 * Takes a number of seconds and turns it into a text using values such as hours and minutes.
1981 * @param integer $seconds The amount of seconds.
1982 * @param array $chosenIntervals The intervals to enable.
1986 public function formatDuration( $seconds, array $chosenIntervals = array() ) {
1987 $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals );
1989 $segments = array();
1991 foreach ( $intervals as $intervalName => $intervalValue ) {
1992 $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
1993 $segments[] = $message->inLanguage( $this )->escaped();
1996 return $this->listToText( $segments );
2000 * Takes a number of seconds and returns an array with a set of corresponding intervals.
2001 * For example 65 will be turned into array( minutes => 1, seconds => 5 ).
2005 * @param integer $seconds The amount of seconds.
2006 * @param array $chosenIntervals The intervals to enable.
2010 public function getDurationIntervals( $seconds, array $chosenIntervals = array() ) {
2011 if ( empty( $chosenIntervals ) ) {
2012 $chosenIntervals = array( 'millennia', 'centuries', 'decades', 'years', 'days', 'hours', 'minutes', 'seconds' );
2015 $intervals = array_intersect_key( self
::$durationIntervals, array_flip( $chosenIntervals ) );
2016 $sortedNames = array_keys( $intervals );
2017 $smallestInterval = array_pop( $sortedNames );
2019 $segments = array();
2021 foreach ( $intervals as $name => $length ) {
2022 $value = floor( $seconds / $length );
2024 if ( $value > 0 ||
( $name == $smallestInterval && empty( $segments ) ) ) {
2025 $seconds -= $value * $length;
2026 $segments[$name] = $value;
2034 * Internal helper function for userDate(), userTime() and userTimeAndDate()
2036 * @param $type String: can be 'date', 'time' or 'both'
2037 * @param $ts Mixed: the time format which needs to be turned into a
2038 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2039 * @param $user User object used to get preferences for timezone and format
2040 * @param $options Array, can contain the following keys:
2041 * - 'timecorrection': time correction, can have the following values:
2042 * - true: use user's preference
2043 * - false: don't use time correction
2044 * - integer: value of time correction in minutes
2045 * - 'format': format to use, can have the following values:
2046 * - true: use user's preference
2047 * - false: use default preference
2048 * - string: format to use
2052 private function internalUserTimeAndDate( $type, $ts, User
$user, array $options ) {
2053 $ts = wfTimestamp( TS_MW
, $ts );
2054 $options +
= array( 'timecorrection' => true, 'format' => true );
2055 if ( $options['timecorrection'] !== false ) {
2056 if ( $options['timecorrection'] === true ) {
2057 $offset = $user->getOption( 'timecorrection' );
2059 $offset = $options['timecorrection'];
2061 $ts = $this->userAdjust( $ts, $offset );
2063 if ( $options['format'] === true ) {
2064 $format = $user->getDatePreference();
2066 $format = $options['format'];
2068 $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) );
2069 return $this->sprintfDate( $df, $ts );
2073 * Get the formatted date for the given timestamp and formatted for
2076 * @param $ts Mixed: the time format which needs to be turned into a
2077 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2078 * @param $user User object used to get preferences for timezone and format
2079 * @param $options Array, can contain the following keys:
2080 * - 'timecorrection': time correction, can have the following values:
2081 * - true: use user's preference
2082 * - false: don't use time correction
2083 * - integer: value of time correction in minutes
2084 * - 'format': format to use, can have the following values:
2085 * - true: use user's preference
2086 * - false: use default preference
2087 * - string: format to use
2091 public function userDate( $ts, User
$user, array $options = array() ) {
2092 return $this->internalUserTimeAndDate( 'date', $ts, $user, $options );
2096 * Get the formatted time for the given timestamp and formatted for
2099 * @param $ts Mixed: the time format which needs to be turned into a
2100 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2101 * @param $user User object used to get preferences for timezone and format
2102 * @param $options Array, can contain the following keys:
2103 * - 'timecorrection': time correction, can have the following values:
2104 * - true: use user's preference
2105 * - false: don't use time correction
2106 * - integer: value of time correction in minutes
2107 * - 'format': format to use, can have the following values:
2108 * - true: use user's preference
2109 * - false: use default preference
2110 * - string: format to use
2114 public function userTime( $ts, User
$user, array $options = array() ) {
2115 return $this->internalUserTimeAndDate( 'time', $ts, $user, $options );
2119 * Get the formatted date and time for the given timestamp and formatted for
2122 * @param $ts Mixed: the time format which needs to be turned into a
2123 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2124 * @param $user User object used to get preferences for timezone and format
2125 * @param $options Array, can contain the following keys:
2126 * - 'timecorrection': time correction, can have the following values:
2127 * - true: use user's preference
2128 * - false: don't use time correction
2129 * - integer: value of time correction in minutes
2130 * - 'format': format to use, can have the following values:
2131 * - true: use user's preference
2132 * - false: use default preference
2133 * - string: format to use
2137 public function userTimeAndDate( $ts, User
$user, array $options = array() ) {
2138 return $this->internalUserTimeAndDate( 'both', $ts, $user, $options );
2142 * @param $key string
2143 * @return array|null
2145 function getMessage( $key ) {
2146 return self
::$dataCache->getSubitem( $this->mCode
, 'messages', $key );
2152 function getAllMessages() {
2153 return self
::$dataCache->getItem( $this->mCode
, 'messages' );
2162 function iconv( $in, $out, $string ) {
2163 # This is a wrapper for iconv in all languages except esperanto,
2164 # which does some nasty x-conversions beforehand
2166 # Even with //IGNORE iconv can whine about illegal characters in
2167 # *input* string. We just ignore those too.
2168 # REF: http://bugs.php.net/bug.php?id=37166
2169 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
2170 wfSuppressWarnings();
2171 $text = iconv( $in, $out . '//IGNORE', $string );
2172 wfRestoreWarnings();
2176 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
2179 * @param $matches array
2180 * @return mixed|string
2182 function ucwordbreaksCallbackAscii( $matches ) {
2183 return $this->ucfirst( $matches[1] );
2187 * @param $matches array
2190 function ucwordbreaksCallbackMB( $matches ) {
2191 return mb_strtoupper( $matches[0] );
2195 * @param $matches array
2198 function ucCallback( $matches ) {
2199 list( $wikiUpperChars ) = self
::getCaseMaps();
2200 return strtr( $matches[1], $wikiUpperChars );
2204 * @param $matches array
2207 function lcCallback( $matches ) {
2208 list( , $wikiLowerChars ) = self
::getCaseMaps();
2209 return strtr( $matches[1], $wikiLowerChars );
2213 * @param $matches array
2216 function ucwordsCallbackMB( $matches ) {
2217 return mb_strtoupper( $matches[0] );
2221 * @param $matches array
2224 function ucwordsCallbackWiki( $matches ) {
2225 list( $wikiUpperChars ) = self
::getCaseMaps();
2226 return strtr( $matches[0], $wikiUpperChars );
2230 * Make a string's first character uppercase
2232 * @param $str string
2236 function ucfirst( $str ) {
2238 if ( $o < 96 ) { // if already uppercase...
2240 } elseif ( $o < 128 ) {
2241 return ucfirst( $str ); // use PHP's ucfirst()
2243 // fall back to more complex logic in case of multibyte strings
2244 return $this->uc( $str, true );
2249 * Convert a string to uppercase
2251 * @param $str string
2252 * @param $first bool
2256 function uc( $str, $first = false ) {
2257 if ( function_exists( 'mb_strtoupper' ) ) {
2259 if ( $this->isMultibyte( $str ) ) {
2260 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2262 return ucfirst( $str );
2265 return $this->isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
2268 if ( $this->isMultibyte( $str ) ) {
2269 $x = $first ?
'^' : '';
2270 return preg_replace_callback(
2271 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2272 array( $this, 'ucCallback' ),
2276 return $first ?
ucfirst( $str ) : strtoupper( $str );
2282 * @param $str string
2283 * @return mixed|string
2285 function lcfirst( $str ) {
2288 return strval( $str );
2289 } elseif ( $o >= 128 ) {
2290 return $this->lc( $str, true );
2291 } elseif ( $o > 96 ) {
2294 $str[0] = strtolower( $str[0] );
2300 * @param $str string
2301 * @param $first bool
2302 * @return mixed|string
2304 function lc( $str, $first = false ) {
2305 if ( function_exists( 'mb_strtolower' ) ) {
2307 if ( $this->isMultibyte( $str ) ) {
2308 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2310 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
2313 return $this->isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
2316 if ( $this->isMultibyte( $str ) ) {
2317 $x = $first ?
'^' : '';
2318 return preg_replace_callback(
2319 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2320 array( $this, 'lcCallback' ),
2324 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
2330 * @param $str string
2333 function isMultibyte( $str ) {
2334 return (bool)preg_match( '/[\x80-\xff]/', $str );
2338 * @param $str string
2339 * @return mixed|string
2341 function ucwords( $str ) {
2342 if ( $this->isMultibyte( $str ) ) {
2343 $str = $this->lc( $str );
2345 // regexp to find first letter in each word (i.e. after each space)
2346 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2348 // function to use to capitalize a single char
2349 if ( function_exists( 'mb_strtoupper' ) ) {
2350 return preg_replace_callback(
2352 array( $this, 'ucwordsCallbackMB' ),
2356 return preg_replace_callback(
2358 array( $this, 'ucwordsCallbackWiki' ),
2363 return ucwords( strtolower( $str ) );
2368 * capitalize words at word breaks
2370 * @param $str string
2373 function ucwordbreaks( $str ) {
2374 if ( $this->isMultibyte( $str ) ) {
2375 $str = $this->lc( $str );
2377 // since \b doesn't work for UTF-8, we explicitely define word break chars
2378 $breaks = "[ \-\(\)\}\{\.,\?!]";
2380 // find first letter after word break
2381 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2383 if ( function_exists( 'mb_strtoupper' ) ) {
2384 return preg_replace_callback(
2386 array( $this, 'ucwordbreaksCallbackMB' ),
2390 return preg_replace_callback(
2392 array( $this, 'ucwordsCallbackWiki' ),
2397 return preg_replace_callback(
2398 '/\b([\w\x80-\xff]+)\b/',
2399 array( $this, 'ucwordbreaksCallbackAscii' ),
2406 * Return a case-folded representation of $s
2408 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
2409 * and $s2 are the same except for the case of their characters. It is not
2410 * necessary for the value returned to make sense when displayed.
2412 * Do *not* perform any other normalisation in this function. If a caller
2413 * uses this function when it should be using a more general normalisation
2414 * function, then fix the caller.
2420 function caseFold( $s ) {
2421 return $this->uc( $s );
2428 function checkTitleEncoding( $s ) {
2429 if ( is_array( $s ) ) {
2430 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
2432 if ( StringUtils
::isUtf8( $s ) ) {
2436 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
2442 function fallback8bitEncoding() {
2443 return self
::$dataCache->getItem( $this->mCode
, 'fallback8bitEncoding' );
2447 * Most writing systems use whitespace to break up words.
2448 * Some languages such as Chinese don't conventionally do this,
2449 * which requires special handling when breaking up words for
2454 function hasWordBreaks() {
2459 * Some languages such as Chinese require word segmentation,
2460 * Specify such segmentation when overridden in derived class.
2462 * @param $string String
2465 function segmentByWord( $string ) {
2470 * Some languages have special punctuation need to be normalized.
2471 * Make such changes here.
2473 * @param $string String
2476 function normalizeForSearch( $string ) {
2477 return self
::convertDoubleWidth( $string );
2481 * convert double-width roman characters to single-width.
2482 * range: ff00-ff5f ~= 0020-007f
2484 * @param $string string
2488 protected static function convertDoubleWidth( $string ) {
2489 static $full = null;
2490 static $half = null;
2492 if ( $full === null ) {
2493 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2494 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2495 $full = str_split( $fullWidth, 3 );
2496 $half = str_split( $halfWidth );
2499 $string = str_replace( $full, $half, $string );
2504 * @param $string string
2505 * @param $pattern string
2508 protected static function insertSpace( $string, $pattern ) {
2509 $string = preg_replace( $pattern, " $1 ", $string );
2510 $string = preg_replace( '/ +/', ' ', $string );
2515 * @param $termsArray array
2518 function convertForSearchResult( $termsArray ) {
2519 # some languages, e.g. Chinese, need to do a conversion
2520 # in order for search results to be displayed correctly
2525 * Get the first character of a string.
2530 function firstChar( $s ) {
2533 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
2534 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
2539 if ( isset( $matches[1] ) ) {
2540 if ( strlen( $matches[1] ) != 3 ) {
2544 // Break down Hangul syllables to grab the first jamo
2545 $code = utf8ToCodepoint( $matches[1] );
2546 if ( $code < 0xac00 ||
0xd7a4 <= $code ) {
2548 } elseif ( $code < 0xb098 ) {
2549 return "\xe3\x84\xb1";
2550 } elseif ( $code < 0xb2e4 ) {
2551 return "\xe3\x84\xb4";
2552 } elseif ( $code < 0xb77c ) {
2553 return "\xe3\x84\xb7";
2554 } elseif ( $code < 0xb9c8 ) {
2555 return "\xe3\x84\xb9";
2556 } elseif ( $code < 0xbc14 ) {
2557 return "\xe3\x85\x81";
2558 } elseif ( $code < 0xc0ac ) {
2559 return "\xe3\x85\x82";
2560 } elseif ( $code < 0xc544 ) {
2561 return "\xe3\x85\x85";
2562 } elseif ( $code < 0xc790 ) {
2563 return "\xe3\x85\x87";
2564 } elseif ( $code < 0xcc28 ) {
2565 return "\xe3\x85\x88";
2566 } elseif ( $code < 0xce74 ) {
2567 return "\xe3\x85\x8a";
2568 } elseif ( $code < 0xd0c0 ) {
2569 return "\xe3\x85\x8b";
2570 } elseif ( $code < 0xd30c ) {
2571 return "\xe3\x85\x8c";
2572 } elseif ( $code < 0xd558 ) {
2573 return "\xe3\x85\x8d";
2575 return "\xe3\x85\x8e";
2582 function initEncoding() {
2583 # Some languages may have an alternate char encoding option
2584 # (Esperanto X-coding, Japanese furigana conversion, etc)
2585 # If this language is used as the primary content language,
2586 # an override to the defaults can be set here on startup.
2593 function recodeForEdit( $s ) {
2594 # For some languages we'll want to explicitly specify
2595 # which characters make it into the edit box raw
2596 # or are converted in some way or another.
2597 global $wgEditEncoding;
2598 if ( $wgEditEncoding == '' ||
$wgEditEncoding == 'UTF-8' ) {
2601 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2609 function recodeInput( $s ) {
2610 # Take the previous into account.
2611 global $wgEditEncoding;
2612 if ( $wgEditEncoding != '' ) {
2613 $enc = $wgEditEncoding;
2617 if ( $enc == 'UTF-8' ) {
2620 return $this->iconv( $enc, 'UTF-8', $s );
2625 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2626 * also cleans up certain backwards-compatible sequences, converting them
2627 * to the modern Unicode equivalent.
2629 * This is language-specific for performance reasons only.
2635 function normalize( $s ) {
2636 global $wgAllUnicodeFixes;
2637 $s = UtfNormal
::cleanUp( $s );
2638 if ( $wgAllUnicodeFixes ) {
2639 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2640 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2647 * Transform a string using serialized data stored in the given file (which
2648 * must be in the serialized subdirectory of $IP). The file contains pairs
2649 * mapping source characters to destination characters.
2651 * The data is cached in process memory. This will go faster if you have the
2652 * FastStringSearch extension.
2654 * @param $file string
2655 * @param $string string
2657 * @throws MWException
2660 function transformUsingPairFile( $file, $string ) {
2661 if ( !isset( $this->transformData
[$file] ) ) {
2662 $data = wfGetPrecompiledData( $file );
2663 if ( $data === false ) {
2664 throw new MWException( __METHOD__
. ": The transformation file $file is missing" );
2666 $this->transformData
[$file] = new ReplacementArray( $data );
2668 return $this->transformData
[$file]->replace( $string );
2672 * For right-to-left language support
2677 return self
::$dataCache->getItem( $this->mCode
, 'rtl' );
2681 * Return the correct HTML 'dir' attribute value for this language.
2685 return $this->isRTL() ?
'rtl' : 'ltr';
2689 * Return 'left' or 'right' as appropriate alignment for line-start
2690 * for this language's text direction.
2692 * Should be equivalent to CSS3 'start' text-align value....
2696 function alignStart() {
2697 return $this->isRTL() ?
'right' : 'left';
2701 * Return 'right' or 'left' as appropriate alignment for line-end
2702 * for this language's text direction.
2704 * Should be equivalent to CSS3 'end' text-align value....
2708 function alignEnd() {
2709 return $this->isRTL() ?
'left' : 'right';
2713 * A hidden direction mark (LRM or RLM), depending on the language direction.
2714 * Unlike getDirMark(), this function returns the character as an HTML entity.
2715 * This function should be used when the output is guaranteed to be HTML,
2716 * because it makes the output HTML source code more readable. When
2717 * the output is plain text or can be escaped, getDirMark() should be used.
2719 * @param $opposite Boolean Get the direction mark opposite to your language
2723 function getDirMarkEntity( $opposite = false ) {
2724 if ( $opposite ) { return $this->isRTL() ?
'‎' : '‏'; }
2725 return $this->isRTL() ?
'‏' : '‎';
2729 * A hidden direction mark (LRM or RLM), depending on the language direction.
2730 * This function produces them as invisible Unicode characters and
2731 * the output may be hard to read and debug, so it should only be used
2732 * when the output is plain text or can be escaped. When the output is
2733 * HTML, use getDirMarkEntity() instead.
2735 * @param $opposite Boolean Get the direction mark opposite to your language
2738 function getDirMark( $opposite = false ) {
2739 $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
2740 $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
2741 if ( $opposite ) { return $this->isRTL() ?
$lrm : $rlm; }
2742 return $this->isRTL() ?
$rlm : $lrm;
2748 function capitalizeAllNouns() {
2749 return self
::$dataCache->getItem( $this->mCode
, 'capitalizeAllNouns' );
2753 * An arrow, depending on the language direction.
2755 * @param $direction String: the direction of the arrow: forwards (default), backwards, left, right, up, down.
2758 function getArrow( $direction = 'forwards' ) {
2759 switch ( $direction ) {
2761 return $this->isRTL() ?
'←' : '→';
2763 return $this->isRTL() ?
'→' : '←';
2776 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2780 function linkPrefixExtension() {
2781 return self
::$dataCache->getItem( $this->mCode
, 'linkPrefixExtension' );
2787 function getMagicWords() {
2788 return self
::$dataCache->getItem( $this->mCode
, 'magicWords' );
2791 protected function doMagicHook() {
2792 if ( $this->mMagicHookDone
) {
2795 $this->mMagicHookDone
= true;
2796 wfProfileIn( 'LanguageGetMagic' );
2797 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
2798 wfProfileOut( 'LanguageGetMagic' );
2802 * Fill a MagicWord object with data from here
2806 function getMagic( $mw ) {
2807 $this->doMagicHook();
2809 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
2810 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
2812 $magicWords = $this->getMagicWords();
2813 if ( isset( $magicWords[$mw->mId
] ) ) {
2814 $rawEntry = $magicWords[$mw->mId
];
2820 if ( !is_array( $rawEntry ) ) {
2821 error_log( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" );
2823 $mw->mCaseSensitive
= $rawEntry[0];
2824 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
2829 * Add magic words to the extension array
2831 * @param $newWords array
2833 function addMagicWordsByLang( $newWords ) {
2834 $fallbackChain = $this->getFallbackLanguages();
2835 $fallbackChain = array_reverse( $fallbackChain );
2836 foreach ( $fallbackChain as $code ) {
2837 if ( isset( $newWords[$code] ) ) {
2838 $this->mMagicExtensions
= $newWords[$code] +
$this->mMagicExtensions
;
2844 * Get special page names, as an associative array
2845 * case folded alias => real name
2847 function getSpecialPageAliases() {
2848 // Cache aliases because it may be slow to load them
2849 if ( is_null( $this->mExtendedSpecialPageAliases
) ) {
2851 $this->mExtendedSpecialPageAliases
=
2852 self
::$dataCache->getItem( $this->mCode
, 'specialPageAliases' );
2853 wfRunHooks( 'LanguageGetSpecialPageAliases',
2854 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
2857 return $this->mExtendedSpecialPageAliases
;
2861 * Italic is unsuitable for some languages
2863 * @param $text String: the text to be emphasized.
2866 function emphasize( $text ) {
2867 return "<em>$text</em>";
2871 * Normally we output all numbers in plain en_US style, that is
2872 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2873 * point twohundredthirtyfive. However this is not suitable for all
2874 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2875 * Icelandic just want to use commas instead of dots, and dots instead
2876 * of commas like "293.291,235".
2878 * An example of this function being called:
2880 * wfMessage( 'message' )->numParams( $num )->text()
2883 * See LanguageGu.php for the Gujarati implementation and
2884 * $separatorTransformTable on MessageIs.php for
2885 * the , => . and . => , implementation.
2887 * @todo check if it's viable to use localeconv() for the decimal
2889 * @param $number Mixed: the string to be formatted, should be an integer
2890 * or a floating point number.
2891 * @param $nocommafy Bool: set to true for special numbers like dates
2894 public function formatNum( $number, $nocommafy = false ) {
2895 global $wgTranslateNumerals;
2896 if ( !$nocommafy ) {
2897 $number = $this->commafy( $number );
2898 $s = $this->separatorTransformTable();
2900 $number = strtr( $number, $s );
2904 if ( $wgTranslateNumerals ) {
2905 $s = $this->digitTransformTable();
2907 $number = strtr( $number, $s );
2915 * @param $number string
2918 function parseFormattedNumber( $number ) {
2919 $s = $this->digitTransformTable();
2921 $number = strtr( $number, array_flip( $s ) );
2924 $s = $this->separatorTransformTable();
2926 $number = strtr( $number, array_flip( $s ) );
2929 $number = strtr( $number, array( ',' => '' ) );
2934 * Adds commas to a given number
2936 * @param $number mixed
2939 function commafy( $number ) {
2940 $digitGroupingPattern = $this->digitGroupingPattern();
2941 if ( $number === null ) {
2945 if ( !$digitGroupingPattern ||
$digitGroupingPattern === "###,###,###" ) {
2946 // default grouping is at thousands, use the same for ###,###,### pattern too.
2947 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
2949 // Ref: http://cldr.unicode.org/translation/number-patterns
2951 if ( intval( $number ) < 0 ) {
2952 // For negative numbers apply the algorithm like positive number and add sign.
2954 $number = substr( $number, 1 );
2956 $integerPart = array();
2957 $decimalPart = array();
2958 $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
2959 preg_match( "/\d+/", $number, $integerPart );
2960 preg_match( "/\.\d*/", $number, $decimalPart );
2961 $groupedNumber = ( count( $decimalPart ) > 0 ) ?
$decimalPart[0]:"";
2962 if ( $groupedNumber === $number ) {
2963 // the string does not have any number part. Eg: .12345
2964 return $sign . $groupedNumber;
2966 $start = $end = strlen( $integerPart[0] );
2967 while ( $start > 0 ) {
2968 $match = $matches[0][$numMatches -1] ;
2969 $matchLen = strlen( $match );
2970 $start = $end - $matchLen;
2974 $groupedNumber = substr( $number , $start, $end -$start ) . $groupedNumber ;
2976 if ( $numMatches > 1 ) {
2977 // use the last pattern for the rest of the number
2981 $groupedNumber = "," . $groupedNumber;
2984 return $sign . $groupedNumber;
2991 function digitGroupingPattern() {
2992 return self
::$dataCache->getItem( $this->mCode
, 'digitGroupingPattern' );
2998 function digitTransformTable() {
2999 return self
::$dataCache->getItem( $this->mCode
, 'digitTransformTable' );
3005 function separatorTransformTable() {
3006 return self
::$dataCache->getItem( $this->mCode
, 'separatorTransformTable' );
3010 * Take a list of strings and build a locale-friendly comma-separated
3011 * list, using the local comma-separator message.
3012 * The last two strings are chained with an "and".
3013 * NOTE: This function will only work with standard numeric array keys (0, 1, 2…)
3018 function listToText( array $l ) {
3019 $m = count( $l ) - 1;
3024 $and = $this->getMessageFromDB( 'and' );
3025 $space = $this->getMessageFromDB( 'word-separator' );
3027 $comma = $this->getMessageFromDB( 'comma-separator' );
3031 for ( $i = $m - 1; $i >= 0; $i-- ) {
3032 if ( $i == $m - 1 ) {
3033 $s = $l[$i] . $and . $space . $s;
3035 $s = $l[$i] . $comma . $s;
3042 * Take a list of strings and build a locale-friendly comma-separated
3043 * list, using the local comma-separator message.
3044 * @param $list array of strings to put in a comma list
3047 function commaList( array $list ) {
3049 wfMessage( 'comma-separator' )->inLanguage( $this )->escaped(),
3055 * Take a list of strings and build a locale-friendly semicolon-separated
3056 * list, using the local semicolon-separator message.
3057 * @param $list array of strings to put in a semicolon list
3060 function semicolonList( array $list ) {
3062 wfMessage( 'semicolon-separator' )->inLanguage( $this )->escaped(),
3068 * Same as commaList, but separate it with the pipe instead.
3069 * @param $list array of strings to put in a pipe list
3072 function pipeList( array $list ) {
3074 wfMessage( 'pipe-separator' )->inLanguage( $this )->escaped(),
3080 * Truncate a string to a specified length in bytes, appending an optional
3081 * string (e.g. for ellipses)
3083 * The database offers limited byte lengths for some columns in the database;
3084 * multi-byte character sets mean we need to ensure that only whole characters
3085 * are included, otherwise broken characters can be passed to the user
3087 * If $length is negative, the string will be truncated from the beginning
3089 * @param $string String to truncate
3090 * @param $length Int: maximum length (including ellipses)
3091 * @param $ellipsis String to append to the truncated text
3092 * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
3093 * $adjustLength was introduced in 1.18, before that behaved as if false.
3096 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
3097 # Use the localized ellipsis character
3098 if ( $ellipsis == '...' ) {
3099 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3101 # Check if there is no need to truncate
3102 if ( $length == 0 ) {
3103 return $ellipsis; // convention
3104 } elseif ( strlen( $string ) <= abs( $length ) ) {
3105 return $string; // no need to truncate
3107 $stringOriginal = $string;
3108 # If ellipsis length is >= $length then we can't apply $adjustLength
3109 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
3110 $string = $ellipsis; // this can be slightly unexpected
3111 # Otherwise, truncate and add ellipsis...
3113 $eLength = $adjustLength ?
strlen( $ellipsis ) : 0;
3114 if ( $length > 0 ) {
3115 $length -= $eLength;
3116 $string = substr( $string, 0, $length ); // xyz...
3117 $string = $this->removeBadCharLast( $string );
3118 $string = $string . $ellipsis;
3120 $length +
= $eLength;
3121 $string = substr( $string, $length ); // ...xyz
3122 $string = $this->removeBadCharFirst( $string );
3123 $string = $ellipsis . $string;
3126 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
3127 # This check is *not* redundant if $adjustLength, due to the single case where
3128 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
3129 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
3132 return $stringOriginal;
3137 * Remove bytes that represent an incomplete Unicode character
3138 * at the end of string (e.g. bytes of the char are missing)
3140 * @param $string String
3143 protected function removeBadCharLast( $string ) {
3144 if ( $string != '' ) {
3145 $char = ord( $string[strlen( $string ) - 1] );
3147 if ( $char >= 0xc0 ) {
3148 # We got the first byte only of a multibyte char; remove it.
3149 $string = substr( $string, 0, -1 );
3150 } elseif ( $char >= 0x80 &&
3151 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
3152 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
3154 # We chopped in the middle of a character; remove it
3162 * Remove bytes that represent an incomplete Unicode character
3163 * at the start of string (e.g. bytes of the char are missing)
3165 * @param $string String
3168 protected function removeBadCharFirst( $string ) {
3169 if ( $string != '' ) {
3170 $char = ord( $string[0] );
3171 if ( $char >= 0x80 && $char < 0xc0 ) {
3172 # We chopped in the middle of a character; remove the whole thing
3173 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
3180 * Truncate a string of valid HTML to a specified length in bytes,
3181 * appending an optional string (e.g. for ellipses), and return valid HTML
3183 * This is only intended for styled/linked text, such as HTML with
3184 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
3185 * Also, this will not detect things like "display:none" CSS.
3187 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
3189 * @param string $text HTML string to truncate
3190 * @param int $length (zero/positive) Maximum length (including ellipses)
3191 * @param string $ellipsis String to append to the truncated text
3194 function truncateHtml( $text, $length, $ellipsis = '...' ) {
3195 # Use the localized ellipsis character
3196 if ( $ellipsis == '...' ) {
3197 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3199 # Check if there is clearly no need to truncate
3200 if ( $length <= 0 ) {
3201 return $ellipsis; // no text shown, nothing to format (convention)
3202 } elseif ( strlen( $text ) <= $length ) {
3203 return $text; // string short enough even *with* HTML (short-circuit)
3206 $dispLen = 0; // innerHTML legth so far
3207 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
3208 $tagType = 0; // 0-open, 1-close
3209 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
3210 $entityState = 0; // 0-not entity, 1-entity
3211 $tag = $ret = ''; // accumulated tag name, accumulated result string
3212 $openTags = array(); // open tag stack
3213 $maybeState = null; // possible truncation state
3215 $textLen = strlen( $text );
3216 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
3217 for ( $pos = 0; true; ++
$pos ) {
3218 # Consider truncation once the display length has reached the maximim.
3219 # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
3220 # Check that we're not in the middle of a bracket/entity...
3221 if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
3222 if ( !$testingEllipsis ) {
3223 $testingEllipsis = true;
3224 # Save where we are; we will truncate here unless there turn out to
3225 # be so few remaining characters that truncation is not necessary.
3226 if ( !$maybeState ) { // already saved? ($neLength = 0 case)
3227 $maybeState = array( $ret, $openTags ); // save state
3229 } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
3230 # String in fact does need truncation, the truncation point was OK.
3231 list( $ret, $openTags ) = $maybeState; // reload state
3232 $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
3233 $ret .= $ellipsis; // add ellipsis
3237 if ( $pos >= $textLen ) break; // extra iteration just for above checks
3239 # Read the next char...
3241 $lastCh = $pos ?
$text[$pos - 1] : '';
3242 $ret .= $ch; // add to result string
3244 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
3245 $entityState = 0; // for bad HTML
3246 $bracketState = 1; // tag started (checking for backslash)
3247 } elseif ( $ch == '>' ) {
3248 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
3249 $entityState = 0; // for bad HTML
3250 $bracketState = 0; // out of brackets
3251 } elseif ( $bracketState == 1 ) {
3253 $tagType = 1; // close tag (e.g. "</span>")
3255 $tagType = 0; // open tag (e.g. "<span>")
3258 $bracketState = 2; // building tag name
3259 } elseif ( $bracketState == 2 ) {
3263 // Name found (e.g. "<a href=..."), add on tag attributes...
3264 $pos +
= $this->truncate_skip( $ret, $text, "<>", $pos +
1 );
3266 } elseif ( $bracketState == 0 ) {
3267 if ( $entityState ) {
3270 $dispLen++
; // entity is one displayed char
3273 if ( $neLength == 0 && !$maybeState ) {
3274 // Save state without $ch. We want to *hit* the first
3275 // display char (to get tags) but not *use* it if truncating.
3276 $maybeState = array( substr( $ret, 0, -1 ), $openTags );
3279 $entityState = 1; // entity found, (e.g. " ")
3281 $dispLen++
; // this char is displayed
3282 // Add the next $max display text chars after this in one swoop...
3283 $max = ( $testingEllipsis ?
$length : $neLength ) - $dispLen;
3284 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos +
1, $max );
3285 $dispLen +
= $skipped;
3291 // Close the last tag if left unclosed by bad HTML
3292 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
3293 while ( count( $openTags ) > 0 ) {
3294 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
3300 * truncateHtml() helper function
3301 * like strcspn() but adds the skipped chars to $ret
3310 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
3311 if ( $len === null ) {
3312 $len = -1; // -1 means "no limit" for strcspn
3313 } elseif ( $len < 0 ) {
3317 if ( $start < strlen( $text ) ) {
3318 $skipCount = strcspn( $text, $search, $start, $len );
3319 $ret .= substr( $text, $start, $skipCount );
3325 * truncateHtml() helper function
3326 * (a) push or pop $tag from $openTags as needed
3327 * (b) clear $tag value
3328 * @param &$tag string Current HTML tag name we are looking at
3329 * @param $tagType int (0-open tag, 1-close tag)
3330 * @param $lastCh string Character before the '>' that ended this tag
3331 * @param &$openTags array Open tag stack (not accounting for $tag)
3333 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
3334 $tag = ltrim( $tag );
3336 if ( $tagType == 0 && $lastCh != '/' ) {
3337 $openTags[] = $tag; // tag opened (didn't close itself)
3338 } elseif ( $tagType == 1 ) {
3339 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
3340 array_pop( $openTags ); // tag closed
3348 * Grammatical transformations, needed for inflected languages
3349 * Invoked by putting {{grammar:case|word}} in a message
3351 * @param $word string
3352 * @param $case string
3355 function convertGrammar( $word, $case ) {
3356 global $wgGrammarForms;
3357 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
3358 return $wgGrammarForms[$this->getCode()][$case][$word];
3363 * Get the grammar forms for the content language
3364 * @return array of grammar forms
3367 function getGrammarForms() {
3368 global $wgGrammarForms;
3369 if ( isset( $wgGrammarForms[$this->getCode()] ) && is_array( $wgGrammarForms[$this->getCode()] ) ) {
3370 return $wgGrammarForms[$this->getCode()];
3375 * Provides an alternative text depending on specified gender.
3376 * Usage {{gender:username|masculine|feminine|neutral}}.
3377 * username is optional, in which case the gender of current user is used,
3378 * but only in (some) interface messages; otherwise default gender is used.
3380 * If no forms are given, an empty string is returned. If only one form is
3381 * given, it will be returned unconditionally. These details are implied by
3382 * the caller and cannot be overridden in subclasses.
3384 * If more than one form is given, the default is to use the neutral one
3385 * if it is specified, and to use the masculine one otherwise. These
3386 * details can be overridden in subclasses.
3388 * @param $gender string
3389 * @param $forms array
3393 function gender( $gender, $forms ) {
3394 if ( !count( $forms ) ) {
3397 $forms = $this->preConvertPlural( $forms, 2 );
3398 if ( $gender === 'male' ) {
3401 if ( $gender === 'female' ) {
3404 return isset( $forms[2] ) ?
$forms[2] : $forms[0];
3408 * Plural form transformations, needed for some languages.
3409 * For example, there are 3 form of plural in Russian and Polish,
3410 * depending on "count mod 10". See [[w:Plural]]
3411 * For English it is pretty simple.
3413 * Invoked by putting {{plural:count|wordform1|wordform2}}
3414 * or {{plural:count|wordform1|wordform2|wordform3}}
3416 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
3418 * @param $count Integer: non-localized number
3419 * @param $forms Array: different plural forms
3420 * @return string Correct form of plural for $count in this language
3422 function convertPlural( $count, $forms ) {
3423 if ( !count( $forms ) ) {
3427 // Handle explicit 0= and 1= forms
3428 foreach ( $forms as $index => $form ) {
3429 if ( isset( $form[1] ) && $form[1] === '=' ) {
3430 if ( $form[0] === (string) $count ) {
3431 return substr( $form, 2 );
3433 unset( $forms[$index] );
3436 $forms = array_values( $forms );
3438 $pluralForm = $this->getPluralForm( $count );
3439 $pluralForm = min( $pluralForm, count( $forms ) - 1 );
3440 return $forms[$pluralForm];
3444 * Checks that convertPlural was given an array and pads it to requested
3445 * amount of forms by copying the last one.
3447 * @param $count Integer: How many forms should there be at least
3448 * @param $forms Array of forms given to convertPlural
3449 * @return array Padded array of forms or an exception if not an array
3451 protected function preConvertPlural( /* Array */ $forms, $count ) {
3452 while ( count( $forms ) < $count ) {
3453 $forms[] = $forms[count( $forms ) - 1];
3459 * @todo Maybe translate block durations. Note that this function is somewhat misnamed: it
3460 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
3461 * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used
3462 * on old expiry lengths recorded in log entries. You'd need to provide the start date to
3465 * @param $str String: the validated block duration in English
3466 * @return string Somehow translated block duration
3467 * @see LanguageFi.php for example implementation
3469 function translateBlockExpiry( $str ) {
3470 $duration = SpecialBlock
::getSuggestedDurations( $this );
3471 foreach ( $duration as $show => $value ) {
3472 if ( strcmp( $str, $value ) == 0 ) {
3473 return htmlspecialchars( trim( $show ) );
3477 // Since usually only infinite or indefinite is only on list, so try
3478 // equivalents if still here.
3479 $indefs = array( 'infinite', 'infinity', 'indefinite' );
3480 if ( in_array( $str, $indefs ) ) {
3481 foreach ( $indefs as $val ) {
3482 $show = array_search( $val, $duration, true );
3483 if ( $show !== false ) {
3484 return htmlspecialchars( trim( $show ) );
3489 // If all else fails, return a standard duration or timestamp description.
3490 $time = strtotime( $str, 0 );
3491 if ( $time === false ) { // Unknown format. Return it as-is in case.
3493 } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
3494 // $time is relative to 0 so it's a duration length.
3495 return $this->formatDuration( $time );
3496 } else { // It's an absolute timestamp.
3497 if ( $time === 0 ) {
3498 // wfTimestamp() handles 0 as current time instead of epoch.
3499 return $this->timeanddate( '19700101000000' );
3501 return $this->timeanddate( $time );
3507 * languages like Chinese need to be segmented in order for the diff
3510 * @param $text String
3513 public function segmentForDiff( $text ) {
3518 * and unsegment to show the result
3520 * @param $text String
3523 public function unsegmentForDiff( $text ) {
3528 * Return the LanguageConverter used in the Language
3531 * @return LanguageConverter
3533 public function getConverter() {
3534 return $this->mConverter
;
3538 * convert text to all supported variants
3540 * @param $text string
3543 public function autoConvertToAllVariants( $text ) {
3544 return $this->mConverter
->autoConvertToAllVariants( $text );
3548 * convert text to different variants of a language.
3550 * @param $text string
3553 public function convert( $text ) {
3554 return $this->mConverter
->convert( $text );
3558 * Convert a Title object to a string in the preferred variant
3560 * @param $title Title
3563 public function convertTitle( $title ) {
3564 return $this->mConverter
->convertTitle( $title );
3568 * Convert a namespace index to a string in the preferred variant
3573 public function convertNamespace( $ns ) {
3574 return $this->mConverter
->convertNamespace( $ns );
3578 * Check if this is a language with variants
3582 public function hasVariants() {
3583 return sizeof( $this->getVariants() ) > 1;
3587 * Check if the language has the specific variant
3590 * @param $variant string
3593 public function hasVariant( $variant ) {
3594 return (bool)$this->mConverter
->validateVariant( $variant );
3598 * Put custom tags (e.g. -{ }-) around math to prevent conversion
3600 * @param $text string
3603 public function armourMath( $text ) {
3604 return $this->mConverter
->armourMath( $text );
3608 * Perform output conversion on a string, and encode for safe HTML output.
3609 * @param $text String text to be converted
3610 * @param $isTitle Bool whether this conversion is for the article title
3612 * @todo this should get integrated somewhere sane
3614 public function convertHtml( $text, $isTitle = false ) {
3615 return htmlspecialchars( $this->convert( $text, $isTitle ) );
3619 * @param $key string
3622 public function convertCategoryKey( $key ) {
3623 return $this->mConverter
->convertCategoryKey( $key );
3627 * Get the list of variants supported by this language
3628 * see sample implementation in LanguageZh.php
3630 * @return array an array of language codes
3632 public function getVariants() {
3633 return $this->mConverter
->getVariants();
3639 public function getPreferredVariant() {
3640 return $this->mConverter
->getPreferredVariant();
3646 public function getDefaultVariant() {
3647 return $this->mConverter
->getDefaultVariant();
3653 public function getURLVariant() {
3654 return $this->mConverter
->getURLVariant();
3658 * If a language supports multiple variants, it is
3659 * possible that non-existing link in one variant
3660 * actually exists in another variant. this function
3661 * tries to find it. See e.g. LanguageZh.php
3663 * @param $link String: the name of the link
3664 * @param $nt Mixed: the title object of the link
3665 * @param $ignoreOtherCond Boolean: to disable other conditions when
3666 * we need to transclude a template or update a category's link
3667 * @return null the input parameters may be modified upon return
3669 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
3670 $this->mConverter
->findVariantLink( $link, $nt, $ignoreOtherCond );
3674 * If a language supports multiple variants, converts text
3675 * into an array of all possible variants of the text:
3676 * 'variant' => text in that variant
3678 * @deprecated since 1.17 Use autoConvertToAllVariants()
3680 * @param $text string
3684 public function convertLinkToAllVariants( $text ) {
3685 return $this->mConverter
->convertLinkToAllVariants( $text );
3689 * returns language specific options used by User::getPageRenderHash()
3690 * for example, the preferred language variant
3694 function getExtraHashOptions() {
3695 return $this->mConverter
->getExtraHashOptions();
3699 * For languages that support multiple variants, the title of an
3700 * article may be displayed differently in different variants. this
3701 * function returns the apporiate title defined in the body of the article.
3705 public function getParsedTitle() {
3706 return $this->mConverter
->getParsedTitle();
3710 * Prepare external link text for conversion. When the text is
3711 * a URL, it shouldn't be converted, and it'll be wrapped in
3712 * the "raw" tag (-{R| }-) to prevent conversion.
3714 * This function is called "markNoConversion" for historical
3717 * @param $text String: text to be used for external link
3718 * @param $noParse bool: wrap it without confirming it's a real URL first
3719 * @return string the tagged text
3721 public function markNoConversion( $text, $noParse = false ) {
3722 // Excluding protocal-relative URLs may avoid many false positives.
3723 if ( $noParse ||
preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
3724 return $this->mConverter
->markNoConversion( $text );
3731 * A regular expression to match legal word-trailing characters
3732 * which should be merged onto a link of the form [[foo]]bar.
3736 public function linkTrail() {
3737 return self
::$dataCache->getItem( $this->mCode
, 'linkTrail' );
3743 function getLangObj() {
3748 * Get the RFC 3066 code for this language object
3750 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3751 * htmlspecialchars() or similar
3755 public function getCode() {
3756 return $this->mCode
;
3760 * Get the code in Bcp47 format which we can use
3761 * inside of html lang="" tags.
3763 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3764 * htmlspecialchars() or similar.
3769 public function getHtmlCode() {
3770 if ( is_null( $this->mHtmlCode
) ) {
3771 $this->mHtmlCode
= wfBCP47( $this->getCode() );
3773 return $this->mHtmlCode
;
3777 * @param $code string
3779 public function setCode( $code ) {
3780 $this->mCode
= $code;
3781 // Ensure we don't leave an incorrect html code lying around
3782 $this->mHtmlCode
= null;
3786 * Get the name of a file for a certain language code
3787 * @param $prefix string Prepend this to the filename
3788 * @param $code string Language code
3789 * @param $suffix string Append this to the filename
3790 * @throws MWException
3791 * @return string $prefix . $mangledCode . $suffix
3793 public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
3794 // Protect against path traversal
3795 if ( !Language
::isValidCode( $code )
3796 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
3798 throw new MWException( "Invalid language code \"$code\"" );
3801 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
3805 * Get the language code from a file name. Inverse of getFileName()
3806 * @param $filename string $prefix . $languageCode . $suffix
3807 * @param $prefix string Prefix before the language code
3808 * @param $suffix string Suffix after the language code
3809 * @return string Language code, or false if $prefix or $suffix isn't found
3811 public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
3813 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
3814 preg_quote( $suffix, '/' ) . '/', $filename, $m );
3815 if ( !count( $m ) ) {
3818 return str_replace( '_', '-', strtolower( $m[1] ) );
3822 * @param $code string
3825 public static function getMessagesFileName( $code ) {
3827 $file = self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
3828 wfRunHooks( 'Language::getMessagesFileName', array( $code, &$file ) );
3833 * @param $code string
3836 public static function getClassFileName( $code ) {
3838 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
3842 * Get the first fallback for a given language.
3844 * @param $code string
3846 * @return bool|string
3848 public static function getFallbackFor( $code ) {
3849 if ( $code === 'en' ||
!Language
::isValidBuiltInCode( $code ) ) {
3852 $fallbacks = self
::getFallbacksFor( $code );
3853 $first = array_shift( $fallbacks );
3859 * Get the ordered list of fallback languages.
3862 * @param $code string Language code
3865 public static function getFallbacksFor( $code ) {
3866 if ( $code === 'en' ||
!Language
::isValidBuiltInCode( $code ) ) {
3869 $v = self
::getLocalisationCache()->getItem( $code, 'fallback' );
3870 $v = array_map( 'trim', explode( ',', $v ) );
3871 if ( $v[count( $v ) - 1] !== 'en' ) {
3879 * Get all messages for a given language
3880 * WARNING: this may take a long time. If you just need all message *keys*
3881 * but need the *contents* of only a few messages, consider using getMessageKeysFor().
3883 * @param $code string
3887 public static function getMessagesFor( $code ) {
3888 return self
::getLocalisationCache()->getItem( $code, 'messages' );
3892 * Get a message for a given language
3894 * @param $key string
3895 * @param $code string
3899 public static function getMessageFor( $key, $code ) {
3900 return self
::getLocalisationCache()->getSubitem( $code, 'messages', $key );
3904 * Get all message keys for a given language. This is a faster alternative to
3905 * array_keys( Language::getMessagesFor( $code ) )
3908 * @param $code string Language code
3909 * @return array of message keys (strings)
3911 public static function getMessageKeysFor( $code ) {
3912 return self
::getLocalisationCache()->getSubItemList( $code, 'messages' );
3919 function fixVariableInNamespace( $talk ) {
3920 if ( strpos( $talk, '$1' ) === false ) {
3924 global $wgMetaNamespace;
3925 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
3927 # Allow grammar transformations
3928 # Allowing full message-style parsing would make simple requests
3929 # such as action=raw much more expensive than they need to be.
3930 # This will hopefully cover most cases.
3931 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
3932 array( &$this, 'replaceGrammarInNamespace' ), $talk );
3933 return str_replace( ' ', '_', $talk );
3940 function replaceGrammarInNamespace( $m ) {
3941 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
3945 * @throws MWException
3948 static function getCaseMaps() {
3949 static $wikiUpperChars, $wikiLowerChars;
3950 if ( isset( $wikiUpperChars ) ) {
3951 return array( $wikiUpperChars, $wikiLowerChars );
3954 wfProfileIn( __METHOD__
);
3955 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
3956 if ( $arr === false ) {
3957 throw new MWException(
3958 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
3960 $wikiUpperChars = $arr['wikiUpperChars'];
3961 $wikiLowerChars = $arr['wikiLowerChars'];
3962 wfProfileOut( __METHOD__
);
3963 return array( $wikiUpperChars, $wikiLowerChars );
3967 * Decode an expiry (block, protection, etc) which has come from the DB
3969 * @todo FIXME: why are we returnings DBMS-dependent strings???
3971 * @param $expiry String: Database expiry String
3972 * @param $format Bool|Int true to process using language functions, or TS_ constant
3973 * to return the expiry in a given timestamp
3977 public function formatExpiry( $expiry, $format = true ) {
3978 static $infinity, $infinityMsg;
3979 if ( $infinity === null ) {
3980 $infinityMsg = wfMessage( 'infiniteblock' );
3981 $infinity = wfGetDB( DB_SLAVE
)->getInfinity();
3984 if ( $expiry == '' ||
$expiry == $infinity ) {
3985 return $format === true
3989 return $format === true
3990 ?
$this->timeanddate( $expiry, /* User preference timezone */ true )
3991 : wfTimestamp( $format, $expiry );
3997 * @param $seconds int|float
3998 * @param $format Array Optional
3999 * If $format['avoid'] == 'avoidseconds' - don't mention seconds if $seconds >= 1 hour
4000 * If $format['avoid'] == 'avoidminutes' - don't mention seconds/minutes if $seconds > 48 hours
4001 * If $format['noabbrevs'] is true - use 'seconds' and friends instead of 'seconds-abbrev' and friends
4002 * For backwards compatibility, $format may also be one of the strings 'avoidseconds' or 'avoidminutes'
4005 function formatTimePeriod( $seconds, $format = array() ) {
4006 if ( !is_array( $format ) ) {
4007 $format = array( 'avoid' => $format ); // For backwards compatibility
4009 if ( !isset( $format['avoid'] ) ) {
4010 $format['avoid'] = false;
4012 if ( !isset( $format['noabbrevs' ] ) ) {
4013 $format['noabbrevs'] = false;
4015 $secondsMsg = wfMessage(
4016 $format['noabbrevs'] ?
'seconds' : 'seconds-abbrev' )->inLanguage( $this );
4017 $minutesMsg = wfMessage(
4018 $format['noabbrevs'] ?
'minutes' : 'minutes-abbrev' )->inLanguage( $this );
4019 $hoursMsg = wfMessage(
4020 $format['noabbrevs'] ?
'hours' : 'hours-abbrev' )->inLanguage( $this );
4021 $daysMsg = wfMessage(
4022 $format['noabbrevs'] ?
'days' : 'days-abbrev' )->inLanguage( $this );
4024 if ( round( $seconds * 10 ) < 100 ) {
4025 $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) );
4026 $s = $secondsMsg->params( $s )->text();
4027 } elseif ( round( $seconds ) < 60 ) {
4028 $s = $this->formatNum( round( $seconds ) );
4029 $s = $secondsMsg->params( $s )->text();
4030 } elseif ( round( $seconds ) < 3600 ) {
4031 $minutes = floor( $seconds / 60 );
4032 $secondsPart = round( fmod( $seconds, 60 ) );
4033 if ( $secondsPart == 60 ) {
4037 $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4039 $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4040 } elseif ( round( $seconds ) <= 2 * 86400 ) {
4041 $hours = floor( $seconds / 3600 );
4042 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
4043 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
4044 if ( $secondsPart == 60 ) {
4048 if ( $minutes == 60 ) {
4052 $s = $hoursMsg->params( $this->formatNum( $hours ) )->text();
4054 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4055 if ( !in_array( $format['avoid'], array( 'avoidseconds', 'avoidminutes' ) ) ) {
4056 $s .= ' ' . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4059 $days = floor( $seconds / 86400 );
4060 if ( $format['avoid'] === 'avoidminutes' ) {
4061 $hours = round( ( $seconds - $days * 86400 ) / 3600 );
4062 if ( $hours == 24 ) {
4066 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4068 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4069 } elseif ( $format['avoid'] === 'avoidseconds' ) {
4070 $hours = floor( ( $seconds - $days * 86400 ) / 3600 );
4071 $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 );
4072 if ( $minutes == 60 ) {
4076 if ( $hours == 24 ) {
4080 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4082 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4084 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4086 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4088 $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format );
4095 * Format a bitrate for output, using an appropriate
4096 * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps or Ybps) according to the magnitude in question
4098 * This use base 1000. For base 1024 use formatSize(), for another base
4099 * see formatComputingNumbers()
4104 function formatBitrate( $bps ) {
4105 return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" );
4109 * @param $size int Size of the unit
4110 * @param $boundary int Size boundary (1000, or 1024 in most cases)
4111 * @param $messageKey string Message key to be uesd
4114 function formatComputingNumbers( $size, $boundary, $messageKey ) {
4116 return str_replace( '$1', $this->formatNum( $size ),
4117 $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) )
4120 $sizes = array( '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zeta', 'yotta' );
4123 $maxIndex = count( $sizes ) - 1;
4124 while ( $size >= $boundary && $index < $maxIndex ) {
4129 // For small sizes no decimal places necessary
4132 // For MB and bigger two decimal places are smarter
4135 $msg = str_replace( '$1', $sizes[$index], $messageKey );
4137 $size = round( $size, $round );
4138 $text = $this->getMessageFromDB( $msg );
4139 return str_replace( '$1', $this->formatNum( $size ), $text );
4143 * Format a size in bytes for output, using an appropriate
4144 * unit (B, KB, MB, GB, TB, PB, EB, ZB or YB) according to the magnitude in question
4146 * This method use base 1024. For base 1000 use formatBitrate(), for
4147 * another base see formatComputingNumbers()
4149 * @param $size int Size to format
4150 * @return string Plain text (not HTML)
4152 function formatSize( $size ) {
4153 return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" );
4157 * Make a list item, used by various special pages
4159 * @param $page String Page link
4160 * @param $details String Text between brackets
4161 * @param $oppositedm Boolean Add the direction mark opposite to your
4162 * language, to display text properly
4165 function specialList( $page, $details, $oppositedm = true ) {
4166 $dirmark = ( $oppositedm ?
$this->getDirMark( true ) : '' ) .
4167 $this->getDirMark();
4168 $details = $details ?
$dirmark . $this->getMessageFromDB( 'word-separator' ) .
4169 wfMessage( 'parentheses' )->rawParams( $details )->inLanguage( $this )->escaped() : '';
4170 return $page . $details;
4174 * Generate (prev x| next x) (20|50|100...) type links for paging
4176 * @param $title Title object to link
4177 * @param $offset Integer offset parameter
4178 * @param $limit Integer limit parameter
4179 * @param $query array|String optional URL query parameter string
4180 * @param $atend Bool optional param for specified if this is the last page
4183 public function viewPrevNext( Title
$title, $offset, $limit, array $query = array(), $atend = false ) {
4184 // @todo FIXME: Why on earth this needs one message for the text and another one for tooltip?
4186 # Make 'previous' link
4187 $prev = wfMessage( 'prevn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4188 if ( $offset > 0 ) {
4189 $plink = $this->numLink( $title, max( $offset - $limit, 0 ), $limit,
4190 $query, $prev, 'prevn-title', 'mw-prevlink' );
4192 $plink = htmlspecialchars( $prev );
4196 $next = wfMessage( 'nextn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4198 $nlink = htmlspecialchars( $next );
4200 $nlink = $this->numLink( $title, $offset +
$limit, $limit,
4201 $query, $next, 'prevn-title', 'mw-nextlink' );
4204 # Make links to set number of items per page
4205 $numLinks = array();
4206 foreach ( array( 20, 50, 100, 250, 500 ) as $num ) {
4207 $numLinks[] = $this->numLink( $title, $offset, $num,
4208 $query, $this->formatNum( $num ), 'shown-title', 'mw-numlink' );
4211 return wfMessage( 'viewprevnext' )->inLanguage( $this )->title( $title
4212 )->rawParams( $plink, $nlink, $this->pipeList( $numLinks ) )->escaped();
4216 * Helper function for viewPrevNext() that generates links
4218 * @param $title Title object to link
4219 * @param $offset Integer offset parameter
4220 * @param $limit Integer limit parameter
4221 * @param $query Array extra query parameters
4222 * @param $link String text to use for the link; will be escaped
4223 * @param $tooltipMsg String name of the message to use as tooltip
4224 * @param $class String value of the "class" attribute of the link
4225 * @return String HTML fragment
4227 private function numLink( Title
$title, $offset, $limit, array $query, $link, $tooltipMsg, $class ) {
4228 $query = array( 'limit' => $limit, 'offset' => $offset ) +
$query;
4229 $tooltip = wfMessage( $tooltipMsg )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4230 return Html
::element( 'a', array( 'href' => $title->getLocalURL( $query ),
4231 'title' => $tooltip, 'class' => $class ), $link );
4235 * Get the conversion rule title, if any.
4239 public function getConvRuleTitle() {
4240 return $this->mConverter
->getConvRuleTitle();
4244 * Get the compiled plural rules for the language
4246 * @return array Associative array with plural form, and plural rule as key-value pairs
4248 public function getCompiledPluralRules() {
4249 $pluralRules = self
::$dataCache->getItem( strtolower( $this->mCode
), 'compiledPluralRules' );
4250 $fallbacks = Language
::getFallbacksFor( $this->mCode
);
4251 if ( !$pluralRules ) {
4252 foreach ( $fallbacks as $fallbackCode ) {
4253 $pluralRules = self
::$dataCache->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' );
4254 if ( $pluralRules ) {
4259 return $pluralRules;
4263 * Get the plural rules for the language
4265 * @return array Associative array with plural form, and plural rule as key-value pairs
4267 public function getPluralRules() {
4268 $pluralRules = self
::$dataCache->getItem( strtolower( $this->mCode
), 'pluralRules' );
4269 $fallbacks = Language
::getFallbacksFor( $this->mCode
);
4270 if ( !$pluralRules ) {
4271 foreach ( $fallbacks as $fallbackCode ) {
4272 $pluralRules = self
::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRules' );
4273 if ( $pluralRules ) {
4278 return $pluralRules;
4282 * Find the plural form matching to the given number
4283 * It return the form index.
4284 * @return int The index of the plural form
4286 private function getPluralForm( $number ) {
4287 $pluralRules = $this->getCompiledPluralRules();
4288 $form = CLDRPluralRuleEvaluator
::evaluateCompiled( $number, $pluralRules );