3 * Internationalisation code
10 * @defgroup Language Language
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__
) . '/Names.php' );
22 if ( function_exists( 'mb_strtoupper' ) ) {
23 mb_internal_encoding( 'UTF-8' );
27 * a fake language converter
33 function __construct( $langobj ) { $this->mLang
= $langobj; }
34 function autoConvertToAllVariants( $text ) { return array( $this->mLang
->getCode() => $text ); }
35 function convert( $t ) { return $t; }
36 function convertTitle( $t ) { return $t->getPrefixedText(); }
37 function getVariants() { return array( $this->mLang
->getCode() ); }
38 function getPreferredVariant() { return $this->mLang
->getCode(); }
39 function getDefaultVariant() { return $this->mLang
->getCode(); }
40 function getURLVariant() { return ''; }
41 function getConvRuleTitle() { return false; }
42 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
43 function getExtraHashOptions() { return ''; }
44 function getParsedTitle() { return ''; }
45 function markNoConversion( $text, $noParse = false ) { return $text; }
46 function convertCategoryKey( $key ) { return $key; }
47 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
48 function armourMath( $text ) { return $text; }
52 * Internationalisation code
56 var $mConverter, $mVariants, $mCode, $mLoaded = false;
57 var $mMagicExtensions = array(), $mMagicHookDone = false;
59 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
60 var $dateFormatStrings = array();
61 var $mExtendedSpecialPageAliases;
64 * ReplacementArray object caches
66 var $transformData = array();
69 * @var LocalisationCache
71 static public $dataCache;
73 static public $mLangObjCache = array();
75 static public $mWeekdayMsgs = array(
76 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
80 static public $mWeekdayAbbrevMsgs = array(
81 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
84 static public $mMonthMsgs = array(
85 'january', 'february', 'march', 'april', 'may_long', 'june',
86 'july', 'august', 'september', 'october', 'november',
89 static public $mMonthGenMsgs = array(
90 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
91 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
94 static public $mMonthAbbrevMsgs = array(
95 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
96 'sep', 'oct', 'nov', 'dec'
99 static public $mIranianCalendarMonthMsgs = array(
100 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
101 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
102 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
103 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
106 static public $mHebrewCalendarMonthMsgs = array(
107 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
108 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
109 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
110 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
111 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
114 static public $mHebrewCalendarMonthGenMsgs = array(
115 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
116 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
117 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
118 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
119 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
122 static public $mHijriCalendarMonthMsgs = array(
123 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
124 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
125 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
126 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
130 * Get a cached language object for a given language code
131 * @param $code String
134 static function factory( $code ) {
135 if ( !isset( self
::$mLangObjCache[$code] ) ) {
136 if ( count( self
::$mLangObjCache ) > 10 ) {
137 // Don't keep a billion objects around, that's stupid.
138 self
::$mLangObjCache = array();
140 self
::$mLangObjCache[$code] = self
::newFromCode( $code );
142 return self
::$mLangObjCache[$code];
146 * Create a language object for a given language code
147 * @param $code String
150 protected static function newFromCode( $code ) {
152 static $recursionLevel = 0;
154 // Protect against path traversal below
155 if ( !Language
::isValidCode( $code )
156 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
158 throw new MWException( "Invalid language code \"$code\"" );
161 if ( !Language
::isValidBuiltInCode( $code ) ) {
162 // It's not possible to customise this code with class files, so
163 // just return a Language object. This is to support uselang= hacks.
164 $lang = new Language
;
165 $lang->setCode( $code );
169 if ( $code == 'en' ) {
172 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
173 if ( !defined( 'MW_COMPILED' ) ) {
174 // Preload base classes to work around APC/PHP5 bug
175 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
176 include_once( "$IP/languages/classes/$class.deps.php" );
178 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
179 include_once( "$IP/languages/classes/$class.php" );
184 if ( $recursionLevel > 5 ) {
185 throw new MWException( "Language fallback loop detected when creating class $class\n" );
188 if ( !MWInit
::classExists( $class ) ) {
189 $fallback = Language
::getFallbackFor( $code );
191 $lang = Language
::newFromCode( $fallback );
193 $lang->setCode( $code );
201 * Returns true if a language code string is of a valid form, whether or
202 * not it exists. This includes codes which are used solely for
203 * customisation via the MediaWiki namespace.
207 public static function isValidCode( $code ) {
209 strcspn( $code, ":/\\\000" ) === strlen( $code )
210 && !preg_match( Title
::getTitleInvalidRegex(), $code );
214 * Returns true if a language code is of a valid form for the purposes of
215 * internal customisation of MediaWiki, via Messages*.php.
217 public static function isValidBuiltInCode( $code ) {
218 return preg_match( '/^[a-z0-9-]*$/i', $code );
222 * Get the LocalisationCache instance
224 * @return LocalisationCache
226 public static function getLocalisationCache() {
227 if ( is_null( self
::$dataCache ) ) {
228 global $wgLocalisationCacheConf;
229 $class = $wgLocalisationCacheConf['class'];
230 self
::$dataCache = new $class( $wgLocalisationCacheConf );
232 return self
::$dataCache;
235 function __construct() {
236 $this->mConverter
= new FakeConverter( $this );
237 // Set the code to the name of the descendant
238 if ( get_class( $this ) == 'Language' ) {
241 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
243 self
::getLocalisationCache();
247 * Reduce memory usage
249 function __destruct() {
250 foreach ( $this as $name => $value ) {
251 unset( $this->$name );
256 * Hook which will be called if this is the content language.
257 * Descendants can use this to register hook functions or modify globals
259 function initContLang() { }
261 function getFallbackLanguageCode() {
262 if ( $this->mCode
=== 'en' ) {
265 return self
::$dataCache->getItem( $this->mCode
, 'fallback' );
270 * Exports $wgBookstoreListEn
273 function getBookstoreList() {
274 return self
::$dataCache->getItem( $this->mCode
, 'bookstoreList' );
280 function getNamespaces() {
281 if ( is_null( $this->namespaceNames
) ) {
282 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
284 $this->namespaceNames
= self
::$dataCache->getItem( $this->mCode
, 'namespaceNames' );
285 $validNamespaces = MWNamespace
::getCanonicalNamespaces();
287 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames +
$validNamespaces;
289 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
290 if ( $wgMetaNamespaceTalk ) {
291 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
293 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
294 $this->namespaceNames
[NS_PROJECT_TALK
] =
295 $this->fixVariableInNamespace( $talk );
298 # Sometimes a language will be localised but not actually exist on this wiki.
299 foreach( $this->namespaceNames
as $key => $text ) {
300 if ( !isset( $validNamespaces[$key] ) ) {
301 unset( $this->namespaceNames
[$key] );
305 # The above mixing may leave namespaces out of canonical order.
306 # Re-order by namespace ID number...
307 ksort( $this->namespaceNames
);
309 return $this->namespaceNames
;
313 * A convenience function that returns the same thing as
314 * getNamespaces() except with the array values changed to ' '
315 * where it found '_', useful for producing output to be displayed
316 * e.g. in <select> forms.
320 function getFormattedNamespaces() {
321 $ns = $this->getNamespaces();
322 foreach ( $ns as $k => $v ) {
323 $ns[$k] = strtr( $v, '_', ' ' );
329 * Get a namespace value by key
331 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
332 * echo $mw_ns; // prints 'MediaWiki'
335 * @param $index Int: the array key of the namespace to return
336 * @return mixed, string if the namespace value exists, otherwise false
338 function getNsText( $index ) {
339 $ns = $this->getNamespaces();
340 return isset( $ns[$index] ) ?
$ns[$index] : false;
344 * A convenience function that returns the same thing as
345 * getNsText() except with '_' changed to ' ', useful for
350 function getFormattedNsText( $index ) {
351 $ns = $this->getNsText( $index );
352 return strtr( $ns, '_', ' ' );
356 * Returns gender-dependent namespace alias if available.
357 * @param $index Int: namespace index
358 * @param $gender String: gender key (male, female... )
362 function getGenderNsText( $index, $gender ) {
363 $ns = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
364 return isset( $ns[$index][$gender] ) ?
$ns[$index][$gender] : $this->getNsText( $index );
368 * Whether this language makes distinguishes genders for example in
373 function needsGenderDistinction() {
374 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
375 return count( $aliases ) > 0;
379 * Get a namespace key by value, case insensitive.
380 * Only matches namespace names for the current language, not the
381 * canonical ones defined in Namespace.php.
383 * @param $text String
384 * @return mixed An integer if $text is a valid value otherwise false
386 function getLocalNsIndex( $text ) {
387 $lctext = $this->lc( $text );
388 $ids = $this->getNamespaceIds();
389 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
392 function getNamespaceAliases() {
393 if ( is_null( $this->namespaceAliases
) ) {
394 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceAliases' );
398 foreach ( $aliases as $name => $index ) {
399 if ( $index === NS_PROJECT_TALK
) {
400 unset( $aliases[$name] );
401 $name = $this->fixVariableInNamespace( $name );
402 $aliases[$name] = $index;
407 $genders = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
408 foreach ( $genders as $index => $forms ) {
409 foreach ( $forms as $alias ) {
410 $aliases[$alias] = $index;
414 $this->namespaceAliases
= $aliases;
416 return $this->namespaceAliases
;
419 function getNamespaceIds() {
420 if ( is_null( $this->mNamespaceIds
) ) {
421 global $wgNamespaceAliases;
422 # Put namespace names and aliases into a hashtable.
423 # If this is too slow, then we should arrange it so that it is done
424 # before caching. The catch is that at pre-cache time, the above
425 # class-specific fixup hasn't been done.
426 $this->mNamespaceIds
= array();
427 foreach ( $this->getNamespaces() as $index => $name ) {
428 $this->mNamespaceIds
[$this->lc( $name )] = $index;
430 foreach ( $this->getNamespaceAliases() as $name => $index ) {
431 $this->mNamespaceIds
[$this->lc( $name )] = $index;
433 if ( $wgNamespaceAliases ) {
434 foreach ( $wgNamespaceAliases as $name => $index ) {
435 $this->mNamespaceIds
[$this->lc( $name )] = $index;
439 return $this->mNamespaceIds
;
444 * Get a namespace key by value, case insensitive. Canonical namespace
445 * names override custom ones defined for the current language.
447 * @param $text String
448 * @return mixed An integer if $text is a valid value otherwise false
450 function getNsIndex( $text ) {
451 $lctext = $this->lc( $text );
452 if ( ( $ns = MWNamespace
::getCanonicalIndex( $lctext ) ) !== null ) {
455 $ids = $this->getNamespaceIds();
456 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
460 * short names for language variants used for language conversion links.
462 * @param $code String
465 function getVariantname( $code ) {
466 return $this->getMessageFromDB( "variantname-$code" );
469 function specialPage( $name ) {
470 $aliases = $this->getSpecialPageAliases();
471 if ( isset( $aliases[$name][0] ) ) {
472 $name = $aliases[$name][0];
474 return $this->getNsText( NS_SPECIAL
) . ':' . $name;
477 function getQuickbarSettings() {
479 $this->getMessage( 'qbsettings-none' ),
480 $this->getMessage( 'qbsettings-fixedleft' ),
481 $this->getMessage( 'qbsettings-fixedright' ),
482 $this->getMessage( 'qbsettings-floatingleft' ),
483 $this->getMessage( 'qbsettings-floatingright' )
487 function getDatePreferences() {
488 return self
::$dataCache->getItem( $this->mCode
, 'datePreferences' );
491 function getDateFormats() {
492 return self
::$dataCache->getItem( $this->mCode
, 'dateFormats' );
495 function getDefaultDateFormat() {
496 $df = self
::$dataCache->getItem( $this->mCode
, 'defaultDateFormat' );
497 if ( $df === 'dmy or mdy' ) {
498 global $wgAmericanDates;
499 return $wgAmericanDates ?
'mdy' : 'dmy';
505 function getDatePreferenceMigrationMap() {
506 return self
::$dataCache->getItem( $this->mCode
, 'datePreferenceMigrationMap' );
509 function getImageFile( $image ) {
510 return self
::$dataCache->getSubitem( $this->mCode
, 'imageFiles', $image );
513 function getDefaultUserOptionOverrides() {
514 return self
::$dataCache->getItem( $this->mCode
, 'defaultUserOptionOverrides' );
517 function getExtraUserToggles() {
518 return self
::$dataCache->getItem( $this->mCode
, 'extraUserToggles' );
521 function getUserToggle( $tog ) {
522 return $this->getMessageFromDB( "tog-$tog" );
526 * Get language names, indexed by code.
527 * If $customisedOnly is true, only returns codes with a messages file
529 public static function getLanguageNames( $customisedOnly = false ) {
530 global $wgExtraLanguageNames;
531 static $coreLanguageNames;
533 if ( $coreLanguageNames === null ) {
534 include( MWInit
::compiledPath( 'languages/Names.php' ) );
537 $allNames = $wgExtraLanguageNames +
$coreLanguageNames;
538 if ( !$customisedOnly ) {
544 $dir = opendir( "$IP/languages/messages" );
545 while ( false !== ( $file = readdir( $dir ) ) ) {
546 $code = self
::getCodeFromFileName( $file, 'Messages' );
547 if ( $code && isset( $allNames[$code] ) ) {
548 $names[$code] = $allNames[$code];
556 * Get translated language names. This is done on best effort and
557 * by default this is exactly the same as Language::getLanguageNames.
558 * The CLDR extension provides translated names.
559 * @param $code String Language code.
560 * @return Array language code => language name
563 public static function getTranslatedLanguageNames( $code ) {
565 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
567 foreach ( self
::getLanguageNames() as $code => $name ) {
568 if ( !isset( $names[$code] ) ) $names[$code] = $name;
575 * Get a message from the MediaWiki namespace.
577 * @param $msg String: message name
580 function getMessageFromDB( $msg ) {
581 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
584 function getLanguageName( $code ) {
585 $names = self
::getLanguageNames();
586 if ( !array_key_exists( $code, $names ) ) {
589 return $names[$code];
592 function getMonthName( $key ) {
593 return $this->getMessageFromDB( self
::$mMonthMsgs[$key - 1] );
596 function getMonthNamesArray() {
597 $monthNames = array( '' );
598 for ( $i=1; $i < 13; $i++
) {
599 $monthNames[] = $this->getMonthName( $i );
604 function getMonthNameGen( $key ) {
605 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key - 1] );
608 function getMonthAbbreviation( $key ) {
609 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key - 1] );
612 function getMonthAbbreviationsArray() {
613 $monthNames = array('');
614 for ( $i=1; $i < 13; $i++
) {
615 $monthNames[] = $this->getMonthAbbreviation( $i );
620 function getWeekdayName( $key ) {
621 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key - 1] );
624 function getWeekdayAbbreviation( $key ) {
625 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key - 1] );
628 function getIranianCalendarMonthName( $key ) {
629 return $this->getMessageFromDB( self
::$mIranianCalendarMonthMsgs[$key - 1] );
632 function getHebrewCalendarMonthName( $key ) {
633 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthMsgs[$key - 1] );
636 function getHebrewCalendarMonthNameGen( $key ) {
637 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthGenMsgs[$key - 1] );
640 function getHijriCalendarMonthName( $key ) {
641 return $this->getMessageFromDB( self
::$mHijriCalendarMonthMsgs[$key - 1] );
645 * Used by date() and time() to adjust the time output.
647 * @param $ts Int the time in date('YmdHis') format
648 * @param $tz Mixed: adjust the time by this amount (default false, mean we
649 * get user timecorrection setting)
652 function userAdjust( $ts, $tz = false ) {
653 global $wgUser, $wgLocalTZoffset;
655 if ( $tz === false ) {
656 $tz = $wgUser->getOption( 'timecorrection' );
659 $data = explode( '|', $tz, 3 );
661 if ( $data[0] == 'ZoneInfo' ) {
662 if ( function_exists( 'timezone_open' ) && @timezone_open
( $data[2] ) !== false ) {
663 $date = date_create( $ts, timezone_open( 'UTC' ) );
664 date_timezone_set( $date, timezone_open( $data[2] ) );
665 $date = date_format( $date, 'YmdHis' );
668 # Unrecognized timezone, default to 'Offset' with the stored offset.
673 if ( $data[0] == 'System' ||
$tz == '' ) {
674 # Global offset in minutes.
675 if ( isset( $wgLocalTZoffset ) ) {
676 $minDiff = $wgLocalTZoffset;
678 } else if ( $data[0] == 'Offset' ) {
679 $minDiff = intval( $data[1] );
681 $data = explode( ':', $tz );
682 if ( count( $data ) == 2 ) {
683 $data[0] = intval( $data[0] );
684 $data[1] = intval( $data[1] );
685 $minDiff = abs( $data[0] ) * 60 +
$data[1];
686 if ( $data[0] < 0 ) {
687 $minDiff = -$minDiff;
690 $minDiff = intval( $data[0] ) * 60;
694 # No difference ? Return time unchanged
695 if ( 0 == $minDiff ) {
699 wfSuppressWarnings(); // E_STRICT system time bitching
700 # Generate an adjusted date; take advantage of the fact that mktime
701 # will normalize out-of-range values so we don't have to split $minDiff
702 # into hours and minutes.
704 (int)substr( $ts, 8, 2 ) ), # Hours
705 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
706 (int)substr( $ts, 12, 2 ), # Seconds
707 (int)substr( $ts, 4, 2 ), # Month
708 (int)substr( $ts, 6, 2 ), # Day
709 (int)substr( $ts, 0, 4 ) ); # Year
711 $date = date( 'YmdHis', $t );
718 * This is a workalike of PHP's date() function, but with better
719 * internationalisation, a reduced set of format characters, and a better
722 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
723 * PHP manual for definitions. There are a number of extensions, which
726 * xn Do not translate digits of the next numeric format character
727 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
728 * xr Use roman numerals for the next numeric format character
729 * xh Use hebrew numerals for the next numeric format character
731 * xg Genitive month name
733 * xij j (day number) in Iranian calendar
734 * xiF F (month name) in Iranian calendar
735 * xin n (month number) in Iranian calendar
736 * xiY Y (full year) in Iranian calendar
738 * xjj j (day number) in Hebrew calendar
739 * xjF F (month name) in Hebrew calendar
740 * xjt t (days in month) in Hebrew calendar
741 * xjx xg (genitive month name) in Hebrew calendar
742 * xjn n (month number) in Hebrew calendar
743 * xjY Y (full year) in Hebrew calendar
745 * xmj j (day number) in Hijri calendar
746 * xmF F (month name) in Hijri calendar
747 * xmn n (month number) in Hijri calendar
748 * xmY Y (full year) in Hijri calendar
750 * xkY Y (full year) in Thai solar calendar. Months and days are
751 * identical to the Gregorian calendar
752 * xoY Y (full year) in Minguo calendar or Juche year.
753 * Months and days are identical to the
755 * xtY Y (full year) in Japanese nengo. Months and days are
756 * identical to the Gregorian calendar
758 * Characters enclosed in double quotes will be considered literal (with
759 * the quotes themselves removed). Unmatched quotes will be considered
760 * literal quotes. Example:
762 * "The month is" F => The month is January
765 * Backslash escaping is also supported.
767 * Input timestamp is assumed to be pre-normalized to the desired local
770 * @param $format String
771 * @param $ts String: 14-character timestamp
774 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
776 function sprintfDate( $format, $ts ) {
789 for ( $p = 0; $p < strlen( $format ); $p++
) {
792 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
793 $code .= $format[++
$p];
796 if ( ( $code === 'xi' ||
$code == 'xj' ||
$code == 'xk' ||
$code == 'xm' ||
$code == 'xo' ||
$code == 'xt' ) && $p < strlen( $format ) - 1 ) {
797 $code .= $format[++
$p];
808 $rawToggle = !$rawToggle;
817 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
820 if ( !$hebrew ) $hebrew = self
::tsToHebrew( $ts );
821 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
824 $num = substr( $ts, 6, 2 );
827 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
828 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) +
1 );
831 $num = intval( substr( $ts, 6, 2 ) );
835 $iranian = self
::tsToIranian( $ts );
841 $hijri = self
::tsToHijri( $ts );
847 $hebrew = self
::tsToHebrew( $ts );
853 $unix = wfTimestamp( TS_UNIX
, $ts );
855 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) +
1 );
859 $unix = wfTimestamp( TS_UNIX
, $ts );
861 $w = gmdate( 'w', $unix );
866 $unix = wfTimestamp( TS_UNIX
, $ts );
868 $num = gmdate( 'w', $unix );
872 $unix = wfTimestamp( TS_UNIX
, $ts );
874 $num = gmdate( 'z', $unix );
878 $unix = wfTimestamp( TS_UNIX
, $ts );
880 $num = gmdate( 'W', $unix );
883 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
887 $iranian = self
::tsToIranian( $ts );
889 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
893 $hijri = self
::tsToHijri( $ts );
895 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
899 $hebrew = self
::tsToHebrew( $ts );
901 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
904 $num = substr( $ts, 4, 2 );
907 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
910 $num = intval( substr( $ts, 4, 2 ) );
914 $iranian = self
::tsToIranian( $ts );
920 $hijri = self
::tsToHijri ( $ts );
926 $hebrew = self
::tsToHebrew( $ts );
932 $unix = wfTimestamp( TS_UNIX
, $ts );
934 $num = gmdate( 't', $unix );
938 $hebrew = self
::tsToHebrew( $ts );
944 $unix = wfTimestamp( TS_UNIX
, $ts );
946 $num = gmdate( 'L', $unix );
950 $unix = wfTimestamp( TS_UNIX
, $ts );
952 $num = date( 'o', $unix );
955 $num = substr( $ts, 0, 4 );
959 $iranian = self
::tsToIranian( $ts );
965 $hijri = self
::tsToHijri( $ts );
971 $hebrew = self
::tsToHebrew( $ts );
977 $thai = self
::tsToYear( $ts, 'thai' );
983 $minguo = self
::tsToYear( $ts, 'minguo' );
989 $tenno = self
::tsToYear( $ts, 'tenno' );
994 $num = substr( $ts, 2, 2 );
997 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
1000 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
1003 $h = substr( $ts, 8, 2 );
1004 $num = $h %
12 ?
$h %
12 : 12;
1007 $num = intval( substr( $ts, 8, 2 ) );
1010 $h = substr( $ts, 8, 2 );
1011 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
1014 $num = substr( $ts, 8, 2 );
1017 $num = substr( $ts, 10, 2 );
1020 $num = substr( $ts, 12, 2 );
1024 $unix = wfTimestamp( TS_UNIX
, $ts );
1026 $s .= gmdate( 'c', $unix );
1030 $unix = wfTimestamp( TS_UNIX
, $ts );
1032 $s .= gmdate( 'r', $unix );
1036 $unix = wfTimestamp( TS_UNIX
, $ts );
1041 # Backslash escaping
1042 if ( $p < strlen( $format ) - 1 ) {
1043 $s .= $format[++
$p];
1050 if ( $p < strlen( $format ) - 1 ) {
1051 $endQuote = strpos( $format, '"', $p +
1 );
1052 if ( $endQuote === false ) {
1053 # No terminating quote, assume literal "
1056 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
1060 # Quote at end of string, assume literal "
1067 if ( $num !== false ) {
1068 if ( $rawToggle ||
$raw ) {
1071 } elseif ( $roman ) {
1072 $s .= self
::romanNumeral( $num );
1074 } elseif ( $hebrewNum ) {
1075 $s .= self
::hebrewNumeral( $num );
1078 $s .= $this->formatNum( $num, true );
1085 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1086 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1088 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1089 * Gregorian dates to Iranian dates. Originally written in C, it
1090 * is released under the terms of GNU Lesser General Public
1091 * License. Conversion to PHP was performed by Niklas Laxström.
1093 * Link: http://www.farsiweb.info/jalali/jalali.c
1095 private static function tsToIranian( $ts ) {
1096 $gy = substr( $ts, 0, 4 ) -1600;
1097 $gm = substr( $ts, 4, 2 ) -1;
1098 $gd = (int)substr( $ts, 6, 2 ) -1;
1100 # Days passed from the beginning (including leap years)
1102 +
floor( ( $gy +
3 ) / 4 )
1103 - floor( ( $gy +
99 ) / 100 )
1104 +
floor( ( $gy +
399 ) / 400 );
1107 // Add days of the past months of this year
1108 for ( $i = 0; $i < $gm; $i++
) {
1109 $gDayNo +
= self
::$GREG_DAYS[$i];
1113 if ( $gm > 1 && ( ( $gy %
4 === 0 && $gy %
100 !== 0 ||
( $gy %
400 == 0 ) ) ) ) {
1117 // Days passed in current month
1120 $jDayNo = $gDayNo - 79;
1122 $jNp = floor( $jDayNo / 12053 );
1125 $jy = 979 +
33 * $jNp +
4 * floor( $jDayNo / 1461 );
1128 if ( $jDayNo >= 366 ) {
1129 $jy +
= floor( ( $jDayNo - 1 ) / 365 );
1130 $jDayNo = floor( ( $jDayNo - 1 ) %
365 );
1133 for ( $i = 0; $i < 11 && $jDayNo >= self
::$IRANIAN_DAYS[$i]; $i++
) {
1134 $jDayNo -= self
::$IRANIAN_DAYS[$i];
1140 return array( $jy, $jm, $jd );
1144 * Converting Gregorian dates to Hijri dates.
1146 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1148 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1150 private static function tsToHijri( $ts ) {
1151 $year = substr( $ts, 0, 4 );
1152 $month = substr( $ts, 4, 2 );
1153 $day = substr( $ts, 6, 2 );
1161 ( $zy > 1582 ) ||
( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1162 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1165 $zjd = (int)( ( 1461 * ( $zy +
4800 +
(int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1166 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1167 (int)( ( 3 * (int)( ( ( $zy +
4900 +
(int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1170 $zjd = 367 * $zy - (int)( ( 7 * ( $zy +
5001 +
(int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1171 (int)( ( 275 * $zm ) / 9 ) +
$zd +
1729777;
1174 $zl = $zjd -1948440 +
10632;
1175 $zn = (int)( ( $zl - 1 ) / 10631 );
1176 $zl = $zl - 10631 * $zn +
354;
1177 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1178 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) +
29;
1179 $zm = (int)( ( 24 * $zl ) / 709 );
1180 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1181 $zy = 30 * $zn +
$zj - 30;
1183 return array( $zy, $zm, $zd );
1187 * Converting Gregorian dates to Hebrew dates.
1189 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1190 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1191 * to translate the relevant functions into PHP and release them under
1194 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1195 * and Adar II is 14. In a non-leap year, Adar is 6.
1197 private static function tsToHebrew( $ts ) {
1199 $year = substr( $ts, 0, 4 );
1200 $month = substr( $ts, 4, 2 );
1201 $day = substr( $ts, 6, 2 );
1203 # Calculate Hebrew year
1204 $hebrewYear = $year +
3760;
1206 # Month number when September = 1, August = 12
1208 if ( $month > 12 ) {
1215 # Calculate day of year from 1 September
1217 for ( $i = 1; $i < $month; $i++
) {
1221 # Check if the year is leap
1222 if ( $year %
400 == 0 ||
( $year %
4 == 0 && $year %
100 > 0 ) ) {
1225 } elseif ( $i == 8 ||
$i == 10 ||
$i == 1 ||
$i == 3 ) {
1232 # Calculate the start of the Hebrew year
1233 $start = self
::hebrewYearStart( $hebrewYear );
1235 # Calculate next year's start
1236 if ( $dayOfYear <= $start ) {
1237 # Day is before the start of the year - it is the previous year
1239 $nextStart = $start;
1243 # Add days since previous year's 1 September
1245 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1249 # Start of the new (previous) year
1250 $start = self
::hebrewYearStart( $hebrewYear );
1253 $nextStart = self
::hebrewYearStart( $hebrewYear +
1 );
1256 # Calculate Hebrew day of year
1257 $hebrewDayOfYear = $dayOfYear - $start;
1259 # Difference between year's days
1260 $diff = $nextStart - $start;
1261 # Add 12 (or 13 for leap years) days to ignore the difference between
1262 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1263 # difference is only about the year type
1264 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1270 # Check the year pattern, and is leap year
1271 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1272 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1273 # and non-leap years
1274 $yearPattern = $diff %
30;
1275 # Check if leap year
1276 $isLeap = $diff >= 30;
1278 # Calculate day in the month from number of day in the Hebrew year
1279 # Don't check Adar - if the day is not in Adar, we will stop before;
1280 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1281 $hebrewDay = $hebrewDayOfYear;
1284 while ( $hebrewMonth <= 12 ) {
1285 # Calculate days in this month
1286 if ( $isLeap && $hebrewMonth == 6 ) {
1287 # Adar in a leap year
1289 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1291 if ( $hebrewDay <= $days ) {
1295 # Subtract the days of Adar I
1296 $hebrewDay -= $days;
1299 if ( $hebrewDay <= $days ) {
1305 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1306 # Cheshvan in a complete year (otherwise as the rule below)
1308 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1309 # Kislev in an incomplete year (otherwise as the rule below)
1312 # Odd months have 30 days, even have 29
1313 $days = 30 - ( $hebrewMonth - 1 ) %
2;
1315 if ( $hebrewDay <= $days ) {
1316 # In the current month
1319 # Subtract the days of the current month
1320 $hebrewDay -= $days;
1321 # Try in the next month
1326 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1330 * This calculates the Hebrew year start, as days since 1 September.
1331 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1332 * Used for Hebrew date.
1334 private static function hebrewYearStart( $year ) {
1335 $a = intval( ( 12 * ( $year - 1 ) +
17 ) %
19 );
1336 $b = intval( ( $year - 1 ) %
4 );
1337 $m = 32.044093161144 +
1.5542417966212 * $a +
$b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1341 $Mar = intval( $m );
1347 $c = intval( ( $Mar +
3 * ( $year - 1 ) +
5 * $b +
5 ) %
7 );
1348 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1350 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1352 } else if ( $c == 2 ||
$c == 4 ||
$c == 6 ) {
1356 $Mar +
= intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1361 * Algorithm to convert Gregorian dates to Thai solar dates,
1362 * Minguo dates or Minguo dates.
1364 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1365 * http://en.wikipedia.org/wiki/Minguo_calendar
1366 * http://en.wikipedia.org/wiki/Japanese_era_name
1368 * @param $ts String: 14-character timestamp
1369 * @param $cName String: calender name
1370 * @return Array: converted year, month, day
1372 private static function tsToYear( $ts, $cName ) {
1373 $gy = substr( $ts, 0, 4 );
1374 $gm = substr( $ts, 4, 2 );
1375 $gd = substr( $ts, 6, 2 );
1377 if ( !strcmp( $cName, 'thai' ) ) {
1379 # Add 543 years to the Gregorian calendar
1380 # Months and days are identical
1381 $gy_offset = $gy +
543;
1382 } else if ( ( !strcmp( $cName, 'minguo' ) ) ||
!strcmp( $cName, 'juche' ) ) {
1384 # Deduct 1911 years from the Gregorian calendar
1385 # Months and days are identical
1386 $gy_offset = $gy - 1911;
1387 } else if ( !strcmp( $cName, 'tenno' ) ) {
1388 # Nengō dates up to Meiji period
1389 # Deduct years from the Gregorian calendar
1390 # depending on the nengo periods
1391 # Months and days are identical
1392 if ( ( $gy < 1912 ) ||
( ( $gy == 1912 ) && ( $gm < 7 ) ) ||
( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1394 $gy_gannen = $gy - 1868 +
1;
1395 $gy_offset = $gy_gannen;
1396 if ( $gy_gannen == 1 ) {
1399 $gy_offset = '明治' . $gy_offset;
1401 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1402 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1403 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1404 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1405 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1409 $gy_gannen = $gy - 1912 +
1;
1410 $gy_offset = $gy_gannen;
1411 if ( $gy_gannen == 1 ) {
1414 $gy_offset = '大正' . $gy_offset;
1416 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1417 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1418 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1422 $gy_gannen = $gy - 1926 +
1;
1423 $gy_offset = $gy_gannen;
1424 if ( $gy_gannen == 1 ) {
1427 $gy_offset = '昭和' . $gy_offset;
1430 $gy_gannen = $gy - 1989 +
1;
1431 $gy_offset = $gy_gannen;
1432 if ( $gy_gannen == 1 ) {
1435 $gy_offset = '平成' . $gy_offset;
1441 return array( $gy_offset, $gm, $gd );
1445 * Roman number formatting up to 3000
1447 static function romanNumeral( $num ) {
1448 static $table = array(
1449 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1450 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1451 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1452 array( '', 'M', 'MM', 'MMM' )
1455 $num = intval( $num );
1456 if ( $num > 3000 ||
$num <= 0 ) {
1461 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1462 if ( $num >= $pow10 ) {
1463 $s .= $table[$i][floor( $num / $pow10 )];
1465 $num = $num %
$pow10;
1471 * Hebrew Gematria number formatting up to 9999
1473 static function hebrewNumeral( $num ) {
1474 static $table = array(
1475 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1476 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1477 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1478 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1481 $num = intval( $num );
1482 if ( $num > 9999 ||
$num <= 0 ) {
1487 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1488 if ( $num >= $pow10 ) {
1489 if ( $num == 15 ||
$num == 16 ) {
1490 $s .= $table[0][9] . $table[0][$num - 9];
1493 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1494 if ( $pow10 == 1000 ) {
1499 $num = $num %
$pow10;
1501 if ( strlen( $s ) == 2 ) {
1504 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1505 $str .= substr( $s, strlen( $s ) - 2, 2 );
1507 $start = substr( $str, 0, strlen( $str ) - 2 );
1508 $end = substr( $str, strlen( $str ) - 2 );
1511 $str = $start . 'ך';
1514 $str = $start . 'ם';
1517 $str = $start . 'ן';
1520 $str = $start . 'ף';
1523 $str = $start . 'ץ';
1530 * This is meant to be used by time(), date(), and timeanddate() to get
1531 * the date preference they're supposed to use, it should be used in
1535 * function timeanddate([...], $format = true) {
1536 * $datePreference = $this->dateFormat($format);
1541 * @param $usePrefs Mixed: if true, the user's preference is used
1542 * if false, the site/language default is used
1543 * if int/string, assumed to be a format.
1546 function dateFormat( $usePrefs = true ) {
1549 if ( is_bool( $usePrefs ) ) {
1551 $datePreference = $wgUser->getDatePreference();
1553 $datePreference = (string)User
::getDefaultOption( 'date' );
1556 $datePreference = (string)$usePrefs;
1560 if ( $datePreference == '' ) {
1564 return $datePreference;
1568 * Get a format string for a given type and preference
1569 * @param $type string May be date, time or both
1570 * @param $pref string The format name as it appears in Messages*.php
1574 function getDateFormatString( $type, $pref ) {
1575 if ( !isset( $this->dateFormatStrings
[$type][$pref] ) ) {
1576 if ( $pref == 'default' ) {
1577 $pref = $this->getDefaultDateFormat();
1578 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1580 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1581 if ( is_null( $df ) ) {
1582 $pref = $this->getDefaultDateFormat();
1583 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1586 $this->dateFormatStrings
[$type][$pref] = $df;
1588 return $this->dateFormatStrings
[$type][$pref];
1592 * @param $ts Mixed: the time format which needs to be turned into a
1593 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1594 * @param $adj Bool: whether to adjust the time output according to the
1595 * user configured offset ($timecorrection)
1596 * @param $format Mixed: true to use user's date format preference
1597 * @param $timecorrection String: the time offset as returned by
1598 * validateTimeZone() in Special:Preferences
1601 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1602 $ts = wfTimestamp( TS_MW
, $ts );
1604 $ts = $this->userAdjust( $ts, $timecorrection );
1606 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1607 return $this->sprintfDate( $df, $ts );
1611 * @param $ts Mixed: the time format which needs to be turned into a
1612 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1613 * @param $adj Bool: whether to adjust the time output according to the
1614 * user configured offset ($timecorrection)
1615 * @param $format Mixed: true to use user's date format preference
1616 * @param $timecorrection String: the time offset as returned by
1617 * validateTimeZone() in Special:Preferences
1620 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1621 $ts = wfTimestamp( TS_MW
, $ts );
1623 $ts = $this->userAdjust( $ts, $timecorrection );
1625 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1626 return $this->sprintfDate( $df, $ts );
1630 * @param $ts Mixed: the time format which needs to be turned into a
1631 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1632 * @param $adj Bool: whether to adjust the time output according to the
1633 * user configured offset ($timecorrection)
1634 * @param $format Mixed: what format to return, if it's false output the
1635 * default one (default true)
1636 * @param $timecorrection String: the time offset as returned by
1637 * validateTimeZone() in Special:Preferences
1640 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1641 $ts = wfTimestamp( TS_MW
, $ts );
1643 $ts = $this->userAdjust( $ts, $timecorrection );
1645 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1646 return $this->sprintfDate( $df, $ts );
1649 function getMessage( $key ) {
1650 return self
::$dataCache->getSubitem( $this->mCode
, 'messages', $key );
1653 function getAllMessages() {
1654 return self
::$dataCache->getItem( $this->mCode
, 'messages' );
1657 function iconv( $in, $out, $string ) {
1658 # This is a wrapper for iconv in all languages except esperanto,
1659 # which does some nasty x-conversions beforehand
1661 # Even with //IGNORE iconv can whine about illegal characters in
1662 # *input* string. We just ignore those too.
1663 # REF: http://bugs.php.net/bug.php?id=37166
1664 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1665 wfSuppressWarnings();
1666 $text = iconv( $in, $out . '//IGNORE', $string );
1667 wfRestoreWarnings();
1671 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1672 function ucwordbreaksCallbackAscii( $matches ) {
1673 return $this->ucfirst( $matches[1] );
1676 function ucwordbreaksCallbackMB( $matches ) {
1677 return mb_strtoupper( $matches[0] );
1680 function ucCallback( $matches ) {
1681 list( $wikiUpperChars ) = self
::getCaseMaps();
1682 return strtr( $matches[1], $wikiUpperChars );
1685 function lcCallback( $matches ) {
1686 list( , $wikiLowerChars ) = self
::getCaseMaps();
1687 return strtr( $matches[1], $wikiLowerChars );
1690 function ucwordsCallbackMB( $matches ) {
1691 return mb_strtoupper( $matches[0] );
1694 function ucwordsCallbackWiki( $matches ) {
1695 list( $wikiUpperChars ) = self
::getCaseMaps();
1696 return strtr( $matches[0], $wikiUpperChars );
1700 * Make a string's first character uppercase
1702 function ucfirst( $str ) {
1704 if ( $o < 96 ) { // if already uppercase...
1706 } elseif ( $o < 128 ) {
1707 return ucfirst( $str ); // use PHP's ucfirst()
1709 // fall back to more complex logic in case of multibyte strings
1710 return $this->uc( $str, true );
1715 * Convert a string to uppercase
1717 function uc( $str, $first = false ) {
1718 if ( function_exists( 'mb_strtoupper' ) ) {
1720 if ( $this->isMultibyte( $str ) ) {
1721 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1723 return ucfirst( $str );
1726 return $this->isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
1729 if ( $this->isMultibyte( $str ) ) {
1730 $x = $first ?
'^' : '';
1731 return preg_replace_callback(
1732 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1733 array( $this, 'ucCallback' ),
1737 return $first ?
ucfirst( $str ) : strtoupper( $str );
1742 function lcfirst( $str ) {
1745 return strval( $str );
1746 } elseif ( $o >= 128 ) {
1747 return $this->lc( $str, true );
1748 } elseif ( $o > 96 ) {
1751 $str[0] = strtolower( $str[0] );
1756 function lc( $str, $first = false ) {
1757 if ( function_exists( 'mb_strtolower' ) ) {
1759 if ( $this->isMultibyte( $str ) ) {
1760 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1762 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1765 return $this->isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
1768 if ( $this->isMultibyte( $str ) ) {
1769 $x = $first ?
'^' : '';
1770 return preg_replace_callback(
1771 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1772 array( $this, 'lcCallback' ),
1776 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1781 function isMultibyte( $str ) {
1782 return (bool)preg_match( '/[\x80-\xff]/', $str );
1785 function ucwords( $str ) {
1786 if ( $this->isMultibyte( $str ) ) {
1787 $str = $this->lc( $str );
1789 // regexp to find first letter in each word (i.e. after each space)
1790 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1792 // function to use to capitalize a single char
1793 if ( function_exists( 'mb_strtoupper' ) ) {
1794 return preg_replace_callback(
1796 array( $this, 'ucwordsCallbackMB' ),
1800 return preg_replace_callback(
1802 array( $this, 'ucwordsCallbackWiki' ),
1807 return ucwords( strtolower( $str ) );
1811 # capitalize words at word breaks
1812 function ucwordbreaks( $str ) {
1813 if ( $this->isMultibyte( $str ) ) {
1814 $str = $this->lc( $str );
1816 // since \b doesn't work for UTF-8, we explicitely define word break chars
1817 $breaks = "[ \-\(\)\}\{\.,\?!]";
1819 // find first letter after word break
1820 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1822 if ( function_exists( 'mb_strtoupper' ) ) {
1823 return preg_replace_callback(
1825 array( $this, 'ucwordbreaksCallbackMB' ),
1829 return preg_replace_callback(
1831 array( $this, 'ucwordsCallbackWiki' ),
1836 return preg_replace_callback(
1837 '/\b([\w\x80-\xff]+)\b/',
1838 array( $this, 'ucwordbreaksCallbackAscii' ),
1845 * Return a case-folded representation of $s
1847 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1848 * and $s2 are the same except for the case of their characters. It is not
1849 * necessary for the value returned to make sense when displayed.
1851 * Do *not* perform any other normalisation in this function. If a caller
1852 * uses this function when it should be using a more general normalisation
1853 * function, then fix the caller.
1855 function caseFold( $s ) {
1856 return $this->uc( $s );
1859 function checkTitleEncoding( $s ) {
1860 if ( is_array( $s ) ) {
1861 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1863 # Check for non-UTF-8 URLs
1864 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1869 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1870 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1875 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1881 function fallback8bitEncoding() {
1882 return self
::$dataCache->getItem( $this->mCode
, 'fallback8bitEncoding' );
1886 * Most writing systems use whitespace to break up words.
1887 * Some languages such as Chinese don't conventionally do this,
1888 * which requires special handling when breaking up words for
1891 function hasWordBreaks() {
1896 * Some languages such as Chinese require word segmentation,
1897 * Specify such segmentation when overridden in derived class.
1899 * @param $string String
1902 function segmentByWord( $string ) {
1907 * Some languages have special punctuation need to be normalized.
1908 * Make such changes here.
1910 * @param $string String
1913 function normalizeForSearch( $string ) {
1914 return self
::convertDoubleWidth( $string );
1918 * convert double-width roman characters to single-width.
1919 * range: ff00-ff5f ~= 0020-007f
1921 protected static function convertDoubleWidth( $string ) {
1922 static $full = null;
1923 static $half = null;
1925 if ( $full === null ) {
1926 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1927 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1928 $full = str_split( $fullWidth, 3 );
1929 $half = str_split( $halfWidth );
1932 $string = str_replace( $full, $half, $string );
1936 protected static function insertSpace( $string, $pattern ) {
1937 $string = preg_replace( $pattern, " $1 ", $string );
1938 $string = preg_replace( '/ +/', ' ', $string );
1942 function convertForSearchResult( $termsArray ) {
1943 # some languages, e.g. Chinese, need to do a conversion
1944 # in order for search results to be displayed correctly
1949 * Get the first character of a string.
1954 function firstChar( $s ) {
1957 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1958 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1963 if ( isset( $matches[1] ) ) {
1964 if ( strlen( $matches[1] ) != 3 ) {
1968 // Break down Hangul syllables to grab the first jamo
1969 $code = utf8ToCodepoint( $matches[1] );
1970 if ( $code < 0xac00 ||
0xd7a4 <= $code ) {
1972 } elseif ( $code < 0xb098 ) {
1973 return "\xe3\x84\xb1";
1974 } elseif ( $code < 0xb2e4 ) {
1975 return "\xe3\x84\xb4";
1976 } elseif ( $code < 0xb77c ) {
1977 return "\xe3\x84\xb7";
1978 } elseif ( $code < 0xb9c8 ) {
1979 return "\xe3\x84\xb9";
1980 } elseif ( $code < 0xbc14 ) {
1981 return "\xe3\x85\x81";
1982 } elseif ( $code < 0xc0ac ) {
1983 return "\xe3\x85\x82";
1984 } elseif ( $code < 0xc544 ) {
1985 return "\xe3\x85\x85";
1986 } elseif ( $code < 0xc790 ) {
1987 return "\xe3\x85\x87";
1988 } elseif ( $code < 0xcc28 ) {
1989 return "\xe3\x85\x88";
1990 } elseif ( $code < 0xce74 ) {
1991 return "\xe3\x85\x8a";
1992 } elseif ( $code < 0xd0c0 ) {
1993 return "\xe3\x85\x8b";
1994 } elseif ( $code < 0xd30c ) {
1995 return "\xe3\x85\x8c";
1996 } elseif ( $code < 0xd558 ) {
1997 return "\xe3\x85\x8d";
1999 return "\xe3\x85\x8e";
2006 function initEncoding() {
2007 # Some languages may have an alternate char encoding option
2008 # (Esperanto X-coding, Japanese furigana conversion, etc)
2009 # If this language is used as the primary content language,
2010 # an override to the defaults can be set here on startup.
2013 function recodeForEdit( $s ) {
2014 # For some languages we'll want to explicitly specify
2015 # which characters make it into the edit box raw
2016 # or are converted in some way or another.
2017 # Note that if wgOutputEncoding is different from
2018 # wgInputEncoding, this text will be further converted
2019 # to wgOutputEncoding.
2020 global $wgEditEncoding;
2021 if ( $wgEditEncoding == '' ||
$wgEditEncoding == 'UTF-8' ) {
2024 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2028 function recodeInput( $s ) {
2029 # Take the previous into account.
2030 global $wgEditEncoding;
2031 if ( $wgEditEncoding != '' ) {
2032 $enc = $wgEditEncoding;
2036 if ( $enc == 'UTF-8' ) {
2039 return $this->iconv( $enc, 'UTF-8', $s );
2044 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2045 * also cleans up certain backwards-compatible sequences, converting them
2046 * to the modern Unicode equivalent.
2048 * This is language-specific for performance reasons only.
2050 function normalize( $s ) {
2051 global $wgAllUnicodeFixes;
2052 $s = UtfNormal
::cleanUp( $s );
2053 if ( $wgAllUnicodeFixes ) {
2054 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2055 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2062 * Transform a string using serialized data stored in the given file (which
2063 * must be in the serialized subdirectory of $IP). The file contains pairs
2064 * mapping source characters to destination characters.
2066 * The data is cached in process memory. This will go faster if you have the
2067 * FastStringSearch extension.
2069 function transformUsingPairFile( $file, $string ) {
2070 if ( !isset( $this->transformData
[$file] ) ) {
2071 $data = wfGetPrecompiledData( $file );
2072 if ( $data === false ) {
2073 throw new MWException( __METHOD__
. ": The transformation file $file is missing" );
2075 $this->transformData
[$file] = new ReplacementArray( $data );
2077 return $this->transformData
[$file]->replace( $string );
2081 * For right-to-left language support
2086 return self
::$dataCache->getItem( $this->mCode
, 'rtl' );
2090 * Return the correct HTML 'dir' attribute value for this language.
2094 return $this->isRTL() ?
'rtl' : 'ltr';
2098 * Return 'left' or 'right' as appropriate alignment for line-start
2099 * for this language's text direction.
2101 * Should be equivalent to CSS3 'start' text-align value....
2105 function alignStart() {
2106 return $this->isRTL() ?
'right' : 'left';
2110 * Return 'right' or 'left' as appropriate alignment for line-end
2111 * for this language's text direction.
2113 * Should be equivalent to CSS3 'end' text-align value....
2117 function alignEnd() {
2118 return $this->isRTL() ?
'left' : 'right';
2122 * A hidden direction mark (LRM or RLM), depending on the language direction
2126 function getDirMark() {
2127 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
2133 function capitalizeAllNouns() {
2134 return self
::$dataCache->getItem( $this->mCode
, 'capitalizeAllNouns' );
2138 * An arrow, depending on the language direction
2142 function getArrow() {
2143 return $this->isRTL() ?
'←' : '→';
2147 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2151 function linkPrefixExtension() {
2152 return self
::$dataCache->getItem( $this->mCode
, 'linkPrefixExtension' );
2158 function getMagicWords() {
2159 return self
::$dataCache->getItem( $this->mCode
, 'magicWords' );
2162 protected function doMagicHook() {
2163 if ( $this->mMagicHookDone
) {
2166 $this->mMagicHookDone
= true;
2167 wfProfileIn( 'LanguageGetMagic' );
2168 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
2169 wfProfileOut( 'LanguageGetMagic' );
2172 # Fill a MagicWord object with data from here
2173 function getMagic( $mw ) {
2174 $this->doMagicHook();
2176 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
2177 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
2179 $magicWords = $this->getMagicWords();
2180 if ( isset( $magicWords[$mw->mId
] ) ) {
2181 $rawEntry = $magicWords[$mw->mId
];
2187 if ( !is_array( $rawEntry ) ) {
2188 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2190 $mw->mCaseSensitive
= $rawEntry[0];
2191 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
2196 * Add magic words to the extension array
2198 function addMagicWordsByLang( $newWords ) {
2199 $code = $this->getCode();
2200 $fallbackChain = array();
2201 while ( $code && !in_array( $code, $fallbackChain ) ) {
2202 $fallbackChain[] = $code;
2203 $code = self
::getFallbackFor( $code );
2205 if ( !in_array( 'en', $fallbackChain ) ) {
2206 $fallbackChain[] = 'en';
2208 $fallbackChain = array_reverse( $fallbackChain );
2209 foreach ( $fallbackChain as $code ) {
2210 if ( isset( $newWords[$code] ) ) {
2211 $this->mMagicExtensions
= $newWords[$code] +
$this->mMagicExtensions
;
2217 * Get special page names, as an associative array
2218 * case folded alias => real name
2220 function getSpecialPageAliases() {
2221 // Cache aliases because it may be slow to load them
2222 if ( is_null( $this->mExtendedSpecialPageAliases
) ) {
2224 $this->mExtendedSpecialPageAliases
=
2225 self
::$dataCache->getItem( $this->mCode
, 'specialPageAliases' );
2226 wfRunHooks( 'LanguageGetSpecialPageAliases',
2227 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
2230 return $this->mExtendedSpecialPageAliases
;
2234 * Italic is unsuitable for some languages
2236 * @param $text String: the text to be emphasized.
2239 function emphasize( $text ) {
2240 return "<em>$text</em>";
2244 * Normally we output all numbers in plain en_US style, that is
2245 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2246 * point twohundredthirtyfive. However this is not sutable for all
2247 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2248 * Icelandic just want to use commas instead of dots, and dots instead
2249 * of commas like "293.291,235".
2251 * An example of this function being called:
2253 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2256 * See LanguageGu.php for the Gujarati implementation and
2257 * $separatorTransformTable on MessageIs.php for
2258 * the , => . and . => , implementation.
2260 * @todo check if it's viable to use localeconv() for the decimal
2262 * @param $number Mixed: the string to be formatted, should be an integer
2263 * or a floating point number.
2264 * @param $nocommafy Bool: set to true for special numbers like dates
2267 function formatNum( $number, $nocommafy = false ) {
2268 global $wgTranslateNumerals;
2269 if ( !$nocommafy ) {
2270 $number = $this->commafy( $number );
2271 $s = $this->separatorTransformTable();
2273 $number = strtr( $number, $s );
2277 if ( $wgTranslateNumerals ) {
2278 $s = $this->digitTransformTable();
2280 $number = strtr( $number, $s );
2287 function parseFormattedNumber( $number ) {
2288 $s = $this->digitTransformTable();
2290 $number = strtr( $number, array_flip( $s ) );
2293 $s = $this->separatorTransformTable();
2295 $number = strtr( $number, array_flip( $s ) );
2298 $number = strtr( $number, array( ',' => '' ) );
2303 * Adds commas to a given number
2308 function commafy( $_ ) {
2309 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2312 function digitTransformTable() {
2313 return self
::$dataCache->getItem( $this->mCode
, 'digitTransformTable' );
2316 function separatorTransformTable() {
2317 return self
::$dataCache->getItem( $this->mCode
, 'separatorTransformTable' );
2321 * Take a list of strings and build a locale-friendly comma-separated
2322 * list, using the local comma-separator message.
2323 * The last two strings are chained with an "and".
2328 function listToText( $l ) {
2330 $m = count( $l ) - 1;
2332 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2334 for ( $i = $m; $i >= 0; $i-- ) {
2337 } else if ( $i == $m - 1 ) {
2338 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2340 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2348 * Take a list of strings and build a locale-friendly comma-separated
2349 * list, using the local comma-separator message.
2350 * @param $list array of strings to put in a comma list
2353 function commaList( $list ) {
2358 array( 'parsemag', 'escapenoentities', 'language' => $this )
2364 * Take a list of strings and build a locale-friendly semicolon-separated
2365 * list, using the local semicolon-separator message.
2366 * @param $list array of strings to put in a semicolon list
2369 function semicolonList( $list ) {
2373 'semicolon-separator',
2374 array( 'parsemag', 'escapenoentities', 'language' => $this )
2380 * Same as commaList, but separate it with the pipe instead.
2381 * @param $list array of strings to put in a pipe list
2384 function pipeList( $list ) {
2389 array( 'escapenoentities', 'language' => $this )
2395 * Truncate a string to a specified length in bytes, appending an optional
2396 * string (e.g. for ellipses)
2398 * The database offers limited byte lengths for some columns in the database;
2399 * multi-byte character sets mean we need to ensure that only whole characters
2400 * are included, otherwise broken characters can be passed to the user
2402 * If $length is negative, the string will be truncated from the beginning
2404 * @param $string String to truncate
2405 * @param $length Int: maximum length (including ellipses)
2406 * @param $ellipsis String to append to the truncated text
2407 * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
2408 * $adjustLength was introduced in 1.18, before that behaved as if false.
2411 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
2412 # Use the localized ellipsis character
2413 if ( $ellipsis == '...' ) {
2414 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2416 # Check if there is no need to truncate
2417 if ( $length == 0 ) {
2418 return $ellipsis; // convention
2419 } elseif ( strlen( $string ) <= abs( $length ) ) {
2420 return $string; // no need to truncate
2422 $stringOriginal = $string;
2423 # If ellipsis length is >= $length then we can't apply $adjustLength
2424 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
2425 $string = $ellipsis; // this can be slightly unexpected
2426 # Otherwise, truncate and add ellipsis...
2428 $eLength = $adjustLength ?
strlen( $ellipsis ) : 0;
2429 if ( $length > 0 ) {
2430 $length -= $eLength;
2431 $string = substr( $string, 0, $length ); // xyz...
2432 $string = $this->removeBadCharLast( $string );
2433 $string = $string . $ellipsis;
2435 $length +
= $eLength;
2436 $string = substr( $string, $length ); // ...xyz
2437 $string = $this->removeBadCharFirst( $string );
2438 $string = $ellipsis . $string;
2441 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
2442 # This check is *not* redundant if $adjustLength, due to the single case where
2443 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
2444 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2447 return $stringOriginal;
2452 * Remove bytes that represent an incomplete Unicode character
2453 * at the end of string (e.g. bytes of the char are missing)
2455 * @param $string String
2458 protected function removeBadCharLast( $string ) {
2459 if ( $string != '' ) {
2460 $char = ord( $string[strlen( $string ) - 1] );
2462 if ( $char >= 0xc0 ) {
2463 # We got the first byte only of a multibyte char; remove it.
2464 $string = substr( $string, 0, -1 );
2465 } elseif ( $char >= 0x80 &&
2466 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2467 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2469 # We chopped in the middle of a character; remove it
2477 * Remove bytes that represent an incomplete Unicode character
2478 * at the start of string (e.g. bytes of the char are missing)
2480 * @param $string String
2483 protected function removeBadCharFirst( $string ) {
2484 if ( $string != '' ) {
2485 $char = ord( $string[0] );
2486 if ( $char >= 0x80 && $char < 0xc0 ) {
2487 # We chopped in the middle of a character; remove the whole thing
2488 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2495 * Truncate a string of valid HTML to a specified length in bytes,
2496 * appending an optional string (e.g. for ellipses), and return valid HTML
2498 * This is only intended for styled/linked text, such as HTML with
2499 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
2500 * Also, this will not detect things like "display:none" CSS.
2502 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
2504 * @param string $text HTML string to truncate
2505 * @param int $length (zero/positive) Maximum length (including ellipses)
2506 * @param string $ellipsis String to append to the truncated text
2509 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2510 # Use the localized ellipsis character
2511 if ( $ellipsis == '...' ) {
2512 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2514 # Check if there is clearly no need to truncate
2515 if ( $length <= 0 ) {
2516 return $ellipsis; // no text shown, nothing to format (convention)
2517 } elseif ( strlen( $text ) <= $length ) {
2518 return $text; // string short enough even *with* HTML (short-circuit)
2521 $displayLen = 0; // innerHTML legth so far
2522 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2523 $tagType = 0; // 0-open, 1-close
2524 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2525 $entityState = 0; // 0-not entity, 1-entity
2526 $tag = $ret = $pRet = ''; // accumulated tag name, accumulated result string
2527 $openTags = array(); // open tag stack
2528 $pOpenTags = array();
2530 $textLen = strlen( $text );
2531 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
2532 for ( $pos = 0; true; ++
$pos ) {
2533 # Consider truncation once the display length has reached the maximim.
2534 # Check that we're not in the middle of a bracket/entity...
2535 if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) {
2536 if ( !$testingEllipsis ) {
2537 $testingEllipsis = true;
2538 # Save where we are; we will truncate here unless there turn out to
2539 # be so few remaining characters that truncation is not necessary.
2540 $pOpenTags = $openTags; // save state
2541 $pRet = $ret; // save state
2542 } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) {
2543 # String in fact does need truncation, the truncation point was OK.
2544 $openTags = $pOpenTags; // reload state
2545 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2546 $ret .= $ellipsis; // add ellipsis
2550 if ( $pos >= $textLen ) break; // extra iteration just for above checks
2552 # Read the next char...
2554 $lastCh = $pos ?
$text[$pos - 1] : '';
2555 $ret .= $ch; // add to result string
2557 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2558 $entityState = 0; // for bad HTML
2559 $bracketState = 1; // tag started (checking for backslash)
2560 } elseif ( $ch == '>' ) {
2561 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2562 $entityState = 0; // for bad HTML
2563 $bracketState = 0; // out of brackets
2564 } elseif ( $bracketState == 1 ) {
2566 $tagType = 1; // close tag (e.g. "</span>")
2568 $tagType = 0; // open tag (e.g. "<span>")
2571 $bracketState = 2; // building tag name
2572 } elseif ( $bracketState == 2 ) {
2576 // Name found (e.g. "<a href=..."), add on tag attributes...
2577 $pos +
= $this->truncate_skip( $ret, $text, "<>", $pos +
1 );
2579 } elseif ( $bracketState == 0 ) {
2580 if ( $entityState ) {
2583 $displayLen++
; // entity is one displayed char
2587 $entityState = 1; // entity found, (e.g. " ")
2589 $displayLen++
; // this char is displayed
2590 // Add the next $max display text chars after this in one swoop...
2591 $max = ( $testingEllipsis ?
$length : $neLength ) - $displayLen;
2592 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos +
1, $max );
2593 $displayLen +
= $skipped;
2599 if ( $displayLen == 0 ) {
2600 return ''; // no text shown, nothing to format
2602 // Close the last tag if left unclosed by bad HTML
2603 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2604 while ( count( $openTags ) > 0 ) {
2605 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2610 // truncateHtml() helper function
2611 // like strcspn() but adds the skipped chars to $ret
2612 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
2613 if ( $len === null ) {
2614 $len = -1; // -1 means "no limit" for strcspn
2615 } elseif ( $len < 0 ) {
2619 if ( $start < strlen( $text ) ) {
2620 $skipCount = strcspn( $text, $search, $start, $len );
2621 $ret .= substr( $text, $start, $skipCount );
2627 * truncateHtml() helper function
2628 * (a) push or pop $tag from $openTags as needed
2629 * (b) clear $tag value
2630 * @param String &$tag Current HTML tag name we are looking at
2631 * @param int $tagType (0-open tag, 1-close tag)
2632 * @param char $lastCh Character before the '>' that ended this tag
2633 * @param array &$openTags Open tag stack (not accounting for $tag)
2635 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2636 $tag = ltrim( $tag );
2638 if ( $tagType == 0 && $lastCh != '/' ) {
2639 $openTags[] = $tag; // tag opened (didn't close itself)
2640 } else if ( $tagType == 1 ) {
2641 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2642 array_pop( $openTags ); // tag closed
2650 * Grammatical transformations, needed for inflected languages
2651 * Invoked by putting {{grammar:case|word}} in a message
2653 * @param $word string
2654 * @param $case string
2657 function convertGrammar( $word, $case ) {
2658 global $wgGrammarForms;
2659 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2660 return $wgGrammarForms[$this->getCode()][$case][$word];
2666 * Provides an alternative text depending on specified gender.
2667 * Usage {{gender:username|masculine|feminine|neutral}}.
2668 * username is optional, in which case the gender of current user is used,
2669 * but only in (some) interface messages; otherwise default gender is used.
2670 * If second or third parameter are not specified, masculine is used.
2671 * These details may be overriden per language.
2673 function gender( $gender, $forms ) {
2674 if ( !count( $forms ) ) {
2677 $forms = $this->preConvertPlural( $forms, 2 );
2678 if ( $gender === 'male' ) {
2681 if ( $gender === 'female' ) {
2684 return isset( $forms[2] ) ?
$forms[2] : $forms[0];
2688 * Plural form transformations, needed for some languages.
2689 * For example, there are 3 form of plural in Russian and Polish,
2690 * depending on "count mod 10". See [[w:Plural]]
2691 * For English it is pretty simple.
2693 * Invoked by putting {{plural:count|wordform1|wordform2}}
2694 * or {{plural:count|wordform1|wordform2|wordform3}}
2696 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2698 * @param $count Integer: non-localized number
2699 * @param $forms Array: different plural forms
2700 * @return string Correct form of plural for $count in this language
2702 function convertPlural( $count, $forms ) {
2703 if ( !count( $forms ) ) {
2706 $forms = $this->preConvertPlural( $forms, 2 );
2708 return ( $count == 1 ) ?
$forms[0] : $forms[1];
2712 * Checks that convertPlural was given an array and pads it to requested
2713 * amount of forms by copying the last one.
2715 * @param $count Integer: How many forms should there be at least
2716 * @param $forms Array of forms given to convertPlural
2717 * @return array Padded array of forms or an exception if not an array
2719 protected function preConvertPlural( /* Array */ $forms, $count ) {
2720 while ( count( $forms ) < $count ) {
2721 $forms[] = $forms[count( $forms ) - 1];
2727 * Maybe translate block durations. Note that this function is somewhat misnamed: it
2728 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
2729 * (which is an absolute timestamp).
2730 * @param $str String: the validated block duration in English
2731 * @return Somehow translated block duration
2732 * @see LanguageFi.php for example implementation
2734 function translateBlockExpiry( $str ) {
2735 $duration = SpecialBlock
::getSuggestedDurations( $this );
2736 foreach( $duration as $show => $value ){
2737 if ( strcmp( $str, $value ) == 0 ) {
2738 return htmlspecialchars( trim( $show ) );
2742 // Since usually only infinite or indefinite is only on list, so try
2743 // equivalents if still here.
2744 $indefs = array( 'infinite', 'infinity', 'indefinite' );
2745 if ( in_array( $str, $indefs ) ) {
2746 foreach( $indefs as $val ) {
2747 $show = array_search( $val, $duration, true );
2748 if ( $show !== false ) {
2749 return htmlspecialchars( trim( $show ) );
2753 // If all else fails, return the original string.
2758 * languages like Chinese need to be segmented in order for the diff
2761 * @param $text String
2764 function segmentForDiff( $text ) {
2769 * and unsegment to show the result
2771 * @param $text String
2774 function unsegmentForDiff( $text ) {
2778 # convert text to all supported variants
2779 function autoConvertToAllVariants( $text ) {
2780 return $this->mConverter
->autoConvertToAllVariants( $text );
2783 # convert text to different variants of a language.
2784 function convert( $text ) {
2785 return $this->mConverter
->convert( $text );
2788 # Convert a Title object to a string in the preferred variant
2789 function convertTitle( $title ) {
2790 return $this->mConverter
->convertTitle( $title );
2793 # Check if this is a language with variants
2794 function hasVariants() {
2795 return sizeof( $this->getVariants() ) > 1;
2798 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2799 function armourMath( $text ) {
2800 return $this->mConverter
->armourMath( $text );
2804 * Perform output conversion on a string, and encode for safe HTML output.
2805 * @param $text String text to be converted
2806 * @param $isTitle Bool whether this conversion is for the article title
2808 * @todo this should get integrated somewhere sane
2810 function convertHtml( $text, $isTitle = false ) {
2811 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2815 * @param $key string
2818 function convertCategoryKey( $key ) {
2819 return $this->mConverter
->convertCategoryKey( $key );
2823 * Get the list of variants supported by this language
2824 * see sample implementation in LanguageZh.php
2826 * @return array an array of language codes
2828 function getVariants() {
2829 return $this->mConverter
->getVariants();
2835 function getPreferredVariant() {
2836 return $this->mConverter
->getPreferredVariant();
2842 function getDefaultVariant() {
2843 return $this->mConverter
->getDefaultVariant();
2849 function getURLVariant() {
2850 return $this->mConverter
->getURLVariant();
2854 * If a language supports multiple variants, it is
2855 * possible that non-existing link in one variant
2856 * actually exists in another variant. this function
2857 * tries to find it. See e.g. LanguageZh.php
2859 * @param $link String: the name of the link
2860 * @param $nt Mixed: the title object of the link
2861 * @param $ignoreOtherCond Boolean: to disable other conditions when
2862 * we need to transclude a template or update a category's link
2863 * @return null the input parameters may be modified upon return
2865 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2866 $this->mConverter
->findVariantLink( $link, $nt, $ignoreOtherCond );
2870 * If a language supports multiple variants, converts text
2871 * into an array of all possible variants of the text:
2872 * 'variant' => text in that variant
2874 * @deprecated since 1.17 Use autoConvertToAllVariants()
2876 function convertLinkToAllVariants( $text ) {
2877 return $this->mConverter
->convertLinkToAllVariants( $text );
2881 * returns language specific options used by User::getPageRenderHash()
2882 * for example, the preferred language variant
2886 function getExtraHashOptions() {
2887 return $this->mConverter
->getExtraHashOptions();
2891 * For languages that support multiple variants, the title of an
2892 * article may be displayed differently in different variants. this
2893 * function returns the apporiate title defined in the body of the article.
2897 function getParsedTitle() {
2898 return $this->mConverter
->getParsedTitle();
2902 * Enclose a string with the "no conversion" tag. This is used by
2903 * various functions in the Parser
2905 * @param $text String: text to be tagged for no conversion
2907 * @return string the tagged text
2909 function markNoConversion( $text, $noParse = false ) {
2910 return $this->mConverter
->markNoConversion( $text, $noParse );
2914 * A regular expression to match legal word-trailing characters
2915 * which should be merged onto a link of the form [[foo]]bar.
2919 function linkTrail() {
2920 return self
::$dataCache->getItem( $this->mCode
, 'linkTrail' );
2926 function getLangObj() {
2931 * Get the RFC 3066 code for this language object
2933 function getCode() {
2934 return $this->mCode
;
2937 function setCode( $code ) {
2938 $this->mCode
= $code;
2942 * Get the name of a file for a certain language code
2943 * @param $prefix string Prepend this to the filename
2944 * @param $code string Language code
2945 * @param $suffix string Append this to the filename
2946 * @return string $prefix . $mangledCode . $suffix
2948 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2949 // Protect against path traversal
2950 if ( !Language
::isValidCode( $code )
2951 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
2953 throw new MWException( "Invalid language code \"$code\"" );
2956 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2960 * Get the language code from a file name. Inverse of getFileName()
2961 * @param $filename string $prefix . $languageCode . $suffix
2962 * @param $prefix string Prefix before the language code
2963 * @param $suffix string Suffix after the language code
2964 * @return string Language code, or false if $prefix or $suffix isn't found
2966 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2968 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2969 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2970 if ( !count( $m ) ) {
2973 return str_replace( '_', '-', strtolower( $m[1] ) );
2977 * @param $code string
2980 static function getMessagesFileName( $code ) {
2982 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2986 * @param $code string
2989 static function getClassFileName( $code ) {
2991 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2995 * Get the fallback for a given language
2997 * @return false|string
2999 static function getFallbackFor( $code ) {
3000 if ( $code === 'en' ) {
3004 return self
::getLocalisationCache()->getItem( $code, 'fallback' );
3009 * Get all messages for a given language
3010 * WARNING: this may take a long time
3012 static function getMessagesFor( $code ) {
3013 return self
::getLocalisationCache()->getItem( $code, 'messages' );
3017 * Get a message for a given language
3019 static function getMessageFor( $key, $code ) {
3020 return self
::getLocalisationCache()->getSubitem( $code, 'messages', $key );
3023 function fixVariableInNamespace( $talk ) {
3024 if ( strpos( $talk, '$1' ) === false ) {
3028 global $wgMetaNamespace;
3029 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
3031 # Allow grammar transformations
3032 # Allowing full message-style parsing would make simple requests
3033 # such as action=raw much more expensive than they need to be.
3034 # This will hopefully cover most cases.
3035 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
3036 array( &$this, 'replaceGrammarInNamespace' ), $talk );
3037 return str_replace( ' ', '_', $talk );
3040 function replaceGrammarInNamespace( $m ) {
3041 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
3044 static function getCaseMaps() {
3045 static $wikiUpperChars, $wikiLowerChars;
3046 if ( isset( $wikiUpperChars ) ) {
3047 return array( $wikiUpperChars, $wikiLowerChars );
3050 wfProfileIn( __METHOD__
);
3051 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
3052 if ( $arr === false ) {
3053 throw new MWException(
3054 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
3056 $wikiUpperChars = $arr['wikiUpperChars'];
3057 $wikiLowerChars = $arr['wikiLowerChars'];
3058 wfProfileOut( __METHOD__
);
3059 return array( $wikiUpperChars, $wikiLowerChars );
3063 * Decode an expiry (block, protection, etc) which has come from the DB
3065 * @param $expiry String: Database expiry String
3066 * @param $format Bool|Int true to process using language functions, or TS_ constant
3067 * to return the expiry in a given timestamp
3070 public function formatExpiry( $expiry, $format = true ) {
3071 static $infinity, $infinityMsg;
3072 if( $infinity === null ){
3073 $infinityMsg = wfMessage( 'infiniteblock' );
3074 $infinity = wfGetDB( DB_SLAVE
)->getInfinity();
3077 if ( $expiry == '' ||
$expiry == $infinity ) {
3078 return $format === true
3082 return $format === true
3083 ?
$this->timeanddate( $expiry )
3084 : wfTimestamp( $format, $expiry );
3090 * @param $seconds String
3093 function formatTimePeriod( $seconds ) {
3094 if ( round( $seconds * 10 ) < 100 ) {
3095 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3096 } elseif ( round( $seconds ) < 60 ) {
3097 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3098 } elseif ( round( $seconds ) < 3600 ) {
3099 $minutes = floor( $seconds / 60 );
3100 $secondsPart = round( fmod( $seconds, 60 ) );
3101 if ( $secondsPart == 60 ) {
3105 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3106 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3108 $hours = floor( $seconds / 3600 );
3109 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
3110 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
3111 if ( $secondsPart == 60 ) {
3115 if ( $minutes == 60 ) {
3119 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
3120 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3121 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3125 function formatBitrate( $bps ) {
3126 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
3128 return $this->formatNum( $bps ) . $units[0];
3130 $unitIndex = floor( log10( $bps ) / 3 );
3131 $mantissa = $bps / pow( 1000, $unitIndex );
3132 if ( $mantissa < 10 ) {
3133 $mantissa = round( $mantissa, 1 );
3135 $mantissa = round( $mantissa );
3137 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3141 * Format a size in bytes for output, using an appropriate
3142 * unit (B, KB, MB or GB) according to the magnitude in question
3144 * @param $size Size to format
3145 * @return string Plain text (not HTML)
3147 function formatSize( $size ) {
3148 // For small sizes no decimal places necessary
3150 if ( $size > 1024 ) {
3151 $size = $size / 1024;
3152 if ( $size > 1024 ) {
3153 $size = $size / 1024;
3154 // For MB and bigger two decimal places are smarter
3156 if ( $size > 1024 ) {
3157 $size = $size / 1024;
3158 $msg = 'size-gigabytes';
3160 $msg = 'size-megabytes';
3163 $msg = 'size-kilobytes';
3166 $msg = 'size-bytes';
3168 $size = round( $size, $round );
3169 $text = $this->getMessageFromDB( $msg );
3170 return str_replace( '$1', $this->formatNum( $size ), $text );
3174 * Get the conversion rule title, if any.
3176 function getConvRuleTitle() {
3177 return $this->mConverter
->getConvRuleTitle();