3 * Internationalisation code
10 * @defgroup Language Language
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__
) . '/Names.php' );
22 global $wgInputEncoding, $wgOutputEncoding;
25 * These are always UTF-8, they exist only for backwards compatibility
27 $wgInputEncoding = 'UTF-8';
28 $wgOutputEncoding = 'UTF-8';
30 if ( function_exists( 'mb_strtoupper' ) ) {
31 mb_internal_encoding( 'UTF-8' );
35 * a fake language converter
41 function __construct( $langobj ) { $this->mLang
= $langobj; }
42 function autoConvertToAllVariants( $text ) { return array( $this->mLang
->getCode() => $text ); }
43 function convert( $t ) { return $t; }
44 function convertTitle( $t ) { return $t->getPrefixedText(); }
45 function getVariants() { return array( $this->mLang
->getCode() ); }
46 function getPreferredVariant() { return $this->mLang
->getCode(); }
47 function getDefaultVariant() { return $this->mLang
->getCode(); }
48 function getURLVariant() { return ''; }
49 function getConvRuleTitle() { return false; }
50 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
51 function getExtraHashOptions() { return ''; }
52 function getParsedTitle() { return ''; }
53 function markNoConversion( $text, $noParse = false ) { return $text; }
54 function convertCategoryKey( $key ) { return $key; }
55 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
56 function armourMath( $text ) { return $text; }
60 * Internationalisation code
64 var $mConverter, $mVariants, $mCode, $mLoaded = false;
65 var $mMagicExtensions = array(), $mMagicHookDone = false;
67 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
68 var $dateFormatStrings = array();
69 var $mExtendedSpecialPageAliases;
72 * ReplacementArray object caches
74 var $transformData = array();
76 static public $dataCache;
77 static public $mLangObjCache = array();
79 static public $mWeekdayMsgs = array(
80 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
84 static public $mWeekdayAbbrevMsgs = array(
85 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
88 static public $mMonthMsgs = array(
89 'january', 'february', 'march', 'april', 'may_long', 'june',
90 'july', 'august', 'september', 'october', 'november',
93 static public $mMonthGenMsgs = array(
94 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
95 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
98 static public $mMonthAbbrevMsgs = array(
99 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
100 'sep', 'oct', 'nov', 'dec'
103 static public $mIranianCalendarMonthMsgs = array(
104 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
105 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
106 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
107 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
110 static public $mHebrewCalendarMonthMsgs = array(
111 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
112 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
113 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
114 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
115 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
118 static public $mHebrewCalendarMonthGenMsgs = array(
119 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
120 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
121 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
122 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
123 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
126 static public $mHijriCalendarMonthMsgs = array(
127 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
128 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
129 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
130 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
134 * Get a cached language object for a given language code
135 * @param $code String
138 static function factory( $code ) {
139 if ( !isset( self
::$mLangObjCache[$code] ) ) {
140 if ( count( self
::$mLangObjCache ) > 10 ) {
141 // Don't keep a billion objects around, that's stupid.
142 self
::$mLangObjCache = array();
144 self
::$mLangObjCache[$code] = self
::newFromCode( $code );
146 return self
::$mLangObjCache[$code];
150 * Create a language object for a given language code
151 * @param $code String
154 protected static function newFromCode( $code ) {
156 static $recursionLevel = 0;
158 // Protect against path traversal below
159 if ( !Language
::isValidCode( $code )
160 ||
strcspn( $code, "/\\\000" ) !== strlen( $code ) )
162 throw new MWException( "Invalid language code \"$code\"" );
165 if ( $code == 'en' ) {
168 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
169 // Preload base classes to work around APC/PHP5 bug
170 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
171 include_once( "$IP/languages/classes/$class.deps.php" );
173 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
174 include_once( "$IP/languages/classes/$class.php" );
178 if ( $recursionLevel > 5 ) {
179 throw new MWException( "Language fallback loop detected when creating class $class\n" );
182 if ( !class_exists( $class ) ) {
183 $fallback = Language
::getFallbackFor( $code );
185 $lang = Language
::newFromCode( $fallback );
187 $lang->setCode( $code );
195 * Returns true if a language code string is of a valid form, whether or
198 public static function isValidCode( $code ) {
199 return strcspn( $code, "/\\\000" ) === strlen( $code );
203 * Get the LocalisationCache instance
205 * @return LocalisationCache
207 public static function getLocalisationCache() {
208 if ( is_null( self
::$dataCache ) ) {
209 global $wgLocalisationCacheConf;
210 $class = $wgLocalisationCacheConf['class'];
211 self
::$dataCache = new $class( $wgLocalisationCacheConf );
213 return self
::$dataCache;
216 function __construct() {
217 $this->mConverter
= new FakeConverter( $this );
218 // Set the code to the name of the descendant
219 if ( get_class( $this ) == 'Language' ) {
222 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
224 self
::getLocalisationCache();
228 * Reduce memory usage
230 function __destruct() {
231 foreach ( $this as $name => $value ) {
232 unset( $this->$name );
237 * Hook which will be called if this is the content language.
238 * Descendants can use this to register hook functions or modify globals
240 function initContLang() { }
243 * @deprecated Use User::getDefaultOptions()
246 function getDefaultUserOptions() {
247 wfDeprecated( __METHOD__
);
248 return User
::getDefaultOptions();
251 function getFallbackLanguageCode() {
252 if ( $this->mCode
=== 'en' ) {
255 return self
::$dataCache->getItem( $this->mCode
, 'fallback' );
260 * Exports $wgBookstoreListEn
263 function getBookstoreList() {
264 return self
::$dataCache->getItem( $this->mCode
, 'bookstoreList' );
270 function getNamespaces() {
271 if ( is_null( $this->namespaceNames
) ) {
272 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
274 $this->namespaceNames
= self
::$dataCache->getItem( $this->mCode
, 'namespaceNames' );
275 $validNamespaces = MWNamespace
::getCanonicalNamespaces();
277 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames +
$validNamespaces;
279 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
280 if ( $wgMetaNamespaceTalk ) {
281 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
283 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
284 $this->namespaceNames
[NS_PROJECT_TALK
] =
285 $this->fixVariableInNamespace( $talk );
288 # Sometimes a language will be localised but not actually exist on this wiki.
289 foreach( $this->namespaceNames
as $key => $text ) {
290 if ( !isset( $validNamespaces[$key] ) ) {
291 unset( $this->namespaceNames
[$key] );
295 # The above mixing may leave namespaces out of canonical order.
296 # Re-order by namespace ID number...
297 ksort( $this->namespaceNames
);
299 return $this->namespaceNames
;
303 * A convenience function that returns the same thing as
304 * getNamespaces() except with the array values changed to ' '
305 * where it found '_', useful for producing output to be displayed
306 * e.g. in <select> forms.
310 function getFormattedNamespaces() {
311 $ns = $this->getNamespaces();
312 foreach ( $ns as $k => $v ) {
313 $ns[$k] = strtr( $v, '_', ' ' );
319 * Get a namespace value by key
321 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
322 * echo $mw_ns; // prints 'MediaWiki'
325 * @param $index Int: the array key of the namespace to return
326 * @return mixed, string if the namespace value exists, otherwise false
328 function getNsText( $index ) {
329 $ns = $this->getNamespaces();
330 return isset( $ns[$index] ) ?
$ns[$index] : false;
334 * A convenience function that returns the same thing as
335 * getNsText() except with '_' changed to ' ', useful for
340 function getFormattedNsText( $index ) {
341 $ns = $this->getNsText( $index );
342 return strtr( $ns, '_', ' ' );
346 * Returns gender-dependent namespace alias if available.
347 * @param $index Int: namespace index
348 * @param $gender String: gender key (male, female... )
352 function getGenderNsText( $index, $gender ) {
353 $ns = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
354 return isset( $ns[$index][$gender] ) ?
$ns[$index][$gender] : $this->getNsText( $index );
358 * Whether this language makes distinguishes genders for example in
363 function needsGenderDistinction() {
364 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
365 return count( $aliases ) > 0;
369 * Get a namespace key by value, case insensitive.
370 * Only matches namespace names for the current language, not the
371 * canonical ones defined in Namespace.php.
373 * @param $text String
374 * @return mixed An integer if $text is a valid value otherwise false
376 function getLocalNsIndex( $text ) {
377 $lctext = $this->lc( $text );
378 $ids = $this->getNamespaceIds();
379 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
382 function getNamespaceAliases() {
383 if ( is_null( $this->namespaceAliases
) ) {
384 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceAliases' );
388 foreach ( $aliases as $name => $index ) {
389 if ( $index === NS_PROJECT_TALK
) {
390 unset( $aliases[$name] );
391 $name = $this->fixVariableInNamespace( $name );
392 $aliases[$name] = $index;
397 $genders = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
398 foreach ( $genders as $index => $forms ) {
399 foreach ( $forms as $alias ) {
400 $aliases[$alias] = $index;
404 $this->namespaceAliases
= $aliases;
406 return $this->namespaceAliases
;
409 function getNamespaceIds() {
410 if ( is_null( $this->mNamespaceIds
) ) {
411 global $wgNamespaceAliases;
412 # Put namespace names and aliases into a hashtable.
413 # If this is too slow, then we should arrange it so that it is done
414 # before caching. The catch is that at pre-cache time, the above
415 # class-specific fixup hasn't been done.
416 $this->mNamespaceIds
= array();
417 foreach ( $this->getNamespaces() as $index => $name ) {
418 $this->mNamespaceIds
[$this->lc( $name )] = $index;
420 foreach ( $this->getNamespaceAliases() as $name => $index ) {
421 $this->mNamespaceIds
[$this->lc( $name )] = $index;
423 if ( $wgNamespaceAliases ) {
424 foreach ( $wgNamespaceAliases as $name => $index ) {
425 $this->mNamespaceIds
[$this->lc( $name )] = $index;
429 return $this->mNamespaceIds
;
434 * Get a namespace key by value, case insensitive. Canonical namespace
435 * names override custom ones defined for the current language.
437 * @param $text String
438 * @return mixed An integer if $text is a valid value otherwise false
440 function getNsIndex( $text ) {
441 $lctext = $this->lc( $text );
442 if ( ( $ns = MWNamespace
::getCanonicalIndex( $lctext ) ) !== null ) {
445 $ids = $this->getNamespaceIds();
446 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
450 * short names for language variants used for language conversion links.
452 * @param $code String
455 function getVariantname( $code ) {
456 return $this->getMessageFromDB( "variantname-$code" );
459 function specialPage( $name ) {
460 $aliases = $this->getSpecialPageAliases();
461 if ( isset( $aliases[$name][0] ) ) {
462 $name = $aliases[$name][0];
464 return $this->getNsText( NS_SPECIAL
) . ':' . $name;
467 function getQuickbarSettings() {
469 $this->getMessage( 'qbsettings-none' ),
470 $this->getMessage( 'qbsettings-fixedleft' ),
471 $this->getMessage( 'qbsettings-fixedright' ),
472 $this->getMessage( 'qbsettings-floatingleft' ),
473 $this->getMessage( 'qbsettings-floatingright' )
477 function getMathNames() {
478 return self
::$dataCache->getItem( $this->mCode
, 'mathNames' );
481 function getDatePreferences() {
482 return self
::$dataCache->getItem( $this->mCode
, 'datePreferences' );
485 function getDateFormats() {
486 return self
::$dataCache->getItem( $this->mCode
, 'dateFormats' );
489 function getDefaultDateFormat() {
490 $df = self
::$dataCache->getItem( $this->mCode
, 'defaultDateFormat' );
491 if ( $df === 'dmy or mdy' ) {
492 global $wgAmericanDates;
493 return $wgAmericanDates ?
'mdy' : 'dmy';
499 function getDatePreferenceMigrationMap() {
500 return self
::$dataCache->getItem( $this->mCode
, 'datePreferenceMigrationMap' );
503 function getImageFile( $image ) {
504 return self
::$dataCache->getSubitem( $this->mCode
, 'imageFiles', $image );
507 function getDefaultUserOptionOverrides() {
508 return self
::$dataCache->getItem( $this->mCode
, 'defaultUserOptionOverrides' );
511 function getExtraUserToggles() {
512 return self
::$dataCache->getItem( $this->mCode
, 'extraUserToggles' );
515 function getUserToggle( $tog ) {
516 return $this->getMessageFromDB( "tog-$tog" );
520 * Get language names, indexed by code.
521 * If $customisedOnly is true, only returns codes with a messages file
523 public static function getLanguageNames( $customisedOnly = false ) {
524 global $wgLanguageNames, $wgExtraLanguageNames;
525 $allNames = $wgExtraLanguageNames +
$wgLanguageNames;
526 if ( !$customisedOnly ) {
532 $dir = opendir( "$IP/languages/messages" );
533 while ( false !== ( $file = readdir( $dir ) ) ) {
534 $code = self
::getCodeFromFileName( $file, 'Messages' );
535 if ( $code && isset( $allNames[$code] ) ) {
536 $names[$code] = $allNames[$code];
544 * Get translated language names. This is done on best effort and
545 * by default this is exactly the same as Language::getLanguageNames.
546 * The CLDR extension provides translated names.
547 * @param $code String Language code.
548 * @return Array language code => language name
551 public static function getTranslatedLanguageNames( $code ) {
553 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
555 foreach ( self
::getLanguageNames() as $code => $name ) {
556 if ( !isset( $names[$code] ) ) $names[$code] = $name;
563 * Get a message from the MediaWiki namespace.
565 * @param $msg String: message name
568 function getMessageFromDB( $msg ) {
569 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
572 function getLanguageName( $code ) {
573 $names = self
::getLanguageNames();
574 if ( !array_key_exists( $code, $names ) ) {
577 return $names[$code];
580 function getMonthName( $key ) {
581 return $this->getMessageFromDB( self
::$mMonthMsgs[$key - 1] );
584 function getMonthNameGen( $key ) {
585 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key - 1] );
588 function getMonthAbbreviation( $key ) {
589 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key - 1] );
592 function getWeekdayName( $key ) {
593 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key - 1] );
596 function getWeekdayAbbreviation( $key ) {
597 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key - 1] );
600 function getIranianCalendarMonthName( $key ) {
601 return $this->getMessageFromDB( self
::$mIranianCalendarMonthMsgs[$key - 1] );
604 function getHebrewCalendarMonthName( $key ) {
605 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthMsgs[$key - 1] );
608 function getHebrewCalendarMonthNameGen( $key ) {
609 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthGenMsgs[$key - 1] );
612 function getHijriCalendarMonthName( $key ) {
613 return $this->getMessageFromDB( self
::$mHijriCalendarMonthMsgs[$key - 1] );
617 * Used by date() and time() to adjust the time output.
619 * @param $ts Int the time in date('YmdHis') format
620 * @param $tz Mixed: adjust the time by this amount (default false, mean we
621 * get user timecorrection setting)
624 function userAdjust( $ts, $tz = false ) {
625 global $wgUser, $wgLocalTZoffset;
627 if ( $tz === false ) {
628 $tz = $wgUser->getOption( 'timecorrection' );
631 $data = explode( '|', $tz, 3 );
633 if ( $data[0] == 'ZoneInfo' ) {
634 if ( function_exists( 'timezone_open' ) && @timezone_open
( $data[2] ) !== false ) {
635 $date = date_create( $ts, timezone_open( 'UTC' ) );
636 date_timezone_set( $date, timezone_open( $data[2] ) );
637 $date = date_format( $date, 'YmdHis' );
640 # Unrecognized timezone, default to 'Offset' with the stored offset.
645 if ( $data[0] == 'System' ||
$tz == '' ) {
646 # Global offset in minutes.
647 if ( isset( $wgLocalTZoffset ) ) {
648 $minDiff = $wgLocalTZoffset;
650 } else if ( $data[0] == 'Offset' ) {
651 $minDiff = intval( $data[1] );
653 $data = explode( ':', $tz );
654 if ( count( $data ) == 2 ) {
655 $data[0] = intval( $data[0] );
656 $data[1] = intval( $data[1] );
657 $minDiff = abs( $data[0] ) * 60 +
$data[1];
658 if ( $data[0] < 0 ) {
659 $minDiff = -$minDiff;
662 $minDiff = intval( $data[0] ) * 60;
666 # No difference ? Return time unchanged
667 if ( 0 == $minDiff ) {
671 wfSuppressWarnings(); // E_STRICT system time bitching
672 # Generate an adjusted date; take advantage of the fact that mktime
673 # will normalize out-of-range values so we don't have to split $minDiff
674 # into hours and minutes.
676 (int)substr( $ts, 8, 2 ) ), # Hours
677 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
678 (int)substr( $ts, 12, 2 ), # Seconds
679 (int)substr( $ts, 4, 2 ), # Month
680 (int)substr( $ts, 6, 2 ), # Day
681 (int)substr( $ts, 0, 4 ) ); # Year
683 $date = date( 'YmdHis', $t );
690 * This is a workalike of PHP's date() function, but with better
691 * internationalisation, a reduced set of format characters, and a better
694 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
695 * PHP manual for definitions. There are a number of extensions, which
698 * xn Do not translate digits of the next numeric format character
699 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
700 * xr Use roman numerals for the next numeric format character
701 * xh Use hebrew numerals for the next numeric format character
703 * xg Genitive month name
705 * xij j (day number) in Iranian calendar
706 * xiF F (month name) in Iranian calendar
707 * xin n (month number) in Iranian calendar
708 * xiY Y (full year) in Iranian calendar
710 * xjj j (day number) in Hebrew calendar
711 * xjF F (month name) in Hebrew calendar
712 * xjt t (days in month) in Hebrew calendar
713 * xjx xg (genitive month name) in Hebrew calendar
714 * xjn n (month number) in Hebrew calendar
715 * xjY Y (full year) in Hebrew calendar
717 * xmj j (day number) in Hijri calendar
718 * xmF F (month name) in Hijri calendar
719 * xmn n (month number) in Hijri calendar
720 * xmY Y (full year) in Hijri calendar
722 * xkY Y (full year) in Thai solar calendar. Months and days are
723 * identical to the Gregorian calendar
724 * xoY Y (full year) in Minguo calendar or Juche year.
725 * Months and days are identical to the
727 * xtY Y (full year) in Japanese nengo. Months and days are
728 * identical to the Gregorian calendar
730 * Characters enclosed in double quotes will be considered literal (with
731 * the quotes themselves removed). Unmatched quotes will be considered
732 * literal quotes. Example:
734 * "The month is" F => The month is January
737 * Backslash escaping is also supported.
739 * Input timestamp is assumed to be pre-normalized to the desired local
742 * @param $format String
743 * @param $ts String: 14-character timestamp
746 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
748 function sprintfDate( $format, $ts ) {
761 for ( $p = 0; $p < strlen( $format ); $p++
) {
764 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
765 $code .= $format[++
$p];
768 if ( ( $code === 'xi' ||
$code == 'xj' ||
$code == 'xk' ||
$code == 'xm' ||
$code == 'xo' ||
$code == 'xt' ) && $p < strlen( $format ) - 1 ) {
769 $code .= $format[++
$p];
780 $rawToggle = !$rawToggle;
789 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
792 if ( !$hebrew ) $hebrew = self
::tsToHebrew( $ts );
793 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
796 $num = substr( $ts, 6, 2 );
799 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
800 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) +
1 );
803 $num = intval( substr( $ts, 6, 2 ) );
807 $iranian = self
::tsToIranian( $ts );
813 $hijri = self
::tsToHijri( $ts );
819 $hebrew = self
::tsToHebrew( $ts );
825 $unix = wfTimestamp( TS_UNIX
, $ts );
827 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) +
1 );
831 $unix = wfTimestamp( TS_UNIX
, $ts );
833 $w = gmdate( 'w', $unix );
838 $unix = wfTimestamp( TS_UNIX
, $ts );
840 $num = gmdate( 'w', $unix );
844 $unix = wfTimestamp( TS_UNIX
, $ts );
846 $num = gmdate( 'z', $unix );
850 $unix = wfTimestamp( TS_UNIX
, $ts );
852 $num = gmdate( 'W', $unix );
855 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
859 $iranian = self
::tsToIranian( $ts );
861 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
865 $hijri = self
::tsToHijri( $ts );
867 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
871 $hebrew = self
::tsToHebrew( $ts );
873 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
876 $num = substr( $ts, 4, 2 );
879 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
882 $num = intval( substr( $ts, 4, 2 ) );
886 $iranian = self
::tsToIranian( $ts );
892 $hijri = self
::tsToHijri ( $ts );
898 $hebrew = self
::tsToHebrew( $ts );
904 $unix = wfTimestamp( TS_UNIX
, $ts );
906 $num = gmdate( 't', $unix );
910 $hebrew = self
::tsToHebrew( $ts );
916 $unix = wfTimestamp( TS_UNIX
, $ts );
918 $num = gmdate( 'L', $unix );
922 $unix = wfTimestamp( TS_UNIX
, $ts );
924 $num = date( 'o', $unix );
927 $num = substr( $ts, 0, 4 );
931 $iranian = self
::tsToIranian( $ts );
937 $hijri = self
::tsToHijri( $ts );
943 $hebrew = self
::tsToHebrew( $ts );
949 $thai = self
::tsToYear( $ts, 'thai' );
955 $minguo = self
::tsToYear( $ts, 'minguo' );
961 $tenno = self
::tsToYear( $ts, 'tenno' );
966 $num = substr( $ts, 2, 2 );
969 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
972 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
975 $h = substr( $ts, 8, 2 );
976 $num = $h %
12 ?
$h %
12 : 12;
979 $num = intval( substr( $ts, 8, 2 ) );
982 $h = substr( $ts, 8, 2 );
983 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
986 $num = substr( $ts, 8, 2 );
989 $num = substr( $ts, 10, 2 );
992 $num = substr( $ts, 12, 2 );
996 $unix = wfTimestamp( TS_UNIX
, $ts );
998 $s .= gmdate( 'c', $unix );
1002 $unix = wfTimestamp( TS_UNIX
, $ts );
1004 $s .= gmdate( 'r', $unix );
1008 $unix = wfTimestamp( TS_UNIX
, $ts );
1013 # Backslash escaping
1014 if ( $p < strlen( $format ) - 1 ) {
1015 $s .= $format[++
$p];
1022 if ( $p < strlen( $format ) - 1 ) {
1023 $endQuote = strpos( $format, '"', $p +
1 );
1024 if ( $endQuote === false ) {
1025 # No terminating quote, assume literal "
1028 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
1032 # Quote at end of string, assume literal "
1039 if ( $num !== false ) {
1040 if ( $rawToggle ||
$raw ) {
1043 } elseif ( $roman ) {
1044 $s .= self
::romanNumeral( $num );
1046 } elseif ( $hebrewNum ) {
1047 $s .= self
::hebrewNumeral( $num );
1050 $s .= $this->formatNum( $num, true );
1057 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1058 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1060 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1061 * Gregorian dates to Iranian dates. Originally written in C, it
1062 * is released under the terms of GNU Lesser General Public
1063 * License. Conversion to PHP was performed by Niklas Laxström.
1065 * Link: http://www.farsiweb.info/jalali/jalali.c
1067 private static function tsToIranian( $ts ) {
1068 $gy = substr( $ts, 0, 4 ) -1600;
1069 $gm = substr( $ts, 4, 2 ) -1;
1070 $gd = substr( $ts, 6, 2 ) -1;
1072 # Days passed from the beginning (including leap years)
1074 +
floor( ( $gy +
3 ) / 4 )
1075 - floor( ( $gy +
99 ) / 100 )
1076 +
floor( ( $gy +
399 ) / 400 );
1079 // Add days of the past months of this year
1080 for ( $i = 0; $i < $gm; $i++
) {
1081 $gDayNo +
= self
::$GREG_DAYS[$i];
1085 if ( $gm > 1 && ( ( $gy %
4 === 0 && $gy %
100 !== 0 ||
( $gy %
400 == 0 ) ) ) ) {
1089 // Days passed in current month
1092 $jDayNo = $gDayNo - 79;
1094 $jNp = floor( $jDayNo / 12053 );
1097 $jy = 979 +
33 * $jNp +
4 * floor( $jDayNo / 1461 );
1100 if ( $jDayNo >= 366 ) {
1101 $jy +
= floor( ( $jDayNo - 1 ) / 365 );
1102 $jDayNo = floor( ( $jDayNo - 1 ) %
365 );
1105 for ( $i = 0; $i < 11 && $jDayNo >= self
::$IRANIAN_DAYS[$i]; $i++
) {
1106 $jDayNo -= self
::$IRANIAN_DAYS[$i];
1112 return array( $jy, $jm, $jd );
1116 * Converting Gregorian dates to Hijri dates.
1118 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1120 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1122 private static function tsToHijri( $ts ) {
1123 $year = substr( $ts, 0, 4 );
1124 $month = substr( $ts, 4, 2 );
1125 $day = substr( $ts, 6, 2 );
1133 ( $zy > 1582 ) ||
( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1134 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1137 $zjd = (int)( ( 1461 * ( $zy +
4800 +
(int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1138 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1139 (int)( ( 3 * (int)( ( ( $zy +
4900 +
(int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1142 $zjd = 367 * $zy - (int)( ( 7 * ( $zy +
5001 +
(int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1143 (int)( ( 275 * $zm ) / 9 ) +
$zd +
1729777;
1146 $zl = $zjd -1948440 +
10632;
1147 $zn = (int)( ( $zl - 1 ) / 10631 );
1148 $zl = $zl - 10631 * $zn +
354;
1149 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1150 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) +
29;
1151 $zm = (int)( ( 24 * $zl ) / 709 );
1152 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1153 $zy = 30 * $zn +
$zj - 30;
1155 return array( $zy, $zm, $zd );
1159 * Converting Gregorian dates to Hebrew dates.
1161 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1162 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1163 * to translate the relevant functions into PHP and release them under
1166 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1167 * and Adar II is 14. In a non-leap year, Adar is 6.
1169 private static function tsToHebrew( $ts ) {
1171 $year = substr( $ts, 0, 4 );
1172 $month = substr( $ts, 4, 2 );
1173 $day = substr( $ts, 6, 2 );
1175 # Calculate Hebrew year
1176 $hebrewYear = $year +
3760;
1178 # Month number when September = 1, August = 12
1180 if ( $month > 12 ) {
1187 # Calculate day of year from 1 September
1189 for ( $i = 1; $i < $month; $i++
) {
1193 # Check if the year is leap
1194 if ( $year %
400 == 0 ||
( $year %
4 == 0 && $year %
100 > 0 ) ) {
1197 } elseif ( $i == 8 ||
$i == 10 ||
$i == 1 ||
$i == 3 ) {
1204 # Calculate the start of the Hebrew year
1205 $start = self
::hebrewYearStart( $hebrewYear );
1207 # Calculate next year's start
1208 if ( $dayOfYear <= $start ) {
1209 # Day is before the start of the year - it is the previous year
1211 $nextStart = $start;
1215 # Add days since previous year's 1 September
1217 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1221 # Start of the new (previous) year
1222 $start = self
::hebrewYearStart( $hebrewYear );
1225 $nextStart = self
::hebrewYearStart( $hebrewYear +
1 );
1228 # Calculate Hebrew day of year
1229 $hebrewDayOfYear = $dayOfYear - $start;
1231 # Difference between year's days
1232 $diff = $nextStart - $start;
1233 # Add 12 (or 13 for leap years) days to ignore the difference between
1234 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1235 # difference is only about the year type
1236 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1242 # Check the year pattern, and is leap year
1243 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1244 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1245 # and non-leap years
1246 $yearPattern = $diff %
30;
1247 # Check if leap year
1248 $isLeap = $diff >= 30;
1250 # Calculate day in the month from number of day in the Hebrew year
1251 # Don't check Adar - if the day is not in Adar, we will stop before;
1252 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1253 $hebrewDay = $hebrewDayOfYear;
1256 while ( $hebrewMonth <= 12 ) {
1257 # Calculate days in this month
1258 if ( $isLeap && $hebrewMonth == 6 ) {
1259 # Adar in a leap year
1261 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1263 if ( $hebrewDay <= $days ) {
1267 # Subtract the days of Adar I
1268 $hebrewDay -= $days;
1271 if ( $hebrewDay <= $days ) {
1277 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1278 # Cheshvan in a complete year (otherwise as the rule below)
1280 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1281 # Kislev in an incomplete year (otherwise as the rule below)
1284 # Odd months have 30 days, even have 29
1285 $days = 30 - ( $hebrewMonth - 1 ) %
2;
1287 if ( $hebrewDay <= $days ) {
1288 # In the current month
1291 # Subtract the days of the current month
1292 $hebrewDay -= $days;
1293 # Try in the next month
1298 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1302 * This calculates the Hebrew year start, as days since 1 September.
1303 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1304 * Used for Hebrew date.
1306 private static function hebrewYearStart( $year ) {
1307 $a = intval( ( 12 * ( $year - 1 ) +
17 ) %
19 );
1308 $b = intval( ( $year - 1 ) %
4 );
1309 $m = 32.044093161144 +
1.5542417966212 * $a +
$b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1313 $Mar = intval( $m );
1319 $c = intval( ( $Mar +
3 * ( $year - 1 ) +
5 * $b +
5 ) %
7 );
1320 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1322 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1324 } else if ( $c == 2 ||
$c == 4 ||
$c == 6 ) {
1328 $Mar +
= intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1333 * Algorithm to convert Gregorian dates to Thai solar dates,
1334 * Minguo dates or Minguo dates.
1336 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1337 * http://en.wikipedia.org/wiki/Minguo_calendar
1338 * http://en.wikipedia.org/wiki/Japanese_era_name
1340 * @param $ts String: 14-character timestamp
1341 * @param $cName String: calender name
1342 * @return Array: converted year, month, day
1344 private static function tsToYear( $ts, $cName ) {
1345 $gy = substr( $ts, 0, 4 );
1346 $gm = substr( $ts, 4, 2 );
1347 $gd = substr( $ts, 6, 2 );
1349 if ( !strcmp( $cName, 'thai' ) ) {
1351 # Add 543 years to the Gregorian calendar
1352 # Months and days are identical
1353 $gy_offset = $gy +
543;
1354 } else if ( ( !strcmp( $cName, 'minguo' ) ) ||
!strcmp( $cName, 'juche' ) ) {
1356 # Deduct 1911 years from the Gregorian calendar
1357 # Months and days are identical
1358 $gy_offset = $gy - 1911;
1359 } else if ( !strcmp( $cName, 'tenno' ) ) {
1360 # Nengō dates up to Meiji period
1361 # Deduct years from the Gregorian calendar
1362 # depending on the nengo periods
1363 # Months and days are identical
1364 if ( ( $gy < 1912 ) ||
( ( $gy == 1912 ) && ( $gm < 7 ) ) ||
( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1366 $gy_gannen = $gy - 1868 +
1;
1367 $gy_offset = $gy_gannen;
1368 if ( $gy_gannen == 1 ) {
1371 $gy_offset = '明治' . $gy_offset;
1373 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1374 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1375 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1376 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1377 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1381 $gy_gannen = $gy - 1912 +
1;
1382 $gy_offset = $gy_gannen;
1383 if ( $gy_gannen == 1 ) {
1386 $gy_offset = '大正' . $gy_offset;
1388 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1389 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1390 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1394 $gy_gannen = $gy - 1926 +
1;
1395 $gy_offset = $gy_gannen;
1396 if ( $gy_gannen == 1 ) {
1399 $gy_offset = '昭和' . $gy_offset;
1402 $gy_gannen = $gy - 1989 +
1;
1403 $gy_offset = $gy_gannen;
1404 if ( $gy_gannen == 1 ) {
1407 $gy_offset = '平成' . $gy_offset;
1413 return array( $gy_offset, $gm, $gd );
1417 * Roman number formatting up to 3000
1419 static function romanNumeral( $num ) {
1420 static $table = array(
1421 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1422 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1423 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1424 array( '', 'M', 'MM', 'MMM' )
1427 $num = intval( $num );
1428 if ( $num > 3000 ||
$num <= 0 ) {
1433 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1434 if ( $num >= $pow10 ) {
1435 $s .= $table[$i][floor( $num / $pow10 )];
1437 $num = $num %
$pow10;
1443 * Hebrew Gematria number formatting up to 9999
1445 static function hebrewNumeral( $num ) {
1446 static $table = array(
1447 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1448 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1449 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1450 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1453 $num = intval( $num );
1454 if ( $num > 9999 ||
$num <= 0 ) {
1459 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1460 if ( $num >= $pow10 ) {
1461 if ( $num == 15 ||
$num == 16 ) {
1462 $s .= $table[0][9] . $table[0][$num - 9];
1465 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1466 if ( $pow10 == 1000 ) {
1471 $num = $num %
$pow10;
1473 if ( strlen( $s ) == 2 ) {
1476 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1477 $str .= substr( $s, strlen( $s ) - 2, 2 );
1479 $start = substr( $str, 0, strlen( $str ) - 2 );
1480 $end = substr( $str, strlen( $str ) - 2 );
1483 $str = $start . 'ך';
1486 $str = $start . 'ם';
1489 $str = $start . 'ן';
1492 $str = $start . 'ף';
1495 $str = $start . 'ץ';
1502 * This is meant to be used by time(), date(), and timeanddate() to get
1503 * the date preference they're supposed to use, it should be used in
1507 * function timeanddate([...], $format = true) {
1508 * $datePreference = $this->dateFormat($format);
1513 * @param $usePrefs Mixed: if true, the user's preference is used
1514 * if false, the site/language default is used
1515 * if int/string, assumed to be a format.
1518 function dateFormat( $usePrefs = true ) {
1521 if ( is_bool( $usePrefs ) ) {
1523 $datePreference = $wgUser->getDatePreference();
1525 $datePreference = (string)User
::getDefaultOption( 'date' );
1528 $datePreference = (string)$usePrefs;
1532 if ( $datePreference == '' ) {
1536 return $datePreference;
1540 * Get a format string for a given type and preference
1541 * @param $type May be date, time or both
1542 * @param $pref The format name as it appears in Messages*.php
1544 function getDateFormatString( $type, $pref ) {
1545 if ( !isset( $this->dateFormatStrings
[$type][$pref] ) ) {
1546 if ( $pref == 'default' ) {
1547 $pref = $this->getDefaultDateFormat();
1548 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1550 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1551 if ( is_null( $df ) ) {
1552 $pref = $this->getDefaultDateFormat();
1553 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1556 $this->dateFormatStrings
[$type][$pref] = $df;
1558 return $this->dateFormatStrings
[$type][$pref];
1562 * @param $ts Mixed: the time format which needs to be turned into a
1563 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1564 * @param $adj Bool: whether to adjust the time output according to the
1565 * user configured offset ($timecorrection)
1566 * @param $format Mixed: true to use user's date format preference
1567 * @param $timecorrection String: the time offset as returned by
1568 * validateTimeZone() in Special:Preferences
1571 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1572 $ts = wfTimestamp( TS_MW
, $ts );
1574 $ts = $this->userAdjust( $ts, $timecorrection );
1576 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1577 return $this->sprintfDate( $df, $ts );
1581 * @param $ts Mixed: the time format which needs to be turned into a
1582 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1583 * @param $adj Bool: whether to adjust the time output according to the
1584 * user configured offset ($timecorrection)
1585 * @param $format Mixed: true to use user's date format preference
1586 * @param $timecorrection String: the time offset as returned by
1587 * validateTimeZone() in Special:Preferences
1590 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1591 $ts = wfTimestamp( TS_MW
, $ts );
1593 $ts = $this->userAdjust( $ts, $timecorrection );
1595 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1596 return $this->sprintfDate( $df, $ts );
1600 * @param $ts Mixed: the time format which needs to be turned into a
1601 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1602 * @param $adj Bool: whether to adjust the time output according to the
1603 * user configured offset ($timecorrection)
1604 * @param $format Mixed: what format to return, if it's false output the
1605 * default one (default true)
1606 * @param $timecorrection String: the time offset as returned by
1607 * validateTimeZone() in Special:Preferences
1610 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1611 $ts = wfTimestamp( TS_MW
, $ts );
1613 $ts = $this->userAdjust( $ts, $timecorrection );
1615 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1616 return $this->sprintfDate( $df, $ts );
1619 function getMessage( $key ) {
1620 return self
::$dataCache->getSubitem( $this->mCode
, 'messages', $key );
1623 function getAllMessages() {
1624 return self
::$dataCache->getItem( $this->mCode
, 'messages' );
1627 function iconv( $in, $out, $string ) {
1628 # This is a wrapper for iconv in all languages except esperanto,
1629 # which does some nasty x-conversions beforehand
1631 # Even with //IGNORE iconv can whine about illegal characters in
1632 # *input* string. We just ignore those too.
1633 # REF: http://bugs.php.net/bug.php?id=37166
1634 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1635 wfSuppressWarnings();
1636 $text = iconv( $in, $out . '//IGNORE', $string );
1637 wfRestoreWarnings();
1641 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1642 function ucwordbreaksCallbackAscii( $matches ) {
1643 return $this->ucfirst( $matches[1] );
1646 function ucwordbreaksCallbackMB( $matches ) {
1647 return mb_strtoupper( $matches[0] );
1650 function ucCallback( $matches ) {
1651 list( $wikiUpperChars ) = self
::getCaseMaps();
1652 return strtr( $matches[1], $wikiUpperChars );
1655 function lcCallback( $matches ) {
1656 list( , $wikiLowerChars ) = self
::getCaseMaps();
1657 return strtr( $matches[1], $wikiLowerChars );
1660 function ucwordsCallbackMB( $matches ) {
1661 return mb_strtoupper( $matches[0] );
1664 function ucwordsCallbackWiki( $matches ) {
1665 list( $wikiUpperChars ) = self
::getCaseMaps();
1666 return strtr( $matches[0], $wikiUpperChars );
1670 * Make a string's first character uppercase
1672 function ucfirst( $str ) {
1674 if ( $o < 96 ) { // if already uppercase...
1676 } elseif ( $o < 128 ) {
1677 return ucfirst( $str ); // use PHP's ucfirst()
1679 // fall back to more complex logic in case of multibyte strings
1680 return $this->uc( $str, true );
1685 * Convert a string to uppercase
1687 function uc( $str, $first = false ) {
1688 if ( function_exists( 'mb_strtoupper' ) ) {
1690 if ( $this->isMultibyte( $str ) ) {
1691 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1693 return ucfirst( $str );
1696 return $this->isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
1699 if ( $this->isMultibyte( $str ) ) {
1700 $x = $first ?
'^' : '';
1701 return preg_replace_callback(
1702 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1703 array( $this, 'ucCallback' ),
1707 return $first ?
ucfirst( $str ) : strtoupper( $str );
1712 function lcfirst( $str ) {
1715 return strval( $str );
1716 } elseif ( $o >= 128 ) {
1717 return $this->lc( $str, true );
1718 } elseif ( $o > 96 ) {
1721 $str[0] = strtolower( $str[0] );
1726 function lc( $str, $first = false ) {
1727 if ( function_exists( 'mb_strtolower' ) ) {
1729 if ( $this->isMultibyte( $str ) ) {
1730 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1732 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1735 return $this->isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
1738 if ( $this->isMultibyte( $str ) ) {
1739 $x = $first ?
'^' : '';
1740 return preg_replace_callback(
1741 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1742 array( $this, 'lcCallback' ),
1746 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1751 function isMultibyte( $str ) {
1752 return (bool)preg_match( '/[\x80-\xff]/', $str );
1755 function ucwords( $str ) {
1756 if ( $this->isMultibyte( $str ) ) {
1757 $str = $this->lc( $str );
1759 // regexp to find first letter in each word (i.e. after each space)
1760 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1762 // function to use to capitalize a single char
1763 if ( function_exists( 'mb_strtoupper' ) ) {
1764 return preg_replace_callback(
1766 array( $this, 'ucwordsCallbackMB' ),
1770 return preg_replace_callback(
1772 array( $this, 'ucwordsCallbackWiki' ),
1777 return ucwords( strtolower( $str ) );
1781 # capitalize words at word breaks
1782 function ucwordbreaks( $str ) {
1783 if ( $this->isMultibyte( $str ) ) {
1784 $str = $this->lc( $str );
1786 // since \b doesn't work for UTF-8, we explicitely define word break chars
1787 $breaks = "[ \-\(\)\}\{\.,\?!]";
1789 // find first letter after word break
1790 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1792 if ( function_exists( 'mb_strtoupper' ) ) {
1793 return preg_replace_callback(
1795 array( $this, 'ucwordbreaksCallbackMB' ),
1799 return preg_replace_callback(
1801 array( $this, 'ucwordsCallbackWiki' ),
1806 return preg_replace_callback(
1807 '/\b([\w\x80-\xff]+)\b/',
1808 array( $this, 'ucwordbreaksCallbackAscii' ),
1815 * Return a case-folded representation of $s
1817 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1818 * and $s2 are the same except for the case of their characters. It is not
1819 * necessary for the value returned to make sense when displayed.
1821 * Do *not* perform any other normalisation in this function. If a caller
1822 * uses this function when it should be using a more general normalisation
1823 * function, then fix the caller.
1825 function caseFold( $s ) {
1826 return $this->uc( $s );
1829 function checkTitleEncoding( $s ) {
1830 if ( is_array( $s ) ) {
1831 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1833 # Check for non-UTF-8 URLs
1834 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1839 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1840 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1845 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1848 function fallback8bitEncoding() {
1849 return self
::$dataCache->getItem( $this->mCode
, 'fallback8bitEncoding' );
1853 * Most writing systems use whitespace to break up words.
1854 * Some languages such as Chinese don't conventionally do this,
1855 * which requires special handling when breaking up words for
1858 function hasWordBreaks() {
1863 * Some languages such as Chinese require word segmentation,
1864 * Specify such segmentation when overridden in derived class.
1866 * @param $string String
1869 function segmentByWord( $string ) {
1874 * Some languages have special punctuation need to be normalized.
1875 * Make such changes here.
1877 * @param $string String
1880 function normalizeForSearch( $string ) {
1881 return self
::convertDoubleWidth( $string );
1885 * convert double-width roman characters to single-width.
1886 * range: ff00-ff5f ~= 0020-007f
1888 protected static function convertDoubleWidth( $string ) {
1889 static $full = null;
1890 static $half = null;
1892 if ( $full === null ) {
1893 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1894 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1895 $full = str_split( $fullWidth, 3 );
1896 $half = str_split( $halfWidth );
1899 $string = str_replace( $full, $half, $string );
1903 protected static function insertSpace( $string, $pattern ) {
1904 $string = preg_replace( $pattern, " $1 ", $string );
1905 $string = preg_replace( '/ +/', ' ', $string );
1909 function convertForSearchResult( $termsArray ) {
1910 # some languages, e.g. Chinese, need to do a conversion
1911 # in order for search results to be displayed correctly
1916 * Get the first character of a string.
1921 function firstChar( $s ) {
1924 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1925 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1930 if ( isset( $matches[1] ) ) {
1931 if ( strlen( $matches[1] ) != 3 ) {
1935 // Break down Hangul syllables to grab the first jamo
1936 $code = utf8ToCodepoint( $matches[1] );
1937 if ( $code < 0xac00 ||
0xd7a4 <= $code ) {
1939 } elseif ( $code < 0xb098 ) {
1940 return "\xe3\x84\xb1";
1941 } elseif ( $code < 0xb2e4 ) {
1942 return "\xe3\x84\xb4";
1943 } elseif ( $code < 0xb77c ) {
1944 return "\xe3\x84\xb7";
1945 } elseif ( $code < 0xb9c8 ) {
1946 return "\xe3\x84\xb9";
1947 } elseif ( $code < 0xbc14 ) {
1948 return "\xe3\x85\x81";
1949 } elseif ( $code < 0xc0ac ) {
1950 return "\xe3\x85\x82";
1951 } elseif ( $code < 0xc544 ) {
1952 return "\xe3\x85\x85";
1953 } elseif ( $code < 0xc790 ) {
1954 return "\xe3\x85\x87";
1955 } elseif ( $code < 0xcc28 ) {
1956 return "\xe3\x85\x88";
1957 } elseif ( $code < 0xce74 ) {
1958 return "\xe3\x85\x8a";
1959 } elseif ( $code < 0xd0c0 ) {
1960 return "\xe3\x85\x8b";
1961 } elseif ( $code < 0xd30c ) {
1962 return "\xe3\x85\x8c";
1963 } elseif ( $code < 0xd558 ) {
1964 return "\xe3\x85\x8d";
1966 return "\xe3\x85\x8e";
1973 function initEncoding() {
1974 # Some languages may have an alternate char encoding option
1975 # (Esperanto X-coding, Japanese furigana conversion, etc)
1976 # If this language is used as the primary content language,
1977 # an override to the defaults can be set here on startup.
1980 function recodeForEdit( $s ) {
1981 # For some languages we'll want to explicitly specify
1982 # which characters make it into the edit box raw
1983 # or are converted in some way or another.
1984 # Note that if wgOutputEncoding is different from
1985 # wgInputEncoding, this text will be further converted
1986 # to wgOutputEncoding.
1987 global $wgEditEncoding;
1988 if ( $wgEditEncoding == '' ||
$wgEditEncoding == 'UTF-8' ) {
1991 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1995 function recodeInput( $s ) {
1996 # Take the previous into account.
1997 global $wgEditEncoding;
1998 if ( $wgEditEncoding != '' ) {
1999 $enc = $wgEditEncoding;
2003 if ( $enc == 'UTF-8' ) {
2006 return $this->iconv( $enc, 'UTF-8', $s );
2011 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2012 * also cleans up certain backwards-compatible sequences, converting them
2013 * to the modern Unicode equivalent.
2015 * This is language-specific for performance reasons only.
2017 function normalize( $s ) {
2018 global $wgAllUnicodeFixes;
2019 $s = UtfNormal
::cleanUp( $s );
2020 if ( $wgAllUnicodeFixes ) {
2021 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2022 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2029 * Transform a string using serialized data stored in the given file (which
2030 * must be in the serialized subdirectory of $IP). The file contains pairs
2031 * mapping source characters to destination characters.
2033 * The data is cached in process memory. This will go faster if you have the
2034 * FastStringSearch extension.
2036 function transformUsingPairFile( $file, $string ) {
2037 if ( !isset( $this->transformData
[$file] ) ) {
2038 $data = wfGetPrecompiledData( $file );
2039 if ( $data === false ) {
2040 throw new MWException( __METHOD__
. ": The transformation file $file is missing" );
2042 $this->transformData
[$file] = new ReplacementArray( $data );
2044 return $this->transformData
[$file]->replace( $string );
2048 * For right-to-left language support
2053 return self
::$dataCache->getItem( $this->mCode
, 'rtl' );
2057 * Return the correct HTML 'dir' attribute value for this language.
2061 return $this->isRTL() ?
'rtl' : 'ltr';
2065 * Return 'left' or 'right' as appropriate alignment for line-start
2066 * for this language's text direction.
2068 * Should be equivalent to CSS3 'start' text-align value....
2072 function alignStart() {
2073 return $this->isRTL() ?
'right' : 'left';
2077 * Return 'right' or 'left' as appropriate alignment for line-end
2078 * for this language's text direction.
2080 * Should be equivalent to CSS3 'end' text-align value....
2084 function alignEnd() {
2085 return $this->isRTL() ?
'left' : 'right';
2089 * A hidden direction mark (LRM or RLM), depending on the language direction
2093 function getDirMark() {
2094 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
2097 function capitalizeAllNouns() {
2098 return self
::$dataCache->getItem( $this->mCode
, 'capitalizeAllNouns' );
2102 * An arrow, depending on the language direction
2106 function getArrow() {
2107 return $this->isRTL() ?
'←' : '→';
2111 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2115 function linkPrefixExtension() {
2116 return self
::$dataCache->getItem( $this->mCode
, 'linkPrefixExtension' );
2119 function getMagicWords() {
2120 return self
::$dataCache->getItem( $this->mCode
, 'magicWords' );
2123 protected function doMagicHook() {
2124 if ( $this->mMagicHookDone
) {
2127 $this->mMagicHookDone
= true;
2128 wfProfileIn( 'LanguageGetMagic' );
2129 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
2130 wfProfileOut( 'LanguageGetMagic' );
2133 # Fill a MagicWord object with data from here
2134 function getMagic( $mw ) {
2135 $this->doMagicHook();
2137 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
2138 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
2140 $magicWords = $this->getMagicWords();
2141 if ( isset( $magicWords[$mw->mId
] ) ) {
2142 $rawEntry = $magicWords[$mw->mId
];
2148 if ( !is_array( $rawEntry ) ) {
2149 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2151 $mw->mCaseSensitive
= $rawEntry[0];
2152 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
2157 * Add magic words to the extension array
2159 function addMagicWordsByLang( $newWords ) {
2160 $code = $this->getCode();
2161 $fallbackChain = array();
2162 while ( $code && !in_array( $code, $fallbackChain ) ) {
2163 $fallbackChain[] = $code;
2164 $code = self
::getFallbackFor( $code );
2166 if ( !in_array( 'en', $fallbackChain ) ) {
2167 $fallbackChain[] = 'en';
2169 $fallbackChain = array_reverse( $fallbackChain );
2170 foreach ( $fallbackChain as $code ) {
2171 if ( isset( $newWords[$code] ) ) {
2172 $this->mMagicExtensions
= $newWords[$code] +
$this->mMagicExtensions
;
2178 * Get special page names, as an associative array
2179 * case folded alias => real name
2181 function getSpecialPageAliases() {
2182 // Cache aliases because it may be slow to load them
2183 if ( is_null( $this->mExtendedSpecialPageAliases
) ) {
2185 $this->mExtendedSpecialPageAliases
=
2186 self
::$dataCache->getItem( $this->mCode
, 'specialPageAliases' );
2187 wfRunHooks( 'LanguageGetSpecialPageAliases',
2188 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
2191 return $this->mExtendedSpecialPageAliases
;
2195 * Italic is unsuitable for some languages
2197 * @param $text String: the text to be emphasized.
2200 function emphasize( $text ) {
2201 return "<em>$text</em>";
2205 * Normally we output all numbers in plain en_US style, that is
2206 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2207 * point twohundredthirtyfive. However this is not sutable for all
2208 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2209 * Icelandic just want to use commas instead of dots, and dots instead
2210 * of commas like "293.291,235".
2212 * An example of this function being called:
2214 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2217 * See LanguageGu.php for the Gujarati implementation and
2218 * $separatorTransformTable on MessageIs.php for
2219 * the , => . and . => , implementation.
2221 * @todo check if it's viable to use localeconv() for the decimal
2223 * @param $number Mixed: the string to be formatted, should be an integer
2224 * or a floating point number.
2225 * @param $nocommafy Bool: set to true for special numbers like dates
2228 function formatNum( $number, $nocommafy = false ) {
2229 global $wgTranslateNumerals;
2230 if ( !$nocommafy ) {
2231 $number = $this->commafy( $number );
2232 $s = $this->separatorTransformTable();
2234 $number = strtr( $number, $s );
2238 if ( $wgTranslateNumerals ) {
2239 $s = $this->digitTransformTable();
2241 $number = strtr( $number, $s );
2248 function parseFormattedNumber( $number ) {
2249 $s = $this->digitTransformTable();
2251 $number = strtr( $number, array_flip( $s ) );
2254 $s = $this->separatorTransformTable();
2256 $number = strtr( $number, array_flip( $s ) );
2259 $number = strtr( $number, array( ',' => '' ) );
2264 * Adds commas to a given number
2269 function commafy( $_ ) {
2270 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2273 function digitTransformTable() {
2274 return self
::$dataCache->getItem( $this->mCode
, 'digitTransformTable' );
2277 function separatorTransformTable() {
2278 return self
::$dataCache->getItem( $this->mCode
, 'separatorTransformTable' );
2282 * Take a list of strings and build a locale-friendly comma-separated
2283 * list, using the local comma-separator message.
2284 * The last two strings are chained with an "and".
2289 function listToText( $l ) {
2291 $m = count( $l ) - 1;
2293 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2295 for ( $i = $m; $i >= 0; $i-- ) {
2298 } else if ( $i == $m - 1 ) {
2299 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2301 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2309 * Take a list of strings and build a locale-friendly comma-separated
2310 * list, using the local comma-separator message.
2311 * @param $list array of strings to put in a comma list
2314 function commaList( $list ) {
2319 array( 'parsemag', 'escapenoentities', 'language' => $this )
2325 * Take a list of strings and build a locale-friendly semicolon-separated
2326 * list, using the local semicolon-separator message.
2327 * @param $list array of strings to put in a semicolon list
2330 function semicolonList( $list ) {
2334 'semicolon-separator',
2335 array( 'parsemag', 'escapenoentities', 'language' => $this )
2341 * Same as commaList, but separate it with the pipe instead.
2342 * @param $list array of strings to put in a pipe list
2345 function pipeList( $list ) {
2350 array( 'escapenoentities', 'language' => $this )
2356 * Truncate a string to a specified length in bytes, appending an optional
2357 * string (e.g. for ellipses)
2359 * The database offers limited byte lengths for some columns in the database;
2360 * multi-byte character sets mean we need to ensure that only whole characters
2361 * are included, otherwise broken characters can be passed to the user
2363 * If $length is negative, the string will be truncated from the beginning
2365 * @param $string String to truncate
2366 * @param $length Int: maximum length (excluding ellipses)
2367 * @param $ellipsis String to append to the truncated text
2370 function truncate( $string, $length, $ellipsis = '...' ) {
2371 # Use the localized ellipsis character
2372 if ( $ellipsis == '...' ) {
2373 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2375 # Check if there is no need to truncate
2376 if ( $length == 0 ) {
2378 } elseif ( strlen( $string ) <= abs( $length ) ) {
2381 $stringOriginal = $string;
2382 if ( $length > 0 ) {
2383 $string = substr( $string, 0, $length ); // xyz...
2384 $string = $this->removeBadCharLast( $string );
2385 $string = $string . $ellipsis;
2387 $string = substr( $string, $length ); // ...xyz
2388 $string = $this->removeBadCharFirst( $string );
2389 $string = $ellipsis . $string;
2391 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2392 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2395 return $stringOriginal;
2400 * Remove bytes that represent an incomplete Unicode character
2401 * at the end of string (e.g. bytes of the char are missing)
2403 * @param $string String
2406 protected function removeBadCharLast( $string ) {
2407 $char = ord( $string[strlen( $string ) - 1] );
2409 if ( $char >= 0xc0 ) {
2410 # We got the first byte only of a multibyte char; remove it.
2411 $string = substr( $string, 0, -1 );
2412 } elseif ( $char >= 0x80 &&
2413 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2414 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2416 # We chopped in the middle of a character; remove it
2423 * Remove bytes that represent an incomplete Unicode character
2424 * at the start of string (e.g. bytes of the char are missing)
2426 * @param $string String
2429 protected function removeBadCharFirst( $string ) {
2430 $char = ord( $string[0] );
2431 if ( $char >= 0x80 && $char < 0xc0 ) {
2432 # We chopped in the middle of a character; remove the whole thing
2433 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2439 * Truncate a string of valid HTML to a specified length in bytes,
2440 * appending an optional string (e.g. for ellipses), and return valid HTML
2442 * This is only intended for styled/linked text, such as HTML with
2443 * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2445 * Note: tries to fix broken HTML with MWTidy
2447 * @param string $text HTML string to truncate
2448 * @param int $length (zero/positive) Maximum length (excluding ellipses)
2449 * @param string $ellipsis String to append to the truncated text
2452 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2453 # Use the localized ellipsis character
2454 if ( $ellipsis == '...' ) {
2455 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2457 # Check if there is no need to truncate
2458 if ( $length <= 0 ) {
2459 return $ellipsis; // no text shown, nothing to format
2460 } elseif ( strlen( $text ) <= $length ) {
2461 return $text; // string short enough even *with* HTML
2463 $text = MWTidy
::tidy( $text ); // fix tags
2464 $displayLen = 0; // innerHTML legth so far
2465 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2466 $tagType = 0; // 0-open, 1-close
2467 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2468 $entityState = 0; // 0-not entity, 1-entity
2470 $openTags = array(); // open tag stack
2471 $textLen = strlen( $text );
2472 for ( $pos = 0; $pos < $textLen; ++
$pos ) {
2474 $lastCh = $pos ?
$text[$pos - 1] : '';
2475 $ret .= $ch; // add to result string
2477 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2478 $entityState = 0; // for bad HTML
2479 $bracketState = 1; // tag started (checking for backslash)
2480 } elseif ( $ch == '>' ) {
2481 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2482 $entityState = 0; // for bad HTML
2483 $bracketState = 0; // out of brackets
2484 } elseif ( $bracketState == 1 ) {
2486 $tagType = 1; // close tag (e.g. "</span>")
2488 $tagType = 0; // open tag (e.g. "<span>")
2491 $bracketState = 2; // building tag name
2492 } elseif ( $bracketState == 2 ) {
2496 // Name found (e.g. "<a href=..."), add on tag attributes...
2497 $pos +
= $this->truncate_skip( $ret, $text, "<>", $pos +
1 );
2499 } elseif ( $bracketState == 0 ) {
2500 if ( $entityState ) {
2503 $displayLen++
; // entity is one displayed char
2507 $entityState = 1; // entity found, (e.g. " ")
2509 $displayLen++
; // this char is displayed
2510 // Add on the other display text after this...
2511 $skipped = $this->truncate_skip(
2512 $ret, $text, "<>&", $pos +
1, $length - $displayLen );
2513 $displayLen +
= $skipped;
2518 # Consider truncation once the display length has reached the maximim.
2519 # Double-check that we're not in the middle of a bracket/entity...
2520 if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2521 if ( !$testingEllipsis ) {
2522 $testingEllipsis = true;
2523 # Save where we are; we will truncate here unless
2524 # the ellipsis actually makes the string longer.
2525 $pOpenTags = $openTags; // save state
2526 $pRet = $ret; // save state
2527 } elseif ( $displayLen > ( $length +
strlen( $ellipsis ) ) ) {
2528 # Ellipsis won't make string longer/equal, the truncation point was OK.
2529 $openTags = $pOpenTags; // reload state
2530 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2531 $ret .= $ellipsis; // add ellipsis
2536 if ( $displayLen == 0 ) {
2537 return ''; // no text shown, nothing to format
2539 // Close the last tag if left unclosed by bad HTML
2540 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2541 while ( count( $openTags ) > 0 ) {
2542 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2547 // truncateHtml() helper function
2548 // like strcspn() but adds the skipped chars to $ret
2549 private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2551 if ( $start < strlen( $text ) ) {
2552 $skipCount = strcspn( $text, $search, $start, $len );
2553 $ret .= substr( $text, $start, $skipCount );
2559 * truncateHtml() helper function
2560 * (a) push or pop $tag from $openTags as needed
2561 * (b) clear $tag value
2562 * @param String &$tag Current HTML tag name we are looking at
2563 * @param int $tagType (0-open tag, 1-close tag)
2564 * @param char $lastCh Character before the '>' that ended this tag
2565 * @param array &$openTags Open tag stack (not accounting for $tag)
2567 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2568 $tag = ltrim( $tag );
2570 if ( $tagType == 0 && $lastCh != '/' ) {
2571 $openTags[] = $tag; // tag opened (didn't close itself)
2572 } else if ( $tagType == 1 ) {
2573 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2574 array_pop( $openTags ); // tag closed
2582 * Grammatical transformations, needed for inflected languages
2583 * Invoked by putting {{grammar:case|word}} in a message
2585 * @param $word string
2586 * @param $case string
2589 function convertGrammar( $word, $case ) {
2590 global $wgGrammarForms;
2591 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2592 return $wgGrammarForms[$this->getCode()][$case][$word];
2598 * Provides an alternative text depending on specified gender.
2599 * Usage {{gender:username|masculine|feminine|neutral}}.
2600 * username is optional, in which case the gender of current user is used,
2601 * but only in (some) interface messages; otherwise default gender is used.
2602 * If second or third parameter are not specified, masculine is used.
2603 * These details may be overriden per language.
2605 function gender( $gender, $forms ) {
2606 if ( !count( $forms ) ) {
2609 $forms = $this->preConvertPlural( $forms, 2 );
2610 if ( $gender === 'male' ) {
2613 if ( $gender === 'female' ) {
2616 return isset( $forms[2] ) ?
$forms[2] : $forms[0];
2620 * Plural form transformations, needed for some languages.
2621 * For example, there are 3 form of plural in Russian and Polish,
2622 * depending on "count mod 10". See [[w:Plural]]
2623 * For English it is pretty simple.
2625 * Invoked by putting {{plural:count|wordform1|wordform2}}
2626 * or {{plural:count|wordform1|wordform2|wordform3}}
2628 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2630 * @param $count Integer: non-localized number
2631 * @param $forms Array: different plural forms
2632 * @return string Correct form of plural for $count in this language
2634 function convertPlural( $count, $forms ) {
2635 if ( !count( $forms ) ) {
2638 $forms = $this->preConvertPlural( $forms, 2 );
2640 return ( $count == 1 ) ?
$forms[0] : $forms[1];
2644 * Checks that convertPlural was given an array and pads it to requested
2645 * amound of forms by copying the last one.
2647 * @param $count Integer: How many forms should there be at least
2648 * @param $forms Array of forms given to convertPlural
2649 * @return array Padded array of forms or an exception if not an array
2651 protected function preConvertPlural( /* Array */ $forms, $count ) {
2652 while ( count( $forms ) < $count ) {
2653 $forms[] = $forms[count( $forms ) - 1];
2659 * For translating of expiry times
2660 * @param $str String: the validated block time in English
2661 * @return Somehow translated block time
2662 * @see LanguageFi.php for example implementation
2664 function translateBlockExpiry( $str ) {
2665 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2667 if ( $scBlockExpiryOptions == '-' ) {
2671 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2672 if ( strpos( $option, ':' ) === false ) {
2675 list( $show, $value ) = explode( ':', $option );
2676 if ( strcmp( $str, $value ) == 0 ) {
2677 return htmlspecialchars( trim( $show ) );
2685 * languages like Chinese need to be segmented in order for the diff
2688 * @param $text String
2691 function segmentForDiff( $text ) {
2696 * and unsegment to show the result
2698 * @param $text String
2701 function unsegmentForDiff( $text ) {
2705 # convert text to all supported variants
2706 function autoConvertToAllVariants( $text ) {
2707 return $this->mConverter
->autoConvertToAllVariants( $text );
2710 # convert text to different variants of a language.
2711 function convert( $text ) {
2712 return $this->mConverter
->convert( $text );
2715 # Convert a Title object to a string in the preferred variant
2716 function convertTitle( $title ) {
2717 return $this->mConverter
->convertTitle( $title );
2720 # Check if this is a language with variants
2721 function hasVariants() {
2722 return sizeof( $this->getVariants() ) > 1;
2725 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2726 function armourMath( $text ) {
2727 return $this->mConverter
->armourMath( $text );
2731 * Perform output conversion on a string, and encode for safe HTML output.
2732 * @param $text String text to be converted
2733 * @param $isTitle Bool whether this conversion is for the article title
2735 * @todo this should get integrated somewhere sane
2737 function convertHtml( $text, $isTitle = false ) {
2738 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2741 function convertCategoryKey( $key ) {
2742 return $this->mConverter
->convertCategoryKey( $key );
2746 * Get the list of variants supported by this language
2747 * see sample implementation in LanguageZh.php
2749 * @return array an array of language codes
2751 function getVariants() {
2752 return $this->mConverter
->getVariants();
2755 function getPreferredVariant() {
2756 return $this->mConverter
->getPreferredVariant();
2759 function getDefaultVariant() {
2760 return $this->mConverter
->getDefaultVariant();
2763 function getURLVariant() {
2764 return $this->mConverter
->getURLVariant();
2768 * If a language supports multiple variants, it is
2769 * possible that non-existing link in one variant
2770 * actually exists in another variant. this function
2771 * tries to find it. See e.g. LanguageZh.php
2773 * @param $link String: the name of the link
2774 * @param $nt Mixed: the title object of the link
2775 * @param $ignoreOtherCond Boolean: to disable other conditions when
2776 * we need to transclude a template or update a category's link
2777 * @return null the input parameters may be modified upon return
2779 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2780 $this->mConverter
->findVariantLink( $link, $nt, $ignoreOtherCond );
2784 * If a language supports multiple variants, converts text
2785 * into an array of all possible variants of the text:
2786 * 'variant' => text in that variant
2788 * @deprecated Use autoConvertToAllVariants()
2790 function convertLinkToAllVariants( $text ) {
2791 return $this->mConverter
->convertLinkToAllVariants( $text );
2795 * returns language specific options used by User::getPageRenderHash()
2796 * for example, the preferred language variant
2800 function getExtraHashOptions() {
2801 return $this->mConverter
->getExtraHashOptions();
2805 * For languages that support multiple variants, the title of an
2806 * article may be displayed differently in different variants. this
2807 * function returns the apporiate title defined in the body of the article.
2811 function getParsedTitle() {
2812 return $this->mConverter
->getParsedTitle();
2816 * Enclose a string with the "no conversion" tag. This is used by
2817 * various functions in the Parser
2819 * @param $text String: text to be tagged for no conversion
2821 * @return string the tagged text
2823 function markNoConversion( $text, $noParse = false ) {
2824 return $this->mConverter
->markNoConversion( $text, $noParse );
2828 * A regular expression to match legal word-trailing characters
2829 * which should be merged onto a link of the form [[foo]]bar.
2833 function linkTrail() {
2834 return self
::$dataCache->getItem( $this->mCode
, 'linkTrail' );
2837 function getLangObj() {
2842 * Get the RFC 3066 code for this language object
2844 function getCode() {
2845 return $this->mCode
;
2848 function setCode( $code ) {
2849 $this->mCode
= $code;
2853 * Get the name of a file for a certain language code
2854 * @param $prefix string Prepend this to the filename
2855 * @param $code string Language code
2856 * @param $suffix string Append this to the filename
2857 * @return string $prefix . $mangledCode . $suffix
2859 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2860 // Protect against path traversal
2861 if ( !Language
::isValidCode( $code )
2862 ||
strcspn( $code, "/\\\000" ) !== strlen( $code ) )
2864 throw new MWException( "Invalid language code \"$code\"" );
2867 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2871 * Get the language code from a file name. Inverse of getFileName()
2872 * @param $filename string $prefix . $languageCode . $suffix
2873 * @param $prefix string Prefix before the language code
2874 * @param $suffix string Suffix after the language code
2875 * @return Language code, or false if $prefix or $suffix isn't found
2877 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2879 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2880 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2881 if ( !count( $m ) ) {
2884 return str_replace( '_', '-', strtolower( $m[1] ) );
2887 static function getMessagesFileName( $code ) {
2889 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2892 static function getClassFileName( $code ) {
2894 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2898 * Get the fallback for a given language
2900 static function getFallbackFor( $code ) {
2901 if ( $code === 'en' ) {
2905 return self
::getLocalisationCache()->getItem( $code, 'fallback' );
2910 * Get all messages for a given language
2911 * WARNING: this may take a long time
2913 static function getMessagesFor( $code ) {
2914 return self
::getLocalisationCache()->getItem( $code, 'messages' );
2918 * Get a message for a given language
2920 static function getMessageFor( $key, $code ) {
2921 return self
::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2924 function fixVariableInNamespace( $talk ) {
2925 if ( strpos( $talk, '$1' ) === false ) {
2929 global $wgMetaNamespace;
2930 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2932 # Allow grammar transformations
2933 # Allowing full message-style parsing would make simple requests
2934 # such as action=raw much more expensive than they need to be.
2935 # This will hopefully cover most cases.
2936 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2937 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2938 return str_replace( ' ', '_', $talk );
2941 function replaceGrammarInNamespace( $m ) {
2942 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2945 static function getCaseMaps() {
2946 static $wikiUpperChars, $wikiLowerChars;
2947 if ( isset( $wikiUpperChars ) ) {
2948 return array( $wikiUpperChars, $wikiLowerChars );
2951 wfProfileIn( __METHOD__
);
2952 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2953 if ( $arr === false ) {
2954 throw new MWException(
2955 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2957 $wikiUpperChars = $arr['wikiUpperChars'];
2958 $wikiLowerChars = $arr['wikiLowerChars'];
2959 wfProfileOut( __METHOD__
);
2960 return array( $wikiUpperChars, $wikiLowerChars );
2963 function formatTimePeriod( $seconds ) {
2964 if ( round( $seconds * 10 ) < 100 ) {
2965 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2966 } elseif ( round( $seconds ) < 60 ) {
2967 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2968 } elseif ( round( $seconds ) < 3600 ) {
2969 $minutes = floor( $seconds / 60 );
2970 $secondsPart = round( fmod( $seconds, 60 ) );
2971 if ( $secondsPart == 60 ) {
2975 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2976 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2978 $hours = floor( $seconds / 3600 );
2979 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2980 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2981 if ( $secondsPart == 60 ) {
2985 if ( $minutes == 60 ) {
2989 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2990 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2991 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2995 function formatBitrate( $bps ) {
2996 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2998 return $this->formatNum( $bps ) . $units[0];
3000 $unitIndex = floor( log10( $bps ) / 3 );
3001 $mantissa = $bps / pow( 1000, $unitIndex );
3002 if ( $mantissa < 10 ) {
3003 $mantissa = round( $mantissa, 1 );
3005 $mantissa = round( $mantissa );
3007 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3011 * Format a size in bytes for output, using an appropriate
3012 * unit (B, KB, MB or GB) according to the magnitude in question
3014 * @param $size Size to format
3015 * @return string Plain text (not HTML)
3017 function formatSize( $size ) {
3018 // For small sizes no decimal places necessary
3020 if ( $size > 1024 ) {
3021 $size = $size / 1024;
3022 if ( $size > 1024 ) {
3023 $size = $size / 1024;
3024 // For MB and bigger two decimal places are smarter
3026 if ( $size > 1024 ) {
3027 $size = $size / 1024;
3028 $msg = 'size-gigabytes';
3030 $msg = 'size-megabytes';
3033 $msg = 'size-kilobytes';
3036 $msg = 'size-bytes';
3038 $size = round( $size, $round );
3039 $text = $this->getMessageFromDB( $msg );
3040 return str_replace( '$1', $this->formatNum( $size ), $text );
3044 * Get the conversion rule title, if any.
3046 function getConvRuleTitle() {
3047 return $this->mConverter
->getConvRuleTitle();