3 * Internationalisation code
10 * @defgroup Language Language
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__
) . '/Names.php' );
22 global $wgInputEncoding, $wgOutputEncoding;
25 * These are always UTF-8, they exist only for backwards compatibility
27 $wgInputEncoding = 'UTF-8';
28 $wgOutputEncoding = 'UTF-8';
30 if ( function_exists( 'mb_strtoupper' ) ) {
31 mb_internal_encoding( 'UTF-8' );
35 * a fake language converter
41 function __construct( $langobj ) { $this->mLang
= $langobj; }
42 function autoConvertToAllVariants( $text ) { return array( $this->mLang
->getCode() => $text ); }
43 function convert( $t ) { return $t; }
44 function convertTitle( $t ) { return $t->getPrefixedText(); }
45 function getVariants() { return array( $this->mLang
->getCode() ); }
46 function getPreferredVariant() { return $this->mLang
->getCode(); }
47 function getDefaultVariant() { return $this->mLang
->getCode(); }
48 function getURLVariant() { return ''; }
49 function getConvRuleTitle() { return false; }
50 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
51 function getExtraHashOptions() { return ''; }
52 function getParsedTitle() { return ''; }
53 function markNoConversion( $text, $noParse = false ) { return $text; }
54 function convertCategoryKey( $key ) { return $key; }
55 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
56 function armourMath( $text ) { return $text; }
60 * Internationalisation code
64 var $mConverter, $mVariants, $mCode, $mLoaded = false;
65 var $mMagicExtensions = array(), $mMagicHookDone = false;
67 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
68 var $dateFormatStrings = array();
69 var $mExtendedSpecialPageAliases;
72 * ReplacementArray object caches
74 var $transformData = array();
77 * @var LocalisationCache
79 static public $dataCache;
81 static public $mLangObjCache = array();
83 static public $mWeekdayMsgs = array(
84 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
88 static public $mWeekdayAbbrevMsgs = array(
89 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
92 static public $mMonthMsgs = array(
93 'january', 'february', 'march', 'april', 'may_long', 'june',
94 'july', 'august', 'september', 'october', 'november',
97 static public $mMonthGenMsgs = array(
98 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
99 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
102 static public $mMonthAbbrevMsgs = array(
103 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
104 'sep', 'oct', 'nov', 'dec'
107 static public $mIranianCalendarMonthMsgs = array(
108 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
109 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
110 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
111 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
114 static public $mHebrewCalendarMonthMsgs = array(
115 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
116 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
117 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
118 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
119 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
122 static public $mHebrewCalendarMonthGenMsgs = array(
123 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
124 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
125 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
126 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
127 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
130 static public $mHijriCalendarMonthMsgs = array(
131 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
132 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
133 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
134 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
138 * Get a cached language object for a given language code
139 * @param $code String
142 static function factory( $code ) {
143 if ( !isset( self
::$mLangObjCache[$code] ) ) {
144 if ( count( self
::$mLangObjCache ) > 10 ) {
145 // Don't keep a billion objects around, that's stupid.
146 self
::$mLangObjCache = array();
148 self
::$mLangObjCache[$code] = self
::newFromCode( $code );
150 return self
::$mLangObjCache[$code];
154 * Create a language object for a given language code
155 * @param $code String
158 protected static function newFromCode( $code ) {
160 static $recursionLevel = 0;
162 // Protect against path traversal below
163 if ( !Language
::isValidCode( $code )
164 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
166 throw new MWException( "Invalid language code \"$code\"" );
169 if ( !Language
::isValidBuiltInCode( $code ) ) {
170 // It's not possible to customise this code with class files, so
171 // just return a Language object. This is to support uselang= hacks.
172 $lang = new Language
;
173 $lang->setCode( $code );
177 if ( $code == 'en' ) {
180 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
181 if ( !defined( 'MW_COMPILED' ) ) {
182 // Preload base classes to work around APC/PHP5 bug
183 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
184 include_once( "$IP/languages/classes/$class.deps.php" );
186 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
187 include_once( "$IP/languages/classes/$class.php" );
192 if ( $recursionLevel > 5 ) {
193 throw new MWException( "Language fallback loop detected when creating class $class\n" );
196 if ( !MWInit
::classExists( $class ) ) {
197 $fallback = Language
::getFallbackFor( $code );
199 $lang = Language
::newFromCode( $fallback );
201 $lang->setCode( $code );
209 * Returns true if a language code string is of a valid form, whether or
210 * not it exists. This includes codes which are used solely for
211 * customisation via the MediaWiki namespace.
213 public static function isValidCode( $code ) {
215 strcspn( $code, ":/\\\000" ) === strlen( $code )
216 && !preg_match( Title
::getTitleInvalidRegex(), $code );
220 * Returns true if a language code is of a valid form for the purposes of
221 * internal customisation of MediaWiki, via Messages*.php.
223 public static function isValidBuiltInCode( $code ) {
224 return preg_match( '/^[a-z0-9-]*$/i', $code );
228 * Get the LocalisationCache instance
230 * @return LocalisationCache
232 public static function getLocalisationCache() {
233 if ( is_null( self
::$dataCache ) ) {
234 global $wgLocalisationCacheConf;
235 $class = $wgLocalisationCacheConf['class'];
236 self
::$dataCache = new $class( $wgLocalisationCacheConf );
238 return self
::$dataCache;
241 function __construct() {
242 $this->mConverter
= new FakeConverter( $this );
243 // Set the code to the name of the descendant
244 if ( get_class( $this ) == 'Language' ) {
247 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
249 self
::getLocalisationCache();
253 * Reduce memory usage
255 function __destruct() {
256 foreach ( $this as $name => $value ) {
257 unset( $this->$name );
262 * Hook which will be called if this is the content language.
263 * Descendants can use this to register hook functions or modify globals
265 function initContLang() { }
267 function getFallbackLanguageCode() {
268 if ( $this->mCode
=== 'en' ) {
271 return self
::$dataCache->getItem( $this->mCode
, 'fallback' );
276 * Exports $wgBookstoreListEn
279 function getBookstoreList() {
280 return self
::$dataCache->getItem( $this->mCode
, 'bookstoreList' );
286 function getNamespaces() {
287 if ( is_null( $this->namespaceNames
) ) {
288 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
290 $this->namespaceNames
= self
::$dataCache->getItem( $this->mCode
, 'namespaceNames' );
291 $validNamespaces = MWNamespace
::getCanonicalNamespaces();
293 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames +
$validNamespaces;
295 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
296 if ( $wgMetaNamespaceTalk ) {
297 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
299 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
300 $this->namespaceNames
[NS_PROJECT_TALK
] =
301 $this->fixVariableInNamespace( $talk );
304 # Sometimes a language will be localised but not actually exist on this wiki.
305 foreach( $this->namespaceNames
as $key => $text ) {
306 if ( !isset( $validNamespaces[$key] ) ) {
307 unset( $this->namespaceNames
[$key] );
311 # The above mixing may leave namespaces out of canonical order.
312 # Re-order by namespace ID number...
313 ksort( $this->namespaceNames
);
315 return $this->namespaceNames
;
319 * A convenience function that returns the same thing as
320 * getNamespaces() except with the array values changed to ' '
321 * where it found '_', useful for producing output to be displayed
322 * e.g. in <select> forms.
326 function getFormattedNamespaces() {
327 $ns = $this->getNamespaces();
328 foreach ( $ns as $k => $v ) {
329 $ns[$k] = strtr( $v, '_', ' ' );
335 * Get a namespace value by key
337 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
338 * echo $mw_ns; // prints 'MediaWiki'
341 * @param $index Int: the array key of the namespace to return
342 * @return mixed, string if the namespace value exists, otherwise false
344 function getNsText( $index ) {
345 $ns = $this->getNamespaces();
346 return isset( $ns[$index] ) ?
$ns[$index] : false;
350 * A convenience function that returns the same thing as
351 * getNsText() except with '_' changed to ' ', useful for
356 function getFormattedNsText( $index ) {
357 $ns = $this->getNsText( $index );
358 return strtr( $ns, '_', ' ' );
362 * Returns gender-dependent namespace alias if available.
363 * @param $index Int: namespace index
364 * @param $gender String: gender key (male, female... )
368 function getGenderNsText( $index, $gender ) {
369 $ns = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
370 return isset( $ns[$index][$gender] ) ?
$ns[$index][$gender] : $this->getNsText( $index );
374 * Whether this language makes distinguishes genders for example in
379 function needsGenderDistinction() {
380 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
381 return count( $aliases ) > 0;
385 * Get a namespace key by value, case insensitive.
386 * Only matches namespace names for the current language, not the
387 * canonical ones defined in Namespace.php.
389 * @param $text String
390 * @return mixed An integer if $text is a valid value otherwise false
392 function getLocalNsIndex( $text ) {
393 $lctext = $this->lc( $text );
394 $ids = $this->getNamespaceIds();
395 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
398 function getNamespaceAliases() {
399 if ( is_null( $this->namespaceAliases
) ) {
400 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceAliases' );
404 foreach ( $aliases as $name => $index ) {
405 if ( $index === NS_PROJECT_TALK
) {
406 unset( $aliases[$name] );
407 $name = $this->fixVariableInNamespace( $name );
408 $aliases[$name] = $index;
413 $genders = self
::$dataCache->getItem( $this->mCode
, 'namespaceGenderAliases' );
414 foreach ( $genders as $index => $forms ) {
415 foreach ( $forms as $alias ) {
416 $aliases[$alias] = $index;
420 $this->namespaceAliases
= $aliases;
422 return $this->namespaceAliases
;
425 function getNamespaceIds() {
426 if ( is_null( $this->mNamespaceIds
) ) {
427 global $wgNamespaceAliases;
428 # Put namespace names and aliases into a hashtable.
429 # If this is too slow, then we should arrange it so that it is done
430 # before caching. The catch is that at pre-cache time, the above
431 # class-specific fixup hasn't been done.
432 $this->mNamespaceIds
= array();
433 foreach ( $this->getNamespaces() as $index => $name ) {
434 $this->mNamespaceIds
[$this->lc( $name )] = $index;
436 foreach ( $this->getNamespaceAliases() as $name => $index ) {
437 $this->mNamespaceIds
[$this->lc( $name )] = $index;
439 if ( $wgNamespaceAliases ) {
440 foreach ( $wgNamespaceAliases as $name => $index ) {
441 $this->mNamespaceIds
[$this->lc( $name )] = $index;
445 return $this->mNamespaceIds
;
450 * Get a namespace key by value, case insensitive. Canonical namespace
451 * names override custom ones defined for the current language.
453 * @param $text String
454 * @return mixed An integer if $text is a valid value otherwise false
456 function getNsIndex( $text ) {
457 $lctext = $this->lc( $text );
458 if ( ( $ns = MWNamespace
::getCanonicalIndex( $lctext ) ) !== null ) {
461 $ids = $this->getNamespaceIds();
462 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
466 * short names for language variants used for language conversion links.
468 * @param $code String
471 function getVariantname( $code ) {
472 return $this->getMessageFromDB( "variantname-$code" );
475 function specialPage( $name ) {
476 $aliases = $this->getSpecialPageAliases();
477 if ( isset( $aliases[$name][0] ) ) {
478 $name = $aliases[$name][0];
480 return $this->getNsText( NS_SPECIAL
) . ':' . $name;
483 function getQuickbarSettings() {
485 $this->getMessage( 'qbsettings-none' ),
486 $this->getMessage( 'qbsettings-fixedleft' ),
487 $this->getMessage( 'qbsettings-fixedright' ),
488 $this->getMessage( 'qbsettings-floatingleft' ),
489 $this->getMessage( 'qbsettings-floatingright' )
493 function getDatePreferences() {
494 return self
::$dataCache->getItem( $this->mCode
, 'datePreferences' );
497 function getDateFormats() {
498 return self
::$dataCache->getItem( $this->mCode
, 'dateFormats' );
501 function getDefaultDateFormat() {
502 $df = self
::$dataCache->getItem( $this->mCode
, 'defaultDateFormat' );
503 if ( $df === 'dmy or mdy' ) {
504 global $wgAmericanDates;
505 return $wgAmericanDates ?
'mdy' : 'dmy';
511 function getDatePreferenceMigrationMap() {
512 return self
::$dataCache->getItem( $this->mCode
, 'datePreferenceMigrationMap' );
515 function getImageFile( $image ) {
516 return self
::$dataCache->getSubitem( $this->mCode
, 'imageFiles', $image );
519 function getDefaultUserOptionOverrides() {
520 return self
::$dataCache->getItem( $this->mCode
, 'defaultUserOptionOverrides' );
523 function getExtraUserToggles() {
524 return self
::$dataCache->getItem( $this->mCode
, 'extraUserToggles' );
527 function getUserToggle( $tog ) {
528 return $this->getMessageFromDB( "tog-$tog" );
532 * Get language names, indexed by code.
533 * If $customisedOnly is true, only returns codes with a messages file
535 public static function getLanguageNames( $customisedOnly = false ) {
536 global $wgExtraLanguageNames;
537 static $coreLanguageNames;
539 if ( $coreLanguageNames === null ) {
540 include( MWInit
::compiledPath( 'languages/Names.php' ) );
543 $allNames = $wgExtraLanguageNames +
$coreLanguageNames;
544 if ( !$customisedOnly ) {
550 $dir = opendir( "$IP/languages/messages" );
551 while ( false !== ( $file = readdir( $dir ) ) ) {
552 $code = self
::getCodeFromFileName( $file, 'Messages' );
553 if ( $code && isset( $allNames[$code] ) ) {
554 $names[$code] = $allNames[$code];
562 * Get translated language names. This is done on best effort and
563 * by default this is exactly the same as Language::getLanguageNames.
564 * The CLDR extension provides translated names.
565 * @param $code String Language code.
566 * @return Array language code => language name
569 public static function getTranslatedLanguageNames( $code ) {
571 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
573 foreach ( self
::getLanguageNames() as $code => $name ) {
574 if ( !isset( $names[$code] ) ) $names[$code] = $name;
581 * Get a message from the MediaWiki namespace.
583 * @param $msg String: message name
586 function getMessageFromDB( $msg ) {
587 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
590 function getLanguageName( $code ) {
591 $names = self
::getLanguageNames();
592 if ( !array_key_exists( $code, $names ) ) {
595 return $names[$code];
598 function getMonthName( $key ) {
599 return $this->getMessageFromDB( self
::$mMonthMsgs[$key - 1] );
602 function getMonthNamesArray() {
603 $monthNames = array( '' );
604 for ( $i=1; $i < 13; $i++
) {
605 $monthNames[] = $this->getMonthName( $i );
610 function getMonthNameGen( $key ) {
611 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key - 1] );
614 function getMonthAbbreviation( $key ) {
615 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key - 1] );
618 function getMonthAbbreviationsArray() {
619 $monthNames = array('');
620 for ( $i=1; $i < 13; $i++
) {
621 $monthNames[] = $this->getMonthAbbreviation( $i );
626 function getWeekdayName( $key ) {
627 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key - 1] );
630 function getWeekdayAbbreviation( $key ) {
631 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key - 1] );
634 function getIranianCalendarMonthName( $key ) {
635 return $this->getMessageFromDB( self
::$mIranianCalendarMonthMsgs[$key - 1] );
638 function getHebrewCalendarMonthName( $key ) {
639 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthMsgs[$key - 1] );
642 function getHebrewCalendarMonthNameGen( $key ) {
643 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthGenMsgs[$key - 1] );
646 function getHijriCalendarMonthName( $key ) {
647 return $this->getMessageFromDB( self
::$mHijriCalendarMonthMsgs[$key - 1] );
651 * Used by date() and time() to adjust the time output.
653 * @param $ts Int the time in date('YmdHis') format
654 * @param $tz Mixed: adjust the time by this amount (default false, mean we
655 * get user timecorrection setting)
658 function userAdjust( $ts, $tz = false ) {
659 global $wgUser, $wgLocalTZoffset;
661 if ( $tz === false ) {
662 $tz = $wgUser->getOption( 'timecorrection' );
665 $data = explode( '|', $tz, 3 );
667 if ( $data[0] == 'ZoneInfo' ) {
668 if ( function_exists( 'timezone_open' ) && @timezone_open
( $data[2] ) !== false ) {
669 $date = date_create( $ts, timezone_open( 'UTC' ) );
670 date_timezone_set( $date, timezone_open( $data[2] ) );
671 $date = date_format( $date, 'YmdHis' );
674 # Unrecognized timezone, default to 'Offset' with the stored offset.
679 if ( $data[0] == 'System' ||
$tz == '' ) {
680 # Global offset in minutes.
681 if ( isset( $wgLocalTZoffset ) ) {
682 $minDiff = $wgLocalTZoffset;
684 } else if ( $data[0] == 'Offset' ) {
685 $minDiff = intval( $data[1] );
687 $data = explode( ':', $tz );
688 if ( count( $data ) == 2 ) {
689 $data[0] = intval( $data[0] );
690 $data[1] = intval( $data[1] );
691 $minDiff = abs( $data[0] ) * 60 +
$data[1];
692 if ( $data[0] < 0 ) {
693 $minDiff = -$minDiff;
696 $minDiff = intval( $data[0] ) * 60;
700 # No difference ? Return time unchanged
701 if ( 0 == $minDiff ) {
705 wfSuppressWarnings(); // E_STRICT system time bitching
706 # Generate an adjusted date; take advantage of the fact that mktime
707 # will normalize out-of-range values so we don't have to split $minDiff
708 # into hours and minutes.
710 (int)substr( $ts, 8, 2 ) ), # Hours
711 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
712 (int)substr( $ts, 12, 2 ), # Seconds
713 (int)substr( $ts, 4, 2 ), # Month
714 (int)substr( $ts, 6, 2 ), # Day
715 (int)substr( $ts, 0, 4 ) ); # Year
717 $date = date( 'YmdHis', $t );
724 * This is a workalike of PHP's date() function, but with better
725 * internationalisation, a reduced set of format characters, and a better
728 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
729 * PHP manual for definitions. There are a number of extensions, which
732 * xn Do not translate digits of the next numeric format character
733 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
734 * xr Use roman numerals for the next numeric format character
735 * xh Use hebrew numerals for the next numeric format character
737 * xg Genitive month name
739 * xij j (day number) in Iranian calendar
740 * xiF F (month name) in Iranian calendar
741 * xin n (month number) in Iranian calendar
742 * xiY Y (full year) in Iranian calendar
744 * xjj j (day number) in Hebrew calendar
745 * xjF F (month name) in Hebrew calendar
746 * xjt t (days in month) in Hebrew calendar
747 * xjx xg (genitive month name) in Hebrew calendar
748 * xjn n (month number) in Hebrew calendar
749 * xjY Y (full year) in Hebrew calendar
751 * xmj j (day number) in Hijri calendar
752 * xmF F (month name) in Hijri calendar
753 * xmn n (month number) in Hijri calendar
754 * xmY Y (full year) in Hijri calendar
756 * xkY Y (full year) in Thai solar calendar. Months and days are
757 * identical to the Gregorian calendar
758 * xoY Y (full year) in Minguo calendar or Juche year.
759 * Months and days are identical to the
761 * xtY Y (full year) in Japanese nengo. Months and days are
762 * identical to the Gregorian calendar
764 * Characters enclosed in double quotes will be considered literal (with
765 * the quotes themselves removed). Unmatched quotes will be considered
766 * literal quotes. Example:
768 * "The month is" F => The month is January
771 * Backslash escaping is also supported.
773 * Input timestamp is assumed to be pre-normalized to the desired local
776 * @param $format String
777 * @param $ts String: 14-character timestamp
780 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
782 function sprintfDate( $format, $ts ) {
795 for ( $p = 0; $p < strlen( $format ); $p++
) {
798 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
799 $code .= $format[++
$p];
802 if ( ( $code === 'xi' ||
$code == 'xj' ||
$code == 'xk' ||
$code == 'xm' ||
$code == 'xo' ||
$code == 'xt' ) && $p < strlen( $format ) - 1 ) {
803 $code .= $format[++
$p];
814 $rawToggle = !$rawToggle;
823 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
826 if ( !$hebrew ) $hebrew = self
::tsToHebrew( $ts );
827 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
830 $num = substr( $ts, 6, 2 );
833 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
834 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) +
1 );
837 $num = intval( substr( $ts, 6, 2 ) );
841 $iranian = self
::tsToIranian( $ts );
847 $hijri = self
::tsToHijri( $ts );
853 $hebrew = self
::tsToHebrew( $ts );
859 $unix = wfTimestamp( TS_UNIX
, $ts );
861 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) +
1 );
865 $unix = wfTimestamp( TS_UNIX
, $ts );
867 $w = gmdate( 'w', $unix );
872 $unix = wfTimestamp( TS_UNIX
, $ts );
874 $num = gmdate( 'w', $unix );
878 $unix = wfTimestamp( TS_UNIX
, $ts );
880 $num = gmdate( 'z', $unix );
884 $unix = wfTimestamp( TS_UNIX
, $ts );
886 $num = gmdate( 'W', $unix );
889 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
893 $iranian = self
::tsToIranian( $ts );
895 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
899 $hijri = self
::tsToHijri( $ts );
901 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
905 $hebrew = self
::tsToHebrew( $ts );
907 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
910 $num = substr( $ts, 4, 2 );
913 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
916 $num = intval( substr( $ts, 4, 2 ) );
920 $iranian = self
::tsToIranian( $ts );
926 $hijri = self
::tsToHijri ( $ts );
932 $hebrew = self
::tsToHebrew( $ts );
938 $unix = wfTimestamp( TS_UNIX
, $ts );
940 $num = gmdate( 't', $unix );
944 $hebrew = self
::tsToHebrew( $ts );
950 $unix = wfTimestamp( TS_UNIX
, $ts );
952 $num = gmdate( 'L', $unix );
956 $unix = wfTimestamp( TS_UNIX
, $ts );
958 $num = date( 'o', $unix );
961 $num = substr( $ts, 0, 4 );
965 $iranian = self
::tsToIranian( $ts );
971 $hijri = self
::tsToHijri( $ts );
977 $hebrew = self
::tsToHebrew( $ts );
983 $thai = self
::tsToYear( $ts, 'thai' );
989 $minguo = self
::tsToYear( $ts, 'minguo' );
995 $tenno = self
::tsToYear( $ts, 'tenno' );
1000 $num = substr( $ts, 2, 2 );
1003 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
1006 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
1009 $h = substr( $ts, 8, 2 );
1010 $num = $h %
12 ?
$h %
12 : 12;
1013 $num = intval( substr( $ts, 8, 2 ) );
1016 $h = substr( $ts, 8, 2 );
1017 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
1020 $num = substr( $ts, 8, 2 );
1023 $num = substr( $ts, 10, 2 );
1026 $num = substr( $ts, 12, 2 );
1030 $unix = wfTimestamp( TS_UNIX
, $ts );
1032 $s .= gmdate( 'c', $unix );
1036 $unix = wfTimestamp( TS_UNIX
, $ts );
1038 $s .= gmdate( 'r', $unix );
1042 $unix = wfTimestamp( TS_UNIX
, $ts );
1047 # Backslash escaping
1048 if ( $p < strlen( $format ) - 1 ) {
1049 $s .= $format[++
$p];
1056 if ( $p < strlen( $format ) - 1 ) {
1057 $endQuote = strpos( $format, '"', $p +
1 );
1058 if ( $endQuote === false ) {
1059 # No terminating quote, assume literal "
1062 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
1066 # Quote at end of string, assume literal "
1073 if ( $num !== false ) {
1074 if ( $rawToggle ||
$raw ) {
1077 } elseif ( $roman ) {
1078 $s .= self
::romanNumeral( $num );
1080 } elseif ( $hebrewNum ) {
1081 $s .= self
::hebrewNumeral( $num );
1084 $s .= $this->formatNum( $num, true );
1091 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1092 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1094 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1095 * Gregorian dates to Iranian dates. Originally written in C, it
1096 * is released under the terms of GNU Lesser General Public
1097 * License. Conversion to PHP was performed by Niklas Laxström.
1099 * Link: http://www.farsiweb.info/jalali/jalali.c
1101 private static function tsToIranian( $ts ) {
1102 $gy = substr( $ts, 0, 4 ) -1600;
1103 $gm = substr( $ts, 4, 2 ) -1;
1104 $gd = (int)substr( $ts, 6, 2 ) -1;
1106 # Days passed from the beginning (including leap years)
1108 +
floor( ( $gy +
3 ) / 4 )
1109 - floor( ( $gy +
99 ) / 100 )
1110 +
floor( ( $gy +
399 ) / 400 );
1113 // Add days of the past months of this year
1114 for ( $i = 0; $i < $gm; $i++
) {
1115 $gDayNo +
= self
::$GREG_DAYS[$i];
1119 if ( $gm > 1 && ( ( $gy %
4 === 0 && $gy %
100 !== 0 ||
( $gy %
400 == 0 ) ) ) ) {
1123 // Days passed in current month
1126 $jDayNo = $gDayNo - 79;
1128 $jNp = floor( $jDayNo / 12053 );
1131 $jy = 979 +
33 * $jNp +
4 * floor( $jDayNo / 1461 );
1134 if ( $jDayNo >= 366 ) {
1135 $jy +
= floor( ( $jDayNo - 1 ) / 365 );
1136 $jDayNo = floor( ( $jDayNo - 1 ) %
365 );
1139 for ( $i = 0; $i < 11 && $jDayNo >= self
::$IRANIAN_DAYS[$i]; $i++
) {
1140 $jDayNo -= self
::$IRANIAN_DAYS[$i];
1146 return array( $jy, $jm, $jd );
1150 * Converting Gregorian dates to Hijri dates.
1152 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1154 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1156 private static function tsToHijri( $ts ) {
1157 $year = substr( $ts, 0, 4 );
1158 $month = substr( $ts, 4, 2 );
1159 $day = substr( $ts, 6, 2 );
1167 ( $zy > 1582 ) ||
( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1168 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1171 $zjd = (int)( ( 1461 * ( $zy +
4800 +
(int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1172 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1173 (int)( ( 3 * (int)( ( ( $zy +
4900 +
(int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1176 $zjd = 367 * $zy - (int)( ( 7 * ( $zy +
5001 +
(int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1177 (int)( ( 275 * $zm ) / 9 ) +
$zd +
1729777;
1180 $zl = $zjd -1948440 +
10632;
1181 $zn = (int)( ( $zl - 1 ) / 10631 );
1182 $zl = $zl - 10631 * $zn +
354;
1183 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1184 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) +
29;
1185 $zm = (int)( ( 24 * $zl ) / 709 );
1186 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1187 $zy = 30 * $zn +
$zj - 30;
1189 return array( $zy, $zm, $zd );
1193 * Converting Gregorian dates to Hebrew dates.
1195 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1196 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1197 * to translate the relevant functions into PHP and release them under
1200 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1201 * and Adar II is 14. In a non-leap year, Adar is 6.
1203 private static function tsToHebrew( $ts ) {
1205 $year = substr( $ts, 0, 4 );
1206 $month = substr( $ts, 4, 2 );
1207 $day = substr( $ts, 6, 2 );
1209 # Calculate Hebrew year
1210 $hebrewYear = $year +
3760;
1212 # Month number when September = 1, August = 12
1214 if ( $month > 12 ) {
1221 # Calculate day of year from 1 September
1223 for ( $i = 1; $i < $month; $i++
) {
1227 # Check if the year is leap
1228 if ( $year %
400 == 0 ||
( $year %
4 == 0 && $year %
100 > 0 ) ) {
1231 } elseif ( $i == 8 ||
$i == 10 ||
$i == 1 ||
$i == 3 ) {
1238 # Calculate the start of the Hebrew year
1239 $start = self
::hebrewYearStart( $hebrewYear );
1241 # Calculate next year's start
1242 if ( $dayOfYear <= $start ) {
1243 # Day is before the start of the year - it is the previous year
1245 $nextStart = $start;
1249 # Add days since previous year's 1 September
1251 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1255 # Start of the new (previous) year
1256 $start = self
::hebrewYearStart( $hebrewYear );
1259 $nextStart = self
::hebrewYearStart( $hebrewYear +
1 );
1262 # Calculate Hebrew day of year
1263 $hebrewDayOfYear = $dayOfYear - $start;
1265 # Difference between year's days
1266 $diff = $nextStart - $start;
1267 # Add 12 (or 13 for leap years) days to ignore the difference between
1268 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1269 # difference is only about the year type
1270 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1276 # Check the year pattern, and is leap year
1277 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1278 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1279 # and non-leap years
1280 $yearPattern = $diff %
30;
1281 # Check if leap year
1282 $isLeap = $diff >= 30;
1284 # Calculate day in the month from number of day in the Hebrew year
1285 # Don't check Adar - if the day is not in Adar, we will stop before;
1286 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1287 $hebrewDay = $hebrewDayOfYear;
1290 while ( $hebrewMonth <= 12 ) {
1291 # Calculate days in this month
1292 if ( $isLeap && $hebrewMonth == 6 ) {
1293 # Adar in a leap year
1295 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1297 if ( $hebrewDay <= $days ) {
1301 # Subtract the days of Adar I
1302 $hebrewDay -= $days;
1305 if ( $hebrewDay <= $days ) {
1311 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1312 # Cheshvan in a complete year (otherwise as the rule below)
1314 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1315 # Kislev in an incomplete year (otherwise as the rule below)
1318 # Odd months have 30 days, even have 29
1319 $days = 30 - ( $hebrewMonth - 1 ) %
2;
1321 if ( $hebrewDay <= $days ) {
1322 # In the current month
1325 # Subtract the days of the current month
1326 $hebrewDay -= $days;
1327 # Try in the next month
1332 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1336 * This calculates the Hebrew year start, as days since 1 September.
1337 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1338 * Used for Hebrew date.
1340 private static function hebrewYearStart( $year ) {
1341 $a = intval( ( 12 * ( $year - 1 ) +
17 ) %
19 );
1342 $b = intval( ( $year - 1 ) %
4 );
1343 $m = 32.044093161144 +
1.5542417966212 * $a +
$b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1347 $Mar = intval( $m );
1353 $c = intval( ( $Mar +
3 * ( $year - 1 ) +
5 * $b +
5 ) %
7 );
1354 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1356 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1358 } else if ( $c == 2 ||
$c == 4 ||
$c == 6 ) {
1362 $Mar +
= intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1367 * Algorithm to convert Gregorian dates to Thai solar dates,
1368 * Minguo dates or Minguo dates.
1370 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1371 * http://en.wikipedia.org/wiki/Minguo_calendar
1372 * http://en.wikipedia.org/wiki/Japanese_era_name
1374 * @param $ts String: 14-character timestamp
1375 * @param $cName String: calender name
1376 * @return Array: converted year, month, day
1378 private static function tsToYear( $ts, $cName ) {
1379 $gy = substr( $ts, 0, 4 );
1380 $gm = substr( $ts, 4, 2 );
1381 $gd = substr( $ts, 6, 2 );
1383 if ( !strcmp( $cName, 'thai' ) ) {
1385 # Add 543 years to the Gregorian calendar
1386 # Months and days are identical
1387 $gy_offset = $gy +
543;
1388 } else if ( ( !strcmp( $cName, 'minguo' ) ) ||
!strcmp( $cName, 'juche' ) ) {
1390 # Deduct 1911 years from the Gregorian calendar
1391 # Months and days are identical
1392 $gy_offset = $gy - 1911;
1393 } else if ( !strcmp( $cName, 'tenno' ) ) {
1394 # Nengō dates up to Meiji period
1395 # Deduct years from the Gregorian calendar
1396 # depending on the nengo periods
1397 # Months and days are identical
1398 if ( ( $gy < 1912 ) ||
( ( $gy == 1912 ) && ( $gm < 7 ) ) ||
( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1400 $gy_gannen = $gy - 1868 +
1;
1401 $gy_offset = $gy_gannen;
1402 if ( $gy_gannen == 1 ) {
1405 $gy_offset = '明治' . $gy_offset;
1407 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1408 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1409 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1410 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1411 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1415 $gy_gannen = $gy - 1912 +
1;
1416 $gy_offset = $gy_gannen;
1417 if ( $gy_gannen == 1 ) {
1420 $gy_offset = '大正' . $gy_offset;
1422 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1423 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1424 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1428 $gy_gannen = $gy - 1926 +
1;
1429 $gy_offset = $gy_gannen;
1430 if ( $gy_gannen == 1 ) {
1433 $gy_offset = '昭和' . $gy_offset;
1436 $gy_gannen = $gy - 1989 +
1;
1437 $gy_offset = $gy_gannen;
1438 if ( $gy_gannen == 1 ) {
1441 $gy_offset = '平成' . $gy_offset;
1447 return array( $gy_offset, $gm, $gd );
1451 * Roman number formatting up to 3000
1453 static function romanNumeral( $num ) {
1454 static $table = array(
1455 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1456 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1457 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1458 array( '', 'M', 'MM', 'MMM' )
1461 $num = intval( $num );
1462 if ( $num > 3000 ||
$num <= 0 ) {
1467 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1468 if ( $num >= $pow10 ) {
1469 $s .= $table[$i][floor( $num / $pow10 )];
1471 $num = $num %
$pow10;
1477 * Hebrew Gematria number formatting up to 9999
1479 static function hebrewNumeral( $num ) {
1480 static $table = array(
1481 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1482 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1483 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1484 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1487 $num = intval( $num );
1488 if ( $num > 9999 ||
$num <= 0 ) {
1493 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1494 if ( $num >= $pow10 ) {
1495 if ( $num == 15 ||
$num == 16 ) {
1496 $s .= $table[0][9] . $table[0][$num - 9];
1499 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1500 if ( $pow10 == 1000 ) {
1505 $num = $num %
$pow10;
1507 if ( strlen( $s ) == 2 ) {
1510 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1511 $str .= substr( $s, strlen( $s ) - 2, 2 );
1513 $start = substr( $str, 0, strlen( $str ) - 2 );
1514 $end = substr( $str, strlen( $str ) - 2 );
1517 $str = $start . 'ך';
1520 $str = $start . 'ם';
1523 $str = $start . 'ן';
1526 $str = $start . 'ף';
1529 $str = $start . 'ץ';
1536 * This is meant to be used by time(), date(), and timeanddate() to get
1537 * the date preference they're supposed to use, it should be used in
1541 * function timeanddate([...], $format = true) {
1542 * $datePreference = $this->dateFormat($format);
1547 * @param $usePrefs Mixed: if true, the user's preference is used
1548 * if false, the site/language default is used
1549 * if int/string, assumed to be a format.
1552 function dateFormat( $usePrefs = true ) {
1555 if ( is_bool( $usePrefs ) ) {
1557 $datePreference = $wgUser->getDatePreference();
1559 $datePreference = (string)User
::getDefaultOption( 'date' );
1562 $datePreference = (string)$usePrefs;
1566 if ( $datePreference == '' ) {
1570 return $datePreference;
1574 * Get a format string for a given type and preference
1575 * @param $type May be date, time or both
1576 * @param $pref The format name as it appears in Messages*.php
1578 function getDateFormatString( $type, $pref ) {
1579 if ( !isset( $this->dateFormatStrings
[$type][$pref] ) ) {
1580 if ( $pref == 'default' ) {
1581 $pref = $this->getDefaultDateFormat();
1582 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1584 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1585 if ( is_null( $df ) ) {
1586 $pref = $this->getDefaultDateFormat();
1587 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1590 $this->dateFormatStrings
[$type][$pref] = $df;
1592 return $this->dateFormatStrings
[$type][$pref];
1596 * @param $ts Mixed: the time format which needs to be turned into a
1597 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1598 * @param $adj Bool: whether to adjust the time output according to the
1599 * user configured offset ($timecorrection)
1600 * @param $format Mixed: true to use user's date format preference
1601 * @param $timecorrection String: the time offset as returned by
1602 * validateTimeZone() in Special:Preferences
1605 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1606 $ts = wfTimestamp( TS_MW
, $ts );
1608 $ts = $this->userAdjust( $ts, $timecorrection );
1610 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1611 return $this->sprintfDate( $df, $ts );
1615 * @param $ts Mixed: the time format which needs to be turned into a
1616 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1617 * @param $adj Bool: whether to adjust the time output according to the
1618 * user configured offset ($timecorrection)
1619 * @param $format Mixed: true to use user's date format preference
1620 * @param $timecorrection String: the time offset as returned by
1621 * validateTimeZone() in Special:Preferences
1624 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1625 $ts = wfTimestamp( TS_MW
, $ts );
1627 $ts = $this->userAdjust( $ts, $timecorrection );
1629 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1630 return $this->sprintfDate( $df, $ts );
1634 * @param $ts Mixed: the time format which needs to be turned into a
1635 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1636 * @param $adj Bool: whether to adjust the time output according to the
1637 * user configured offset ($timecorrection)
1638 * @param $format Mixed: what format to return, if it's false output the
1639 * default one (default true)
1640 * @param $timecorrection String: the time offset as returned by
1641 * validateTimeZone() in Special:Preferences
1644 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1645 $ts = wfTimestamp( TS_MW
, $ts );
1647 $ts = $this->userAdjust( $ts, $timecorrection );
1649 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1650 return $this->sprintfDate( $df, $ts );
1653 function getMessage( $key ) {
1654 return self
::$dataCache->getSubitem( $this->mCode
, 'messages', $key );
1657 function getAllMessages() {
1658 return self
::$dataCache->getItem( $this->mCode
, 'messages' );
1661 function iconv( $in, $out, $string ) {
1662 # This is a wrapper for iconv in all languages except esperanto,
1663 # which does some nasty x-conversions beforehand
1665 # Even with //IGNORE iconv can whine about illegal characters in
1666 # *input* string. We just ignore those too.
1667 # REF: http://bugs.php.net/bug.php?id=37166
1668 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1669 wfSuppressWarnings();
1670 $text = iconv( $in, $out . '//IGNORE', $string );
1671 wfRestoreWarnings();
1675 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1676 function ucwordbreaksCallbackAscii( $matches ) {
1677 return $this->ucfirst( $matches[1] );
1680 function ucwordbreaksCallbackMB( $matches ) {
1681 return mb_strtoupper( $matches[0] );
1684 function ucCallback( $matches ) {
1685 list( $wikiUpperChars ) = self
::getCaseMaps();
1686 return strtr( $matches[1], $wikiUpperChars );
1689 function lcCallback( $matches ) {
1690 list( , $wikiLowerChars ) = self
::getCaseMaps();
1691 return strtr( $matches[1], $wikiLowerChars );
1694 function ucwordsCallbackMB( $matches ) {
1695 return mb_strtoupper( $matches[0] );
1698 function ucwordsCallbackWiki( $matches ) {
1699 list( $wikiUpperChars ) = self
::getCaseMaps();
1700 return strtr( $matches[0], $wikiUpperChars );
1704 * Make a string's first character uppercase
1706 function ucfirst( $str ) {
1708 if ( $o < 96 ) { // if already uppercase...
1710 } elseif ( $o < 128 ) {
1711 return ucfirst( $str ); // use PHP's ucfirst()
1713 // fall back to more complex logic in case of multibyte strings
1714 return $this->uc( $str, true );
1719 * Convert a string to uppercase
1721 function uc( $str, $first = false ) {
1722 if ( function_exists( 'mb_strtoupper' ) ) {
1724 if ( $this->isMultibyte( $str ) ) {
1725 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1727 return ucfirst( $str );
1730 return $this->isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
1733 if ( $this->isMultibyte( $str ) ) {
1734 $x = $first ?
'^' : '';
1735 return preg_replace_callback(
1736 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1737 array( $this, 'ucCallback' ),
1741 return $first ?
ucfirst( $str ) : strtoupper( $str );
1746 function lcfirst( $str ) {
1749 return strval( $str );
1750 } elseif ( $o >= 128 ) {
1751 return $this->lc( $str, true );
1752 } elseif ( $o > 96 ) {
1755 $str[0] = strtolower( $str[0] );
1760 function lc( $str, $first = false ) {
1761 if ( function_exists( 'mb_strtolower' ) ) {
1763 if ( $this->isMultibyte( $str ) ) {
1764 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1766 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1769 return $this->isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
1772 if ( $this->isMultibyte( $str ) ) {
1773 $x = $first ?
'^' : '';
1774 return preg_replace_callback(
1775 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1776 array( $this, 'lcCallback' ),
1780 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1785 function isMultibyte( $str ) {
1786 return (bool)preg_match( '/[\x80-\xff]/', $str );
1789 function ucwords( $str ) {
1790 if ( $this->isMultibyte( $str ) ) {
1791 $str = $this->lc( $str );
1793 // regexp to find first letter in each word (i.e. after each space)
1794 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1796 // function to use to capitalize a single char
1797 if ( function_exists( 'mb_strtoupper' ) ) {
1798 return preg_replace_callback(
1800 array( $this, 'ucwordsCallbackMB' ),
1804 return preg_replace_callback(
1806 array( $this, 'ucwordsCallbackWiki' ),
1811 return ucwords( strtolower( $str ) );
1815 # capitalize words at word breaks
1816 function ucwordbreaks( $str ) {
1817 if ( $this->isMultibyte( $str ) ) {
1818 $str = $this->lc( $str );
1820 // since \b doesn't work for UTF-8, we explicitely define word break chars
1821 $breaks = "[ \-\(\)\}\{\.,\?!]";
1823 // find first letter after word break
1824 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1826 if ( function_exists( 'mb_strtoupper' ) ) {
1827 return preg_replace_callback(
1829 array( $this, 'ucwordbreaksCallbackMB' ),
1833 return preg_replace_callback(
1835 array( $this, 'ucwordsCallbackWiki' ),
1840 return preg_replace_callback(
1841 '/\b([\w\x80-\xff]+)\b/',
1842 array( $this, 'ucwordbreaksCallbackAscii' ),
1849 * Return a case-folded representation of $s
1851 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1852 * and $s2 are the same except for the case of their characters. It is not
1853 * necessary for the value returned to make sense when displayed.
1855 * Do *not* perform any other normalisation in this function. If a caller
1856 * uses this function when it should be using a more general normalisation
1857 * function, then fix the caller.
1859 function caseFold( $s ) {
1860 return $this->uc( $s );
1863 function checkTitleEncoding( $s ) {
1864 if ( is_array( $s ) ) {
1865 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1867 # Check for non-UTF-8 URLs
1868 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1873 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1874 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1879 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1882 function fallback8bitEncoding() {
1883 return self
::$dataCache->getItem( $this->mCode
, 'fallback8bitEncoding' );
1887 * Most writing systems use whitespace to break up words.
1888 * Some languages such as Chinese don't conventionally do this,
1889 * which requires special handling when breaking up words for
1892 function hasWordBreaks() {
1897 * Some languages such as Chinese require word segmentation,
1898 * Specify such segmentation when overridden in derived class.
1900 * @param $string String
1903 function segmentByWord( $string ) {
1908 * Some languages have special punctuation need to be normalized.
1909 * Make such changes here.
1911 * @param $string String
1914 function normalizeForSearch( $string ) {
1915 return self
::convertDoubleWidth( $string );
1919 * convert double-width roman characters to single-width.
1920 * range: ff00-ff5f ~= 0020-007f
1922 protected static function convertDoubleWidth( $string ) {
1923 static $full = null;
1924 static $half = null;
1926 if ( $full === null ) {
1927 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1928 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1929 $full = str_split( $fullWidth, 3 );
1930 $half = str_split( $halfWidth );
1933 $string = str_replace( $full, $half, $string );
1937 protected static function insertSpace( $string, $pattern ) {
1938 $string = preg_replace( $pattern, " $1 ", $string );
1939 $string = preg_replace( '/ +/', ' ', $string );
1943 function convertForSearchResult( $termsArray ) {
1944 # some languages, e.g. Chinese, need to do a conversion
1945 # in order for search results to be displayed correctly
1950 * Get the first character of a string.
1955 function firstChar( $s ) {
1958 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1959 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1964 if ( isset( $matches[1] ) ) {
1965 if ( strlen( $matches[1] ) != 3 ) {
1969 // Break down Hangul syllables to grab the first jamo
1970 $code = utf8ToCodepoint( $matches[1] );
1971 if ( $code < 0xac00 ||
0xd7a4 <= $code ) {
1973 } elseif ( $code < 0xb098 ) {
1974 return "\xe3\x84\xb1";
1975 } elseif ( $code < 0xb2e4 ) {
1976 return "\xe3\x84\xb4";
1977 } elseif ( $code < 0xb77c ) {
1978 return "\xe3\x84\xb7";
1979 } elseif ( $code < 0xb9c8 ) {
1980 return "\xe3\x84\xb9";
1981 } elseif ( $code < 0xbc14 ) {
1982 return "\xe3\x85\x81";
1983 } elseif ( $code < 0xc0ac ) {
1984 return "\xe3\x85\x82";
1985 } elseif ( $code < 0xc544 ) {
1986 return "\xe3\x85\x85";
1987 } elseif ( $code < 0xc790 ) {
1988 return "\xe3\x85\x87";
1989 } elseif ( $code < 0xcc28 ) {
1990 return "\xe3\x85\x88";
1991 } elseif ( $code < 0xce74 ) {
1992 return "\xe3\x85\x8a";
1993 } elseif ( $code < 0xd0c0 ) {
1994 return "\xe3\x85\x8b";
1995 } elseif ( $code < 0xd30c ) {
1996 return "\xe3\x85\x8c";
1997 } elseif ( $code < 0xd558 ) {
1998 return "\xe3\x85\x8d";
2000 return "\xe3\x85\x8e";
2007 function initEncoding() {
2008 # Some languages may have an alternate char encoding option
2009 # (Esperanto X-coding, Japanese furigana conversion, etc)
2010 # If this language is used as the primary content language,
2011 # an override to the defaults can be set here on startup.
2014 function recodeForEdit( $s ) {
2015 # For some languages we'll want to explicitly specify
2016 # which characters make it into the edit box raw
2017 # or are converted in some way or another.
2018 # Note that if wgOutputEncoding is different from
2019 # wgInputEncoding, this text will be further converted
2020 # to wgOutputEncoding.
2021 global $wgEditEncoding;
2022 if ( $wgEditEncoding == '' ||
$wgEditEncoding == 'UTF-8' ) {
2025 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2029 function recodeInput( $s ) {
2030 # Take the previous into account.
2031 global $wgEditEncoding;
2032 if ( $wgEditEncoding != '' ) {
2033 $enc = $wgEditEncoding;
2037 if ( $enc == 'UTF-8' ) {
2040 return $this->iconv( $enc, 'UTF-8', $s );
2045 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2046 * also cleans up certain backwards-compatible sequences, converting them
2047 * to the modern Unicode equivalent.
2049 * This is language-specific for performance reasons only.
2051 function normalize( $s ) {
2052 global $wgAllUnicodeFixes;
2053 $s = UtfNormal
::cleanUp( $s );
2054 if ( $wgAllUnicodeFixes ) {
2055 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2056 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2063 * Transform a string using serialized data stored in the given file (which
2064 * must be in the serialized subdirectory of $IP). The file contains pairs
2065 * mapping source characters to destination characters.
2067 * The data is cached in process memory. This will go faster if you have the
2068 * FastStringSearch extension.
2070 function transformUsingPairFile( $file, $string ) {
2071 if ( !isset( $this->transformData
[$file] ) ) {
2072 $data = wfGetPrecompiledData( $file );
2073 if ( $data === false ) {
2074 throw new MWException( __METHOD__
. ": The transformation file $file is missing" );
2076 $this->transformData
[$file] = new ReplacementArray( $data );
2078 return $this->transformData
[$file]->replace( $string );
2082 * For right-to-left language support
2087 return self
::$dataCache->getItem( $this->mCode
, 'rtl' );
2091 * Return the correct HTML 'dir' attribute value for this language.
2095 return $this->isRTL() ?
'rtl' : 'ltr';
2099 * Return 'left' or 'right' as appropriate alignment for line-start
2100 * for this language's text direction.
2102 * Should be equivalent to CSS3 'start' text-align value....
2106 function alignStart() {
2107 return $this->isRTL() ?
'right' : 'left';
2111 * Return 'right' or 'left' as appropriate alignment for line-end
2112 * for this language's text direction.
2114 * Should be equivalent to CSS3 'end' text-align value....
2118 function alignEnd() {
2119 return $this->isRTL() ?
'left' : 'right';
2123 * A hidden direction mark (LRM or RLM), depending on the language direction
2127 function getDirMark() {
2128 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
2131 function capitalizeAllNouns() {
2132 return self
::$dataCache->getItem( $this->mCode
, 'capitalizeAllNouns' );
2136 * An arrow, depending on the language direction
2140 function getArrow() {
2141 return $this->isRTL() ?
'←' : '→';
2145 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2149 function linkPrefixExtension() {
2150 return self
::$dataCache->getItem( $this->mCode
, 'linkPrefixExtension' );
2153 function getMagicWords() {
2154 return self
::$dataCache->getItem( $this->mCode
, 'magicWords' );
2157 protected function doMagicHook() {
2158 if ( $this->mMagicHookDone
) {
2161 $this->mMagicHookDone
= true;
2162 wfProfileIn( 'LanguageGetMagic' );
2163 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
2164 wfProfileOut( 'LanguageGetMagic' );
2167 # Fill a MagicWord object with data from here
2168 function getMagic( $mw ) {
2169 $this->doMagicHook();
2171 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
2172 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
2174 $magicWords = $this->getMagicWords();
2175 if ( isset( $magicWords[$mw->mId
] ) ) {
2176 $rawEntry = $magicWords[$mw->mId
];
2182 if ( !is_array( $rawEntry ) ) {
2183 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2185 $mw->mCaseSensitive
= $rawEntry[0];
2186 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
2191 * Add magic words to the extension array
2193 function addMagicWordsByLang( $newWords ) {
2194 $code = $this->getCode();
2195 $fallbackChain = array();
2196 while ( $code && !in_array( $code, $fallbackChain ) ) {
2197 $fallbackChain[] = $code;
2198 $code = self
::getFallbackFor( $code );
2200 if ( !in_array( 'en', $fallbackChain ) ) {
2201 $fallbackChain[] = 'en';
2203 $fallbackChain = array_reverse( $fallbackChain );
2204 foreach ( $fallbackChain as $code ) {
2205 if ( isset( $newWords[$code] ) ) {
2206 $this->mMagicExtensions
= $newWords[$code] +
$this->mMagicExtensions
;
2212 * Get special page names, as an associative array
2213 * case folded alias => real name
2215 function getSpecialPageAliases() {
2216 // Cache aliases because it may be slow to load them
2217 if ( is_null( $this->mExtendedSpecialPageAliases
) ) {
2219 $this->mExtendedSpecialPageAliases
=
2220 self
::$dataCache->getItem( $this->mCode
, 'specialPageAliases' );
2221 wfRunHooks( 'LanguageGetSpecialPageAliases',
2222 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
2225 return $this->mExtendedSpecialPageAliases
;
2229 * Italic is unsuitable for some languages
2231 * @param $text String: the text to be emphasized.
2234 function emphasize( $text ) {
2235 return "<em>$text</em>";
2239 * Normally we output all numbers in plain en_US style, that is
2240 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2241 * point twohundredthirtyfive. However this is not sutable for all
2242 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2243 * Icelandic just want to use commas instead of dots, and dots instead
2244 * of commas like "293.291,235".
2246 * An example of this function being called:
2248 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2251 * See LanguageGu.php for the Gujarati implementation and
2252 * $separatorTransformTable on MessageIs.php for
2253 * the , => . and . => , implementation.
2255 * @todo check if it's viable to use localeconv() for the decimal
2257 * @param $number Mixed: the string to be formatted, should be an integer
2258 * or a floating point number.
2259 * @param $nocommafy Bool: set to true for special numbers like dates
2262 function formatNum( $number, $nocommafy = false ) {
2263 global $wgTranslateNumerals;
2264 if ( !$nocommafy ) {
2265 $number = $this->commafy( $number );
2266 $s = $this->separatorTransformTable();
2268 $number = strtr( $number, $s );
2272 if ( $wgTranslateNumerals ) {
2273 $s = $this->digitTransformTable();
2275 $number = strtr( $number, $s );
2282 function parseFormattedNumber( $number ) {
2283 $s = $this->digitTransformTable();
2285 $number = strtr( $number, array_flip( $s ) );
2288 $s = $this->separatorTransformTable();
2290 $number = strtr( $number, array_flip( $s ) );
2293 $number = strtr( $number, array( ',' => '' ) );
2298 * Adds commas to a given number
2303 function commafy( $_ ) {
2304 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2307 function digitTransformTable() {
2308 return self
::$dataCache->getItem( $this->mCode
, 'digitTransformTable' );
2311 function separatorTransformTable() {
2312 return self
::$dataCache->getItem( $this->mCode
, 'separatorTransformTable' );
2316 * Take a list of strings and build a locale-friendly comma-separated
2317 * list, using the local comma-separator message.
2318 * The last two strings are chained with an "and".
2323 function listToText( $l ) {
2325 $m = count( $l ) - 1;
2327 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2329 for ( $i = $m; $i >= 0; $i-- ) {
2332 } else if ( $i == $m - 1 ) {
2333 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2335 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2343 * Take a list of strings and build a locale-friendly comma-separated
2344 * list, using the local comma-separator message.
2345 * @param $list array of strings to put in a comma list
2348 function commaList( $list ) {
2353 array( 'parsemag', 'escapenoentities', 'language' => $this )
2359 * Take a list of strings and build a locale-friendly semicolon-separated
2360 * list, using the local semicolon-separator message.
2361 * @param $list array of strings to put in a semicolon list
2364 function semicolonList( $list ) {
2368 'semicolon-separator',
2369 array( 'parsemag', 'escapenoentities', 'language' => $this )
2375 * Same as commaList, but separate it with the pipe instead.
2376 * @param $list array of strings to put in a pipe list
2379 function pipeList( $list ) {
2384 array( 'escapenoentities', 'language' => $this )
2390 * Truncate a string to a specified length in bytes, appending an optional
2391 * string (e.g. for ellipses)
2393 * The database offers limited byte lengths for some columns in the database;
2394 * multi-byte character sets mean we need to ensure that only whole characters
2395 * are included, otherwise broken characters can be passed to the user
2397 * If $length is negative, the string will be truncated from the beginning
2399 * @param $string String to truncate
2400 * @param $length Int: maximum length (including ellipses)
2401 * @param $ellipsis String to append to the truncated text
2402 * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
2403 * $adjustLength was introduced in 1.18, before that behaved as if false.
2406 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
2407 # Use the localized ellipsis character
2408 if ( $ellipsis == '...' ) {
2409 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2411 # Check if there is no need to truncate
2412 if ( $length == 0 ) {
2413 return $ellipsis; // convention
2414 } elseif ( strlen( $string ) <= abs( $length ) ) {
2415 return $string; // no need to truncate
2417 $stringOriginal = $string;
2418 # If ellipsis length is >= $length then we can't apply $adjustLength
2419 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
2420 $string = $ellipsis; // this can be slightly unexpected
2421 # Otherwise, truncate and add ellipsis...
2423 $eLength = $adjustLength ?
strlen( $ellipsis ) : 0;
2424 if ( $length > 0 ) {
2425 $length -= $eLength;
2426 $string = substr( $string, 0, $length ); // xyz...
2427 $string = $this->removeBadCharLast( $string );
2428 $string = $string . $ellipsis;
2430 $length +
= $eLength;
2431 $string = substr( $string, $length ); // ...xyz
2432 $string = $this->removeBadCharFirst( $string );
2433 $string = $ellipsis . $string;
2436 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
2437 # This check is *not* redundant if $adjustLength, due to the single case where
2438 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
2439 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2442 return $stringOriginal;
2447 * Remove bytes that represent an incomplete Unicode character
2448 * at the end of string (e.g. bytes of the char are missing)
2450 * @param $string String
2453 protected function removeBadCharLast( $string ) {
2454 if ( $string != '' ) {
2455 $char = ord( $string[strlen( $string ) - 1] );
2457 if ( $char >= 0xc0 ) {
2458 # We got the first byte only of a multibyte char; remove it.
2459 $string = substr( $string, 0, -1 );
2460 } elseif ( $char >= 0x80 &&
2461 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2462 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2464 # We chopped in the middle of a character; remove it
2472 * Remove bytes that represent an incomplete Unicode character
2473 * at the start of string (e.g. bytes of the char are missing)
2475 * @param $string String
2478 protected function removeBadCharFirst( $string ) {
2479 if ( $string != '' ) {
2480 $char = ord( $string[0] );
2481 if ( $char >= 0x80 && $char < 0xc0 ) {
2482 # We chopped in the middle of a character; remove the whole thing
2483 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2490 * Truncate a string of valid HTML to a specified length in bytes,
2491 * appending an optional string (e.g. for ellipses), and return valid HTML
2493 * This is only intended for styled/linked text, such as HTML with
2494 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
2495 * Also, this will not detect things like "display:none" CSS.
2497 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
2499 * @param string $text HTML string to truncate
2500 * @param int $length (zero/positive) Maximum length (including ellipses)
2501 * @param string $ellipsis String to append to the truncated text
2504 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2505 # Use the localized ellipsis character
2506 if ( $ellipsis == '...' ) {
2507 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2509 # Check if there is clearly no need to truncate
2510 if ( $length <= 0 ) {
2511 return $ellipsis; // no text shown, nothing to format (convention)
2512 } elseif ( strlen( $text ) <= $length ) {
2513 return $text; // string short enough even *with* HTML (short-circuit)
2516 $displayLen = 0; // innerHTML legth so far
2517 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2518 $tagType = 0; // 0-open, 1-close
2519 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2520 $entityState = 0; // 0-not entity, 1-entity
2521 $tag = $ret = $pRet = ''; // accumulated tag name, accumulated result string
2522 $openTags = array(); // open tag stack
2523 $pOpenTags = array();
2525 $textLen = strlen( $text );
2526 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
2527 for ( $pos = 0; true; ++
$pos ) {
2528 # Consider truncation once the display length has reached the maximim.
2529 # Check that we're not in the middle of a bracket/entity...
2530 if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) {
2531 if ( !$testingEllipsis ) {
2532 $testingEllipsis = true;
2533 # Save where we are; we will truncate here unless there turn out to
2534 # be so few remaining characters that truncation is not necessary.
2535 $pOpenTags = $openTags; // save state
2536 $pRet = $ret; // save state
2537 } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) {
2538 # String in fact does need truncation, the truncation point was OK.
2539 $openTags = $pOpenTags; // reload state
2540 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2541 $ret .= $ellipsis; // add ellipsis
2545 if ( $pos >= $textLen ) break; // extra iteration just for above checks
2547 # Read the next char...
2549 $lastCh = $pos ?
$text[$pos - 1] : '';
2550 $ret .= $ch; // add to result string
2552 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2553 $entityState = 0; // for bad HTML
2554 $bracketState = 1; // tag started (checking for backslash)
2555 } elseif ( $ch == '>' ) {
2556 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2557 $entityState = 0; // for bad HTML
2558 $bracketState = 0; // out of brackets
2559 } elseif ( $bracketState == 1 ) {
2561 $tagType = 1; // close tag (e.g. "</span>")
2563 $tagType = 0; // open tag (e.g. "<span>")
2566 $bracketState = 2; // building tag name
2567 } elseif ( $bracketState == 2 ) {
2571 // Name found (e.g. "<a href=..."), add on tag attributes...
2572 $pos +
= $this->truncate_skip( $ret, $text, "<>", $pos +
1 );
2574 } elseif ( $bracketState == 0 ) {
2575 if ( $entityState ) {
2578 $displayLen++
; // entity is one displayed char
2582 $entityState = 1; // entity found, (e.g. " ")
2584 $displayLen++
; // this char is displayed
2585 // Add the next $max display text chars after this in one swoop...
2586 $max = ( $testingEllipsis ?
$length : $neLength ) - $displayLen;
2587 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos +
1, $max );
2588 $displayLen +
= $skipped;
2594 if ( $displayLen == 0 ) {
2595 return ''; // no text shown, nothing to format
2597 // Close the last tag if left unclosed by bad HTML
2598 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2599 while ( count( $openTags ) > 0 ) {
2600 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2605 // truncateHtml() helper function
2606 // like strcspn() but adds the skipped chars to $ret
2607 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
2608 if ( $len === null ) {
2609 $len = -1; // -1 means "no limit" for strcspn
2610 } elseif ( $len < 0 ) {
2614 if ( $start < strlen( $text ) ) {
2615 $skipCount = strcspn( $text, $search, $start, $len );
2616 $ret .= substr( $text, $start, $skipCount );
2622 * truncateHtml() helper function
2623 * (a) push or pop $tag from $openTags as needed
2624 * (b) clear $tag value
2625 * @param String &$tag Current HTML tag name we are looking at
2626 * @param int $tagType (0-open tag, 1-close tag)
2627 * @param char $lastCh Character before the '>' that ended this tag
2628 * @param array &$openTags Open tag stack (not accounting for $tag)
2630 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2631 $tag = ltrim( $tag );
2633 if ( $tagType == 0 && $lastCh != '/' ) {
2634 $openTags[] = $tag; // tag opened (didn't close itself)
2635 } else if ( $tagType == 1 ) {
2636 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2637 array_pop( $openTags ); // tag closed
2645 * Grammatical transformations, needed for inflected languages
2646 * Invoked by putting {{grammar:case|word}} in a message
2648 * @param $word string
2649 * @param $case string
2652 function convertGrammar( $word, $case ) {
2653 global $wgGrammarForms;
2654 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2655 return $wgGrammarForms[$this->getCode()][$case][$word];
2661 * Provides an alternative text depending on specified gender.
2662 * Usage {{gender:username|masculine|feminine|neutral}}.
2663 * username is optional, in which case the gender of current user is used,
2664 * but only in (some) interface messages; otherwise default gender is used.
2665 * If second or third parameter are not specified, masculine is used.
2666 * These details may be overriden per language.
2668 function gender( $gender, $forms ) {
2669 if ( !count( $forms ) ) {
2672 $forms = $this->preConvertPlural( $forms, 2 );
2673 if ( $gender === 'male' ) {
2676 if ( $gender === 'female' ) {
2679 return isset( $forms[2] ) ?
$forms[2] : $forms[0];
2683 * Plural form transformations, needed for some languages.
2684 * For example, there are 3 form of plural in Russian and Polish,
2685 * depending on "count mod 10". See [[w:Plural]]
2686 * For English it is pretty simple.
2688 * Invoked by putting {{plural:count|wordform1|wordform2}}
2689 * or {{plural:count|wordform1|wordform2|wordform3}}
2691 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2693 * @param $count Integer: non-localized number
2694 * @param $forms Array: different plural forms
2695 * @return string Correct form of plural for $count in this language
2697 function convertPlural( $count, $forms ) {
2698 if ( !count( $forms ) ) {
2701 $forms = $this->preConvertPlural( $forms, 2 );
2703 return ( $count == 1 ) ?
$forms[0] : $forms[1];
2707 * Checks that convertPlural was given an array and pads it to requested
2708 * amount of forms by copying the last one.
2710 * @param $count Integer: How many forms should there be at least
2711 * @param $forms Array of forms given to convertPlural
2712 * @return array Padded array of forms or an exception if not an array
2714 protected function preConvertPlural( /* Array */ $forms, $count ) {
2715 while ( count( $forms ) < $count ) {
2716 $forms[] = $forms[count( $forms ) - 1];
2722 * Maybe translate block durations. Note that this function is somewhat misnamed: it
2723 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
2724 * (which is an absolute timestamp).
2725 * @param $str String: the validated block duration in English
2726 * @return Somehow translated block duration
2727 * @see LanguageFi.php for example implementation
2729 function translateBlockExpiry( $str ) {
2730 foreach( SpecialBlock
::getSuggestedDurations( $this ) as $show => $value ){
2731 if ( strcmp( $str, $value ) == 0 ) {
2732 return htmlspecialchars( trim( $show ) );
2739 * languages like Chinese need to be segmented in order for the diff
2742 * @param $text String
2745 function segmentForDiff( $text ) {
2750 * and unsegment to show the result
2752 * @param $text String
2755 function unsegmentForDiff( $text ) {
2759 # convert text to all supported variants
2760 function autoConvertToAllVariants( $text ) {
2761 return $this->mConverter
->autoConvertToAllVariants( $text );
2764 # convert text to different variants of a language.
2765 function convert( $text ) {
2766 return $this->mConverter
->convert( $text );
2769 # Convert a Title object to a string in the preferred variant
2770 function convertTitle( $title ) {
2771 return $this->mConverter
->convertTitle( $title );
2774 # Check if this is a language with variants
2775 function hasVariants() {
2776 return sizeof( $this->getVariants() ) > 1;
2779 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2780 function armourMath( $text ) {
2781 return $this->mConverter
->armourMath( $text );
2785 * Perform output conversion on a string, and encode for safe HTML output.
2786 * @param $text String text to be converted
2787 * @param $isTitle Bool whether this conversion is for the article title
2789 * @todo this should get integrated somewhere sane
2791 function convertHtml( $text, $isTitle = false ) {
2792 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2795 function convertCategoryKey( $key ) {
2796 return $this->mConverter
->convertCategoryKey( $key );
2800 * Get the list of variants supported by this language
2801 * see sample implementation in LanguageZh.php
2803 * @return array an array of language codes
2805 function getVariants() {
2806 return $this->mConverter
->getVariants();
2809 function getPreferredVariant() {
2810 return $this->mConverter
->getPreferredVariant();
2813 function getDefaultVariant() {
2814 return $this->mConverter
->getDefaultVariant();
2817 function getURLVariant() {
2818 return $this->mConverter
->getURLVariant();
2822 * If a language supports multiple variants, it is
2823 * possible that non-existing link in one variant
2824 * actually exists in another variant. this function
2825 * tries to find it. See e.g. LanguageZh.php
2827 * @param $link String: the name of the link
2828 * @param $nt Mixed: the title object of the link
2829 * @param $ignoreOtherCond Boolean: to disable other conditions when
2830 * we need to transclude a template or update a category's link
2831 * @return null the input parameters may be modified upon return
2833 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2834 $this->mConverter
->findVariantLink( $link, $nt, $ignoreOtherCond );
2838 * If a language supports multiple variants, converts text
2839 * into an array of all possible variants of the text:
2840 * 'variant' => text in that variant
2842 * @deprecated Use autoConvertToAllVariants()
2844 function convertLinkToAllVariants( $text ) {
2845 return $this->mConverter
->convertLinkToAllVariants( $text );
2849 * returns language specific options used by User::getPageRenderHash()
2850 * for example, the preferred language variant
2854 function getExtraHashOptions() {
2855 return $this->mConverter
->getExtraHashOptions();
2859 * For languages that support multiple variants, the title of an
2860 * article may be displayed differently in different variants. this
2861 * function returns the apporiate title defined in the body of the article.
2865 function getParsedTitle() {
2866 return $this->mConverter
->getParsedTitle();
2870 * Enclose a string with the "no conversion" tag. This is used by
2871 * various functions in the Parser
2873 * @param $text String: text to be tagged for no conversion
2875 * @return string the tagged text
2877 function markNoConversion( $text, $noParse = false ) {
2878 return $this->mConverter
->markNoConversion( $text, $noParse );
2882 * A regular expression to match legal word-trailing characters
2883 * which should be merged onto a link of the form [[foo]]bar.
2887 function linkTrail() {
2888 return self
::$dataCache->getItem( $this->mCode
, 'linkTrail' );
2891 function getLangObj() {
2896 * Get the RFC 3066 code for this language object
2898 function getCode() {
2899 return $this->mCode
;
2902 function setCode( $code ) {
2903 $this->mCode
= $code;
2907 * Get the name of a file for a certain language code
2908 * @param $prefix string Prepend this to the filename
2909 * @param $code string Language code
2910 * @param $suffix string Append this to the filename
2911 * @return string $prefix . $mangledCode . $suffix
2913 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2914 // Protect against path traversal
2915 if ( !Language
::isValidCode( $code )
2916 ||
strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
2918 throw new MWException( "Invalid language code \"$code\"" );
2921 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2925 * Get the language code from a file name. Inverse of getFileName()
2926 * @param $filename string $prefix . $languageCode . $suffix
2927 * @param $prefix string Prefix before the language code
2928 * @param $suffix string Suffix after the language code
2929 * @return Language code, or false if $prefix or $suffix isn't found
2931 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2933 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2934 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2935 if ( !count( $m ) ) {
2938 return str_replace( '_', '-', strtolower( $m[1] ) );
2941 static function getMessagesFileName( $code ) {
2943 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2946 static function getClassFileName( $code ) {
2948 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2952 * Get the fallback for a given language
2954 static function getFallbackFor( $code ) {
2955 if ( $code === 'en' ) {
2959 return self
::getLocalisationCache()->getItem( $code, 'fallback' );
2964 * Get all messages for a given language
2965 * WARNING: this may take a long time
2967 static function getMessagesFor( $code ) {
2968 return self
::getLocalisationCache()->getItem( $code, 'messages' );
2972 * Get a message for a given language
2974 static function getMessageFor( $key, $code ) {
2975 return self
::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2978 function fixVariableInNamespace( $talk ) {
2979 if ( strpos( $talk, '$1' ) === false ) {
2983 global $wgMetaNamespace;
2984 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2986 # Allow grammar transformations
2987 # Allowing full message-style parsing would make simple requests
2988 # such as action=raw much more expensive than they need to be.
2989 # This will hopefully cover most cases.
2990 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2991 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2992 return str_replace( ' ', '_', $talk );
2995 function replaceGrammarInNamespace( $m ) {
2996 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2999 static function getCaseMaps() {
3000 static $wikiUpperChars, $wikiLowerChars;
3001 if ( isset( $wikiUpperChars ) ) {
3002 return array( $wikiUpperChars, $wikiLowerChars );
3005 wfProfileIn( __METHOD__
);
3006 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
3007 if ( $arr === false ) {
3008 throw new MWException(
3009 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
3011 $wikiUpperChars = $arr['wikiUpperChars'];
3012 $wikiLowerChars = $arr['wikiLowerChars'];
3013 wfProfileOut( __METHOD__
);
3014 return array( $wikiUpperChars, $wikiLowerChars );
3018 * Decode an expiry (block, protection, etc) which has come from the DB
3020 * @param $expiry String: Database expiry String
3021 * @param $format Bool|Int true to process using language functions, or TS_ constant
3022 * to return the expiry in a given timestamp
3025 public function formatExpiry( $expiry, $format = true ) {
3026 static $infinity, $infinityMsg;
3027 if( $infinity === null ){
3028 $infinityMsg = wfMessage( 'infiniteblock' );
3029 $infinity = wfGetDB( DB_SLAVE
)->getInfinity();
3032 if ( $expiry == '' ||
$expiry == $infinity ) {
3033 return $format === true
3037 return $format === true
3038 ?
$this->timeanddate( $expiry )
3039 : wfTimestamp( $format, $expiry );
3045 * @param $seconds String
3048 function formatTimePeriod( $seconds ) {
3049 if ( round( $seconds * 10 ) < 100 ) {
3050 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3051 } elseif ( round( $seconds ) < 60 ) {
3052 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3053 } elseif ( round( $seconds ) < 3600 ) {
3054 $minutes = floor( $seconds / 60 );
3055 $secondsPart = round( fmod( $seconds, 60 ) );
3056 if ( $secondsPart == 60 ) {
3060 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3061 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3063 $hours = floor( $seconds / 3600 );
3064 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
3065 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
3066 if ( $secondsPart == 60 ) {
3070 if ( $minutes == 60 ) {
3074 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
3075 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3076 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3080 function formatBitrate( $bps ) {
3081 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
3083 return $this->formatNum( $bps ) . $units[0];
3085 $unitIndex = floor( log10( $bps ) / 3 );
3086 $mantissa = $bps / pow( 1000, $unitIndex );
3087 if ( $mantissa < 10 ) {
3088 $mantissa = round( $mantissa, 1 );
3090 $mantissa = round( $mantissa );
3092 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3096 * Format a size in bytes for output, using an appropriate
3097 * unit (B, KB, MB or GB) according to the magnitude in question
3099 * @param $size Size to format
3100 * @return string Plain text (not HTML)
3102 function formatSize( $size ) {
3103 // For small sizes no decimal places necessary
3105 if ( $size > 1024 ) {
3106 $size = $size / 1024;
3107 if ( $size > 1024 ) {
3108 $size = $size / 1024;
3109 // For MB and bigger two decimal places are smarter
3111 if ( $size > 1024 ) {
3112 $size = $size / 1024;
3113 $msg = 'size-gigabytes';
3115 $msg = 'size-megabytes';
3118 $msg = 'size-kilobytes';
3121 $msg = 'size-bytes';
3123 $size = round( $size, $round );
3124 $text = $this->getMessageFromDB( $msg );
3125 return str_replace( '$1', $this->formatNum( $size ), $text );
3129 * Get the conversion rule title, if any.
3131 function getConvRuleTitle() {
3132 return $this->mConverter
->getConvRuleTitle();