languages/Language.php

   1 <?php
   2 /**
   3  * Internationalisation code.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Language
  22  */
  23
  24 /**
  25  * @defgroup Language Language
  26  */
  27
  28 if ( !defined( 'MEDIAWIKI' ) ) {
  29         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  30         exit( 1 );
  31 }
  32
  33 # Read language names
  34 global $wgLanguageNames;
  35 require_once( __DIR__ . '/Names.php' );
  36
  37 if ( function_exists( 'mb_strtoupper' ) ) {
  38         mb_internal_encoding( 'UTF-8' );
  39 }
  40
  41 /**
  42  * a fake language converter
  43  *
  44  * @ingroup Language
  45  */
  46 class FakeConverter {
  47
  48         /**
  49          * @var Language
  50          */
  51         public $mLang;
  52         function __construct( $langobj ) { $this->mLang = $langobj; }
  53         function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  54         function convert( $t ) { return $t; }
  55         function convertTo( $text, $variant ) { return $text; }
  56         function convertTitle( $t ) { return $t->getPrefixedText(); }
  57         function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); }
  58         function getVariants() { return array( $this->mLang->getCode() ); }
  59         function getPreferredVariant() { return $this->mLang->getCode(); }
  60         function getDefaultVariant() { return $this->mLang->getCode(); }
  61         function getURLVariant() { return ''; }
  62         function getConvRuleTitle() { return false; }
  63         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
  64         function getExtraHashOptions() { return ''; }
  65         function getParsedTitle() { return ''; }
  66         function markNoConversion( $text, $noParse = false ) { return $text; }
  67         function convertCategoryKey( $key ) { return $key; }
  68         function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
  69         function armourMath( $text ) { return $text; }
  70 }
  71
  72 /**
  73  * Internationalisation code
  74  * @ingroup Language
  75  */
  76 class Language {
  77
  78         /**
  79          * @var LanguageConverter
  80          */
  81         public $mConverter;
  82
  83         public $mVariants, $mCode, $mLoaded = false;
  84         public $mMagicExtensions = array(), $mMagicHookDone = false;
  85         private $mHtmlCode = null;
  86
  87         public $dateFormatStrings = array();
  88         public $mExtendedSpecialPageAliases;
  89
  90         protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
  91
  92         /**
  93          * ReplacementArray object caches
  94          */
  95         public $transformData = array();
  96
  97         /**
  98          * @var LocalisationCache
  99          */
 100         static public $dataCache;
 101
 102         static public $mLangObjCache = array();
 103
 104         static public $mWeekdayMsgs = array(
 105                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
 106                 'friday', 'saturday'
 107         );
 108
 109         static public $mWeekdayAbbrevMsgs = array(
 110                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
 111         );
 112
 113         static public $mMonthMsgs = array(
 114                 'january', 'february', 'march', 'april', 'may_long', 'june',
 115                 'july', 'august', 'september', 'october', 'november',
 116                 'december'
 117         );
 118         static public $mMonthGenMsgs = array(
 119                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
 120                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
 121                 'december-gen'
 122         );
 123         static public $mMonthAbbrevMsgs = array(
 124                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 125                 'sep', 'oct', 'nov', 'dec'
 126         );
 127
 128         static public $mIranianCalendarMonthMsgs = array(
 129                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
 130                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
 131                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 132                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 133         );
 134
 135         static public $mHebrewCalendarMonthMsgs = array(
 136                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 137                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 138                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 139                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 140                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 141         );
 142
 143         static public $mHebrewCalendarMonthGenMsgs = array(
 144                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 145                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 146                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 147                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 148                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 149         );
 150
 151         static public $mHijriCalendarMonthMsgs = array(
 152                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 153                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 154                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 155                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 156         );
 157
 158         /**
 159          * @since 1.20
 160          * @var array
 161          */
 162         static public $durationIntervals = array(
 163                 'millennia' => 31556952000,
 164                 'centuries' => 3155695200,
 165                 'decades' => 315569520,
 166                 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
 167                 'weeks' => 604800,
 168                 'days' => 86400,
 169                 'hours' => 3600,
 170                 'minutes' => 60,
 171                 'seconds' => 1,
 172         );
 173
 174         /**
 175          * Get a cached or new language object for a given language code
 176          * @param $code String
 177          * @return Language
 178          */
 179         static function factory( $code ) {
 180                 global $wgDummyLanguageCodes, $wgLangObjCacheSize;
 181
 182                 if ( isset( $wgDummyLanguageCodes[$code] ) ) {
 183                         $code = $wgDummyLanguageCodes[$code];
 184                 }
 185
 186                 // get the language object to process
 187                 $langObj = isset( self::$mLangObjCache[$code] )
 188                         ? self::$mLangObjCache[$code]
 189                         : self::newFromCode( $code );
 190
 191                 // merge the language object in to get it up front in the cache
 192                 self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
 193                 // get rid of the oldest ones in case we have an overflow
 194                 self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
 195
 196                 return $langObj;
 197         }
 198
 199         /**
 200          * Create a language object for a given language code
 201          * @param $code String
 202          * @throws MWException
 203          * @return Language
 204          */
 205         protected static function newFromCode( $code ) {
 206                 // Protect against path traversal below
 207                 if ( !Language::isValidCode( $code )
 208                         || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
 209                 {
 210                         throw new MWException( "Invalid language code \"$code\"" );
 211                 }
 212
 213                 if ( !Language::isValidBuiltInCode( $code ) ) {
 214                         // It's not possible to customise this code with class files, so
 215                         // just return a Language object. This is to support uselang= hacks.
 216                         $lang = new Language;
 217                         $lang->setCode( $code );
 218                         return $lang;
 219                 }
 220
 221                 // Check if there is a language class for the code
 222                 $class = self::classFromCode( $code );
 223                 self::preloadLanguageClass( $class );
 224                 if ( MWInit::classExists( $class ) ) {
 225                         $lang = new $class;
 226                         return $lang;
 227                 }
 228
 229                 // Keep trying the fallback list until we find an existing class
 230                 $fallbacks = Language::getFallbacksFor( $code );
 231                 foreach ( $fallbacks as $fallbackCode ) {
 232                         if ( !Language::isValidBuiltInCode( $fallbackCode ) ) {
 233                                 throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
 234                         }
 235
 236                         $class = self::classFromCode( $fallbackCode );
 237                         self::preloadLanguageClass( $class );
 238                         if ( MWInit::classExists( $class ) ) {
 239                                 $lang = Language::newFromCode( $fallbackCode );
 240                                 $lang->setCode( $code );
 241                                 return $lang;
 242                         }
 243                 }
 244
 245                 throw new MWException( "Invalid fallback sequence for language '$code'" );
 246         }
 247
 248         /**
 249          * Checks whether any localisation is available for that language tag
 250          * in MediaWiki (MessagesXx.php exists).
 251          *
 252          * @param string $code Language tag (in lower case)
 253          * @return bool Whether language is supported
 254          * @since 1.21
 255          */
 256         public static function isSupportedLanguage( $code ) {
 257                 return is_readable( self::getMessagesFileName( $code ) );
 258         }
 259
 260         /**
 261          * Returns true if a language code string is a well-formed language tag
 262          * according to RFC 5646.
 263          * This function only checks well-formedness; it doesn't check that
 264          * language, script or variant codes actually exist in the repositories.
 265          *
 266          * Based on regexes by Mark Davis of the Unicode Consortium:
 267          * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
 268          *
 269          * @param $code string
 270          * @param $lenient boolean Whether to allow '_' as separator. The default is only '-'.
 271          *
 272          * @return bool
 273          * @since 1.21
 274          */
 275         public static function isWellFormedLanguageTag( $code, $lenient = false ) {
 276                 $alpha = '[a-z]';
 277                 $digit = '[0-9]';
 278                 $alphanum = '[a-z0-9]';
 279                 $x = 'x' ; # private use singleton
 280                 $singleton = '[a-wy-z]'; # other singleton
 281                 $s = $lenient ? '[-_]' : '-';
 282
 283                 $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
 284                 $script = "$alpha{4}"; # ISO 15924
 285                 $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
 286                 $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
 287                 $extension = "$singleton(?:$s$alphanum{2,8})+";
 288                 $privateUse = "$x(?:$s$alphanum{1,8})+";
 289
 290                 # Define certain grandfathered codes, since otherwise the regex is pretty useless.
 291                 # Since these are limited, this is safe even later changes to the registry --
 292                 # the only oddity is that it might change the type of the tag, and thus
 293                 # the results from the capturing groups.
 294                 # http://www.iana.org/assignments/language-subtag-registry
 295
 296                 $grandfathered = "en{$s}GB{$s}oed"
 297                         . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
 298                         . "|no{$s}(?:bok|nyn)"
 299                         . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
 300                         . "|zh{$s}min{$s}nan";
 301
 302                 $variantList = "$variant(?:$s$variant)*";
 303                 $extensionList = "$extension(?:$s$extension)*";
 304
 305                 $langtag = "(?:($language)"
 306                         . "(?:$s$script)?"
 307                         . "(?:$s$region)?"
 308                         . "(?:$s$variantList)?"
 309                         . "(?:$s$extensionList)?"
 310                         . "(?:$s$privateUse)?)";
 311
 312                 # The final breakdown, with capturing groups for each of these components
 313                 # The variants, extensions, grandfathered, and private-use may have interior '-'
 314
 315                 $root = "^(?:$langtag|$privateUse|$grandfathered)$";
 316
 317                 return (bool)preg_match( "/$root/", strtolower( $code ) );
 318         }
 319
 320         /**
 321          * Returns true if a language code string is of a valid form, whether or
 322          * not it exists. This includes codes which are used solely for
 323          * customisation via the MediaWiki namespace.
 324          *
 325          * @param $code string
 326          *
 327          * @return bool
 328          */
 329         public static function isValidCode( $code ) {
 330                 return
 331                         // People think language codes are html safe, so enforce it.
 332                         // Ideally we should only allow a-zA-Z0-9-
 333                         // but, .+ and other chars are often used for {{int:}} hacks
 334                         // see bugs 37564, 37587, 36938
 335                         strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
 336                         && !preg_match( Title::getTitleInvalidRegex(), $code );
 337         }
 338
 339         /**
 340          * Returns true if a language code is of a valid form for the purposes of
 341          * internal customisation of MediaWiki, via Messages*.php.
 342          *
 343          * @param $code string
 344          *
 345          * @throws MWException
 346          * @since 1.18
 347          * @return bool
 348          */
 349         public static function isValidBuiltInCode( $code ) {
 350
 351                 if ( !is_string( $code ) ) {
 352                         $type = gettype( $code );
 353                         if ( $type === 'object' ) {
 354                                 $addmsg = " of class " . get_class( $code );
 355                         } else {
 356                                 $addmsg = '';
 357                         }
 358                         throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
 359                 }
 360
 361                 return (bool)preg_match( '/^[a-z0-9-]+$/i', $code );
 362         }
 363
 364         /**
 365          * @param $code
 366          * @return String Name of the language class
 367          */
 368         public static function classFromCode( $code ) {
 369                 if ( $code == 'en' ) {
 370                         return 'Language';
 371                 } else {
 372                         return 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 373                 }
 374         }
 375
 376         /**
 377          * Includes language class files
 378          *
 379          * @param $class string Name of the language class
 380          */
 381         public static function preloadLanguageClass( $class ) {
 382                 global $IP;
 383
 384                 if ( $class === 'Language' ) {
 385                         return;
 386                 }
 387
 388                 if ( !defined( 'MW_COMPILED' ) ) {
 389                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 390                                 include_once( "$IP/languages/classes/$class.php" );
 391                         }
 392                 }
 393         }
 394
 395         /**
 396          * Get the LocalisationCache instance
 397          *
 398          * @return LocalisationCache
 399          */
 400         public static function getLocalisationCache() {
 401                 if ( is_null( self::$dataCache ) ) {
 402                         global $wgLocalisationCacheConf;
 403                         $class = $wgLocalisationCacheConf['class'];
 404                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 405                 }
 406                 return self::$dataCache;
 407         }
 408
 409         function __construct() {
 410                 $this->mConverter = new FakeConverter( $this );
 411                 // Set the code to the name of the descendant
 412                 if ( get_class( $this ) == 'Language' ) {
 413                         $this->mCode = 'en';
 414                 } else {
 415                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 416                 }
 417                 self::getLocalisationCache();
 418         }
 419
 420         /**
 421          * Reduce memory usage
 422          */
 423         function __destruct() {
 424                 foreach ( $this as $name => $value ) {
 425                         unset( $this->$name );
 426                 }
 427         }
 428
 429         /**
 430          * Hook which will be called if this is the content language.
 431          * Descendants can use this to register hook functions or modify globals
 432          */
 433         function initContLang() { }
 434
 435         /**
 436          * Same as getFallbacksFor for current language.
 437          * @return array|bool
 438          * @deprecated in 1.19
 439          */
 440         function getFallbackLanguageCode() {
 441                 wfDeprecated( __METHOD__, '1.19' );
 442                 return self::getFallbackFor( $this->mCode );
 443         }
 444
 445         /**
 446          * @return array
 447          * @since 1.19
 448          */
 449         function getFallbackLanguages() {
 450                 return self::getFallbacksFor( $this->mCode );
 451         }
 452
 453         /**
 454          * Exports $wgBookstoreListEn
 455          * @return array
 456          */
 457         function getBookstoreList() {
 458                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 459         }
 460
 461         /**
 462          * @return array
 463          */
 464         public function getNamespaces() {
 465                 if ( is_null( $this->namespaceNames ) ) {
 466                         global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
 467
 468                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 469                         $validNamespaces = MWNamespace::getCanonicalNamespaces();
 470
 471                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
 472
 473                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 474                         if ( $wgMetaNamespaceTalk ) {
 475                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 476                         } else {
 477                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 478                                 $this->namespaceNames[NS_PROJECT_TALK] =
 479                                         $this->fixVariableInNamespace( $talk );
 480                         }
 481
 482                         # Sometimes a language will be localised but not actually exist on this wiki.
 483                         foreach ( $this->namespaceNames as $key => $text ) {
 484                                 if ( !isset( $validNamespaces[$key] ) ) {
 485                                         unset( $this->namespaceNames[$key] );
 486                                 }
 487                         }
 488
 489                         # The above mixing may leave namespaces out of canonical order.
 490                         # Re-order by namespace ID number...
 491                         ksort( $this->namespaceNames );
 492
 493                         wfRunHooks( 'LanguageGetNamespaces', array( &$this->namespaceNames ) );
 494                 }
 495                 return $this->namespaceNames;
 496         }
 497
 498         /**
 499          * Arbitrarily set all of the namespace names at once. Mainly used for testing
 500          * @param $namespaces Array of namespaces (id => name)
 501          */
 502         public function setNamespaces( array $namespaces ) {
 503                 $this->namespaceNames = $namespaces;
 504                 $this->mNamespaceIds = null;
 505         }
 506
 507         /**
 508          * Resets all of the namespace caches. Mainly used for testing
 509          */
 510         public function resetNamespaces( ) {
 511                 $this->namespaceNames = null;
 512                 $this->mNamespaceIds = null;
 513                 $this->namespaceAliases = null;
 514         }
 515
 516         /**
 517          * A convenience function that returns the same thing as
 518          * getNamespaces() except with the array values changed to ' '
 519          * where it found '_', useful for producing output to be displayed
 520          * e.g. in <select> forms.
 521          *
 522          * @return array
 523          */
 524         function getFormattedNamespaces() {
 525                 $ns = $this->getNamespaces();
 526                 foreach ( $ns as $k => $v ) {
 527                         $ns[$k] = strtr( $v, '_', ' ' );
 528                 }
 529                 return $ns;
 530         }
 531
 532         /**
 533          * Get a namespace value by key
 534          * <code>
 535          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 536          * echo $mw_ns; // prints 'MediaWiki'
 537          * </code>
 538          *
 539          * @param $index Int: the array key of the namespace to return
 540          * @return mixed, string if the namespace value exists, otherwise false
 541          */
 542         function getNsText( $index ) {
 543                 $ns = $this->getNamespaces();
 544                 return isset( $ns[$index] ) ? $ns[$index] : false;
 545         }
 546
 547         /**
 548          * A convenience function that returns the same thing as
 549          * getNsText() except with '_' changed to ' ', useful for
 550          * producing output.
 551          *
 552          * <code>
 553          * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
 554          * echo $mw_ns; // prints 'MediaWiki talk'
 555          * </code>
 556          *
 557          * @param int $index The array key of the namespace to return
 558          * @return string Namespace name without underscores (empty string if namespace does not exist)
 559          */
 560         function getFormattedNsText( $index ) {
 561                 $ns = $this->getNsText( $index );
 562                 return strtr( $ns, '_', ' ' );
 563         }
 564
 565         /**
 566          * Returns gender-dependent namespace alias if available.
 567          * @param $index Int: namespace index
 568          * @param $gender String: gender key (male, female... )
 569          * @return String
 570          * @since 1.18
 571          */
 572         function getGenderNsText( $index, $gender ) {
 573                 global $wgExtraGenderNamespaces;
 574
 575                 $ns = $wgExtraGenderNamespaces + self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 576                 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
 577         }
 578
 579         /**
 580          * Whether this language makes distinguishes genders for example in
 581          * namespaces.
 582          * @return bool
 583          * @since 1.18
 584          */
 585         function needsGenderDistinction() {
 586                 global $wgExtraGenderNamespaces, $wgExtraNamespaces;
 587                 if ( count( $wgExtraGenderNamespaces ) > 0 ) {
 588                         // $wgExtraGenderNamespaces overrides everything
 589                         return true;
 590                 } elseif ( isset( $wgExtraNamespaces[NS_USER] ) && isset( $wgExtraNamespaces[NS_USER_TALK] ) ) {
 591                         /// @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future
 592                         // $wgExtraNamespaces overrides any gender aliases specified in i18n files
 593                         return false;
 594                 } else {
 595                         // Check what is in i18n files
 596                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 597                         return count( $aliases ) > 0;
 598                 }
 599         }
 600
 601         /**
 602          * Get a namespace key by value, case insensitive.
 603          * Only matches namespace names for the current language, not the
 604          * canonical ones defined in Namespace.php.
 605          *
 606          * @param $text String
 607          * @return mixed An integer if $text is a valid value otherwise false
 608          */
 609         function getLocalNsIndex( $text ) {
 610                 $lctext = $this->lc( $text );
 611                 $ids = $this->getNamespaceIds();
 612                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 613         }
 614
 615         /**
 616          * @return array
 617          */
 618         function getNamespaceAliases() {
 619                 if ( is_null( $this->namespaceAliases ) ) {
 620                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 621                         if ( !$aliases ) {
 622                                 $aliases = array();
 623                         } else {
 624                                 foreach ( $aliases as $name => $index ) {
 625                                         if ( $index === NS_PROJECT_TALK ) {
 626                                                 unset( $aliases[$name] );
 627                                                 $name = $this->fixVariableInNamespace( $name );
 628                                                 $aliases[$name] = $index;
 629                                         }
 630                                 }
 631                         }
 632
 633                         global $wgExtraGenderNamespaces;
 634                         $genders = $wgExtraGenderNamespaces + (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 635                         foreach ( $genders as $index => $forms ) {
 636                                 foreach ( $forms as $alias ) {
 637                                         $aliases[$alias] = $index;
 638                                 }
 639                         }
 640
 641                         $this->namespaceAliases = $aliases;
 642                 }
 643                 return $this->namespaceAliases;
 644         }
 645
 646         /**
 647          * @return array
 648          */
 649         function getNamespaceIds() {
 650                 if ( is_null( $this->mNamespaceIds ) ) {
 651                         global $wgNamespaceAliases;
 652                         # Put namespace names and aliases into a hashtable.
 653                         # If this is too slow, then we should arrange it so that it is done
 654                         # before caching. The catch is that at pre-cache time, the above
 655                         # class-specific fixup hasn't been done.
 656                         $this->mNamespaceIds = array();
 657                         foreach ( $this->getNamespaces() as $index => $name ) {
 658                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 659                         }
 660                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 661                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 662                         }
 663                         if ( $wgNamespaceAliases ) {
 664                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 665                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 666                                 }
 667                         }
 668                 }
 669                 return $this->mNamespaceIds;
 670         }
 671
 672         /**
 673          * Get a namespace key by value, case insensitive.  Canonical namespace
 674          * names override custom ones defined for the current language.
 675          *
 676          * @param $text String
 677          * @return mixed An integer if $text is a valid value otherwise false
 678          */
 679         function getNsIndex( $text ) {
 680                 $lctext = $this->lc( $text );
 681                 $ns = MWNamespace::getCanonicalIndex( $lctext );
 682                 if ( $ns !== null ) {
 683                         return $ns;
 684                 }
 685                 $ids = $this->getNamespaceIds();
 686                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 687         }
 688
 689         /**
 690          * short names for language variants used for language conversion links.
 691          *
 692          * @param $code String
 693          * @param $usemsg bool Use the "variantname-xyz" message if it exists
 694          * @return string
 695          */
 696         function getVariantname( $code, $usemsg = true ) {
 697                 $msg = "variantname-$code";
 698                 if ( $usemsg && wfMessage( $msg )->exists() ) {
 699                         return $this->getMessageFromDB( $msg );
 700                 }
 701                 $name = self::fetchLanguageName( $code );
 702                 if ( $name ) {
 703                         return $name; # if it's defined as a language name, show that
 704                 } else {
 705                         # otherwise, output the language code
 706                         return $code;
 707                 }
 708         }
 709
 710         /**
 711          * @param $name string
 712          * @return string
 713          */
 714         function specialPage( $name ) {
 715                 $aliases = $this->getSpecialPageAliases();
 716                 if ( isset( $aliases[$name][0] ) ) {
 717                         $name = $aliases[$name][0];
 718                 }
 719                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 720         }
 721
 722         /**
 723          * @return array
 724          */
 725         function getQuickbarSettings() {
 726                 return array(
 727                         $this->getMessage( 'qbsettings-none' ),
 728                         $this->getMessage( 'qbsettings-fixedleft' ),
 729                         $this->getMessage( 'qbsettings-fixedright' ),
 730                         $this->getMessage( 'qbsettings-floatingleft' ),
 731                         $this->getMessage( 'qbsettings-floatingright' ),
 732                         $this->getMessage( 'qbsettings-directionality' )
 733                 );
 734         }
 735
 736         /**
 737          * @return array
 738          */
 739         function getDatePreferences() {
 740                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 741         }
 742
 743         /**
 744          * @return array
 745          */
 746         function getDateFormats() {
 747                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 748         }
 749
 750         /**
 751          * @return array|string
 752          */
 753         function getDefaultDateFormat() {
 754                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 755                 if ( $df === 'dmy or mdy' ) {
 756                         global $wgAmericanDates;
 757                         return $wgAmericanDates ? 'mdy' : 'dmy';
 758                 } else {
 759                         return $df;
 760                 }
 761         }
 762
 763         /**
 764          * @return array
 765          */
 766         function getDatePreferenceMigrationMap() {
 767                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 768         }
 769
 770         /**
 771          * @param  $image
 772          * @return array|null
 773          */
 774         function getImageFile( $image ) {
 775                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 776         }
 777
 778         /**
 779          * @return array
 780          */
 781         function getExtraUserToggles() {
 782                 return (array)self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 783         }
 784
 785         /**
 786          * @param  $tog
 787          * @return string
 788          */
 789         function getUserToggle( $tog ) {
 790                 return $this->getMessageFromDB( "tog-$tog" );
 791         }
 792
 793         /**
 794          * Get native language names, indexed by code.
 795          * Only those defined in MediaWiki, no other data like CLDR.
 796          * If $customisedOnly is true, only returns codes with a messages file
 797          *
 798          * @param $customisedOnly bool
 799          *
 800          * @return array
 801          * @deprecated in 1.20, use fetchLanguageNames()
 802          */
 803         public static function getLanguageNames( $customisedOnly = false ) {
 804                 return self::fetchLanguageNames( null, $customisedOnly ? 'mwfile' : 'mw' );
 805         }
 806
 807         /**
 808          * Get translated language names. This is done on best effort and
 809          * by default this is exactly the same as Language::getLanguageNames.
 810          * The CLDR extension provides translated names.
 811          * @param $code String Language code.
 812          * @return Array language code => language name
 813          * @since 1.18.0
 814          * @deprecated in 1.20, use fetchLanguageNames()
 815          */
 816         public static function getTranslatedLanguageNames( $code ) {
 817                 return self::fetchLanguageNames( $code, 'all' );
 818         }
 819
 820         /**
 821          * Get an array of language names, indexed by code.
 822          * @param $inLanguage null|string: Code of language in which to return the names
 823          *              Use null for autonyms (native names)
 824          * @param $include string:
 825          *              'all' all available languages
 826          *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
 827          *              'mwfile' only if the language is in 'mw' *and* has a message file
 828          * @return array: language code => language name
 829          * @since 1.20
 830          */
 831         public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
 832                 global $wgExtraLanguageNames;
 833                 static $coreLanguageNames;
 834
 835                 if ( $coreLanguageNames === null ) {
 836                         include( MWInit::compiledPath( 'languages/Names.php' ) );
 837                 }
 838
 839                 $names = array();
 840
 841                 if ( $inLanguage ) {
 842                         # TODO: also include when $inLanguage is null, when this code is more efficient
 843                         wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) );
 844                 }
 845
 846                 $mwNames = $wgExtraLanguageNames + $coreLanguageNames;
 847                 foreach ( $mwNames as $mwCode => $mwName ) {
 848                         # - Prefer own MediaWiki native name when not using the hook
 849                         # - For other names just add if not added through the hook
 850                         if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
 851                                 $names[$mwCode] = $mwName;
 852                         }
 853                 }
 854
 855                 if ( $include === 'all' ) {
 856                         return $names;
 857                 }
 858
 859                 $returnMw = array();
 860                 $coreCodes = array_keys( $mwNames );
 861                 foreach ( $coreCodes as $coreCode ) {
 862                         $returnMw[$coreCode] = $names[$coreCode];
 863                 }
 864
 865                 if ( $include === 'mwfile' ) {
 866                         $namesMwFile = array();
 867                         # We do this using a foreach over the codes instead of a directory
 868                         # loop so that messages files in extensions will work correctly.
 869                         foreach ( $returnMw as $code => $value ) {
 870                                 if ( is_readable( self::getMessagesFileName( $code ) ) ) {
 871                                         $namesMwFile[$code] = $names[$code];
 872                                 }
 873                         }
 874                         return $namesMwFile;
 875                 }
 876                 # 'mw' option; default if it's not one of the other two options (all/mwfile)
 877                 return $returnMw;
 878         }
 879
 880         /**
 881          * @param $code string: The code of the language for which to get the name
 882          * @param $inLanguage null|string: Code of language in which to return the name (null for autonyms)
 883          * @param $include string: 'all', 'mw' or 'mwfile'; see fetchLanguageNames()
 884          * @return string: Language name or empty
 885          * @since 1.20
 886          */
 887         public static function fetchLanguageName( $code, $inLanguage = null, $include = 'all' ) {
 888                 $array = self::fetchLanguageNames( $inLanguage, $include );
 889                 return !array_key_exists( $code, $array ) ? '' : $array[$code];
 890         }
 891
 892         /**
 893          * Get a message from the MediaWiki namespace.
 894          *
 895          * @param $msg String: message name
 896          * @return string
 897          */
 898         function getMessageFromDB( $msg ) {
 899                 return wfMessage( $msg )->inLanguage( $this )->text();
 900         }
 901
 902         /**
 903          * Get the native language name of $code.
 904          * Only if defined in MediaWiki, no other data like CLDR.
 905          * @param $code string
 906          * @return string
 907          * @deprecated in 1.20, use fetchLanguageName()
 908          */
 909         function getLanguageName( $code ) {
 910                 return self::fetchLanguageName( $code );
 911         }
 912
 913         /**
 914          * @param $key string
 915          * @return string
 916          */
 917         function getMonthName( $key ) {
 918                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 919         }
 920
 921         /**
 922          * @return array
 923          */
 924         function getMonthNamesArray() {
 925                 $monthNames = array( '' );
 926                 for ( $i = 1; $i < 13; $i++ ) {
 927                         $monthNames[] = $this->getMonthName( $i );
 928                 }
 929                 return $monthNames;
 930         }
 931
 932         /**
 933          * @param $key string
 934          * @return string
 935          */
 936         function getMonthNameGen( $key ) {
 937                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 938         }
 939
 940         /**
 941          * @param $key string
 942          * @return string
 943          */
 944         function getMonthAbbreviation( $key ) {
 945                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 946         }
 947
 948         /**
 949          * @return array
 950          */
 951         function getMonthAbbreviationsArray() {
 952                 $monthNames = array( '' );
 953                 for ( $i = 1; $i < 13; $i++ ) {
 954                         $monthNames[] = $this->getMonthAbbreviation( $i );
 955                 }
 956                 return $monthNames;
 957         }
 958
 959         /**
 960          * @param $key string
 961          * @return string
 962          */
 963         function getWeekdayName( $key ) {
 964                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 965         }
 966
 967         /**
 968          * @param $key string
 969          * @return string
 970          */
 971         function getWeekdayAbbreviation( $key ) {
 972                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 973         }
 974
 975         /**
 976          * @param $key string
 977          * @return string
 978          */
 979         function getIranianCalendarMonthName( $key ) {
 980                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
 981         }
 982
 983         /**
 984          * @param $key string
 985          * @return string
 986          */
 987         function getHebrewCalendarMonthName( $key ) {
 988                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
 989         }
 990
 991         /**
 992          * @param $key string
 993          * @return string
 994          */
 995         function getHebrewCalendarMonthNameGen( $key ) {
 996                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
 997         }
 998
 999         /**
1000          * @param $key string
1001          * @return string
1002          */
1003         function getHijriCalendarMonthName( $key ) {
1004                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
1005         }
1006
1007         /**
1008          * This is a workalike of PHP's date() function, but with better
1009          * internationalisation, a reduced set of format characters, and a better
1010          * escaping format.
1011          *
1012          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
1013          * PHP manual for definitions. There are a number of extensions, which
1014          * start with "x":
1015          *
1016          *    xn   Do not translate digits of the next numeric format character
1017          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
1018          *    xr   Use roman numerals for the next numeric format character
1019          *    xh   Use hebrew numerals for the next numeric format character
1020          *    xx   Literal x
1021          *    xg   Genitive month name
1022          *
1023          *    xij  j (day number) in Iranian calendar
1024          *    xiF  F (month name) in Iranian calendar
1025          *    xin  n (month number) in Iranian calendar
1026          *    xiy  y (two digit year) in Iranian calendar
1027          *    xiY  Y (full year) in Iranian calendar
1028          *
1029          *    xjj  j (day number) in Hebrew calendar
1030          *    xjF  F (month name) in Hebrew calendar
1031          *    xjt  t (days in month) in Hebrew calendar
1032          *    xjx  xg (genitive month name) in Hebrew calendar
1033          *    xjn  n (month number) in Hebrew calendar
1034          *    xjY  Y (full year) in Hebrew calendar
1035          *
1036          *    xmj  j (day number) in Hijri calendar
1037          *    xmF  F (month name) in Hijri calendar
1038          *    xmn  n (month number) in Hijri calendar
1039          *    xmY  Y (full year) in Hijri calendar
1040          *
1041          *    xkY  Y (full year) in Thai solar calendar. Months and days are
1042          *                       identical to the Gregorian calendar
1043          *    xoY  Y (full year) in Minguo calendar or Juche year.
1044          *                       Months and days are identical to the
1045          *                       Gregorian calendar
1046          *    xtY  Y (full year) in Japanese nengo. Months and days are
1047          *                       identical to the Gregorian calendar
1048          *
1049          * Characters enclosed in double quotes will be considered literal (with
1050          * the quotes themselves removed). Unmatched quotes will be considered
1051          * literal quotes. Example:
1052          *
1053          * "The month is" F       => The month is January
1054          * i's"                   => 20'11"
1055          *
1056          * Backslash escaping is also supported.
1057          *
1058          * Input timestamp is assumed to be pre-normalized to the desired local
1059          * time zone, if any.
1060          *
1061          * @param $format String
1062          * @param $ts String: 14-character timestamp
1063          *      YYYYMMDDHHMMSS
1064          *      01234567890123
1065          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
1066          *
1067          * @return string
1068          */
1069         function sprintfDate( $format, $ts ) {
1070                 $s = '';
1071                 $raw = false;
1072                 $roman = false;
1073                 $hebrewNum = false;
1074                 $unix = false;
1075                 $rawToggle = false;
1076                 $iranian = false;
1077                 $hebrew = false;
1078                 $hijri = false;
1079                 $thai = false;
1080                 $minguo = false;
1081                 $tenno = false;
1082                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
1083                         $num = false;
1084                         $code = $format[$p];
1085                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
1086                                 $code .= $format[++$p];
1087                         }
1088
1089                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
1090                                 $code .= $format[++$p];
1091                         }
1092
1093                         switch ( $code ) {
1094                                 case 'xx':
1095                                         $s .= 'x';
1096                                         break;
1097                                 case 'xn':
1098                                         $raw = true;
1099                                         break;
1100                                 case 'xN':
1101                                         $rawToggle = !$rawToggle;
1102                                         break;
1103                                 case 'xr':
1104                                         $roman = true;
1105                                         break;
1106                                 case 'xh':
1107                                         $hebrewNum = true;
1108                                         break;
1109                                 case 'xg':
1110                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
1111                                         break;
1112                                 case 'xjx':
1113                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
1114                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
1115                                         break;
1116                                 case 'd':
1117                                         $num = substr( $ts, 6, 2 );
1118                                         break;
1119                                 case 'D':
1120                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
1121                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
1122                                         break;
1123                                 case 'j':
1124                                         $num = intval( substr( $ts, 6, 2 ) );
1125                                         break;
1126                                 case 'xij':
1127                                         if ( !$iranian ) {
1128                                                 $iranian = self::tsToIranian( $ts );
1129                                         }
1130                                         $num = $iranian[2];
1131                                         break;
1132                                 case 'xmj':
1133                                         if ( !$hijri ) {
1134                                                 $hijri = self::tsToHijri( $ts );
1135                                         }
1136                                         $num = $hijri[2];
1137                                         break;
1138                                 case 'xjj':
1139                                         if ( !$hebrew ) {
1140                                                 $hebrew = self::tsToHebrew( $ts );
1141                                         }
1142                                         $num = $hebrew[2];
1143                                         break;
1144                                 case 'l':
1145                                         if ( !$unix ) {
1146                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1147                                         }
1148                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
1149                                         break;
1150                                 case 'N':
1151                                         if ( !$unix ) {
1152                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1153                                         }
1154                                         $w = gmdate( 'w', $unix );
1155                                         $num = $w ? $w : 7;
1156                                         break;
1157                                 case 'w':
1158                                         if ( !$unix ) {
1159                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1160                                         }
1161                                         $num = gmdate( 'w', $unix );
1162                                         break;
1163                                 case 'z':
1164                                         if ( !$unix ) {
1165                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1166                                         }
1167                                         $num = gmdate( 'z', $unix );
1168                                         break;
1169                                 case 'W':
1170                                         if ( !$unix ) {
1171                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1172                                         }
1173                                         $num = gmdate( 'W', $unix );
1174                                         break;
1175                                 case 'F':
1176                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
1177                                         break;
1178                                 case 'xiF':
1179                                         if ( !$iranian ) {
1180                                                 $iranian = self::tsToIranian( $ts );
1181                                         }
1182                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
1183                                         break;
1184                                 case 'xmF':
1185                                         if ( !$hijri ) {
1186                                                 $hijri = self::tsToHijri( $ts );
1187                                         }
1188                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
1189                                         break;
1190                                 case 'xjF':
1191                                         if ( !$hebrew ) {
1192                                                 $hebrew = self::tsToHebrew( $ts );
1193                                         }
1194                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
1195                                         break;
1196                                 case 'm':
1197                                         $num = substr( $ts, 4, 2 );
1198                                         break;
1199                                 case 'M':
1200                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
1201                                         break;
1202                                 case 'n':
1203                                         $num = intval( substr( $ts, 4, 2 ) );
1204                                         break;
1205                                 case 'xin':
1206                                         if ( !$iranian ) {
1207                                                 $iranian = self::tsToIranian( $ts );
1208                                         }
1209                                         $num = $iranian[1];
1210                                         break;
1211                                 case 'xmn':
1212                                         if ( !$hijri ) {
1213                                                 $hijri = self::tsToHijri ( $ts );
1214                                         }
1215                                         $num = $hijri[1];
1216                                         break;
1217                                 case 'xjn':
1218                                         if ( !$hebrew ) {
1219                                                 $hebrew = self::tsToHebrew( $ts );
1220                                         }
1221                                         $num = $hebrew[1];
1222                                         break;
1223                                 case 't':
1224                                         if ( !$unix ) {
1225                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1226                                         }
1227                                         $num = gmdate( 't', $unix );
1228                                         break;
1229                                 case 'xjt':
1230                                         if ( !$hebrew ) {
1231                                                 $hebrew = self::tsToHebrew( $ts );
1232                                         }
1233                                         $num = $hebrew[3];
1234                                         break;
1235                                 case 'L':
1236                                         if ( !$unix ) {
1237                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1238                                         }
1239                                         $num = gmdate( 'L', $unix );
1240                                         break;
1241                                 case 'o':
1242                                         if ( !$unix ) {
1243                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1244                                         }
1245                                         $num = gmdate( 'o', $unix );
1246                                         break;
1247                                 case 'Y':
1248                                         $num = substr( $ts, 0, 4 );
1249                                         break;
1250                                 case 'xiY':
1251                                         if ( !$iranian ) {
1252                                                 $iranian = self::tsToIranian( $ts );
1253                                         }
1254                                         $num = $iranian[0];
1255                                         break;
1256                                 case 'xmY':
1257                                         if ( !$hijri ) {
1258                                                 $hijri = self::tsToHijri( $ts );
1259                                         }
1260                                         $num = $hijri[0];
1261                                         break;
1262                                 case 'xjY':
1263                                         if ( !$hebrew ) {
1264                                                 $hebrew = self::tsToHebrew( $ts );
1265                                         }
1266                                         $num = $hebrew[0];
1267                                         break;
1268                                 case 'xkY':
1269                                         if ( !$thai ) {
1270                                                 $thai = self::tsToYear( $ts, 'thai' );
1271                                         }
1272                                         $num = $thai[0];
1273                                         break;
1274                                 case 'xoY':
1275                                         if ( !$minguo ) {
1276                                                 $minguo = self::tsToYear( $ts, 'minguo' );
1277                                         }
1278                                         $num = $minguo[0];
1279                                         break;
1280                                 case 'xtY':
1281                                         if ( !$tenno ) {
1282                                                 $tenno = self::tsToYear( $ts, 'tenno' );
1283                                         }
1284                                         $num = $tenno[0];
1285                                         break;
1286                                 case 'y':
1287                                         $num = substr( $ts, 2, 2 );
1288                                         break;
1289                                 case 'xiy':
1290                                         if ( !$iranian ) {
1291                                                 $iranian = self::tsToIranian( $ts );
1292                                         }
1293                                         $num = substr( $iranian[0], -2 );
1294                                         break;
1295                                 case 'a':
1296                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
1297                                         break;
1298                                 case 'A':
1299                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1300                                         break;
1301                                 case 'g':
1302                                         $h = substr( $ts, 8, 2 );
1303                                         $num = $h % 12 ? $h % 12 : 12;
1304                                         break;
1305                                 case 'G':
1306                                         $num = intval( substr( $ts, 8, 2 ) );
1307                                         break;
1308                                 case 'h':
1309                                         $h = substr( $ts, 8, 2 );
1310                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1311                                         break;
1312                                 case 'H':
1313                                         $num = substr( $ts, 8, 2 );
1314                                         break;
1315                                 case 'i':
1316                                         $num = substr( $ts, 10, 2 );
1317                                         break;
1318                                 case 's':
1319                                         $num = substr( $ts, 12, 2 );
1320                                         break;
1321                                 case 'c':
1322                                         if ( !$unix ) {
1323                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1324                                         }
1325                                         $s .= gmdate( 'c', $unix );
1326                                         break;
1327                                 case 'r':
1328                                         if ( !$unix ) {
1329                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1330                                         }
1331                                         $s .= gmdate( 'r', $unix );
1332                                         break;
1333                                 case 'U':
1334                                         if ( !$unix ) {
1335                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1336                                         }
1337                                         $num = $unix;
1338                                         break;
1339                                 case '\\':
1340                                         # Backslash escaping
1341                                         if ( $p < strlen( $format ) - 1 ) {
1342                                                 $s .= $format[++$p];
1343                                         } else {
1344                                                 $s .= '\\';
1345                                         }
1346                                         break;
1347                                 case '"':
1348                                         # Quoted literal
1349                                         if ( $p < strlen( $format ) - 1 ) {
1350                                                 $endQuote = strpos( $format, '"', $p + 1 );
1351                                                 if ( $endQuote === false ) {
1352                                                         # No terminating quote, assume literal "
1353                                                         $s .= '"';
1354                                                 } else {
1355                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1356                                                         $p = $endQuote;
1357                                                 }
1358                                         } else {
1359                                                 # Quote at end of string, assume literal "
1360                                                 $s .= '"';
1361                                         }
1362                                         break;
1363                                 default:
1364                                         $s .= $format[$p];
1365                         }
1366                         if ( $num !== false ) {
1367                                 if ( $rawToggle || $raw ) {
1368                                         $s .= $num;
1369                                         $raw = false;
1370                                 } elseif ( $roman ) {
1371                                         $s .= Language::romanNumeral( $num );
1372                                         $roman = false;
1373                                 } elseif ( $hebrewNum ) {
1374                                         $s .= self::hebrewNumeral( $num );
1375                                         $hebrewNum = false;
1376                                 } else {
1377                                         $s .= $this->formatNum( $num, true );
1378                                 }
1379                         }
1380                 }
1381                 return $s;
1382         }
1383
1384         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1385         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1386
1387         /**
1388          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1389          * Gregorian dates to Iranian dates. Originally written in C, it
1390          * is released under the terms of GNU Lesser General Public
1391          * License. Conversion to PHP was performed by Niklas Laxström.
1392          *
1393          * Link: http://www.farsiweb.info/jalali/jalali.c
1394          *
1395          * @param $ts string
1396          *
1397          * @return string
1398          */
1399         private static function tsToIranian( $ts ) {
1400                 $gy = substr( $ts, 0, 4 ) -1600;
1401                 $gm = substr( $ts, 4, 2 ) -1;
1402                 $gd = substr( $ts, 6, 2 ) -1;
1403
1404                 # Days passed from the beginning (including leap years)
1405                 $gDayNo = 365 * $gy
1406                         + floor( ( $gy + 3 ) / 4 )
1407                         - floor( ( $gy + 99 ) / 100 )
1408                         + floor( ( $gy + 399 ) / 400 );
1409
1410                 // Add days of the past months of this year
1411                 for ( $i = 0; $i < $gm; $i++ ) {
1412                         $gDayNo += self::$GREG_DAYS[$i];
1413                 }
1414
1415                 // Leap years
1416                 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1417                         $gDayNo++;
1418                 }
1419
1420                 // Days passed in current month
1421                 $gDayNo += (int)$gd;
1422
1423                 $jDayNo = $gDayNo - 79;
1424
1425                 $jNp = floor( $jDayNo / 12053 );
1426                 $jDayNo %= 12053;
1427
1428                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1429                 $jDayNo %= 1461;
1430
1431                 if ( $jDayNo >= 366 ) {
1432                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1433                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1434                 }
1435
1436                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1437                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1438                 }
1439
1440                 $jm = $i + 1;
1441                 $jd = $jDayNo + 1;
1442
1443                 return array( $jy, $jm, $jd );
1444         }
1445
1446         /**
1447          * Converting Gregorian dates to Hijri dates.
1448          *
1449          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1450          *
1451          * @see http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1452          *
1453          * @param $ts string
1454          *
1455          * @return string
1456          */
1457         private static function tsToHijri( $ts ) {
1458                 $year = substr( $ts, 0, 4 );
1459                 $month = substr( $ts, 4, 2 );
1460                 $day = substr( $ts, 6, 2 );
1461
1462                 $zyr = $year;
1463                 $zd = $day;
1464                 $zm = $month;
1465                 $zy = $zyr;
1466
1467                 if (
1468                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1469                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1470                 )
1471                 {
1472                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1473                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1474                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1475                                         $zd - 32075;
1476                 } else {
1477                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1478                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1479                 }
1480
1481                 $zl = $zjd -1948440 + 10632;
1482                 $zn = (int)( ( $zl - 1 ) / 10631 );
1483                 $zl = $zl - 10631 * $zn + 354;
1484                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1485                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1486                 $zm = (int)( ( 24 * $zl ) / 709 );
1487                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1488                 $zy = 30 * $zn + $zj - 30;
1489
1490                 return array( $zy, $zm, $zd );
1491         }
1492
1493         /**
1494          * Converting Gregorian dates to Hebrew dates.
1495          *
1496          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1497          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1498          * to translate the relevant functions into PHP and release them under
1499          * GNU GPL.
1500          *
1501          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1502          * and Adar II is 14. In a non-leap year, Adar is 6.
1503          *
1504          * @param $ts string
1505          *
1506          * @return string
1507          */
1508         private static function tsToHebrew( $ts ) {
1509                 # Parse date
1510                 $year = substr( $ts, 0, 4 );
1511                 $month = substr( $ts, 4, 2 );
1512                 $day = substr( $ts, 6, 2 );
1513
1514                 # Calculate Hebrew year
1515                 $hebrewYear = $year + 3760;
1516
1517                 # Month number when September = 1, August = 12
1518                 $month += 4;
1519                 if ( $month > 12 ) {
1520                         # Next year
1521                         $month -= 12;
1522                         $year++;
1523                         $hebrewYear++;
1524                 }
1525
1526                 # Calculate day of year from 1 September
1527                 $dayOfYear = $day;
1528                 for ( $i = 1; $i < $month; $i++ ) {
1529                         if ( $i == 6 ) {
1530                                 # February
1531                                 $dayOfYear += 28;
1532                                 # Check if the year is leap
1533                                 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1534                                         $dayOfYear++;
1535                                 }
1536                         } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1537                                 $dayOfYear += 30;
1538                         } else {
1539                                 $dayOfYear += 31;
1540                         }
1541                 }
1542
1543                 # Calculate the start of the Hebrew year
1544                 $start = self::hebrewYearStart( $hebrewYear );
1545
1546                 # Calculate next year's start
1547                 if ( $dayOfYear <= $start ) {
1548                         # Day is before the start of the year - it is the previous year
1549                         # Next year's start
1550                         $nextStart = $start;
1551                         # Previous year
1552                         $year--;
1553                         $hebrewYear--;
1554                         # Add days since previous year's 1 September
1555                         $dayOfYear += 365;
1556                         if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1557                                 # Leap year
1558                                 $dayOfYear++;
1559                         }
1560                         # Start of the new (previous) year
1561                         $start = self::hebrewYearStart( $hebrewYear );
1562                 } else {
1563                         # Next year's start
1564                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1565                 }
1566
1567                 # Calculate Hebrew day of year
1568                 $hebrewDayOfYear = $dayOfYear - $start;
1569
1570                 # Difference between year's days
1571                 $diff = $nextStart - $start;
1572                 # Add 12 (or 13 for leap years) days to ignore the difference between
1573                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1574                 # difference is only about the year type
1575                 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1576                         $diff += 13;
1577                 } else {
1578                         $diff += 12;
1579                 }
1580
1581                 # Check the year pattern, and is leap year
1582                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1583                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1584                 # and non-leap years
1585                 $yearPattern = $diff % 30;
1586                 # Check if leap year
1587                 $isLeap = $diff >= 30;
1588
1589                 # Calculate day in the month from number of day in the Hebrew year
1590                 # Don't check Adar - if the day is not in Adar, we will stop before;
1591                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1592                 $hebrewDay = $hebrewDayOfYear;
1593                 $hebrewMonth = 1;
1594                 $days = 0;
1595                 while ( $hebrewMonth <= 12 ) {
1596                         # Calculate days in this month
1597                         if ( $isLeap && $hebrewMonth == 6 ) {
1598                                 # Adar in a leap year
1599                                 if ( $isLeap ) {
1600                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1601                                         $days = 30;
1602                                         if ( $hebrewDay <= $days ) {
1603                                                 # Day in Adar I
1604                                                 $hebrewMonth = 13;
1605                                         } else {
1606                                                 # Subtract the days of Adar I
1607                                                 $hebrewDay -= $days;
1608                                                 # Try Adar II
1609                                                 $days = 29;
1610                                                 if ( $hebrewDay <= $days ) {
1611                                                         # Day in Adar II
1612                                                         $hebrewMonth = 14;
1613                                                 }
1614                                         }
1615                                 }
1616                         } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1617                                 # Cheshvan in a complete year (otherwise as the rule below)
1618                                 $days = 30;
1619                         } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1620                                 # Kislev in an incomplete year (otherwise as the rule below)
1621                                 $days = 29;
1622                         } else {
1623                                 # Odd months have 30 days, even have 29
1624                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1625                         }
1626                         if ( $hebrewDay <= $days ) {
1627                                 # In the current month
1628                                 break;
1629                         } else {
1630                                 # Subtract the days of the current month
1631                                 $hebrewDay -= $days;
1632                                 # Try in the next month
1633                                 $hebrewMonth++;
1634                         }
1635                 }
1636
1637                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1638         }
1639
1640         /**
1641          * This calculates the Hebrew year start, as days since 1 September.
1642          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1643          * Used for Hebrew date.
1644          *
1645          * @param $year int
1646          *
1647          * @return string
1648          */
1649         private static function hebrewYearStart( $year ) {
1650                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1651                 $b = intval( ( $year - 1 ) % 4 );
1652                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1653                 if ( $m < 0 ) {
1654                         $m--;
1655                 }
1656                 $Mar = intval( $m );
1657                 if ( $m < 0 ) {
1658                         $m++;
1659                 }
1660                 $m -= $Mar;
1661
1662                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1663                 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1664                         $Mar++;
1665                 } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1666                         $Mar += 2;
1667                 } elseif ( $c == 2 || $c == 4 || $c == 6 ) {
1668                         $Mar++;
1669                 }
1670
1671                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1672                 return $Mar;
1673         }
1674
1675         /**
1676          * Algorithm to convert Gregorian dates to Thai solar dates,
1677          * Minguo dates or Minguo dates.
1678          *
1679          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1680          *       http://en.wikipedia.org/wiki/Minguo_calendar
1681          *       http://en.wikipedia.org/wiki/Japanese_era_name
1682          *
1683          * @param $ts String: 14-character timestamp
1684          * @param $cName String: calender name
1685          * @return Array: converted year, month, day
1686          */
1687         private static function tsToYear( $ts, $cName ) {
1688                 $gy = substr( $ts, 0, 4 );
1689                 $gm = substr( $ts, 4, 2 );
1690                 $gd = substr( $ts, 6, 2 );
1691
1692                 if ( !strcmp( $cName, 'thai' ) ) {
1693                         # Thai solar dates
1694                         # Add 543 years to the Gregorian calendar
1695                         # Months and days are identical
1696                         $gy_offset = $gy + 543;
1697                 } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1698                         # Minguo dates
1699                         # Deduct 1911 years from the Gregorian calendar
1700                         # Months and days are identical
1701                         $gy_offset = $gy - 1911;
1702                 } elseif ( !strcmp( $cName, 'tenno' ) ) {
1703                         # Nengō dates up to Meiji period
1704                         # Deduct years from the Gregorian calendar
1705                         # depending on the nengo periods
1706                         # Months and days are identical
1707                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1708                                 # Meiji period
1709                                 $gy_gannen = $gy - 1868 + 1;
1710                                 $gy_offset = $gy_gannen;
1711                                 if ( $gy_gannen == 1 ) {
1712                                         $gy_offset = '元';
1713                                 }
1714                                 $gy_offset = '明治' . $gy_offset;
1715                         } elseif (
1716                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1717                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1718                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1719                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1720                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1721                         )
1722                         {
1723                                 # Taishō period
1724                                 $gy_gannen = $gy - 1912 + 1;
1725                                 $gy_offset = $gy_gannen;
1726                                 if ( $gy_gannen == 1 ) {
1727                                         $gy_offset = '元';
1728                                 }
1729                                 $gy_offset = '大正' . $gy_offset;
1730                         } elseif (
1731                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1732                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1733                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1734                         )
1735                         {
1736                                 # Shōwa period
1737                                 $gy_gannen = $gy - 1926 + 1;
1738                                 $gy_offset = $gy_gannen;
1739                                 if ( $gy_gannen == 1 ) {
1740                                         $gy_offset = '元';
1741                                 }
1742                                 $gy_offset = '昭和' . $gy_offset;
1743                         } else {
1744                                 # Heisei period
1745                                 $gy_gannen = $gy - 1989 + 1;
1746                                 $gy_offset = $gy_gannen;
1747                                 if ( $gy_gannen == 1 ) {
1748                                         $gy_offset = '元';
1749                                 }
1750                                 $gy_offset = '平成' . $gy_offset;
1751                         }
1752                 } else {
1753                         $gy_offset = $gy;
1754                 }
1755
1756                 return array( $gy_offset, $gm, $gd );
1757         }
1758
1759         /**
1760          * Roman number formatting up to 10000
1761          *
1762          * @param $num int
1763          *
1764          * @return string
1765          */
1766         static function romanNumeral( $num ) {
1767                 static $table = array(
1768                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1769                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1770                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1771                         array( '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM', 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' )
1772                 );
1773
1774                 $num = intval( $num );
1775                 if ( $num > 10000 || $num <= 0 ) {
1776                         return $num;
1777                 }
1778
1779                 $s = '';
1780                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1781                         if ( $num >= $pow10 ) {
1782                                 $s .= $table[$i][(int)floor( $num / $pow10 )];
1783                         }
1784                         $num = $num % $pow10;
1785                 }
1786                 return $s;
1787         }
1788
1789         /**
1790          * Hebrew Gematria number formatting up to 9999
1791          *
1792          * @param $num int
1793          *
1794          * @return string
1795          */
1796         static function hebrewNumeral( $num ) {
1797                 static $table = array(
1798                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1799                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1800                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1801                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1802                 );
1803
1804                 $num = intval( $num );
1805                 if ( $num > 9999 || $num <= 0 ) {
1806                         return $num;
1807                 }
1808
1809                 $s = '';
1810                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1811                         if ( $num >= $pow10 ) {
1812                                 if ( $num == 15 || $num == 16 ) {
1813                                         $s .= $table[0][9] . $table[0][$num - 9];
1814                                         $num = 0;
1815                                 } else {
1816                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1817                                         if ( $pow10 == 1000 ) {
1818                                                 $s .= "'";
1819                                         }
1820                                 }
1821                         }
1822                         $num = $num % $pow10;
1823                 }
1824                 if ( strlen( $s ) == 2 ) {
1825                         $str = $s . "'";
1826                 } else  {
1827                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1828                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1829                 }
1830                 $start = substr( $str, 0, strlen( $str ) - 2 );
1831                 $end = substr( $str, strlen( $str ) - 2 );
1832                 switch( $end ) {
1833                         case 'כ':
1834                                 $str = $start . 'ך';
1835                                 break;
1836                         case 'מ':
1837                                 $str = $start . 'ם';
1838                                 break;
1839                         case 'נ':
1840                                 $str = $start . 'ן';
1841                                 break;
1842                         case 'פ':
1843                                 $str = $start . 'ף';
1844                                 break;
1845                         case 'צ':
1846                                 $str = $start . 'ץ';
1847                                 break;
1848                 }
1849                 return $str;
1850         }
1851
1852         /**
1853          * Used by date() and time() to adjust the time output.
1854          *
1855          * @param $ts Int the time in date('YmdHis') format
1856          * @param $tz Mixed: adjust the time by this amount (default false, mean we
1857          *            get user timecorrection setting)
1858          * @return int
1859          */
1860         function userAdjust( $ts, $tz = false ) {
1861                 global $wgUser, $wgLocalTZoffset;
1862
1863                 if ( $tz === false ) {
1864                         $tz = $wgUser->getOption( 'timecorrection' );
1865                 }
1866
1867                 $data = explode( '|', $tz, 3 );
1868
1869                 if ( $data[0] == 'ZoneInfo' ) {
1870                         wfSuppressWarnings();
1871                         $userTZ = timezone_open( $data[2] );
1872                         wfRestoreWarnings();
1873                         if ( $userTZ !== false ) {
1874                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
1875                                 date_timezone_set( $date, $userTZ );
1876                                 $date = date_format( $date, 'YmdHis' );
1877                                 return $date;
1878                         }
1879                         # Unrecognized timezone, default to 'Offset' with the stored offset.
1880                         $data[0] = 'Offset';
1881                 }
1882
1883                 $minDiff = 0;
1884                 if ( $data[0] == 'System' || $tz == '' ) {
1885                         #  Global offset in minutes.
1886                         if ( isset( $wgLocalTZoffset ) ) {
1887                                 $minDiff = $wgLocalTZoffset;
1888                         }
1889                 } elseif ( $data[0] == 'Offset' ) {
1890                         $minDiff = intval( $data[1] );
1891                 } else {
1892                         $data = explode( ':', $tz );
1893                         if ( count( $data ) == 2 ) {
1894                                 $data[0] = intval( $data[0] );
1895                                 $data[1] = intval( $data[1] );
1896                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
1897                                 if ( $data[0] < 0 ) {
1898                                         $minDiff = -$minDiff;
1899                                 }
1900                         } else {
1901                                 $minDiff = intval( $data[0] ) * 60;
1902                         }
1903                 }
1904
1905                 # No difference ? Return time unchanged
1906                 if ( 0 == $minDiff ) {
1907                         return $ts;
1908                 }
1909
1910                 wfSuppressWarnings(); // E_STRICT system time bitching
1911                 # Generate an adjusted date; take advantage of the fact that mktime
1912                 # will normalize out-of-range values so we don't have to split $minDiff
1913                 # into hours and minutes.
1914                 $t = mktime( (
1915                   (int)substr( $ts, 8, 2 ) ), # Hours
1916                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
1917                   (int)substr( $ts, 12, 2 ), # Seconds
1918                   (int)substr( $ts, 4, 2 ), # Month
1919                   (int)substr( $ts, 6, 2 ), # Day
1920                   (int)substr( $ts, 0, 4 ) ); # Year
1921
1922                 $date = date( 'YmdHis', $t );
1923                 wfRestoreWarnings();
1924
1925                 return $date;
1926         }
1927
1928         /**
1929          * This is meant to be used by time(), date(), and timeanddate() to get
1930          * the date preference they're supposed to use, it should be used in
1931          * all children.
1932          *
1933          *<code>
1934          * function timeanddate([...], $format = true) {
1935          *      $datePreference = $this->dateFormat($format);
1936          * [...]
1937          * }
1938          *</code>
1939          *
1940          * @param $usePrefs Mixed: if true, the user's preference is used
1941          *                         if false, the site/language default is used
1942          *                         if int/string, assumed to be a format.
1943          * @return string
1944          */
1945         function dateFormat( $usePrefs = true ) {
1946                 global $wgUser;
1947
1948                 if ( is_bool( $usePrefs ) ) {
1949                         if ( $usePrefs ) {
1950                                 $datePreference = $wgUser->getDatePreference();
1951                         } else {
1952                                 $datePreference = (string)User::getDefaultOption( 'date' );
1953                         }
1954                 } else {
1955                         $datePreference = (string)$usePrefs;
1956                 }
1957
1958                 // return int
1959                 if ( $datePreference == '' ) {
1960                         return 'default';
1961                 }
1962
1963                 return $datePreference;
1964         }
1965
1966         /**
1967          * Get a format string for a given type and preference
1968          * @param $type string May be date, time or both
1969          * @param $pref string The format name as it appears in Messages*.php
1970          *
1971          * @return string
1972          */
1973         function getDateFormatString( $type, $pref ) {
1974                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1975                         if ( $pref == 'default' ) {
1976                                 $pref = $this->getDefaultDateFormat();
1977                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1978                         } else {
1979                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1980                                 if ( is_null( $df ) ) {
1981                                         $pref = $this->getDefaultDateFormat();
1982                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1983                                 }
1984                         }
1985                         $this->dateFormatStrings[$type][$pref] = $df;
1986                 }
1987                 return $this->dateFormatStrings[$type][$pref];
1988         }
1989
1990         /**
1991          * @param $ts Mixed: the time format which needs to be turned into a
1992          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1993          * @param $adj Bool: whether to adjust the time output according to the
1994          *             user configured offset ($timecorrection)
1995          * @param $format Mixed: true to use user's date format preference
1996          * @param $timecorrection String|bool the time offset as returned by
1997          *                        validateTimeZone() in Special:Preferences
1998          * @return string
1999          */
2000         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
2001                 $ts = wfTimestamp( TS_MW, $ts );
2002                 if ( $adj ) {
2003                         $ts = $this->userAdjust( $ts, $timecorrection );
2004                 }
2005                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
2006                 return $this->sprintfDate( $df, $ts );
2007         }
2008
2009         /**
2010          * @param $ts Mixed: the time format which needs to be turned into a
2011          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2012          * @param $adj Bool: whether to adjust the time output according to the
2013          *             user configured offset ($timecorrection)
2014          * @param $format Mixed: true to use user's date format preference
2015          * @param $timecorrection String|bool the time offset as returned by
2016          *                        validateTimeZone() in Special:Preferences
2017          * @return string
2018          */
2019         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
2020                 $ts = wfTimestamp( TS_MW, $ts );
2021                 if ( $adj ) {
2022                         $ts = $this->userAdjust( $ts, $timecorrection );
2023                 }
2024                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
2025                 return $this->sprintfDate( $df, $ts );
2026         }
2027
2028         /**
2029          * @param $ts Mixed: the time format which needs to be turned into a
2030          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2031          * @param $adj Bool: whether to adjust the time output according to the
2032          *             user configured offset ($timecorrection)
2033          * @param $format Mixed: what format to return, if it's false output the
2034          *                default one (default true)
2035          * @param $timecorrection String|bool the time offset as returned by
2036          *                        validateTimeZone() in Special:Preferences
2037          * @return string
2038          */
2039         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
2040                 $ts = wfTimestamp( TS_MW, $ts );
2041                 if ( $adj ) {
2042                         $ts = $this->userAdjust( $ts, $timecorrection );
2043                 }
2044                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
2045                 return $this->sprintfDate( $df, $ts );
2046         }
2047
2048         /**
2049          * Takes a number of seconds and turns it into a text using values such as hours and minutes.
2050          *
2051          * @since 1.20
2052          *
2053          * @param integer $seconds The amount of seconds.
2054          * @param array $chosenIntervals The intervals to enable.
2055          *
2056          * @return string
2057          */
2058         public function formatDuration( $seconds, array $chosenIntervals = array() ) {
2059                 $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals );
2060
2061                 $segments = array();
2062
2063                 foreach ( $intervals as $intervalName => $intervalValue ) {
2064                         $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
2065                         $segments[] = $message->inLanguage( $this )->escaped();
2066                 }
2067
2068                 return $this->listToText( $segments );
2069         }
2070
2071         /**
2072          * Takes a number of seconds and returns an array with a set of corresponding intervals.
2073          * For example 65 will be turned into array( minutes => 1, seconds => 5 ).
2074          *
2075          * @since 1.20
2076          *
2077          * @param integer $seconds The amount of seconds.
2078          * @param array $chosenIntervals The intervals to enable.
2079          *
2080          * @return array
2081          */
2082         public function getDurationIntervals( $seconds, array $chosenIntervals = array() ) {
2083                 if ( empty( $chosenIntervals ) ) {
2084                         $chosenIntervals = array( 'millennia', 'centuries', 'decades', 'years', 'days', 'hours', 'minutes', 'seconds' );
2085                 }
2086
2087                 $intervals = array_intersect_key( self::$durationIntervals, array_flip( $chosenIntervals ) );
2088                 $sortedNames = array_keys( $intervals );
2089                 $smallestInterval = array_pop( $sortedNames );
2090
2091                 $segments = array();
2092
2093                 foreach ( $intervals as $name => $length ) {
2094                         $value = floor( $seconds / $length );
2095
2096                         if ( $value > 0 || ( $name == $smallestInterval && empty( $segments ) ) ) {
2097                                 $seconds -= $value * $length;
2098                                 $segments[$name] = $value;
2099                         }
2100                 }
2101
2102                 return $segments;
2103         }
2104
2105         /**
2106          * Internal helper function for userDate(), userTime() and userTimeAndDate()
2107          *
2108          * @param $type String: can be 'date', 'time' or 'both'
2109          * @param $ts Mixed: the time format which needs to be turned into a
2110          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2111          * @param $user User object used to get preferences for timezone and format
2112          * @param $options Array, can contain the following keys:
2113          *        - 'timecorrection': time correction, can have the following values:
2114          *             - true: use user's preference
2115          *             - false: don't use time correction
2116          *             - integer: value of time correction in minutes
2117          *        - 'format': format to use, can have the following values:
2118          *             - true: use user's preference
2119          *             - false: use default preference
2120          *             - string: format to use
2121          * @since 1.19
2122          * @return String
2123          */
2124         private function internalUserTimeAndDate( $type, $ts, User $user, array $options ) {
2125                 $ts = wfTimestamp( TS_MW, $ts );
2126                 $options += array( 'timecorrection' => true, 'format' => true );
2127                 if ( $options['timecorrection'] !== false ) {
2128                         if ( $options['timecorrection'] === true ) {
2129                                 $offset = $user->getOption( 'timecorrection' );
2130                         } else {
2131                                 $offset = $options['timecorrection'];
2132                         }
2133                         $ts = $this->userAdjust( $ts, $offset );
2134                 }
2135                 if ( $options['format'] === true ) {
2136                         $format = $user->getDatePreference();
2137                 } else {
2138                         $format = $options['format'];
2139                 }
2140                 $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) );
2141                 return $this->sprintfDate( $df, $ts );
2142         }
2143
2144         /**
2145          * Get the formatted date for the given timestamp and formatted for
2146          * the given user.
2147          *
2148          * @param $ts Mixed: the time format which needs to be turned into a
2149          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2150          * @param $user User object used to get preferences for timezone and format
2151          * @param $options Array, can contain the following keys:
2152          *        - 'timecorrection': time correction, can have the following values:
2153          *             - true: use user's preference
2154          *             - false: don't use time correction
2155          *             - integer: value of time correction in minutes
2156          *        - 'format': format to use, can have the following values:
2157          *             - true: use user's preference
2158          *             - false: use default preference
2159          *             - string: format to use
2160          * @since 1.19
2161          * @return String
2162          */
2163         public function userDate( $ts, User $user, array $options = array() ) {
2164                 return $this->internalUserTimeAndDate( 'date', $ts, $user, $options );
2165         }
2166
2167         /**
2168          * Get the formatted time for the given timestamp and formatted for
2169          * the given user.
2170          *
2171          * @param $ts Mixed: the time format which needs to be turned into a
2172          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2173          * @param $user User object used to get preferences for timezone and format
2174          * @param $options Array, can contain the following keys:
2175          *        - 'timecorrection': time correction, can have the following values:
2176          *             - true: use user's preference
2177          *             - false: don't use time correction
2178          *             - integer: value of time correction in minutes
2179          *        - 'format': format to use, can have the following values:
2180          *             - true: use user's preference
2181          *             - false: use default preference
2182          *             - string: format to use
2183          * @since 1.19
2184          * @return String
2185          */
2186         public function userTime( $ts, User $user, array $options = array() ) {
2187                 return $this->internalUserTimeAndDate( 'time', $ts, $user, $options );
2188         }
2189
2190         /**
2191          * Get the formatted date and time for the given timestamp and formatted for
2192          * the given user.
2193          *
2194          * @param $ts Mixed: the time format which needs to be turned into a
2195          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2196          * @param $user User object used to get preferences for timezone and format
2197          * @param $options Array, can contain the following keys:
2198          *        - 'timecorrection': time correction, can have the following values:
2199          *             - true: use user's preference
2200          *             - false: don't use time correction
2201          *             - integer: value of time correction in minutes
2202          *        - 'format': format to use, can have the following values:
2203          *             - true: use user's preference
2204          *             - false: use default preference
2205          *             - string: format to use
2206          * @since 1.19
2207          * @return String
2208          */
2209         public function userTimeAndDate( $ts, User $user, array $options = array() ) {
2210                 return $this->internalUserTimeAndDate( 'both', $ts, $user, $options );
2211         }
2212
2213         /**
2214          * @param $key string
2215          * @return array|null
2216          */
2217         function getMessage( $key ) {
2218                 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
2219         }
2220
2221         /**
2222          * @return array
2223          */
2224         function getAllMessages() {
2225                 return self::$dataCache->getItem( $this->mCode, 'messages' );
2226         }
2227
2228         /**
2229          * @param $in
2230          * @param $out
2231          * @param $string
2232          * @return string
2233          */
2234         function iconv( $in, $out, $string ) {
2235                 # This is a wrapper for iconv in all languages except esperanto,
2236                 # which does some nasty x-conversions beforehand
2237
2238                 # Even with //IGNORE iconv can whine about illegal characters in
2239                 # *input* string. We just ignore those too.
2240                 # REF: http://bugs.php.net/bug.php?id=37166
2241                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
2242                 wfSuppressWarnings();
2243                 $text = iconv( $in, $out . '//IGNORE', $string );
2244                 wfRestoreWarnings();
2245                 return $text;
2246         }
2247
2248         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
2249
2250         /**
2251          * @param $matches array
2252          * @return mixed|string
2253          */
2254         function ucwordbreaksCallbackAscii( $matches ) {
2255                 return $this->ucfirst( $matches[1] );
2256         }
2257
2258         /**
2259          * @param $matches array
2260          * @return string
2261          */
2262         function ucwordbreaksCallbackMB( $matches ) {
2263                 return mb_strtoupper( $matches[0] );
2264         }
2265
2266         /**
2267          * @param $matches array
2268          * @return string
2269          */
2270         function ucCallback( $matches ) {
2271                 list( $wikiUpperChars ) = self::getCaseMaps();
2272                 return strtr( $matches[1], $wikiUpperChars );
2273         }
2274
2275         /**
2276          * @param $matches array
2277          * @return string
2278          */
2279         function lcCallback( $matches ) {
2280                 list( , $wikiLowerChars ) = self::getCaseMaps();
2281                 return strtr( $matches[1], $wikiLowerChars );
2282         }
2283
2284         /**
2285          * @param $matches array
2286          * @return string
2287          */
2288         function ucwordsCallbackMB( $matches ) {
2289                 return mb_strtoupper( $matches[0] );
2290         }
2291
2292         /**
2293          * @param $matches array
2294          * @return string
2295          */
2296         function ucwordsCallbackWiki( $matches ) {
2297                 list( $wikiUpperChars ) = self::getCaseMaps();
2298                 return strtr( $matches[0], $wikiUpperChars );
2299         }
2300
2301         /**
2302          * Make a string's first character uppercase
2303          *
2304          * @param $str string
2305          *
2306          * @return string
2307          */
2308         function ucfirst( $str ) {
2309                 $o = ord( $str );
2310                 if ( $o < 96 ) { // if already uppercase...
2311                         return $str;
2312                 } elseif ( $o < 128 ) {
2313                         return ucfirst( $str ); // use PHP's ucfirst()
2314                 } else {
2315                         // fall back to more complex logic in case of multibyte strings
2316                         return $this->uc( $str, true );
2317                 }
2318         }
2319
2320         /**
2321          * Convert a string to uppercase
2322          *
2323          * @param $str string
2324          * @param $first bool
2325          *
2326          * @return string
2327          */
2328         function uc( $str, $first = false ) {
2329                 if ( function_exists( 'mb_strtoupper' ) ) {
2330                         if ( $first ) {
2331                                 if ( $this->isMultibyte( $str ) ) {
2332                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2333                                 } else {
2334                                         return ucfirst( $str );
2335                                 }
2336                         } else {
2337                                 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
2338                         }
2339                 } else {
2340                         if ( $this->isMultibyte( $str ) ) {
2341                                 $x = $first ? '^' : '';
2342                                 return preg_replace_callback(
2343                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2344                                         array( $this, 'ucCallback' ),
2345                                         $str
2346                                 );
2347                         } else {
2348                                 return $first ? ucfirst( $str ) : strtoupper( $str );
2349                         }
2350                 }
2351         }
2352
2353         /**
2354          * @param $str string
2355          * @return mixed|string
2356          */
2357         function lcfirst( $str ) {
2358                 $o = ord( $str );
2359                 if ( !$o ) {
2360                         return strval( $str );
2361                 } elseif ( $o >= 128 ) {
2362                         return $this->lc( $str, true );
2363                 } elseif ( $o > 96 ) {
2364                         return $str;
2365                 } else {
2366                         $str[0] = strtolower( $str[0] );
2367                         return $str;
2368                 }
2369         }
2370
2371         /**
2372          * @param $str string
2373          * @param $first bool
2374          * @return mixed|string
2375          */
2376         function lc( $str, $first = false ) {
2377                 if ( function_exists( 'mb_strtolower' ) ) {
2378                         if ( $first ) {
2379                                 if ( $this->isMultibyte( $str ) ) {
2380                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2381                                 } else {
2382                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
2383                                 }
2384                         } else {
2385                                 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
2386                         }
2387                 } else {
2388                         if ( $this->isMultibyte( $str ) ) {
2389                                 $x = $first ? '^' : '';
2390                                 return preg_replace_callback(
2391                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2392                                         array( $this, 'lcCallback' ),
2393                                         $str
2394                                 );
2395                         } else {
2396                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
2397                         }
2398                 }
2399         }
2400
2401         /**
2402          * @param $str string
2403          * @return bool
2404          */
2405         function isMultibyte( $str ) {
2406                 return (bool)preg_match( '/[\x80-\xff]/', $str );
2407         }
2408
2409         /**
2410          * @param $str string
2411          * @return mixed|string
2412          */
2413         function ucwords( $str ) {
2414                 if ( $this->isMultibyte( $str ) ) {
2415                         $str = $this->lc( $str );
2416
2417                         // regexp to find first letter in each word (i.e. after each space)
2418                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2419
2420                         // function to use to capitalize a single char
2421                         if ( function_exists( 'mb_strtoupper' ) ) {
2422                                 return preg_replace_callback(
2423                                         $replaceRegexp,
2424                                         array( $this, 'ucwordsCallbackMB' ),
2425                                         $str
2426                                 );
2427                         } else {
2428                                 return preg_replace_callback(
2429                                         $replaceRegexp,
2430                                         array( $this, 'ucwordsCallbackWiki' ),
2431                                         $str
2432                                 );
2433                         }
2434                 } else {
2435                         return ucwords( strtolower( $str ) );
2436                 }
2437         }
2438
2439         /**
2440          * capitalize words at word breaks
2441          *
2442          * @param $str string
2443          * @return mixed
2444          */
2445         function ucwordbreaks( $str ) {
2446                 if ( $this->isMultibyte( $str ) ) {
2447                         $str = $this->lc( $str );
2448
2449                         // since \b doesn't work for UTF-8, we explicitely define word break chars
2450                         $breaks = "[ \-\(\)\}\{\.,\?!]";
2451
2452                         // find first letter after word break
2453                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2454
2455                         if ( function_exists( 'mb_strtoupper' ) ) {
2456                                 return preg_replace_callback(
2457                                         $replaceRegexp,
2458                                         array( $this, 'ucwordbreaksCallbackMB' ),
2459                                         $str
2460                                 );
2461                         } else {
2462                                 return preg_replace_callback(
2463                                         $replaceRegexp,
2464                                         array( $this, 'ucwordsCallbackWiki' ),
2465                                         $str
2466                                 );
2467                         }
2468                 } else {
2469                         return preg_replace_callback(
2470                                 '/\b([\w\x80-\xff]+)\b/',
2471                                 array( $this, 'ucwordbreaksCallbackAscii' ),
2472                                 $str
2473                         );
2474                 }
2475         }
2476
2477         /**
2478          * Return a case-folded representation of $s
2479          *
2480          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
2481          * and $s2 are the same except for the case of their characters. It is not
2482          * necessary for the value returned to make sense when displayed.
2483          *
2484          * Do *not* perform any other normalisation in this function. If a caller
2485          * uses this function when it should be using a more general normalisation
2486          * function, then fix the caller.
2487          *
2488          * @param $s string
2489          *
2490          * @return string
2491          */
2492         function caseFold( $s ) {
2493                 return $this->uc( $s );
2494         }
2495
2496         /**
2497          * @param $s string
2498          * @return string
2499          */
2500         function checkTitleEncoding( $s ) {
2501                 if ( is_array( $s ) ) {
2502                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
2503                 }
2504                 if ( StringUtils::isUtf8( $s ) ) {
2505                         return $s;
2506                 }
2507
2508                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
2509         }
2510
2511         /**
2512          * @return array
2513          */
2514         function fallback8bitEncoding() {
2515                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
2516         }
2517
2518         /**
2519          * Most writing systems use whitespace to break up words.
2520          * Some languages such as Chinese don't conventionally do this,
2521          * which requires special handling when breaking up words for
2522          * searching etc.
2523          *
2524          * @return bool
2525          */
2526         function hasWordBreaks() {
2527                 return true;
2528         }
2529
2530         /**
2531          * Some languages such as Chinese require word segmentation,
2532          * Specify such segmentation when overridden in derived class.
2533          *
2534          * @param $string String
2535          * @return String
2536          */
2537         function segmentByWord( $string ) {
2538                 return $string;
2539         }
2540
2541         /**
2542          * Some languages have special punctuation need to be normalized.
2543          * Make such changes here.
2544          *
2545          * @param $string String
2546          * @return String
2547          */
2548         function normalizeForSearch( $string ) {
2549                 return self::convertDoubleWidth( $string );
2550         }
2551
2552         /**
2553          * convert double-width roman characters to single-width.
2554          * range: ff00-ff5f ~= 0020-007f
2555          *
2556          * @param $string string
2557          *
2558          * @return string
2559          */
2560         protected static function convertDoubleWidth( $string ) {
2561                 static $full = null;
2562                 static $half = null;
2563
2564                 if ( $full === null ) {
2565                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
2566                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2567                         $full = str_split( $fullWidth, 3 );
2568                         $half = str_split( $halfWidth );
2569                 }
2570
2571                 $string = str_replace( $full, $half, $string );
2572                 return $string;
2573         }
2574
2575         /**
2576          * @param $string string
2577          * @param $pattern string
2578          * @return string
2579          */
2580         protected static function insertSpace( $string, $pattern ) {
2581                 $string = preg_replace( $pattern, " $1 ", $string );
2582                 $string = preg_replace( '/ +/', ' ', $string );
2583                 return $string;
2584         }
2585
2586         /**
2587          * @param $termsArray array
2588          * @return array
2589          */
2590         function convertForSearchResult( $termsArray ) {
2591                 # some languages, e.g. Chinese, need to do a conversion
2592                 # in order for search results to be displayed correctly
2593                 return $termsArray;
2594         }
2595
2596         /**
2597          * Get the first character of a string.
2598          *
2599          * @param $s string
2600          * @return string
2601          */
2602         function firstChar( $s ) {
2603                 $matches = array();
2604                 preg_match(
2605                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
2606                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
2607                         $s,
2608                         $matches
2609                 );
2610
2611                 if ( isset( $matches[1] ) ) {
2612                         if ( strlen( $matches[1] ) != 3 ) {
2613                                 return $matches[1];
2614                         }
2615
2616                         // Break down Hangul syllables to grab the first jamo
2617                         $code = utf8ToCodepoint( $matches[1] );
2618                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
2619                                 return $matches[1];
2620                         } elseif ( $code < 0xb098 ) {
2621                                 return "\xe3\x84\xb1";
2622                         } elseif ( $code < 0xb2e4 ) {
2623                                 return "\xe3\x84\xb4";
2624                         } elseif ( $code < 0xb77c ) {
2625                                 return "\xe3\x84\xb7";
2626                         } elseif ( $code < 0xb9c8 ) {
2627                                 return "\xe3\x84\xb9";
2628                         } elseif ( $code < 0xbc14 ) {
2629                                 return "\xe3\x85\x81";
2630                         } elseif ( $code < 0xc0ac ) {
2631                                 return "\xe3\x85\x82";
2632                         } elseif ( $code < 0xc544 ) {
2633                                 return "\xe3\x85\x85";
2634                         } elseif ( $code < 0xc790 ) {
2635                                 return "\xe3\x85\x87";
2636                         } elseif ( $code < 0xcc28 ) {
2637                                 return "\xe3\x85\x88";
2638                         } elseif ( $code < 0xce74 ) {
2639                                 return "\xe3\x85\x8a";
2640                         } elseif ( $code < 0xd0c0 ) {
2641                                 return "\xe3\x85\x8b";
2642                         } elseif ( $code < 0xd30c ) {
2643                                 return "\xe3\x85\x8c";
2644                         } elseif ( $code < 0xd558 ) {
2645                                 return "\xe3\x85\x8d";
2646                         } else {
2647                                 return "\xe3\x85\x8e";
2648                         }
2649                 } else {
2650                         return '';
2651                 }
2652         }
2653
2654         function initEncoding() {
2655                 # Some languages may have an alternate char encoding option
2656                 # (Esperanto X-coding, Japanese furigana conversion, etc)
2657                 # If this language is used as the primary content language,
2658                 # an override to the defaults can be set here on startup.
2659         }
2660
2661         /**
2662          * @param $s string
2663          * @return string
2664          */
2665         function recodeForEdit( $s ) {
2666                 # For some languages we'll want to explicitly specify
2667                 # which characters make it into the edit box raw
2668                 # or are converted in some way or another.
2669                 global $wgEditEncoding;
2670                 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
2671                         return $s;
2672                 } else {
2673                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2674                 }
2675         }
2676
2677         /**
2678          * @param $s string
2679          * @return string
2680          */
2681         function recodeInput( $s ) {
2682                 # Take the previous into account.
2683                 global $wgEditEncoding;
2684                 if ( $wgEditEncoding != '' ) {
2685                         $enc = $wgEditEncoding;
2686                 } else {
2687                         $enc = 'UTF-8';
2688                 }
2689                 if ( $enc == 'UTF-8' ) {
2690                         return $s;
2691                 } else {
2692                         return $this->iconv( $enc, 'UTF-8', $s );
2693                 }
2694         }
2695
2696         /**
2697          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2698          * also cleans up certain backwards-compatible sequences, converting them
2699          * to the modern Unicode equivalent.
2700          *
2701          * This is language-specific for performance reasons only.
2702          *
2703          * @param $s string
2704          *
2705          * @return string
2706          */
2707         function normalize( $s ) {
2708                 global $wgAllUnicodeFixes;
2709                 $s = UtfNormal::cleanUp( $s );
2710                 if ( $wgAllUnicodeFixes ) {
2711                         $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2712                         $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2713                 }
2714
2715                 return $s;
2716         }
2717
2718         /**
2719          * Transform a string using serialized data stored in the given file (which
2720          * must be in the serialized subdirectory of $IP). The file contains pairs
2721          * mapping source characters to destination characters.
2722          *
2723          * The data is cached in process memory. This will go faster if you have the
2724          * FastStringSearch extension.
2725          *
2726          * @param $file string
2727          * @param $string string
2728          *
2729          * @throws MWException
2730          * @return string
2731          */
2732         function transformUsingPairFile( $file, $string ) {
2733                 if ( !isset( $this->transformData[$file] ) ) {
2734                         $data = wfGetPrecompiledData( $file );
2735                         if ( $data === false ) {
2736                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2737                         }
2738                         $this->transformData[$file] = new ReplacementArray( $data );
2739                 }
2740                 return $this->transformData[$file]->replace( $string );
2741         }
2742
2743         /**
2744          * For right-to-left language support
2745          *
2746          * @return bool
2747          */
2748         function isRTL() {
2749                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2750         }
2751
2752         /**
2753          * Return the correct HTML 'dir' attribute value for this language.
2754          * @return String
2755          */
2756         function getDir() {
2757                 return $this->isRTL() ? 'rtl' : 'ltr';
2758         }
2759
2760         /**
2761          * Return 'left' or 'right' as appropriate alignment for line-start
2762          * for this language's text direction.
2763          *
2764          * Should be equivalent to CSS3 'start' text-align value....
2765          *
2766          * @return String
2767          */
2768         function alignStart() {
2769                 return $this->isRTL() ? 'right' : 'left';
2770         }
2771
2772         /**
2773          * Return 'right' or 'left' as appropriate alignment for line-end
2774          * for this language's text direction.
2775          *
2776          * Should be equivalent to CSS3 'end' text-align value....
2777          *
2778          * @return String
2779          */
2780         function alignEnd() {
2781                 return $this->isRTL() ? 'left' : 'right';
2782         }
2783
2784         /**
2785          * A hidden direction mark (LRM or RLM), depending on the language direction.
2786          * Unlike getDirMark(), this function returns the character as an HTML entity.
2787          * This function should be used when the output is guaranteed to be HTML,
2788          * because it makes the output HTML source code more readable. When
2789          * the output is plain text or can be escaped, getDirMark() should be used.
2790          *
2791          * @param $opposite Boolean Get the direction mark opposite to your language
2792          * @return string
2793          * @since 1.20
2794          */
2795         function getDirMarkEntity( $opposite = false ) {
2796                 if ( $opposite ) { return $this->isRTL() ? '&lrm;' : '&rlm;'; }
2797                 return $this->isRTL() ? '&rlm;' : '&lrm;';
2798         }
2799
2800         /**
2801          * A hidden direction mark (LRM or RLM), depending on the language direction.
2802          * This function produces them as invisible Unicode characters and
2803          * the output may be hard to read and debug, so it should only be used
2804          * when the output is plain text or can be escaped. When the output is
2805          * HTML, use getDirMarkEntity() instead.
2806          *
2807          * @param $opposite Boolean Get the direction mark opposite to your language
2808          * @return string
2809          */
2810         function getDirMark( $opposite = false ) {
2811                 $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
2812                 $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
2813                 if ( $opposite ) { return $this->isRTL() ? $lrm : $rlm; }
2814                 return $this->isRTL() ? $rlm : $lrm;
2815         }
2816
2817         /**
2818          * @return array
2819          */
2820         function capitalizeAllNouns() {
2821                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2822         }
2823
2824         /**
2825          * An arrow, depending on the language direction.
2826          *
2827          * @param $direction String: the direction of the arrow: forwards (default), backwards, left, right, up, down.
2828          * @return string
2829          */
2830         function getArrow( $direction = 'forwards' ) {
2831                 switch ( $direction ) {
2832                 case 'forwards':
2833                         return $this->isRTL() ? '←' : '→';
2834                 case 'backwards':
2835                         return $this->isRTL() ? '→' : '←';
2836                 case 'left':
2837                         return '←';
2838                 case 'right':
2839                         return '→';
2840                 case 'up':
2841                         return '↑';
2842                 case 'down':
2843                         return '↓';
2844                 }
2845         }
2846
2847         /**
2848          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2849          *
2850          * @return bool
2851          */
2852         function linkPrefixExtension() {
2853                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2854         }
2855
2856         /**
2857          * @return array
2858          */
2859         function getMagicWords() {
2860                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2861         }
2862
2863         protected function doMagicHook() {
2864                 if ( $this->mMagicHookDone ) {
2865                         return;
2866                 }
2867                 $this->mMagicHookDone = true;
2868                 wfProfileIn( 'LanguageGetMagic' );
2869                 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2870                 wfProfileOut( 'LanguageGetMagic' );
2871         }
2872
2873         /**
2874          * Fill a MagicWord object with data from here
2875          *
2876          * @param $mw
2877          */
2878         function getMagic( $mw ) {
2879                 $this->doMagicHook();
2880
2881                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2882                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2883                 } else {
2884                         $magicWords = $this->getMagicWords();
2885                         if ( isset( $magicWords[$mw->mId] ) ) {
2886                                 $rawEntry = $magicWords[$mw->mId];
2887                         } else {
2888                                 $rawEntry = false;
2889                         }
2890                 }
2891
2892                 if ( !is_array( $rawEntry ) ) {
2893                         error_log( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" );
2894                 } else {
2895                         $mw->mCaseSensitive = $rawEntry[0];
2896                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2897                 }
2898         }
2899
2900         /**
2901          * Add magic words to the extension array
2902          *
2903          * @param $newWords array
2904          */
2905         function addMagicWordsByLang( $newWords ) {
2906                 $fallbackChain = $this->getFallbackLanguages();
2907                 $fallbackChain = array_reverse( $fallbackChain );
2908                 foreach ( $fallbackChain as $code ) {
2909                         if ( isset( $newWords[$code] ) ) {
2910                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2911                         }
2912                 }
2913         }
2914
2915         /**
2916          * Get special page names, as an associative array
2917          *   case folded alias => real name
2918          */
2919         function getSpecialPageAliases() {
2920                 // Cache aliases because it may be slow to load them
2921                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2922                         // Initialise array
2923                         $this->mExtendedSpecialPageAliases =
2924                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2925                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2926                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2927                 }
2928
2929                 return $this->mExtendedSpecialPageAliases;
2930         }
2931
2932         /**
2933          * Italic is unsuitable for some languages
2934          *
2935          * @param $text String: the text to be emphasized.
2936          * @return string
2937          */
2938         function emphasize( $text ) {
2939                 return "<em>$text</em>";
2940         }
2941
2942         /**
2943          * Normally we output all numbers in plain en_US style, that is
2944          * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2945          * point twohundredthirtyfive. However this is not suitable for all
2946          * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2947          * Icelandic just want to use commas instead of dots, and dots instead
2948          * of commas like "293.291,235".
2949          *
2950          * An example of this function being called:
2951          * <code>
2952          * wfMessage( 'message' )->numParams( $num )->text()
2953          * </code>
2954          *
2955          * See LanguageGu.php for the Gujarati implementation and
2956          * $separatorTransformTable on MessageIs.php for
2957          * the , => . and . => , implementation.
2958          *
2959          * @todo check if it's viable to use localeconv() for the decimal
2960          *       separator thing.
2961          * @param $number Mixed: the string to be formatted, should be an integer
2962          *        or a floating point number.
2963          * @param $nocommafy Bool: set to true for special numbers like dates
2964          * @return string
2965          */
2966         public function formatNum( $number, $nocommafy = false ) {
2967                 global $wgTranslateNumerals;
2968                 if ( !$nocommafy ) {
2969                         $number = $this->commafy( $number );
2970                         $s = $this->separatorTransformTable();
2971                         if ( $s ) {
2972                                 $number = strtr( $number, $s );
2973                         }
2974                 }
2975
2976                 if ( $wgTranslateNumerals ) {
2977                         $s = $this->digitTransformTable();
2978                         if ( $s ) {
2979                                 $number = strtr( $number, $s );
2980                         }
2981                 }
2982
2983                 return $number;
2984         }
2985
2986         /**
2987          * @param $number string
2988          * @return string
2989          */
2990         function parseFormattedNumber( $number ) {
2991                 $s = $this->digitTransformTable();
2992                 if ( $s ) {
2993                         $number = strtr( $number, array_flip( $s ) );
2994                 }
2995
2996                 $s = $this->separatorTransformTable();
2997                 if ( $s ) {
2998                         $number = strtr( $number, array_flip( $s ) );
2999                 }
3000
3001                 $number = strtr( $number, array( ',' => '' ) );
3002                 return $number;
3003         }
3004
3005         /**
3006          * Adds commas to a given number
3007          * @since 1.19
3008          * @param $number mixed
3009          * @return string
3010          */
3011         function commafy( $number ) {
3012                 $digitGroupingPattern = $this->digitGroupingPattern();
3013                 if ( $number === null ) {
3014                         return '';
3015                 }
3016
3017                 if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
3018                         // default grouping is at thousands,  use the same for ###,###,### pattern too.
3019                         return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
3020                 } else {
3021                         // Ref: http://cldr.unicode.org/translation/number-patterns
3022                         $sign = "";
3023                         if ( intval( $number ) < 0 ) {
3024                                 // For negative numbers apply the algorithm like positive number and add sign.
3025                                 $sign =  "-";
3026                                 $number = substr( $number, 1 );
3027                         }
3028                         $integerPart = array();
3029                         $decimalPart = array();
3030                         $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
3031                         preg_match( "/\d+/", $number, $integerPart );
3032                         preg_match( "/\.\d*/", $number, $decimalPart );
3033                         $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:"";
3034                         if ( $groupedNumber  === $number ) {
3035                                 // the string does not have any number part. Eg: .12345
3036                                 return $sign . $groupedNumber;
3037                         }
3038                         $start = $end = strlen( $integerPart[0] );
3039                         while ( $start > 0 ) {
3040                                 $match = $matches[0][$numMatches -1] ;
3041                                 $matchLen = strlen( $match );
3042                                 $start = $end - $matchLen;
3043                                 if ( $start < 0 ) {
3044                                         $start = 0;
3045                                 }
3046                                 $groupedNumber = substr( $number , $start, $end -$start ) . $groupedNumber ;
3047                                 $end = $start;
3048                                 if ( $numMatches > 1 ) {
3049                                         // use the last pattern for the rest of the number
3050                                         $numMatches--;
3051                                 }
3052                                 if ( $start > 0 ) {
3053                                         $groupedNumber = "," . $groupedNumber;
3054                                 }
3055                         }
3056                         return $sign . $groupedNumber;
3057                 }
3058         }
3059
3060         /**
3061          * @return String
3062          */
3063         function digitGroupingPattern() {
3064                 return self::$dataCache->getItem( $this->mCode, 'digitGroupingPattern' );
3065         }
3066
3067         /**
3068          * @return array
3069          */
3070         function digitTransformTable() {
3071                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
3072         }
3073
3074         /**
3075          * @return array
3076          */
3077         function separatorTransformTable() {
3078                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
3079         }
3080
3081         /**
3082          * Take a list of strings and build a locale-friendly comma-separated
3083          * list, using the local comma-separator message.
3084          * The last two strings are chained with an "and".
3085          * NOTE: This function will only work with standard numeric array keys (0, 1, 2…)
3086          *
3087          * @param $l Array
3088          * @return string
3089          */
3090         function listToText( array $l ) {
3091                 $m = count( $l ) - 1;
3092                 if ( $m < 0 ) {
3093                         return '';
3094                 }
3095                 if ( $m > 0 ) {
3096                         $and = $this->getMessageFromDB( 'and' );
3097                         $space = $this->getMessageFromDB( 'word-separator' );
3098                         if ( $m > 1 ) {
3099                                 $comma = $this->getMessageFromDB( 'comma-separator' );
3100                         }
3101                 }
3102                 $s = $l[$m];
3103                 for ( $i = $m - 1; $i >= 0; $i-- ) {
3104                         if ( $i == $m - 1 ) {
3105                                 $s = $l[$i] . $and . $space . $s;
3106                         } else {
3107                                 $s = $l[$i] . $comma . $s;
3108                         }
3109                 }
3110                 return $s;
3111         }
3112
3113         /**
3114          * Take a list of strings and build a locale-friendly comma-separated
3115          * list, using the local comma-separator message.
3116          * @param $list array of strings to put in a comma list
3117          * @return string
3118          */
3119         function commaList( array $list ) {
3120                 return implode(
3121                         wfMessage( 'comma-separator' )->inLanguage( $this )->escaped(),
3122                         $list
3123                 );
3124         }
3125
3126         /**
3127          * Take a list of strings and build a locale-friendly semicolon-separated
3128          * list, using the local semicolon-separator message.
3129          * @param $list array of strings to put in a semicolon list
3130          * @return string
3131          */
3132         function semicolonList( array $list ) {
3133                 return implode(
3134                         wfMessage( 'semicolon-separator' )->inLanguage( $this )->escaped(),
3135                         $list
3136                 );
3137         }
3138
3139         /**
3140          * Same as commaList, but separate it with the pipe instead.
3141          * @param $list array of strings to put in a pipe list
3142          * @return string
3143          */
3144         function pipeList( array $list ) {
3145                 return implode(
3146                         wfMessage( 'pipe-separator' )->inLanguage( $this )->escaped(),
3147                         $list
3148                 );
3149         }
3150
3151         /**
3152          * Truncate a string to a specified length in bytes, appending an optional
3153          * string (e.g. for ellipses)
3154          *
3155          * The database offers limited byte lengths for some columns in the database;
3156          * multi-byte character sets mean we need to ensure that only whole characters
3157          * are included, otherwise broken characters can be passed to the user
3158          *
3159          * If $length is negative, the string will be truncated from the beginning
3160          *
3161          * @param $string String to truncate
3162          * @param $length Int: maximum length (including ellipses)
3163          * @param $ellipsis String to append to the truncated text
3164          * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
3165          *      $adjustLength was introduced in 1.18, before that behaved as if false.
3166          * @return string
3167          */
3168         function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
3169                 # Use the localized ellipsis character
3170                 if ( $ellipsis == '...' ) {
3171                         $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3172                 }
3173                 # Check if there is no need to truncate
3174                 if ( $length == 0 ) {
3175                         return $ellipsis; // convention
3176                 } elseif ( strlen( $string ) <= abs( $length ) ) {
3177                         return $string; // no need to truncate
3178                 }
3179                 $stringOriginal = $string;
3180                 # If ellipsis length is >= $length then we can't apply $adjustLength
3181                 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
3182                         $string = $ellipsis; // this can be slightly unexpected
3183                 # Otherwise, truncate and add ellipsis...
3184                 } else {
3185                         $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
3186                         if ( $length > 0 ) {
3187                                 $length -= $eLength;
3188                                 $string = substr( $string, 0, $length ); // xyz...
3189                                 $string = $this->removeBadCharLast( $string );
3190                                 $string = $string . $ellipsis;
3191                         } else {
3192                                 $length += $eLength;
3193                                 $string = substr( $string, $length ); // ...xyz
3194                                 $string = $this->removeBadCharFirst( $string );
3195                                 $string = $ellipsis . $string;
3196                         }
3197                 }
3198                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
3199                 # This check is *not* redundant if $adjustLength, due to the single case where
3200                 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
3201                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
3202                         return $string;
3203                 } else {
3204                         return $stringOriginal;
3205                 }
3206         }
3207
3208         /**
3209          * Remove bytes that represent an incomplete Unicode character
3210          * at the end of string (e.g. bytes of the char are missing)
3211          *
3212          * @param $string String
3213          * @return string
3214          */
3215         protected function removeBadCharLast( $string ) {
3216                 if ( $string != '' ) {
3217                         $char = ord( $string[strlen( $string ) - 1] );
3218                         $m = array();
3219                         if ( $char >= 0xc0 ) {
3220                                 # We got the first byte only of a multibyte char; remove it.
3221                                 $string = substr( $string, 0, -1 );
3222                         } elseif ( $char >= 0x80 &&
3223                                   preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
3224                                                           '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
3225                         {
3226                                 # We chopped in the middle of a character; remove it
3227                                 $string = $m[1];
3228                         }
3229                 }
3230                 return $string;
3231         }
3232
3233         /**
3234          * Remove bytes that represent an incomplete Unicode character
3235          * at the start of string (e.g. bytes of the char are missing)
3236          *
3237          * @param $string String
3238          * @return string
3239          */
3240         protected function removeBadCharFirst( $string ) {
3241                 if ( $string != '' ) {
3242                         $char = ord( $string[0] );
3243                         if ( $char >= 0x80 && $char < 0xc0 ) {
3244                                 # We chopped in the middle of a character; remove the whole thing
3245                                 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
3246                         }
3247                 }
3248                 return $string;
3249         }
3250
3251         /**
3252          * Truncate a string of valid HTML to a specified length in bytes,
3253          * appending an optional string (e.g. for ellipses), and return valid HTML
3254          *
3255          * This is only intended for styled/linked text, such as HTML with
3256          * tags like <span> and <a>, were the tags are self-contained (valid HTML).
3257          * Also, this will not detect things like "display:none" CSS.
3258          *
3259          * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
3260          *
3261          * @param string $text HTML string to truncate
3262          * @param int $length (zero/positive) Maximum length (including ellipses)
3263          * @param string $ellipsis String to append to the truncated text
3264          * @return string
3265          */
3266         function truncateHtml( $text, $length, $ellipsis = '...' ) {
3267                 # Use the localized ellipsis character
3268                 if ( $ellipsis == '...' ) {
3269                         $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3270                 }
3271                 # Check if there is clearly no need to truncate
3272                 if ( $length <= 0 ) {
3273                         return $ellipsis; // no text shown, nothing to format (convention)
3274                 } elseif ( strlen( $text ) <= $length ) {
3275                         return $text; // string short enough even *with* HTML (short-circuit)
3276                 }
3277
3278                 $dispLen = 0; // innerHTML legth so far
3279                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
3280                 $tagType = 0; // 0-open, 1-close
3281                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
3282                 $entityState = 0; // 0-not entity, 1-entity
3283                 $tag = $ret = ''; // accumulated tag name, accumulated result string
3284                 $openTags = array(); // open tag stack
3285                 $maybeState = null; // possible truncation state
3286
3287                 $textLen = strlen( $text );
3288                 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
3289                 for ( $pos = 0; true; ++$pos ) {
3290                         # Consider truncation once the display length has reached the maximim.
3291                         # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
3292                         # Check that we're not in the middle of a bracket/entity...
3293                         if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
3294                                 if ( !$testingEllipsis ) {
3295                                         $testingEllipsis = true;
3296                                         # Save where we are; we will truncate here unless there turn out to
3297                                         # be so few remaining characters that truncation is not necessary.
3298                                         if ( !$maybeState ) { // already saved? ($neLength = 0 case)
3299                                                 $maybeState = array( $ret, $openTags ); // save state
3300                                         }
3301                                 } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
3302                                         # String in fact does need truncation, the truncation point was OK.
3303                                         list( $ret, $openTags ) = $maybeState; // reload state
3304                                         $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
3305                                         $ret .= $ellipsis; // add ellipsis
3306                                         break;
3307                                 }
3308                         }
3309                         if ( $pos >= $textLen ) break; // extra iteration just for above checks
3310
3311                         # Read the next char...
3312                         $ch = $text[$pos];
3313                         $lastCh = $pos ? $text[$pos - 1] : '';
3314                         $ret .= $ch; // add to result string
3315                         if ( $ch == '<' ) {
3316                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
3317                                 $entityState = 0; // for bad HTML
3318                                 $bracketState = 1; // tag started (checking for backslash)
3319                         } elseif ( $ch == '>' ) {
3320                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
3321                                 $entityState = 0; // for bad HTML
3322                                 $bracketState = 0; // out of brackets
3323                         } elseif ( $bracketState == 1 ) {
3324                                 if ( $ch == '/' ) {
3325                                         $tagType = 1; // close tag (e.g. "</span>")
3326                                 } else {
3327                                         $tagType = 0; // open tag (e.g. "<span>")
3328                                         $tag .= $ch;
3329                                 }
3330                                 $bracketState = 2; // building tag name
3331                         } elseif ( $bracketState == 2 ) {
3332                                 if ( $ch != ' ' ) {
3333                                         $tag .= $ch;
3334                                 } else {
3335                                         // Name found (e.g. "<a href=..."), add on tag attributes...
3336                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
3337                                 }
3338                         } elseif ( $bracketState == 0 ) {
3339                                 if ( $entityState ) {
3340                                         if ( $ch == ';' ) {
3341                                                 $entityState = 0;
3342                                                 $dispLen++; // entity is one displayed char
3343                                         }
3344                                 } else {
3345                                         if ( $neLength == 0 && !$maybeState ) {
3346                                                 // Save state without $ch. We want to *hit* the first
3347                                                 // display char (to get tags) but not *use* it if truncating.
3348                                                 $maybeState = array( substr( $ret, 0, -1 ), $openTags );
3349                                         }
3350                                         if ( $ch == '&' ) {
3351                                                 $entityState = 1; // entity found, (e.g. "&#160;")
3352                                         } else {
3353                                                 $dispLen++; // this char is displayed
3354                                                 // Add the next $max display text chars after this in one swoop...
3355                                                 $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen;
3356                                                 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
3357                                                 $dispLen += $skipped;
3358                                                 $pos += $skipped;
3359                                         }
3360                                 }
3361                         }
3362                 }
3363                 // Close the last tag if left unclosed by bad HTML
3364                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
3365                 while ( count( $openTags ) > 0 ) {
3366                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
3367                 }
3368                 return $ret;
3369         }
3370
3371         /**
3372          * truncateHtml() helper function
3373          * like strcspn() but adds the skipped chars to $ret
3374          *
3375          * @param $ret
3376          * @param $text
3377          * @param $search
3378          * @param $start
3379          * @param $len
3380          * @return int
3381          */
3382         private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
3383                 if ( $len === null ) {
3384                         $len = -1; // -1 means "no limit" for strcspn
3385                 } elseif ( $len < 0 ) {
3386                         $len = 0; // sanity
3387                 }
3388                 $skipCount = 0;
3389                 if ( $start < strlen( $text ) ) {
3390                         $skipCount = strcspn( $text, $search, $start, $len );
3391                         $ret .= substr( $text, $start, $skipCount );
3392                 }
3393                 return $skipCount;
3394         }
3395
3396         /**
3397          * truncateHtml() helper function
3398          * (a) push or pop $tag from $openTags as needed
3399          * (b) clear $tag value
3400          * @param &$tag string Current HTML tag name we are looking at
3401          * @param $tagType int (0-open tag, 1-close tag)
3402          * @param $lastCh string Character before the '>' that ended this tag
3403          * @param &$openTags array Open tag stack (not accounting for $tag)
3404          */
3405         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
3406                 $tag = ltrim( $tag );
3407                 if ( $tag != '' ) {
3408                         if ( $tagType == 0 && $lastCh != '/' ) {
3409                                 $openTags[] = $tag; // tag opened (didn't close itself)
3410                         } elseif ( $tagType == 1 ) {
3411                                 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
3412                                         array_pop( $openTags ); // tag closed
3413                                 }
3414                         }
3415                         $tag = '';
3416                 }
3417         }
3418
3419         /**
3420          * Grammatical transformations, needed for inflected languages
3421          * Invoked by putting {{grammar:case|word}} in a message
3422          *
3423          * @param $word string
3424          * @param $case string
3425          * @return string
3426          */
3427         function convertGrammar( $word, $case ) {
3428                 global $wgGrammarForms;
3429                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
3430                         return $wgGrammarForms[$this->getCode()][$case][$word];
3431                 }
3432                 return $word;
3433         }
3434         /**
3435          * Get the grammar forms for the content language
3436          * @return array of grammar forms
3437          * @since 1.20
3438          */
3439         function getGrammarForms() {
3440                 global $wgGrammarForms;
3441                 if ( isset( $wgGrammarForms[$this->getCode()] ) && is_array( $wgGrammarForms[$this->getCode()] ) ) {
3442                          return $wgGrammarForms[$this->getCode()];
3443                 }
3444                 return array();
3445         }
3446         /**
3447          * Provides an alternative text depending on specified gender.
3448          * Usage {{gender:username|masculine|feminine|neutral}}.
3449          * username is optional, in which case the gender of current user is used,
3450          * but only in (some) interface messages; otherwise default gender is used.
3451          *
3452          * If no forms are given, an empty string is returned. If only one form is
3453          * given, it will be returned unconditionally. These details are implied by
3454          * the caller and cannot be overridden in subclasses.
3455          *
3456          * If more than one form is given, the default is to use the neutral one
3457          * if it is specified, and to use the masculine one otherwise. These
3458          * details can be overridden in subclasses.
3459          *
3460          * @param $gender string
3461          * @param $forms array
3462          *
3463          * @return string
3464          */
3465         function gender( $gender, $forms ) {
3466                 if ( !count( $forms ) ) {
3467                         return '';
3468                 }
3469                 $forms = $this->preConvertPlural( $forms, 2 );
3470                 if ( $gender === 'male' ) {
3471                         return $forms[0];
3472                 }
3473                 if ( $gender === 'female' ) {
3474                         return $forms[1];
3475                 }
3476                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
3477         }
3478
3479         /**
3480          * Plural form transformations, needed for some languages.
3481          * For example, there are 3 form of plural in Russian and Polish,
3482          * depending on "count mod 10". See [[w:Plural]]
3483          * For English it is pretty simple.
3484          *
3485          * Invoked by putting {{plural:count|wordform1|wordform2}}
3486          * or {{plural:count|wordform1|wordform2|wordform3}}
3487          *
3488          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
3489          *
3490          * @param $count Integer: non-localized number
3491          * @param $forms Array: different plural forms
3492          * @return string Correct form of plural for $count in this language
3493          */
3494         function convertPlural( $count, $forms ) {
3495                 if ( !count( $forms ) ) {
3496                         return '';
3497                 }
3498
3499                 // Handle explicit 0= and 1= forms
3500                 foreach ( $forms as $index => $form ) {
3501                         if ( isset( $form[1] ) && $form[1] === '=' ) {
3502                                 if ( $form[0] === (string) $count ) {
3503                                         return substr( $form, 2 );
3504                                 }
3505                                 unset( $forms[$index] );
3506                         }
3507                 }
3508                 $forms = array_values( $forms );
3509
3510                 $pluralForm = $this->getPluralForm( $count );
3511                 $pluralForm = min( $pluralForm, count( $forms ) - 1 );
3512                 return $forms[$pluralForm];
3513         }
3514
3515         /**
3516          * Checks that convertPlural was given an array and pads it to requested
3517          * amount of forms by copying the last one.
3518          *
3519          * @param $count Integer: How many forms should there be at least
3520          * @param $forms Array of forms given to convertPlural
3521          * @return array Padded array of forms or an exception if not an array
3522          */
3523         protected function preConvertPlural( /* Array */ $forms, $count ) {
3524                 while ( count( $forms ) < $count ) {
3525                         $forms[] = $forms[count( $forms ) - 1];
3526                 }
3527                 return $forms;
3528         }
3529
3530         /**
3531          * @todo Maybe translate block durations.  Note that this function is somewhat misnamed: it
3532          * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
3533          * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used
3534          * on old expiry lengths recorded in log entries. You'd need to provide the start date to
3535          * match up with it.
3536          *
3537          * @param $str String: the validated block duration in English
3538          * @return string Somehow translated block duration
3539          * @see LanguageFi.php for example implementation
3540          */
3541         function translateBlockExpiry( $str ) {
3542                 $duration = SpecialBlock::getSuggestedDurations( $this );
3543                 foreach ( $duration as $show => $value ) {
3544                         if ( strcmp( $str, $value ) == 0 ) {
3545                                 return htmlspecialchars( trim( $show ) );
3546                         }
3547                 }
3548
3549                 // Since usually only infinite or indefinite is only on list, so try
3550                 // equivalents if still here.
3551                 $indefs = array( 'infinite', 'infinity', 'indefinite' );
3552                 if ( in_array( $str, $indefs ) ) {
3553                         foreach ( $indefs as $val ) {
3554                                 $show = array_search( $val, $duration, true );
3555                                 if ( $show !== false ) {
3556                                         return htmlspecialchars( trim( $show ) );
3557                                 }
3558                         }
3559                 }
3560
3561                 // If all else fails, return a standard duration or timestamp description.
3562                 $time = strtotime( $str, 0 );
3563                 if ( $time === false ) { // Unknown format. Return it as-is in case.
3564                         return $str;
3565                 } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
3566                         // $time is relative to 0 so it's a duration length.
3567                         return $this->formatDuration( $time );
3568                 } else { // It's an absolute timestamp.
3569                         if ( $time === 0 ) {
3570                                 // wfTimestamp() handles 0 as current time instead of epoch.
3571                                 return $this->timeanddate( '19700101000000' );
3572                         } else {
3573                                 return $this->timeanddate( $time );
3574                         }
3575                 }
3576         }
3577
3578         /**
3579          * languages like Chinese need to be segmented in order for the diff
3580          * to be of any use
3581          *
3582          * @param $text String
3583          * @return String
3584          */
3585         public function segmentForDiff( $text ) {
3586                 return $text;
3587         }
3588
3589         /**
3590          * and unsegment to show the result
3591          *
3592          * @param $text String
3593          * @return String
3594          */
3595         public function unsegmentForDiff( $text ) {
3596                 return $text;
3597         }
3598
3599         /**
3600          * Return the LanguageConverter used in the Language
3601          *
3602          * @since 1.19
3603          * @return LanguageConverter
3604          */
3605         public function getConverter() {
3606                 return $this->mConverter;
3607         }
3608
3609         /**
3610          * convert text to all supported variants
3611          *
3612          * @param $text string
3613          * @return array
3614          */
3615         public function autoConvertToAllVariants( $text ) {
3616                 return $this->mConverter->autoConvertToAllVariants( $text );
3617         }
3618
3619         /**
3620          * convert text to different variants of a language.
3621          *
3622          * @param $text string
3623          * @return string
3624          */
3625         public function convert( $text ) {
3626                 return $this->mConverter->convert( $text );
3627         }
3628
3629         /**
3630          * Convert a Title object to a string in the preferred variant
3631          *
3632          * @param $title Title
3633          * @return string
3634          */
3635         public function convertTitle( $title ) {
3636                 return $this->mConverter->convertTitle( $title );
3637         }
3638
3639         /**
3640          * Convert a namespace index to a string in the preferred variant
3641          *
3642          * @param $ns int
3643          * @return string
3644          */
3645         public function convertNamespace( $ns ) {
3646                 return $this->mConverter->convertNamespace( $ns );
3647         }
3648
3649         /**
3650          * Check if this is a language with variants
3651          *
3652          * @return bool
3653          */
3654         public function hasVariants() {
3655                 return sizeof( $this->getVariants() ) > 1;
3656         }
3657
3658         /**
3659          * Check if the language has the specific variant
3660          *
3661          * @since 1.19
3662          * @param $variant string
3663          * @return bool
3664          */
3665         public function hasVariant( $variant ) {
3666                 return (bool)$this->mConverter->validateVariant( $variant );
3667         }
3668
3669         /**
3670          * Put custom tags (e.g. -{ }-) around math to prevent conversion
3671          *
3672          * @param $text string
3673          * @return string
3674          */
3675         public function armourMath( $text ) {
3676                 return $this->mConverter->armourMath( $text );
3677         }
3678
3679         /**
3680          * Perform output conversion on a string, and encode for safe HTML output.
3681          * @param $text String text to be converted
3682          * @param $isTitle Bool whether this conversion is for the article title
3683          * @return string
3684          * @todo this should get integrated somewhere sane
3685          */
3686         public function convertHtml( $text, $isTitle = false ) {
3687                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
3688         }
3689
3690         /**
3691          * @param $key string
3692          * @return string
3693          */
3694         public function convertCategoryKey( $key ) {
3695                 return $this->mConverter->convertCategoryKey( $key );
3696         }
3697
3698         /**
3699          * Get the list of variants supported by this language
3700          * see sample implementation in LanguageZh.php
3701          *
3702          * @return array an array of language codes
3703          */
3704         public function getVariants() {
3705                 return $this->mConverter->getVariants();
3706         }
3707
3708         /**
3709          * @return string
3710          */
3711         public function getPreferredVariant() {
3712                 return $this->mConverter->getPreferredVariant();
3713         }
3714
3715         /**
3716          * @return string
3717          */
3718         public function getDefaultVariant() {
3719                 return $this->mConverter->getDefaultVariant();
3720         }
3721
3722         /**
3723          * @return string
3724          */
3725         public function getURLVariant() {
3726                 return $this->mConverter->getURLVariant();
3727         }
3728
3729         /**
3730          * If a language supports multiple variants, it is
3731          * possible that non-existing link in one variant
3732          * actually exists in another variant. this function
3733          * tries to find it. See e.g. LanguageZh.php
3734          *
3735          * @param $link String: the name of the link
3736          * @param $nt Mixed: the title object of the link
3737          * @param $ignoreOtherCond Boolean: to disable other conditions when
3738          *      we need to transclude a template or update a category's link
3739          * @return null the input parameters may be modified upon return
3740          */
3741         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
3742                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
3743         }
3744
3745         /**
3746          * If a language supports multiple variants, converts text
3747          * into an array of all possible variants of the text:
3748          *  'variant' => text in that variant
3749          *
3750          * @deprecated since 1.17 Use autoConvertToAllVariants()
3751          *
3752          * @param $text string
3753          *
3754          * @return string
3755          */
3756         public function convertLinkToAllVariants( $text ) {
3757                 return $this->mConverter->convertLinkToAllVariants( $text );
3758         }
3759
3760         /**
3761          * returns language specific options used by User::getPageRenderHash()
3762          * for example, the preferred language variant
3763          *
3764          * @return string
3765          */
3766         function getExtraHashOptions() {
3767                 return $this->mConverter->getExtraHashOptions();
3768         }
3769
3770         /**
3771          * For languages that support multiple variants, the title of an
3772          * article may be displayed differently in different variants. this
3773          * function returns the apporiate title defined in the body of the article.
3774          *
3775          * @return string
3776          */
3777         public function getParsedTitle() {
3778                 return $this->mConverter->getParsedTitle();
3779         }
3780
3781         /**
3782          * Prepare external link text for conversion. When the text is
3783          * a URL, it shouldn't be converted, and it'll be wrapped in
3784          * the "raw" tag (-{R| }-) to prevent conversion.
3785          *
3786          * This function is called "markNoConversion" for historical
3787          * reasons.
3788          *
3789          * @param $text String: text to be used for external link
3790          * @param $noParse bool: wrap it without confirming it's a real URL first
3791          * @return string the tagged text
3792          */
3793         public function markNoConversion( $text, $noParse = false ) {
3794                 // Excluding protocal-relative URLs may avoid many false positives.
3795                 if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
3796                         return $this->mConverter->markNoConversion( $text );
3797                 } else {
3798                         return $text;
3799                 }
3800         }
3801
3802         /**
3803          * A regular expression to match legal word-trailing characters
3804          * which should be merged onto a link of the form [[foo]]bar.
3805          *
3806          * @return string
3807          */
3808         public function linkTrail() {
3809                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
3810         }
3811
3812         /**
3813          * @return Language
3814          */
3815         function getLangObj() {
3816                 return $this;
3817         }
3818
3819         /**
3820          * Get the RFC 3066 code for this language object
3821          *
3822          * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3823          * htmlspecialchars() or similar
3824          *
3825          * @return string
3826          */
3827         public function getCode() {
3828                 return $this->mCode;
3829         }
3830
3831         /**
3832          * Get the code in Bcp47 format which we can use
3833          * inside of html lang="" tags.
3834          *
3835          * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3836          * htmlspecialchars() or similar.
3837          *
3838          * @since 1.19
3839          * @return string
3840          */
3841         public function getHtmlCode() {
3842                 if ( is_null( $this->mHtmlCode ) ) {
3843                         $this->mHtmlCode = wfBCP47( $this->getCode() );
3844                 }
3845                 return $this->mHtmlCode;
3846         }
3847
3848         /**
3849          * @param $code string
3850          */
3851         public function setCode( $code ) {
3852                 $this->mCode = $code;
3853                 // Ensure we don't leave an incorrect html code lying around
3854                 $this->mHtmlCode = null;
3855         }
3856
3857         /**
3858          * Get the name of a file for a certain language code
3859          * @param $prefix string Prepend this to the filename
3860          * @param $code string Language code
3861          * @param $suffix string Append this to the filename
3862          * @throws MWException
3863          * @return string $prefix . $mangledCode . $suffix
3864          */
3865         public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
3866                 // Protect against path traversal
3867                 if ( !Language::isValidCode( $code )
3868                         || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
3869                 {
3870                         throw new MWException( "Invalid language code \"$code\"" );
3871                 }
3872
3873                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
3874         }
3875
3876         /**
3877          * Get the language code from a file name. Inverse of getFileName()
3878          * @param $filename string $prefix . $languageCode . $suffix
3879          * @param $prefix string Prefix before the language code
3880          * @param $suffix string Suffix after the language code
3881          * @return string Language code, or false if $prefix or $suffix isn't found
3882          */
3883         public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
3884                 $m = null;
3885                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
3886                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
3887                 if ( !count( $m ) ) {
3888                         return false;
3889                 }
3890                 return str_replace( '_', '-', strtolower( $m[1] ) );
3891         }
3892
3893         /**
3894          * @param $code string
3895          * @return string
3896          */
3897         public static function getMessagesFileName( $code ) {
3898                 global $IP;
3899                 $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
3900                 wfRunHooks( 'Language::getMessagesFileName', array( $code, &$file ) );
3901                 return $file;
3902         }
3903
3904         /**
3905          * @param $code string
3906          * @return string
3907          */
3908         public static function getClassFileName( $code ) {
3909                 global $IP;
3910                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
3911         }
3912
3913         /**
3914          * Get the first fallback for a given language.
3915          *
3916          * @param $code string
3917          *
3918          * @return bool|string
3919          */
3920         public static function getFallbackFor( $code ) {
3921                 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
3922                         return false;
3923                 } else {
3924                         $fallbacks = self::getFallbacksFor( $code );
3925                         $first = array_shift( $fallbacks );
3926                         return $first;
3927                 }
3928         }
3929
3930         /**
3931          * Get the ordered list of fallback languages.
3932          *
3933          * @since 1.19
3934          * @param $code string Language code
3935          * @return array
3936          */
3937         public static function getFallbacksFor( $code ) {
3938                 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
3939                         return array();
3940                 } else {
3941                         $v = self::getLocalisationCache()->getItem( $code, 'fallback' );
3942                         $v = array_map( 'trim', explode( ',', $v ) );
3943                         if ( $v[count( $v ) - 1] !== 'en' ) {
3944                                 $v[] = 'en';
3945                         }
3946                         return $v;
3947                 }
3948         }
3949
3950         /**
3951          * Get all messages for a given language
3952          * WARNING: this may take a long time. If you just need all message *keys*
3953          * but need the *contents* of only a few messages, consider using getMessageKeysFor().
3954          *
3955          * @param $code string
3956          *
3957          * @return array
3958          */
3959         public static function getMessagesFor( $code ) {
3960                 return self::getLocalisationCache()->getItem( $code, 'messages' );
3961         }
3962
3963         /**
3964          * Get a message for a given language
3965          *
3966          * @param $key string
3967          * @param $code string
3968          *
3969          * @return string
3970          */
3971         public static function getMessageFor( $key, $code ) {
3972                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
3973         }
3974
3975         /**
3976          * Get all message keys for a given language. This is a faster alternative to
3977          * array_keys( Language::getMessagesFor( $code ) )
3978          *
3979          * @since 1.19
3980          * @param $code string Language code
3981          * @return array of message keys (strings)
3982          */
3983         public static function getMessageKeysFor( $code ) {
3984                 return self::getLocalisationCache()->getSubItemList( $code, 'messages' );
3985         }
3986
3987         /**
3988          * @param $talk
3989          * @return mixed
3990          */
3991         function fixVariableInNamespace( $talk ) {
3992                 if ( strpos( $talk, '$1' ) === false ) {
3993                         return $talk;
3994                 }
3995
3996                 global $wgMetaNamespace;
3997                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
3998
3999                 # Allow grammar transformations
4000                 # Allowing full message-style parsing would make simple requests
4001                 # such as action=raw much more expensive than they need to be.
4002                 # This will hopefully cover most cases.
4003                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
4004                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
4005                 return str_replace( ' ', '_', $talk );
4006         }
4007
4008         /**
4009          * @param $m string
4010          * @return string
4011          */
4012         function replaceGrammarInNamespace( $m ) {
4013                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
4014         }
4015
4016         /**
4017          * @throws MWException
4018          * @return array
4019          */
4020         static function getCaseMaps() {
4021                 static $wikiUpperChars, $wikiLowerChars;
4022                 if ( isset( $wikiUpperChars ) ) {
4023                         return array( $wikiUpperChars, $wikiLowerChars );
4024                 }
4025
4026                 wfProfileIn( __METHOD__ );
4027                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
4028                 if ( $arr === false ) {
4029                         throw new MWException(
4030                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
4031                 }
4032                 $wikiUpperChars = $arr['wikiUpperChars'];
4033                 $wikiLowerChars = $arr['wikiLowerChars'];
4034                 wfProfileOut( __METHOD__ );
4035                 return array( $wikiUpperChars, $wikiLowerChars );
4036         }
4037
4038         /**
4039          * Decode an expiry (block, protection, etc) which has come from the DB
4040          *
4041          * @todo FIXME: why are we returnings DBMS-dependent strings???
4042          *
4043          * @param $expiry String: Database expiry String
4044          * @param $format Bool|Int true to process using language functions, or TS_ constant
4045          *     to return the expiry in a given timestamp
4046          * @return String
4047          * @since 1.18
4048          */
4049         public function formatExpiry( $expiry, $format = true ) {
4050                 static $infinity, $infinityMsg;
4051                 if ( $infinity === null ) {
4052                         $infinityMsg = wfMessage( 'infiniteblock' );
4053                         $infinity = wfGetDB( DB_SLAVE )->getInfinity();
4054                 }
4055
4056                 if ( $expiry == '' || $expiry == $infinity ) {
4057                         return $format === true
4058                                 ? $infinityMsg
4059                                 : $infinity;
4060                 } else {
4061                         return $format === true
4062                                 ? $this->timeanddate( $expiry, /* User preference timezone */ true )
4063                                 : wfTimestamp( $format, $expiry );
4064                 }
4065         }
4066
4067         /**
4068          * @todo Document
4069          * @param $seconds int|float
4070          * @param $format Array Optional
4071          *              If $format['avoid'] == 'avoidseconds' - don't mention seconds if $seconds >= 1 hour
4072          *              If $format['avoid'] == 'avoidminutes' - don't mention seconds/minutes if $seconds > 48 hours
4073          *              If $format['noabbrevs'] is true - use 'seconds' and friends instead of 'seconds-abbrev' and friends
4074          *              For backwards compatibility, $format may also be one of the strings 'avoidseconds' or 'avoidminutes'
4075          * @return string
4076          */
4077         function formatTimePeriod( $seconds, $format = array() ) {
4078                 if ( !is_array( $format ) ) {
4079                         $format = array( 'avoid' => $format ); // For backwards compatibility
4080                 }
4081                 if ( !isset( $format['avoid'] ) ) {
4082                         $format['avoid'] = false;
4083                 }
4084                 if ( !isset( $format['noabbrevs' ] ) ) {
4085                         $format['noabbrevs'] = false;
4086                 }
4087                 $secondsMsg = wfMessage(
4088                         $format['noabbrevs'] ? 'seconds' : 'seconds-abbrev' )->inLanguage( $this );
4089                 $minutesMsg = wfMessage(
4090                         $format['noabbrevs'] ? 'minutes' : 'minutes-abbrev' )->inLanguage( $this );
4091                 $hoursMsg = wfMessage(
4092                         $format['noabbrevs'] ? 'hours' : 'hours-abbrev' )->inLanguage( $this );
4093                 $daysMsg = wfMessage(
4094                         $format['noabbrevs'] ? 'days' : 'days-abbrev' )->inLanguage( $this );
4095
4096                 if ( round( $seconds * 10 ) < 100 ) {
4097                         $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) );
4098                         $s = $secondsMsg->params( $s )->text();
4099                 } elseif ( round( $seconds ) < 60 ) {
4100                         $s = $this->formatNum( round( $seconds ) );
4101                         $s = $secondsMsg->params( $s )->text();
4102                 } elseif ( round( $seconds ) < 3600 ) {
4103                         $minutes = floor( $seconds / 60 );
4104                         $secondsPart = round( fmod( $seconds, 60 ) );
4105                         if ( $secondsPart == 60 ) {
4106                                 $secondsPart = 0;
4107                                 $minutes++;
4108                         }
4109                         $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4110                         $s .= ' ';
4111                         $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4112                 } elseif ( round( $seconds ) <= 2 * 86400 ) {
4113                         $hours = floor( $seconds / 3600 );
4114                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
4115                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
4116                         if ( $secondsPart == 60 ) {
4117                                 $secondsPart = 0;
4118                                 $minutes++;
4119                         }
4120                         if ( $minutes == 60 ) {
4121                                 $minutes = 0;
4122                                 $hours++;
4123                         }
4124                         $s = $hoursMsg->params( $this->formatNum( $hours ) )->text();
4125                         $s .= ' ';
4126                         $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4127                         if ( !in_array( $format['avoid'], array( 'avoidseconds', 'avoidminutes' ) ) ) {
4128                                 $s .= ' ' . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4129                         }
4130                 } else {
4131                         $days = floor( $seconds / 86400 );
4132                         if ( $format['avoid'] === 'avoidminutes' ) {
4133                                 $hours = round( ( $seconds - $days * 86400 ) / 3600 );
4134                                 if ( $hours == 24 ) {
4135                                         $hours = 0;
4136                                         $days++;
4137                                 }
4138                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4139                                 $s .= ' ';
4140                                 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4141                         } elseif ( $format['avoid'] === 'avoidseconds' ) {
4142                                 $hours = floor( ( $seconds - $days * 86400 ) / 3600 );
4143                                 $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 );
4144                                 if ( $minutes == 60 ) {
4145                                         $minutes = 0;
4146                                         $hours++;
4147                                 }
4148                                 if ( $hours == 24 ) {
4149                                         $hours = 0;
4150                                         $days++;
4151                                 }
4152                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4153                                 $s .= ' ';
4154                                 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4155                                 $s .= ' ';
4156                                 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4157                         } else {
4158                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4159                                 $s .= ' ';
4160                                 $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format );
4161                         }
4162                 }
4163                 return $s;
4164         }
4165
4166         /**
4167          * Format a bitrate for output, using an appropriate
4168          * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps or Ybps) according to the magnitude in question
4169          *
4170          * This use base 1000. For base 1024 use formatSize(), for another base
4171          * see formatComputingNumbers()
4172          *
4173          * @param $bps int
4174          * @return string
4175          */
4176         function formatBitrate( $bps ) {
4177                 return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" );
4178         }
4179
4180         /**
4181          * @param $size int Size of the unit
4182          * @param $boundary int Size boundary (1000, or 1024 in most cases)
4183          * @param $messageKey string Message key to be uesd
4184          * @return string
4185          */
4186         function formatComputingNumbers( $size, $boundary, $messageKey ) {
4187                 if ( $size <= 0 ) {
4188                         return str_replace( '$1', $this->formatNum( $size ),
4189                                 $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) )
4190                         );
4191                 }
4192                 $sizes = array( '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zeta', 'yotta' );
4193                 $index = 0;
4194
4195                 $maxIndex = count( $sizes ) - 1;
4196                 while ( $size >= $boundary && $index < $maxIndex ) {
4197                         $index++;
4198                         $size /= $boundary;
4199                 }
4200
4201                 // For small sizes no decimal places necessary
4202                 $round = 0;
4203                 if ( $index > 1 ) {
4204                         // For MB and bigger two decimal places are smarter
4205                         $round = 2;
4206                 }
4207                 $msg = str_replace( '$1', $sizes[$index], $messageKey );
4208
4209                 $size = round( $size, $round );
4210                 $text = $this->getMessageFromDB( $msg );
4211                 return str_replace( '$1', $this->formatNum( $size ), $text );
4212         }
4213
4214         /**
4215          * Format a size in bytes for output, using an appropriate
4216          * unit (B, KB, MB, GB, TB, PB, EB, ZB or YB) according to the magnitude in question
4217          *
4218          * This method use base 1024. For base 1000 use formatBitrate(), for
4219          * another base see formatComputingNumbers()
4220          *
4221          * @param $size int Size to format
4222          * @return string Plain text (not HTML)
4223          */
4224         function formatSize( $size ) {
4225                 return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" );
4226         }
4227
4228         /**
4229          * Make a list item, used by various special pages
4230          *
4231          * @param $page String Page link
4232          * @param $details String Text between brackets
4233          * @param $oppositedm Boolean Add the direction mark opposite to your
4234          *                    language, to display text properly
4235          * @return String
4236          */
4237         function specialList( $page, $details, $oppositedm = true ) {
4238                 $dirmark = ( $oppositedm ? $this->getDirMark( true ) : '' ) .
4239                         $this->getDirMark();
4240                 $details = $details ? $dirmark . $this->getMessageFromDB( 'word-separator' ) .
4241                         wfMessage( 'parentheses' )->rawParams( $details )->inLanguage( $this )->escaped() : '';
4242                 return $page . $details;
4243         }
4244
4245         /**
4246          * Generate (prev x| next x) (20|50|100...) type links for paging
4247          *
4248          * @param $title Title object to link
4249          * @param $offset Integer offset parameter
4250          * @param $limit Integer limit parameter
4251          * @param $query array|String optional URL query parameter string
4252          * @param $atend Bool optional param for specified if this is the last page
4253          * @return String
4254          */
4255         public function viewPrevNext( Title $title, $offset, $limit, array $query = array(), $atend = false ) {
4256                 // @todo FIXME: Why on earth this needs one message for the text and another one for tooltip?
4257
4258                 # Make 'previous' link
4259                 $prev = wfMessage( 'prevn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4260                 if ( $offset > 0 ) {
4261                         $plink = $this->numLink( $title, max( $offset - $limit, 0 ), $limit,
4262                                 $query, $prev, 'prevn-title', 'mw-prevlink' );
4263                 } else {
4264                         $plink = htmlspecialchars( $prev );
4265                 }
4266
4267                 # Make 'next' link
4268                 $next = wfMessage( 'nextn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4269                 if ( $atend ) {
4270                         $nlink = htmlspecialchars( $next );
4271                 } else {
4272                         $nlink = $this->numLink( $title, $offset + $limit, $limit,
4273                                 $query, $next, 'prevn-title', 'mw-nextlink' );
4274                 }
4275
4276                 # Make links to set number of items per page
4277                 $numLinks = array();
4278                 foreach ( array( 20, 50, 100, 250, 500 ) as $num ) {
4279                         $numLinks[] = $this->numLink( $title, $offset, $num,
4280                                 $query, $this->formatNum( $num ), 'shown-title', 'mw-numlink' );
4281                 }
4282
4283                 return wfMessage( 'viewprevnext' )->inLanguage( $this )->title( $title
4284                         )->rawParams( $plink, $nlink, $this->pipeList( $numLinks ) )->escaped();
4285         }
4286
4287         /**
4288          * Helper function for viewPrevNext() that generates links
4289          *
4290          * @param $title Title object to link
4291          * @param $offset Integer offset parameter
4292          * @param $limit Integer limit parameter
4293          * @param $query Array extra query parameters
4294          * @param $link String text to use for the link; will be escaped
4295          * @param $tooltipMsg String name of the message to use as tooltip
4296          * @param $class String value of the "class" attribute of the link
4297          * @return String HTML fragment
4298          */
4299         private function numLink( Title $title, $offset, $limit, array $query, $link, $tooltipMsg, $class ) {
4300                 $query = array( 'limit' => $limit, 'offset' => $offset ) + $query;
4301                 $tooltip = wfMessage( $tooltipMsg )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4302                 return Html::element( 'a', array( 'href' => $title->getLocalURL( $query ),
4303                         'title' => $tooltip, 'class' => $class ), $link );
4304         }
4305
4306         /**
4307          * Get the conversion rule title, if any.
4308          *
4309          * @return string
4310          */
4311         public function getConvRuleTitle() {
4312                 return $this->mConverter->getConvRuleTitle();
4313         }
4314
4315         /**
4316          * Get the compiled plural rules for the language
4317          * @since 1.20
4318          * @return array Associative array with plural form, and plural rule as key-value pairs
4319          */
4320         public function getCompiledPluralRules() {
4321                 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' );
4322                 $fallbacks = Language::getFallbacksFor( $this->mCode );
4323                 if ( !$pluralRules ) {
4324                         foreach ( $fallbacks as $fallbackCode ) {
4325                                 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' );
4326                                 if ( $pluralRules ) {
4327                                         break;
4328                                 }
4329                         }
4330                 }
4331                 return $pluralRules;
4332         }
4333
4334         /**
4335          * Get the plural rules for the language
4336          * @since 1.20
4337          * @return array Associative array with plural form, and plural rule as key-value pairs
4338          */
4339         public function getPluralRules() {
4340                 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' );
4341                 $fallbacks = Language::getFallbacksFor( $this->mCode );
4342                 if ( !$pluralRules ) {
4343                         foreach ( $fallbacks as $fallbackCode ) {
4344                                 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRules' );
4345                                 if ( $pluralRules ) {
4346                                         break;
4347                                 }
4348                         }
4349                 }
4350                 return $pluralRules;
4351         }
4352
4353         /**
4354          * Find the plural form matching to the given number
4355          * It return the form index.
4356          * @return int The index of the plural form
4357          */
4358         private function getPluralForm( $number ) {
4359                 $pluralRules = $this->getCompiledPluralRules();
4360                 $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules );
4361                 return $form;
4362         }
4363 }