languages/Language.php

   1 <?php
   2 /**
   3  * Internationalisation code
   4  *
   5  * @file
   6  * @ingroup Language
   7  */
   8
   9 /**
  10  * @defgroup Language Language
  11  */
  12
  13 if ( !defined( 'MEDIAWIKI' ) ) {
  14         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  15         exit( 1 );
  16 }
  17
  18 # Read language names
  19 global $wgLanguageNames;
  20 require_once( dirname( __FILE__ ) . '/Names.php' );
  21
  22 global $wgInputEncoding, $wgOutputEncoding;
  23
  24 /**
  25  * These are always UTF-8, they exist only for backwards compatibility
  26  */
  27 $wgInputEncoding    = 'UTF-8';
  28 $wgOutputEncoding       = 'UTF-8';
  29
  30 if ( function_exists( 'mb_strtoupper' ) ) {
  31         mb_internal_encoding( 'UTF-8' );
  32 }
  33
  34 /**
  35  * a fake language converter
  36  *
  37  * @ingroup Language
  38  */
  39 class FakeConverter {
  40         var $mLang;
  41         function __construct( $langobj ) { $this->mLang = $langobj; }
  42         function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  43         function convert( $t ) { return $t; }
  44         function convertTitle( $t ) { return $t->getPrefixedText(); }
  45         function getVariants() { return array( $this->mLang->getCode() ); }
  46         function getPreferredVariant() { return $this->mLang->getCode(); }
  47         function getDefaultVariant() { return $this->mLang->getCode(); }
  48         function getURLVariant() { return ''; }
  49         function getConvRuleTitle() { return false; }
  50         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
  51         function getExtraHashOptions() { return ''; }
  52         function getParsedTitle() { return ''; }
  53         function markNoConversion( $text, $noParse = false ) { return $text; }
  54         function convertCategoryKey( $key ) { return $key; }
  55         function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
  56         function armourMath( $text ) { return $text; }
  57 }
  58
  59 /**
  60  * Internationalisation code
  61  * @ingroup Language
  62  */
  63 class Language {
  64         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  65         var $mMagicExtensions = array(), $mMagicHookDone = false;
  66
  67         var $mNamespaceIds, $namespaceNames, $namespaceAliases;
  68         var $dateFormatStrings = array();
  69         var $mExtendedSpecialPageAliases;
  70
  71         /**
  72          * ReplacementArray object caches
  73          */
  74         var $transformData = array();
  75
  76         static public $dataCache;
  77         static public $mLangObjCache = array();
  78
  79         static public $mWeekdayMsgs = array(
  80                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  81                 'friday', 'saturday'
  82         );
  83
  84         static public $mWeekdayAbbrevMsgs = array(
  85                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  86         );
  87
  88         static public $mMonthMsgs = array(
  89                 'january', 'february', 'march', 'april', 'may_long', 'june',
  90                 'july', 'august', 'september', 'october', 'november',
  91                 'december'
  92         );
  93         static public $mMonthGenMsgs = array(
  94                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  95                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  96                 'december-gen'
  97         );
  98         static public $mMonthAbbrevMsgs = array(
  99                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 100                 'sep', 'oct', 'nov', 'dec'
 101         );
 102
 103         static public $mIranianCalendarMonthMsgs = array(
 104                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
 105                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
 106                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 107                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 108         );
 109
 110         static public $mHebrewCalendarMonthMsgs = array(
 111                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 112                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 113                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 114                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 115                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 116         );
 117
 118         static public $mHebrewCalendarMonthGenMsgs = array(
 119                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 120                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 121                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 122                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 123                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 124         );
 125
 126         static public $mHijriCalendarMonthMsgs = array(
 127                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 128                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 129                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 130                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 131         );
 132
 133         /**
 134          * Get a cached language object for a given language code
 135          * @param $code String
 136          * @return Language
 137          */
 138         static function factory( $code ) {
 139                 if ( !isset( self::$mLangObjCache[$code] ) ) {
 140                         if ( count( self::$mLangObjCache ) > 10 ) {
 141                                 // Don't keep a billion objects around, that's stupid.
 142                                 self::$mLangObjCache = array();
 143                         }
 144                         self::$mLangObjCache[$code] = self::newFromCode( $code );
 145                 }
 146                 return self::$mLangObjCache[$code];
 147         }
 148
 149         /**
 150          * Create a language object for a given language code
 151          * @param $code String
 152          * @return Language
 153          */
 154         protected static function newFromCode( $code ) {
 155                 global $IP;
 156                 static $recursionLevel = 0;
 157
 158                 // Protect against path traversal below
 159                 if ( !Language::isValidCode( $code ) ) {
 160                         throw new MWException( "Invalid language code \"$code\"" );
 161                 }
 162
 163                 if ( $code == 'en' ) {
 164                         $class = 'Language';
 165                 } else {
 166                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 167                         // Preload base classes to work around APC/PHP5 bug
 168                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 169                                 include_once( "$IP/languages/classes/$class.deps.php" );
 170                         }
 171                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 172                                 include_once( "$IP/languages/classes/$class.php" );
 173                         }
 174                 }
 175
 176                 if ( $recursionLevel > 5 ) {
 177                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 178                 }
 179
 180                 if ( !class_exists( $class ) ) {
 181                         $fallback = Language::getFallbackFor( $code );
 182                         ++$recursionLevel;
 183                         $lang = Language::newFromCode( $fallback );
 184                         --$recursionLevel;
 185                         $lang->setCode( $code );
 186                 } else {
 187                         $lang = new $class;
 188                 }
 189                 return $lang;
 190         }
 191
 192         /**
 193          * Returns true if a language code string is of a valid form, whether or
 194          * not it exists.
 195          */
 196         public static function isValidCode( $code ) {
 197                 return strcspn( $code, "/\\\000" ) === strlen( $code );
 198         }
 199
 200         /**
 201          * Get the LocalisationCache instance
 202          */
 203         public static function getLocalisationCache() {
 204                 if ( is_null( self::$dataCache ) ) {
 205                         global $wgLocalisationCacheConf;
 206                         $class = $wgLocalisationCacheConf['class'];
 207                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 208                 }
 209                 return self::$dataCache;
 210         }
 211
 212         function __construct() {
 213                 $this->mConverter = new FakeConverter( $this );
 214                 // Set the code to the name of the descendant
 215                 if ( get_class( $this ) == 'Language' ) {
 216                         $this->mCode = 'en';
 217                 } else {
 218                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 219                 }
 220                 self::getLocalisationCache();
 221         }
 222
 223         /**
 224          * Reduce memory usage
 225          */
 226         function __destruct() {
 227                 foreach ( $this as $name => $value ) {
 228                         unset( $this->$name );
 229                 }
 230         }
 231
 232         /**
 233          * Hook which will be called if this is the content language.
 234          * Descendants can use this to register hook functions or modify globals
 235          */
 236         function initContLang() { }
 237
 238         /**
 239          * @deprecated Use User::getDefaultOptions()
 240          * @return array
 241          */
 242         function getDefaultUserOptions() {
 243                 wfDeprecated( __METHOD__ );
 244                 return User::getDefaultOptions();
 245         }
 246
 247         function getFallbackLanguageCode() {
 248                 if ( $this->mCode === 'en' ) {
 249                         return false;
 250                 } else {
 251                         return self::$dataCache->getItem( $this->mCode, 'fallback' );
 252                 }
 253         }
 254
 255         /**
 256          * Exports $wgBookstoreListEn
 257          * @return array
 258          */
 259         function getBookstoreList() {
 260                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 261         }
 262
 263         /**
 264          * @return array
 265          */
 266         function getNamespaces() {
 267                 if ( is_null( $this->namespaceNames ) ) {
 268                         global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
 269
 270                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 271                         $validNamespaces = MWNamespace::getCanonicalNamespaces();
 272
 273                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
 274
 275                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 276                         if ( $wgMetaNamespaceTalk ) {
 277                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 278                         } else {
 279                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 280                                 $this->namespaceNames[NS_PROJECT_TALK] =
 281                                         $this->fixVariableInNamespace( $talk );
 282                         }
 283
 284                         # Sometimes a language will be localised but not actually exist on this wiki.
 285                         foreach( $this->namespaceNames as $key => $text ) {
 286                                 if ( !isset( $validNamespaces[$key] ) ) {
 287                                         unset( $this->namespaceNames[$key] );
 288                                 }
 289                         }
 290
 291                         # The above mixing may leave namespaces out of canonical order.
 292                         # Re-order by namespace ID number...
 293                         ksort( $this->namespaceNames );
 294                 }
 295                 return $this->namespaceNames;
 296         }
 297
 298         /**
 299          * A convenience function that returns the same thing as
 300          * getNamespaces() except with the array values changed to ' '
 301          * where it found '_', useful for producing output to be displayed
 302          * e.g. in <select> forms.
 303          *
 304          * @return array
 305          */
 306         function getFormattedNamespaces() {
 307                 $ns = $this->getNamespaces();
 308                 foreach ( $ns as $k => $v ) {
 309                         $ns[$k] = strtr( $v, '_', ' ' );
 310                 }
 311                 return $ns;
 312         }
 313
 314         /**
 315          * Get a namespace value by key
 316          * <code>
 317          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 318          * echo $mw_ns; // prints 'MediaWiki'
 319          * </code>
 320          *
 321          * @param $index Int: the array key of the namespace to return
 322          * @return mixed, string if the namespace value exists, otherwise false
 323          */
 324         function getNsText( $index ) {
 325                 $ns = $this->getNamespaces();
 326                 return isset( $ns[$index] ) ? $ns[$index] : false;
 327         }
 328
 329         /**
 330          * A convenience function that returns the same thing as
 331          * getNsText() except with '_' changed to ' ', useful for
 332          * producing output.
 333          *
 334          * @return array
 335          */
 336         function getFormattedNsText( $index ) {
 337                 $ns = $this->getNsText( $index );
 338                 return strtr( $ns, '_', ' ' );
 339         }
 340
 341         /**
 342          * Returns gender-dependent namespace alias if available.
 343          * @param $index Int: namespace index
 344          * @param $gender String: gender key (male, female... )
 345          * @return String
 346          * @since 1.18
 347          */
 348         function getGenderNsText( $index, $gender ) {
 349                 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 350                 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
 351         }
 352
 353         /**
 354          * Whether this language makes distinguishes genders for example in
 355          * namespaces.
 356          * @return bool
 357          * @since 1.18
 358          */
 359         function needsGenderDistinction() {
 360                 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 361                 return count( $aliases ) > 0;
 362         }
 363
 364         /**
 365          * Get a namespace key by value, case insensitive.
 366          * Only matches namespace names for the current language, not the
 367          * canonical ones defined in Namespace.php.
 368          *
 369          * @param $text String
 370          * @return mixed An integer if $text is a valid value otherwise false
 371          */
 372         function getLocalNsIndex( $text ) {
 373                 $lctext = $this->lc( $text );
 374                 $ids = $this->getNamespaceIds();
 375                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 376         }
 377
 378         function getNamespaceAliases() {
 379                 if ( is_null( $this->namespaceAliases ) ) {
 380                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 381                         if ( !$aliases ) {
 382                                 $aliases = array();
 383                         } else {
 384                                 foreach ( $aliases as $name => $index ) {
 385                                         if ( $index === NS_PROJECT_TALK ) {
 386                                                 unset( $aliases[$name] );
 387                                                 $name = $this->fixVariableInNamespace( $name );
 388                                                 $aliases[$name] = $index;
 389                                         }
 390                                 }
 391                         }
 392
 393                         $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 394                         foreach ( $genders as $index => $forms ) {
 395                                 foreach ( $forms as $alias ) {
 396                                         $aliases[$alias] = $index;
 397                                 }
 398                         }
 399
 400                         $this->namespaceAliases = $aliases;
 401                 }
 402                 return $this->namespaceAliases;
 403         }
 404
 405         function getNamespaceIds() {
 406                 if ( is_null( $this->mNamespaceIds ) ) {
 407                         global $wgNamespaceAliases;
 408                         # Put namespace names and aliases into a hashtable.
 409                         # If this is too slow, then we should arrange it so that it is done
 410                         # before caching. The catch is that at pre-cache time, the above
 411                         # class-specific fixup hasn't been done.
 412                         $this->mNamespaceIds = array();
 413                         foreach ( $this->getNamespaces() as $index => $name ) {
 414                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 415                         }
 416                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 417                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 418                         }
 419                         if ( $wgNamespaceAliases ) {
 420                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 421                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 422                                 }
 423                         }
 424                 }
 425                 return $this->mNamespaceIds;
 426         }
 427
 428
 429         /**
 430          * Get a namespace key by value, case insensitive.  Canonical namespace
 431          * names override custom ones defined for the current language.
 432          *
 433          * @param $text String
 434          * @return mixed An integer if $text is a valid value otherwise false
 435          */
 436         function getNsIndex( $text ) {
 437                 $lctext = $this->lc( $text );
 438                 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
 439                         return $ns;
 440                 }
 441                 $ids = $this->getNamespaceIds();
 442                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 443         }
 444
 445         /**
 446          * short names for language variants used for language conversion links.
 447          *
 448          * @param $code String
 449          * @return string
 450          */
 451         function getVariantname( $code ) {
 452                 return $this->getMessageFromDB( "variantname-$code" );
 453         }
 454
 455         function specialPage( $name ) {
 456                 $aliases = $this->getSpecialPageAliases();
 457                 if ( isset( $aliases[$name][0] ) ) {
 458                         $name = $aliases[$name][0];
 459                 }
 460                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 461         }
 462
 463         function getQuickbarSettings() {
 464                 return array(
 465                         $this->getMessage( 'qbsettings-none' ),
 466                         $this->getMessage( 'qbsettings-fixedleft' ),
 467                         $this->getMessage( 'qbsettings-fixedright' ),
 468                         $this->getMessage( 'qbsettings-floatingleft' ),
 469                         $this->getMessage( 'qbsettings-floatingright' )
 470                 );
 471         }
 472
 473         function getMathNames() {
 474                 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
 475         }
 476
 477         function getDatePreferences() {
 478                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 479         }
 480
 481         function getDateFormats() {
 482                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 483         }
 484
 485         function getDefaultDateFormat() {
 486                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 487                 if ( $df === 'dmy or mdy' ) {
 488                         global $wgAmericanDates;
 489                         return $wgAmericanDates ? 'mdy' : 'dmy';
 490                 } else {
 491                         return $df;
 492                 }
 493         }
 494
 495         function getDatePreferenceMigrationMap() {
 496                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 497         }
 498
 499         function getImageFile( $image ) {
 500                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 501         }
 502
 503         function getDefaultUserOptionOverrides() {
 504                 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
 505         }
 506
 507         function getExtraUserToggles() {
 508                 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 509         }
 510
 511         function getUserToggle( $tog ) {
 512                 return $this->getMessageFromDB( "tog-$tog" );
 513         }
 514
 515         /**
 516          * Get language names, indexed by code.
 517          * If $customisedOnly is true, only returns codes with a messages file
 518          */
 519         public static function getLanguageNames( $customisedOnly = false ) {
 520                 global $wgLanguageNames, $wgExtraLanguageNames;
 521                 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
 522                 if ( !$customisedOnly ) {
 523                         return $allNames;
 524                 }
 525
 526                 global $IP;
 527                 $names = array();
 528                 $dir = opendir( "$IP/languages/messages" );
 529                 while ( false !== ( $file = readdir( $dir ) ) ) {
 530                         $code = self::getCodeFromFileName( $file, 'Messages' );
 531                         if ( $code && isset( $allNames[$code] ) ) {
 532                                 $names[$code] = $allNames[$code];
 533                         }
 534                 }
 535                 closedir( $dir );
 536                 return $names;
 537         }
 538
 539         /**
 540          * Get translated language names. This is done on best effort and
 541          * by default this is exactly the same as Language::getLanguageNames.
 542          * The CLDR extension provides translated names.
 543          * @param $code String Language code.
 544          * @return Array language code => language name
 545          * @since 1.18.0
 546          */
 547         public static function getTranslatedLanguageNames( $code ) {
 548                 $names = array();
 549                 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
 550
 551                 foreach ( self::getLanguageNames() as $code => $name ) {
 552                         if ( !isset( $names[$code] ) ) $names[$code] = $name;
 553                 }
 554
 555                 return $names;
 556         }
 557
 558         /**
 559          * Get a message from the MediaWiki namespace.
 560          *
 561          * @param $msg String: message name
 562          * @return string
 563          */
 564         function getMessageFromDB( $msg ) {
 565                 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
 566         }
 567
 568         function getLanguageName( $code ) {
 569                 $names = self::getLanguageNames();
 570                 if ( !array_key_exists( $code, $names ) ) {
 571                         return '';
 572                 }
 573                 return $names[$code];
 574         }
 575
 576         function getMonthName( $key ) {
 577                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 578         }
 579
 580         function getMonthNameGen( $key ) {
 581                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 582         }
 583
 584         function getMonthAbbreviation( $key ) {
 585                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 586         }
 587
 588         function getWeekdayName( $key ) {
 589                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 590         }
 591
 592         function getWeekdayAbbreviation( $key ) {
 593                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 594         }
 595
 596         function getIranianCalendarMonthName( $key ) {
 597                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
 598         }
 599
 600         function getHebrewCalendarMonthName( $key ) {
 601                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
 602         }
 603
 604         function getHebrewCalendarMonthNameGen( $key ) {
 605                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
 606         }
 607
 608         function getHijriCalendarMonthName( $key ) {
 609                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
 610         }
 611
 612         /**
 613          * Used by date() and time() to adjust the time output.
 614          *
 615          * @param $ts Int the time in date('YmdHis') format
 616          * @param $tz Mixed: adjust the time by this amount (default false, mean we
 617          *            get user timecorrection setting)
 618          * @return int
 619          */
 620         function userAdjust( $ts, $tz = false ) {
 621                 global $wgUser, $wgLocalTZoffset;
 622
 623                 if ( $tz === false ) {
 624                         $tz = $wgUser->getOption( 'timecorrection' );
 625                 }
 626
 627                 $data = explode( '|', $tz, 3 );
 628
 629                 if ( $data[0] == 'ZoneInfo' ) {
 630                         if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
 631                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
 632                                 date_timezone_set( $date, timezone_open( $data[2] ) );
 633                                 $date = date_format( $date, 'YmdHis' );
 634                                 return $date;
 635                         }
 636                         # Unrecognized timezone, default to 'Offset' with the stored offset.
 637                         $data[0] = 'Offset';
 638                 }
 639
 640                 $minDiff = 0;
 641                 if ( $data[0] == 'System' || $tz == '' ) {
 642                         #  Global offset in minutes.
 643                         if ( isset( $wgLocalTZoffset ) ) {
 644                                 $minDiff = $wgLocalTZoffset;
 645                         }
 646                 } else if ( $data[0] == 'Offset' ) {
 647                         $minDiff = intval( $data[1] );
 648                 } else {
 649                         $data = explode( ':', $tz );
 650                         if ( count( $data ) == 2 ) {
 651                                 $data[0] = intval( $data[0] );
 652                                 $data[1] = intval( $data[1] );
 653                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
 654                                 if ( $data[0] < 0 ) {
 655                                         $minDiff = -$minDiff;
 656                                 }
 657                         } else {
 658                                 $minDiff = intval( $data[0] ) * 60;
 659                         }
 660                 }
 661
 662                 # No difference ? Return time unchanged
 663                 if ( 0 == $minDiff ) {
 664                         return $ts;
 665                 }
 666
 667                 wfSuppressWarnings(); // E_STRICT system time bitching
 668                 # Generate an adjusted date; take advantage of the fact that mktime
 669                 # will normalize out-of-range values so we don't have to split $minDiff
 670                 # into hours and minutes.
 671                 $t = mktime( (
 672                   (int)substr( $ts, 8, 2 ) ), # Hours
 673                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 674                   (int)substr( $ts, 12, 2 ), # Seconds
 675                   (int)substr( $ts, 4, 2 ), # Month
 676                   (int)substr( $ts, 6, 2 ), # Day
 677                   (int)substr( $ts, 0, 4 ) ); # Year
 678
 679                 $date = date( 'YmdHis', $t );
 680                 wfRestoreWarnings();
 681
 682                 return $date;
 683         }
 684
 685         /**
 686          * This is a workalike of PHP's date() function, but with better
 687          * internationalisation, a reduced set of format characters, and a better
 688          * escaping format.
 689          *
 690          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
 691          * PHP manual for definitions. There are a number of extensions, which
 692          * start with "x":
 693          *
 694          *    xn   Do not translate digits of the next numeric format character
 695          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 696          *    xr   Use roman numerals for the next numeric format character
 697          *    xh   Use hebrew numerals for the next numeric format character
 698          *    xx   Literal x
 699          *    xg   Genitive month name
 700          *
 701          *    xij  j (day number) in Iranian calendar
 702          *    xiF  F (month name) in Iranian calendar
 703          *    xin  n (month number) in Iranian calendar
 704          *    xiY  Y (full year) in Iranian calendar
 705          *
 706          *    xjj  j (day number) in Hebrew calendar
 707          *    xjF  F (month name) in Hebrew calendar
 708          *    xjt  t (days in month) in Hebrew calendar
 709          *    xjx  xg (genitive month name) in Hebrew calendar
 710          *    xjn  n (month number) in Hebrew calendar
 711          *    xjY  Y (full year) in Hebrew calendar
 712          *
 713          *    xmj  j (day number) in Hijri calendar
 714          *    xmF  F (month name) in Hijri calendar
 715          *    xmn  n (month number) in Hijri calendar
 716          *    xmY  Y (full year) in Hijri calendar
 717          *
 718          *    xkY  Y (full year) in Thai solar calendar. Months and days are
 719          *                       identical to the Gregorian calendar
 720          *    xoY  Y (full year) in Minguo calendar or Juche year.
 721          *                       Months and days are identical to the
 722          *                       Gregorian calendar
 723          *    xtY  Y (full year) in Japanese nengo. Months and days are
 724          *                       identical to the Gregorian calendar
 725          *
 726          * Characters enclosed in double quotes will be considered literal (with
 727          * the quotes themselves removed). Unmatched quotes will be considered
 728          * literal quotes. Example:
 729          *
 730          * "The month is" F       => The month is January
 731          * i's"                   => 20'11"
 732          *
 733          * Backslash escaping is also supported.
 734          *
 735          * Input timestamp is assumed to be pre-normalized to the desired local
 736          * time zone, if any.
 737          *
 738          * @param $format String
 739          * @param $ts String: 14-character timestamp
 740          *      YYYYMMDDHHMMSS
 741          *      01234567890123
 742          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
 743          */
 744         function sprintfDate( $format, $ts ) {
 745                 $s = '';
 746                 $raw = false;
 747                 $roman = false;
 748                 $hebrewNum = false;
 749                 $unix = false;
 750                 $rawToggle = false;
 751                 $iranian = false;
 752                 $hebrew = false;
 753                 $hijri = false;
 754                 $thai = false;
 755                 $minguo = false;
 756                 $tenno = false;
 757                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 758                         $num = false;
 759                         $code = $format[$p];
 760                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 761                                 $code .= $format[++$p];
 762                         }
 763
 764                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
 765                                 $code .= $format[++$p];
 766                         }
 767
 768                         switch ( $code ) {
 769                                 case 'xx':
 770                                         $s .= 'x';
 771                                         break;
 772                                 case 'xn':
 773                                         $raw = true;
 774                                         break;
 775                                 case 'xN':
 776                                         $rawToggle = !$rawToggle;
 777                                         break;
 778                                 case 'xr':
 779                                         $roman = true;
 780                                         break;
 781                                 case 'xh':
 782                                         $hebrewNum = true;
 783                                         break;
 784                                 case 'xg':
 785                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 786                                         break;
 787                                 case 'xjx':
 788                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
 789                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
 790                                         break;
 791                                 case 'd':
 792                                         $num = substr( $ts, 6, 2 );
 793                                         break;
 794                                 case 'D':
 795                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 796                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 797                                         break;
 798                                 case 'j':
 799                                         $num = intval( substr( $ts, 6, 2 ) );
 800                                         break;
 801                                 case 'xij':
 802                                         if ( !$iranian ) {
 803                                                 $iranian = self::tsToIranian( $ts );
 804                                         }
 805                                         $num = $iranian[2];
 806                                         break;
 807                                 case 'xmj':
 808                                         if ( !$hijri ) {
 809                                                 $hijri = self::tsToHijri( $ts );
 810                                         }
 811                                         $num = $hijri[2];
 812                                         break;
 813                                 case 'xjj':
 814                                         if ( !$hebrew ) {
 815                                                 $hebrew = self::tsToHebrew( $ts );
 816                                         }
 817                                         $num = $hebrew[2];
 818                                         break;
 819                                 case 'l':
 820                                         if ( !$unix ) {
 821                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 822                                         }
 823                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 824                                         break;
 825                                 case 'N':
 826                                         if ( !$unix ) {
 827                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 828                                         }
 829                                         $w = gmdate( 'w', $unix );
 830                                         $num = $w ? $w : 7;
 831                                         break;
 832                                 case 'w':
 833                                         if ( !$unix ) {
 834                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 835                                         }
 836                                         $num = gmdate( 'w', $unix );
 837                                         break;
 838                                 case 'z':
 839                                         if ( !$unix ) {
 840                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 841                                         }
 842                                         $num = gmdate( 'z', $unix );
 843                                         break;
 844                                 case 'W':
 845                                         if ( !$unix ) {
 846                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 847                                         }
 848                                         $num = gmdate( 'W', $unix );
 849                                         break;
 850                                 case 'F':
 851                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 852                                         break;
 853                                 case 'xiF':
 854                                         if ( !$iranian ) {
 855                                                 $iranian = self::tsToIranian( $ts );
 856                                         }
 857                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
 858                                         break;
 859                                 case 'xmF':
 860                                         if ( !$hijri ) {
 861                                                 $hijri = self::tsToHijri( $ts );
 862                                         }
 863                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
 864                                         break;
 865                                 case 'xjF':
 866                                         if ( !$hebrew ) {
 867                                                 $hebrew = self::tsToHebrew( $ts );
 868                                         }
 869                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
 870                                         break;
 871                                 case 'm':
 872                                         $num = substr( $ts, 4, 2 );
 873                                         break;
 874                                 case 'M':
 875                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 876                                         break;
 877                                 case 'n':
 878                                         $num = intval( substr( $ts, 4, 2 ) );
 879                                         break;
 880                                 case 'xin':
 881                                         if ( !$iranian ) {
 882                                                 $iranian = self::tsToIranian( $ts );
 883                                         }
 884                                         $num = $iranian[1];
 885                                         break;
 886                                 case 'xmn':
 887                                         if ( !$hijri ) {
 888                                                 $hijri = self::tsToHijri ( $ts );
 889                                         }
 890                                         $num = $hijri[1];
 891                                         break;
 892                                 case 'xjn':
 893                                         if ( !$hebrew ) {
 894                                                 $hebrew = self::tsToHebrew( $ts );
 895                                         }
 896                                         $num = $hebrew[1];
 897                                         break;
 898                                 case 't':
 899                                         if ( !$unix ) {
 900                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 901                                         }
 902                                         $num = gmdate( 't', $unix );
 903                                         break;
 904                                 case 'xjt':
 905                                         if ( !$hebrew ) {
 906                                                 $hebrew = self::tsToHebrew( $ts );
 907                                         }
 908                                         $num = $hebrew[3];
 909                                         break;
 910                                 case 'L':
 911                                         if ( !$unix ) {
 912                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 913                                         }
 914                                         $num = gmdate( 'L', $unix );
 915                                         break;
 916                                 case 'o':
 917                                         if ( !$unix ) {
 918                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 919                                         }
 920                                         $num = date( 'o', $unix );
 921                                         break;
 922                                 case 'Y':
 923                                         $num = substr( $ts, 0, 4 );
 924                                         break;
 925                                 case 'xiY':
 926                                         if ( !$iranian ) {
 927                                                 $iranian = self::tsToIranian( $ts );
 928                                         }
 929                                         $num = $iranian[0];
 930                                         break;
 931                                 case 'xmY':
 932                                         if ( !$hijri ) {
 933                                                 $hijri = self::tsToHijri( $ts );
 934                                         }
 935                                         $num = $hijri[0];
 936                                         break;
 937                                 case 'xjY':
 938                                         if ( !$hebrew ) {
 939                                                 $hebrew = self::tsToHebrew( $ts );
 940                                         }
 941                                         $num = $hebrew[0];
 942                                         break;
 943                                 case 'xkY':
 944                                         if ( !$thai ) {
 945                                                 $thai = self::tsToYear( $ts, 'thai' );
 946                                         }
 947                                         $num = $thai[0];
 948                                         break;
 949                                 case 'xoY':
 950                                         if ( !$minguo ) {
 951                                                 $minguo = self::tsToYear( $ts, 'minguo' );
 952                                         }
 953                                         $num = $minguo[0];
 954                                         break;
 955                                 case 'xtY':
 956                                         if ( !$tenno ) {
 957                                                 $tenno = self::tsToYear( $ts, 'tenno' );
 958                                         }
 959                                         $num = $tenno[0];
 960                                         break;
 961                                 case 'y':
 962                                         $num = substr( $ts, 2, 2 );
 963                                         break;
 964                                 case 'a':
 965                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 966                                         break;
 967                                 case 'A':
 968                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 969                                         break;
 970                                 case 'g':
 971                                         $h = substr( $ts, 8, 2 );
 972                                         $num = $h % 12 ? $h % 12 : 12;
 973                                         break;
 974                                 case 'G':
 975                                         $num = intval( substr( $ts, 8, 2 ) );
 976                                         break;
 977                                 case 'h':
 978                                         $h = substr( $ts, 8, 2 );
 979                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 980                                         break;
 981                                 case 'H':
 982                                         $num = substr( $ts, 8, 2 );
 983                                         break;
 984                                 case 'i':
 985                                         $num = substr( $ts, 10, 2 );
 986                                         break;
 987                                 case 's':
 988                                         $num = substr( $ts, 12, 2 );
 989                                         break;
 990                                 case 'c':
 991                                         if ( !$unix ) {
 992                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 993                                         }
 994                                         $s .= gmdate( 'c', $unix );
 995                                         break;
 996                                 case 'r':
 997                                         if ( !$unix ) {
 998                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 999                                         }
1000                                         $s .= gmdate( 'r', $unix );
1001                                         break;
1002                                 case 'U':
1003                                         if ( !$unix ) {
1004                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1005                                         }
1006                                         $num = $unix;
1007                                         break;
1008                                 case '\\':
1009                                         # Backslash escaping
1010                                         if ( $p < strlen( $format ) - 1 ) {
1011                                                 $s .= $format[++$p];
1012                                         } else {
1013                                                 $s .= '\\';
1014                                         }
1015                                         break;
1016                                 case '"':
1017                                         # Quoted literal
1018                                         if ( $p < strlen( $format ) - 1 ) {
1019                                                 $endQuote = strpos( $format, '"', $p + 1 );
1020                                                 if ( $endQuote === false ) {
1021                                                         # No terminating quote, assume literal "
1022                                                         $s .= '"';
1023                                                 } else {
1024                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1025                                                         $p = $endQuote;
1026                                                 }
1027                                         } else {
1028                                                 # Quote at end of string, assume literal "
1029                                                 $s .= '"';
1030                                         }
1031                                         break;
1032                                 default:
1033                                         $s .= $format[$p];
1034                         }
1035                         if ( $num !== false ) {
1036                                 if ( $rawToggle || $raw ) {
1037                                         $s .= $num;
1038                                         $raw = false;
1039                                 } elseif ( $roman ) {
1040                                         $s .= self::romanNumeral( $num );
1041                                         $roman = false;
1042                                 } elseif ( $hebrewNum ) {
1043                                         $s .= self::hebrewNumeral( $num );
1044                                         $hebrewNum = false;
1045                                 } else {
1046                                         $s .= $this->formatNum( $num, true );
1047                                 }
1048                         }
1049                 }
1050                 return $s;
1051         }
1052
1053         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1054         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1055         /**
1056          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1057          * Gregorian dates to Iranian dates. Originally written in C, it
1058          * is released under the terms of GNU Lesser General Public
1059          * License. Conversion to PHP was performed by Niklas Laxström.
1060          *
1061          * Link: http://www.farsiweb.info/jalali/jalali.c
1062          */
1063         private static function tsToIranian( $ts ) {
1064                 $gy = substr( $ts, 0, 4 ) -1600;
1065                 $gm = substr( $ts, 4, 2 ) -1;
1066                 $gd = substr( $ts, 6, 2 ) -1;
1067
1068                 # Days passed from the beginning (including leap years)
1069                 $gDayNo = 365 * $gy
1070                         + floor( ( $gy + 3 ) / 4 )
1071                         - floor( ( $gy + 99 ) / 100 )
1072                         + floor( ( $gy + 399 ) / 400 );
1073
1074
1075                 // Add days of the past months of this year
1076                 for ( $i = 0; $i < $gm; $i++ ) {
1077                         $gDayNo += self::$GREG_DAYS[$i];
1078                 }
1079
1080                 // Leap years
1081                 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1082                         $gDayNo++;
1083                 }
1084
1085                 // Days passed in current month
1086                 $gDayNo += $gd;
1087
1088                 $jDayNo = $gDayNo - 79;
1089
1090                 $jNp = floor( $jDayNo / 12053 );
1091                 $jDayNo %= 12053;
1092
1093                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1094                 $jDayNo %= 1461;
1095
1096                 if ( $jDayNo >= 366 ) {
1097                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1098                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1099                 }
1100
1101                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1102                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1103                 }
1104
1105                 $jm = $i + 1;
1106                 $jd = $jDayNo + 1;
1107
1108                 return array( $jy, $jm, $jd );
1109         }
1110
1111         /**
1112          * Converting Gregorian dates to Hijri dates.
1113          *
1114          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1115          *
1116          * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1117          */
1118         private static function tsToHijri( $ts ) {
1119                 $year = substr( $ts, 0, 4 );
1120                 $month = substr( $ts, 4, 2 );
1121                 $day = substr( $ts, 6, 2 );
1122
1123                 $zyr = $year;
1124                 $zd = $day;
1125                 $zm = $month;
1126                 $zy = $zyr;
1127
1128                 if (
1129                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1130                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1131                 )
1132                 {
1133                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1134                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1135                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1136                                         $zd - 32075;
1137                 } else {
1138                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1139                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1140                 }
1141
1142                 $zl = $zjd -1948440 + 10632;
1143                 $zn = (int)( ( $zl - 1 ) / 10631 );
1144                 $zl = $zl - 10631 * $zn + 354;
1145                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1146                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1147                 $zm = (int)( ( 24 * $zl ) / 709 );
1148                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1149                 $zy = 30 * $zn + $zj - 30;
1150
1151                 return array( $zy, $zm, $zd );
1152         }
1153
1154         /**
1155          * Converting Gregorian dates to Hebrew dates.
1156          *
1157          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1158          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1159          * to translate the relevant functions into PHP and release them under
1160          * GNU GPL.
1161          *
1162          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1163          * and Adar II is 14. In a non-leap year, Adar is 6.
1164          */
1165         private static function tsToHebrew( $ts ) {
1166                 # Parse date
1167                 $year = substr( $ts, 0, 4 );
1168                 $month = substr( $ts, 4, 2 );
1169                 $day = substr( $ts, 6, 2 );
1170
1171                 # Calculate Hebrew year
1172                 $hebrewYear = $year + 3760;
1173
1174                 # Month number when September = 1, August = 12
1175                 $month += 4;
1176                 if ( $month > 12 ) {
1177                         # Next year
1178                         $month -= 12;
1179                         $year++;
1180                         $hebrewYear++;
1181                 }
1182
1183                 # Calculate day of year from 1 September
1184                 $dayOfYear = $day;
1185                 for ( $i = 1; $i < $month; $i++ ) {
1186                         if ( $i == 6 ) {
1187                                 # February
1188                                 $dayOfYear += 28;
1189                                 # Check if the year is leap
1190                                 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1191                                         $dayOfYear++;
1192                                 }
1193                         } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1194                                 $dayOfYear += 30;
1195                         } else {
1196                                 $dayOfYear += 31;
1197                         }
1198                 }
1199
1200                 # Calculate the start of the Hebrew year
1201                 $start = self::hebrewYearStart( $hebrewYear );
1202
1203                 # Calculate next year's start
1204                 if ( $dayOfYear <= $start ) {
1205                         # Day is before the start of the year - it is the previous year
1206                         # Next year's start
1207                         $nextStart = $start;
1208                         # Previous year
1209                         $year--;
1210                         $hebrewYear--;
1211                         # Add days since previous year's 1 September
1212                         $dayOfYear += 365;
1213                         if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1214                                 # Leap year
1215                                 $dayOfYear++;
1216                         }
1217                         # Start of the new (previous) year
1218                         $start = self::hebrewYearStart( $hebrewYear );
1219                 } else {
1220                         # Next year's start
1221                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1222                 }
1223
1224                 # Calculate Hebrew day of year
1225                 $hebrewDayOfYear = $dayOfYear - $start;
1226
1227                 # Difference between year's days
1228                 $diff = $nextStart - $start;
1229                 # Add 12 (or 13 for leap years) days to ignore the difference between
1230                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1231                 # difference is only about the year type
1232                 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1233                         $diff += 13;
1234                 } else {
1235                         $diff += 12;
1236                 }
1237
1238                 # Check the year pattern, and is leap year
1239                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1240                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1241                 # and non-leap years
1242                 $yearPattern = $diff % 30;
1243                 # Check if leap year
1244                 $isLeap = $diff >= 30;
1245
1246                 # Calculate day in the month from number of day in the Hebrew year
1247                 # Don't check Adar - if the day is not in Adar, we will stop before;
1248                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1249                 $hebrewDay = $hebrewDayOfYear;
1250                 $hebrewMonth = 1;
1251                 $days = 0;
1252                 while ( $hebrewMonth <= 12 ) {
1253                         # Calculate days in this month
1254                         if ( $isLeap && $hebrewMonth == 6 ) {
1255                                 # Adar in a leap year
1256                                 if ( $isLeap ) {
1257                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1258                                         $days = 30;
1259                                         if ( $hebrewDay <= $days ) {
1260                                                 # Day in Adar I
1261                                                 $hebrewMonth = 13;
1262                                         } else {
1263                                                 # Subtract the days of Adar I
1264                                                 $hebrewDay -= $days;
1265                                                 # Try Adar II
1266                                                 $days = 29;
1267                                                 if ( $hebrewDay <= $days ) {
1268                                                         # Day in Adar II
1269                                                         $hebrewMonth = 14;
1270                                                 }
1271                                         }
1272                                 }
1273                         } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1274                                 # Cheshvan in a complete year (otherwise as the rule below)
1275                                 $days = 30;
1276                         } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1277                                 # Kislev in an incomplete year (otherwise as the rule below)
1278                                 $days = 29;
1279                         } else {
1280                                 # Odd months have 30 days, even have 29
1281                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1282                         }
1283                         if ( $hebrewDay <= $days ) {
1284                                 # In the current month
1285                                 break;
1286                         } else {
1287                                 # Subtract the days of the current month
1288                                 $hebrewDay -= $days;
1289                                 # Try in the next month
1290                                 $hebrewMonth++;
1291                         }
1292                 }
1293
1294                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1295         }
1296
1297         /**
1298          * This calculates the Hebrew year start, as days since 1 September.
1299          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1300          * Used for Hebrew date.
1301          */
1302         private static function hebrewYearStart( $year ) {
1303                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1304                 $b = intval( ( $year - 1 ) % 4 );
1305                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1306                 if ( $m < 0 ) {
1307                         $m--;
1308                 }
1309                 $Mar = intval( $m );
1310                 if ( $m < 0 ) {
1311                         $m++;
1312                 }
1313                 $m -= $Mar;
1314
1315                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1316                 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1317                         $Mar++;
1318                 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1319                         $Mar += 2;
1320                 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1321                         $Mar++;
1322                 }
1323
1324                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1325                 return $Mar;
1326         }
1327
1328         /**
1329          * Algorithm to convert Gregorian dates to Thai solar dates,
1330          * Minguo dates or Minguo dates.
1331          *
1332          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1333          *       http://en.wikipedia.org/wiki/Minguo_calendar
1334          *       http://en.wikipedia.org/wiki/Japanese_era_name
1335          *
1336          * @param $ts String: 14-character timestamp
1337          * @param $cName String: calender name
1338          * @return Array: converted year, month, day
1339          */
1340         private static function tsToYear( $ts, $cName ) {
1341                 $gy = substr( $ts, 0, 4 );
1342                 $gm = substr( $ts, 4, 2 );
1343                 $gd = substr( $ts, 6, 2 );
1344
1345                 if ( !strcmp( $cName, 'thai' ) ) {
1346                         # Thai solar dates
1347                         # Add 543 years to the Gregorian calendar
1348                         # Months and days are identical
1349                         $gy_offset = $gy + 543;
1350                 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1351                         # Minguo dates
1352                         # Deduct 1911 years from the Gregorian calendar
1353                         # Months and days are identical
1354                         $gy_offset = $gy - 1911;
1355                 } else if ( !strcmp( $cName, 'tenno' ) ) {
1356                         # Nengō dates up to Meiji period
1357                         # Deduct years from the Gregorian calendar
1358                         # depending on the nengo periods
1359                         # Months and days are identical
1360                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1361                                 # Meiji period
1362                                 $gy_gannen = $gy - 1868 + 1;
1363                                 $gy_offset = $gy_gannen;
1364                                 if ( $gy_gannen == 1 ) {
1365                                         $gy_offset = '元';
1366                                 }
1367                                 $gy_offset = '明治' . $gy_offset;
1368                         } else if (
1369                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1370                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1371                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1372                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1373                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1374                         )
1375                         {
1376                                 # Taishō period
1377                                 $gy_gannen = $gy - 1912 + 1;
1378                                 $gy_offset = $gy_gannen;
1379                                 if ( $gy_gannen == 1 ) {
1380                                         $gy_offset = '元';
1381                                 }
1382                                 $gy_offset = '大正' . $gy_offset;
1383                         } else if (
1384                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1385                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1386                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1387                         )
1388                         {
1389                                 # Shōwa period
1390                                 $gy_gannen = $gy - 1926 + 1;
1391                                 $gy_offset = $gy_gannen;
1392                                 if ( $gy_gannen == 1 ) {
1393                                         $gy_offset = '元';
1394                                 }
1395                                 $gy_offset = '昭和' . $gy_offset;
1396                         } else {
1397                                 # Heisei period
1398                                 $gy_gannen = $gy - 1989 + 1;
1399                                 $gy_offset = $gy_gannen;
1400                                 if ( $gy_gannen == 1 ) {
1401                                         $gy_offset = '元';
1402                                 }
1403                                 $gy_offset = '平成' . $gy_offset;
1404                         }
1405                 } else {
1406                         $gy_offset = $gy;
1407                 }
1408
1409                 return array( $gy_offset, $gm, $gd );
1410         }
1411
1412         /**
1413          * Roman number formatting up to 3000
1414          */
1415         static function romanNumeral( $num ) {
1416                 static $table = array(
1417                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1418                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1419                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1420                         array( '', 'M', 'MM', 'MMM' )
1421                 );
1422
1423                 $num = intval( $num );
1424                 if ( $num > 3000 || $num <= 0 ) {
1425                         return $num;
1426                 }
1427
1428                 $s = '';
1429                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1430                         if ( $num >= $pow10 ) {
1431                                 $s .= $table[$i][floor( $num / $pow10 )];
1432                         }
1433                         $num = $num % $pow10;
1434                 }
1435                 return $s;
1436         }
1437
1438         /**
1439          * Hebrew Gematria number formatting up to 9999
1440          */
1441         static function hebrewNumeral( $num ) {
1442                 static $table = array(
1443                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1444                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1445                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1446                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1447                 );
1448
1449                 $num = intval( $num );
1450                 if ( $num > 9999 || $num <= 0 ) {
1451                         return $num;
1452                 }
1453
1454                 $s = '';
1455                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1456                         if ( $num >= $pow10 ) {
1457                                 if ( $num == 15 || $num == 16 ) {
1458                                         $s .= $table[0][9] . $table[0][$num - 9];
1459                                         $num = 0;
1460                                 } else {
1461                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1462                                         if ( $pow10 == 1000 ) {
1463                                                 $s .= "'";
1464                                         }
1465                                 }
1466                         }
1467                         $num = $num % $pow10;
1468                 }
1469                 if ( strlen( $s ) == 2 ) {
1470                         $str = $s . "'";
1471                 } else  {
1472                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1473                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1474                 }
1475                 $start = substr( $str, 0, strlen( $str ) - 2 );
1476                 $end = substr( $str, strlen( $str ) - 2 );
1477                 switch( $end ) {
1478                         case 'כ':
1479                                 $str = $start . 'ך';
1480                                 break;
1481                         case 'מ':
1482                                 $str = $start . 'ם';
1483                                 break;
1484                         case 'נ':
1485                                 $str = $start . 'ן';
1486                                 break;
1487                         case 'פ':
1488                                 $str = $start . 'ף';
1489                                 break;
1490                         case 'צ':
1491                                 $str = $start . 'ץ';
1492                                 break;
1493                 }
1494                 return $str;
1495         }
1496
1497         /**
1498          * This is meant to be used by time(), date(), and timeanddate() to get
1499          * the date preference they're supposed to use, it should be used in
1500          * all children.
1501          *
1502          *<code>
1503          * function timeanddate([...], $format = true) {
1504          *      $datePreference = $this->dateFormat($format);
1505          * [...]
1506          * }
1507          *</code>
1508          *
1509          * @param $usePrefs Mixed: if true, the user's preference is used
1510          *                         if false, the site/language default is used
1511          *                         if int/string, assumed to be a format.
1512          * @return string
1513          */
1514         function dateFormat( $usePrefs = true ) {
1515                 global $wgUser;
1516
1517                 if ( is_bool( $usePrefs ) ) {
1518                         if ( $usePrefs ) {
1519                                 $datePreference = $wgUser->getDatePreference();
1520                         } else {
1521                                 $datePreference = (string)User::getDefaultOption( 'date' );
1522                         }
1523                 } else {
1524                         $datePreference = (string)$usePrefs;
1525                 }
1526
1527                 // return int
1528                 if ( $datePreference == '' ) {
1529                         return 'default';
1530                 }
1531
1532                 return $datePreference;
1533         }
1534
1535         /**
1536          * Get a format string for a given type and preference
1537          * @param $type May be date, time or both
1538          * @param $pref The format name as it appears in Messages*.php
1539          */
1540         function getDateFormatString( $type, $pref ) {
1541                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1542                         if ( $pref == 'default' ) {
1543                                 $pref = $this->getDefaultDateFormat();
1544                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1545                         } else {
1546                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1547                                 if ( is_null( $df ) ) {
1548                                         $pref = $this->getDefaultDateFormat();
1549                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1550                                 }
1551                         }
1552                         $this->dateFormatStrings[$type][$pref] = $df;
1553                 }
1554                 return $this->dateFormatStrings[$type][$pref];
1555         }
1556
1557         /**
1558          * @param $ts Mixed: the time format which needs to be turned into a
1559          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1560          * @param $adj Bool: whether to adjust the time output according to the
1561          *             user configured offset ($timecorrection)
1562          * @param $format Mixed: true to use user's date format preference
1563          * @param $timecorrection String: the time offset as returned by
1564          *                        validateTimeZone() in Special:Preferences
1565          * @return string
1566          */
1567         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1568                 $ts = wfTimestamp( TS_MW, $ts );
1569                 if ( $adj ) {
1570                         $ts = $this->userAdjust( $ts, $timecorrection );
1571                 }
1572                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1573                 return $this->sprintfDate( $df, $ts );
1574         }
1575
1576         /**
1577          * @param $ts Mixed: the time format which needs to be turned into a
1578          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1579          * @param $adj Bool: whether to adjust the time output according to the
1580          *             user configured offset ($timecorrection)
1581          * @param $format Mixed: true to use user's date format preference
1582          * @param $timecorrection String: the time offset as returned by
1583          *                        validateTimeZone() in Special:Preferences
1584          * @return string
1585          */
1586         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1587                 $ts = wfTimestamp( TS_MW, $ts );
1588                 if ( $adj ) {
1589                         $ts = $this->userAdjust( $ts, $timecorrection );
1590                 }
1591                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1592                 return $this->sprintfDate( $df, $ts );
1593         }
1594
1595         /**
1596          * @param $ts Mixed: the time format which needs to be turned into a
1597          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1598          * @param $adj Bool: whether to adjust the time output according to the
1599          *             user configured offset ($timecorrection)
1600          * @param $format Mixed: what format to return, if it's false output the
1601          *                default one (default true)
1602          * @param $timecorrection String: the time offset as returned by
1603          *                        validateTimeZone() in Special:Preferences
1604          * @return string
1605          */
1606         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1607                 $ts = wfTimestamp( TS_MW, $ts );
1608                 if ( $adj ) {
1609                         $ts = $this->userAdjust( $ts, $timecorrection );
1610                 }
1611                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1612                 return $this->sprintfDate( $df, $ts );
1613         }
1614
1615         function getMessage( $key ) {
1616                 // Don't change getPreferredVariant() to getCode() / mCode, because:
1617
1618                 // 1. Some language like Chinese has multiple variant languages. Only
1619                 //    getPreferredVariant() (in LanguageConverter) could return a
1620                 //    sub-language which would be more suitable for the user.
1621                 // 2. To languages without multiple variants, getPreferredVariant()
1622                 //    (in FakeConverter) functions exactly same as getCode() / mCode,
1623                 //    it won't break anything.
1624
1625                 // The same below.
1626                 return self::$dataCache->getSubitem( $this->getPreferredVariant(), 'messages', $key );
1627         }
1628
1629         function getAllMessages() {
1630                 return self::$dataCache->getItem( $this->getPreferredVariant(), 'messages' );
1631         }
1632
1633         function iconv( $in, $out, $string ) {
1634                 # This is a wrapper for iconv in all languages except esperanto,
1635                 # which does some nasty x-conversions beforehand
1636
1637                 # Even with //IGNORE iconv can whine about illegal characters in
1638                 # *input* string. We just ignore those too.
1639                 # REF: http://bugs.php.net/bug.php?id=37166
1640                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1641                 wfSuppressWarnings();
1642                 $text = iconv( $in, $out . '//IGNORE', $string );
1643                 wfRestoreWarnings();
1644                 return $text;
1645         }
1646
1647         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1648         function ucwordbreaksCallbackAscii( $matches ) {
1649                 return $this->ucfirst( $matches[1] );
1650         }
1651
1652         function ucwordbreaksCallbackMB( $matches ) {
1653                 return mb_strtoupper( $matches[0] );
1654         }
1655
1656         function ucCallback( $matches ) {
1657                 list( $wikiUpperChars ) = self::getCaseMaps();
1658                 return strtr( $matches[1], $wikiUpperChars );
1659         }
1660
1661         function lcCallback( $matches ) {
1662                 list( , $wikiLowerChars ) = self::getCaseMaps();
1663                 return strtr( $matches[1], $wikiLowerChars );
1664         }
1665
1666         function ucwordsCallbackMB( $matches ) {
1667                 return mb_strtoupper( $matches[0] );
1668         }
1669
1670         function ucwordsCallbackWiki( $matches ) {
1671                 list( $wikiUpperChars ) = self::getCaseMaps();
1672                 return strtr( $matches[0], $wikiUpperChars );
1673         }
1674
1675         /**
1676          * Make a string's first character uppercase
1677          */
1678         function ucfirst( $str ) {
1679                 $o = ord( $str );
1680                 if ( $o < 96 ) { // if already uppercase...
1681                         return $str;
1682                 } elseif ( $o < 128 ) {
1683                         return ucfirst( $str ); // use PHP's ucfirst()
1684                 } else {
1685                         // fall back to more complex logic in case of multibyte strings
1686                         return $this->uc( $str, true );
1687                 }
1688         }
1689
1690         /**
1691          * Convert a string to uppercase
1692          */
1693         function uc( $str, $first = false ) {
1694                 if ( function_exists( 'mb_strtoupper' ) ) {
1695                         if ( $first ) {
1696                                 if ( $this->isMultibyte( $str ) ) {
1697                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1698                                 } else {
1699                                         return ucfirst( $str );
1700                                 }
1701                         } else {
1702                                 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1703                         }
1704                 } else {
1705                         if ( $this->isMultibyte( $str ) ) {
1706                                 $x = $first ? '^' : '';
1707                                 return preg_replace_callback(
1708                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1709                                         array( $this, 'ucCallback' ),
1710                                         $str
1711                                 );
1712                         } else {
1713                                 return $first ? ucfirst( $str ) : strtoupper( $str );
1714                         }
1715                 }
1716         }
1717
1718         function lcfirst( $str ) {
1719                 $o = ord( $str );
1720                 if ( !$o ) {
1721                         return strval( $str );
1722                 } elseif ( $o >= 128 ) {
1723                         return $this->lc( $str, true );
1724                 } elseif ( $o > 96 ) {
1725                         return $str;
1726                 } else {
1727                         $str[0] = strtolower( $str[0] );
1728                         return $str;
1729                 }
1730         }
1731
1732         function lc( $str, $first = false ) {
1733                 if ( function_exists( 'mb_strtolower' ) ) {
1734                         if ( $first ) {
1735                                 if ( $this->isMultibyte( $str ) ) {
1736                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1737                                 } else {
1738                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1739                                 }
1740                         } else {
1741                                 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1742                         }
1743                 } else {
1744                         if ( $this->isMultibyte( $str ) ) {
1745                                 $x = $first ? '^' : '';
1746                                 return preg_replace_callback(
1747                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1748                                         array( $this, 'lcCallback' ),
1749                                         $str
1750                                 );
1751                         } else {
1752                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1753                         }
1754                 }
1755         }
1756
1757         function isMultibyte( $str ) {
1758                 return (bool)preg_match( '/[\x80-\xff]/', $str );
1759         }
1760
1761         function ucwords( $str ) {
1762                 if ( $this->isMultibyte( $str ) ) {
1763                         $str = $this->lc( $str );
1764
1765                         // regexp to find first letter in each word (i.e. after each space)
1766                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1767
1768                         // function to use to capitalize a single char
1769                         if ( function_exists( 'mb_strtoupper' ) ) {
1770                                 return preg_replace_callback(
1771                                         $replaceRegexp,
1772                                         array( $this, 'ucwordsCallbackMB' ),
1773                                         $str
1774                                 );
1775                         } else {
1776                                 return preg_replace_callback(
1777                                         $replaceRegexp,
1778                                         array( $this, 'ucwordsCallbackWiki' ),
1779                                         $str
1780                                 );
1781                         }
1782                 } else {
1783                         return ucwords( strtolower( $str ) );
1784                 }
1785         }
1786
1787         # capitalize words at word breaks
1788         function ucwordbreaks( $str ) {
1789                 if ( $this->isMultibyte( $str ) ) {
1790                         $str = $this->lc( $str );
1791
1792                         // since \b doesn't work for UTF-8, we explicitely define word break chars
1793                         $breaks = "[ \-\(\)\}\{\.,\?!]";
1794
1795                         // find first letter after word break
1796                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1797
1798                         if ( function_exists( 'mb_strtoupper' ) ) {
1799                                 return preg_replace_callback(
1800                                         $replaceRegexp,
1801                                         array( $this, 'ucwordbreaksCallbackMB' ),
1802                                         $str
1803                                 );
1804                         } else {
1805                                 return preg_replace_callback(
1806                                         $replaceRegexp,
1807                                         array( $this, 'ucwordsCallbackWiki' ),
1808                                         $str
1809                                 );
1810                         }
1811                 } else {
1812                         return preg_replace_callback(
1813                                 '/\b([\w\x80-\xff]+)\b/',
1814                                 array( $this, 'ucwordbreaksCallbackAscii' ),
1815                                 $str
1816                         );
1817                 }
1818         }
1819
1820         /**
1821          * Return a case-folded representation of $s
1822          *
1823          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1824          * and $s2 are the same except for the case of their characters. It is not
1825          * necessary for the value returned to make sense when displayed.
1826          *
1827          * Do *not* perform any other normalisation in this function. If a caller
1828          * uses this function when it should be using a more general normalisation
1829          * function, then fix the caller.
1830          */
1831         function caseFold( $s ) {
1832                 return $this->uc( $s );
1833         }
1834
1835         function checkTitleEncoding( $s ) {
1836                 if ( is_array( $s ) ) {
1837                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1838                 }
1839                 # Check for non-UTF-8 URLs
1840                 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1841                 if ( !$ishigh ) {
1842                         return $s;
1843                 }
1844
1845                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1846                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1847                 if ( $isutf8 ) {
1848                         return $s;
1849                 }
1850
1851                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1852         }
1853
1854         function fallback8bitEncoding() {
1855                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1856         }
1857
1858         /**
1859          * Most writing systems use whitespace to break up words.
1860          * Some languages such as Chinese don't conventionally do this,
1861          * which requires special handling when breaking up words for
1862          * searching etc.
1863          */
1864         function hasWordBreaks() {
1865                 return true;
1866         }
1867
1868         /**
1869          * Some languages such as Chinese require word segmentation,
1870          * Specify such segmentation when overridden in derived class.
1871          *
1872          * @param $string String
1873          * @return String
1874          */
1875         function segmentByWord( $string ) {
1876                 return $string;
1877         }
1878
1879         /**
1880          * Some languages have special punctuation need to be normalized.
1881          * Make such changes here.
1882          *
1883          * @param $string String
1884          * @return String
1885          */
1886         function normalizeForSearch( $string ) {
1887                 return self::convertDoubleWidth( $string );
1888         }
1889
1890         /**
1891          * convert double-width roman characters to single-width.
1892          * range: ff00-ff5f ~= 0020-007f
1893          */
1894         protected static function convertDoubleWidth( $string ) {
1895                 static $full = null;
1896                 static $half = null;
1897
1898                 if ( $full === null ) {
1899                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
1900                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1901                         $full = str_split( $fullWidth, 3 );
1902                         $half = str_split( $halfWidth );
1903                 }
1904
1905                 $string = str_replace( $full, $half, $string );
1906                 return $string;
1907         }
1908
1909         protected static function insertSpace( $string, $pattern ) {
1910                 $string = preg_replace( $pattern, " $1 ", $string );
1911                 $string = preg_replace( '/ +/', ' ', $string );
1912                 return $string;
1913         }
1914
1915         function convertForSearchResult( $termsArray ) {
1916                 # some languages, e.g. Chinese, need to do a conversion
1917                 # in order for search results to be displayed correctly
1918                 return $termsArray;
1919         }
1920
1921         /**
1922          * Get the first character of a string.
1923          *
1924          * @param $s string
1925          * @return string
1926          */
1927         function firstChar( $s ) {
1928                 $matches = array();
1929                 preg_match(
1930                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1931                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1932                         $s,
1933                         $matches
1934                 );
1935
1936                 if ( isset( $matches[1] ) ) {
1937                         if ( strlen( $matches[1] ) != 3 ) {
1938                                 return $matches[1];
1939                         }
1940
1941                         // Break down Hangul syllables to grab the first jamo
1942                         $code = utf8ToCodepoint( $matches[1] );
1943                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1944                                 return $matches[1];
1945                         } elseif ( $code < 0xb098 ) {
1946                                 return "\xe3\x84\xb1";
1947                         } elseif ( $code < 0xb2e4 ) {
1948                                 return "\xe3\x84\xb4";
1949                         } elseif ( $code < 0xb77c ) {
1950                                 return "\xe3\x84\xb7";
1951                         } elseif ( $code < 0xb9c8 ) {
1952                                 return "\xe3\x84\xb9";
1953                         } elseif ( $code < 0xbc14 ) {
1954                                 return "\xe3\x85\x81";
1955                         } elseif ( $code < 0xc0ac ) {
1956                                 return "\xe3\x85\x82";
1957                         } elseif ( $code < 0xc544 ) {
1958                                 return "\xe3\x85\x85";
1959                         } elseif ( $code < 0xc790 ) {
1960                                 return "\xe3\x85\x87";
1961                         } elseif ( $code < 0xcc28 ) {
1962                                 return "\xe3\x85\x88";
1963                         } elseif ( $code < 0xce74 ) {
1964                                 return "\xe3\x85\x8a";
1965                         } elseif ( $code < 0xd0c0 ) {
1966                                 return "\xe3\x85\x8b";
1967                         } elseif ( $code < 0xd30c ) {
1968                                 return "\xe3\x85\x8c";
1969                         } elseif ( $code < 0xd558 ) {
1970                                 return "\xe3\x85\x8d";
1971                         } else {
1972                                 return "\xe3\x85\x8e";
1973                         }
1974                 } else {
1975                         return '';
1976                 }
1977         }
1978
1979         function initEncoding() {
1980                 # Some languages may have an alternate char encoding option
1981                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1982                 # If this language is used as the primary content language,
1983                 # an override to the defaults can be set here on startup.
1984         }
1985
1986         function recodeForEdit( $s ) {
1987                 # For some languages we'll want to explicitly specify
1988                 # which characters make it into the edit box raw
1989                 # or are converted in some way or another.
1990                 # Note that if wgOutputEncoding is different from
1991                 # wgInputEncoding, this text will be further converted
1992                 # to wgOutputEncoding.
1993                 global $wgEditEncoding;
1994                 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1995                         return $s;
1996                 } else {
1997                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1998                 }
1999         }
2000
2001         function recodeInput( $s ) {
2002                 # Take the previous into account.
2003                 global $wgEditEncoding;
2004                 if ( $wgEditEncoding != '' ) {
2005                         $enc = $wgEditEncoding;
2006                 } else {
2007                         $enc = 'UTF-8';
2008                 }
2009                 if ( $enc == 'UTF-8' ) {
2010                         return $s;
2011                 } else {
2012                         return $this->iconv( $enc, 'UTF-8', $s );
2013                 }
2014         }
2015
2016         /**
2017          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2018          * also cleans up certain backwards-compatible sequences, converting them
2019          * to the modern Unicode equivalent.
2020          *
2021          * This is language-specific for performance reasons only.
2022          */
2023         function normalize( $s ) {
2024                 global $wgAllUnicodeFixes;
2025                 $s = UtfNormal::cleanUp( $s );
2026                 if ( $wgAllUnicodeFixes ) {
2027                         $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2028                         $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2029                 }
2030
2031                 return $s;
2032         }
2033
2034         /**
2035          * Transform a string using serialized data stored in the given file (which
2036          * must be in the serialized subdirectory of $IP). The file contains pairs
2037          * mapping source characters to destination characters.
2038          *
2039          * The data is cached in process memory. This will go faster if you have the
2040          * FastStringSearch extension.
2041          */
2042         function transformUsingPairFile( $file, $string ) {
2043                 if ( !isset( $this->transformData[$file] ) ) {
2044                         $data = wfGetPrecompiledData( $file );
2045                         if ( $data === false ) {
2046                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2047                         }
2048                         $this->transformData[$file] = new ReplacementArray( $data );
2049                 }
2050                 return $this->transformData[$file]->replace( $string );
2051         }
2052
2053         /**
2054          * For right-to-left language support
2055          *
2056          * @return bool
2057          */
2058         function isRTL() {
2059                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2060         }
2061
2062         /**
2063          * Return the correct HTML 'dir' attribute value for this language.
2064          * @return String
2065          */
2066         function getDir() {
2067                 return $this->isRTL() ? 'rtl' : 'ltr';
2068         }
2069
2070         /**
2071          * Return 'left' or 'right' as appropriate alignment for line-start
2072          * for this language's text direction.
2073          *
2074          * Should be equivalent to CSS3 'start' text-align value....
2075          *
2076          * @return String
2077          */
2078         function alignStart() {
2079                 return $this->isRTL() ? 'right' : 'left';
2080         }
2081
2082         /**
2083          * Return 'right' or 'left' as appropriate alignment for line-end
2084          * for this language's text direction.
2085          *
2086          * Should be equivalent to CSS3 'end' text-align value....
2087          *
2088          * @return String
2089          */
2090         function alignEnd() {
2091                 return $this->isRTL() ? 'left' : 'right';
2092         }
2093
2094         /**
2095          * A hidden direction mark (LRM or RLM), depending on the language direction
2096          *
2097          * @return string
2098          */
2099         function getDirMark() {
2100                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2101         }
2102
2103         function capitalizeAllNouns() {
2104                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2105         }
2106
2107         /**
2108          * An arrow, depending on the language direction
2109          *
2110          * @return string
2111          */
2112         function getArrow() {
2113                 return $this->isRTL() ? '←' : '→';
2114         }
2115
2116         /**
2117          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2118          *
2119          * @return bool
2120          */
2121         function linkPrefixExtension() {
2122                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2123         }
2124
2125         function getMagicWords() {
2126                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2127         }
2128
2129         protected function doMagicHook() {
2130                 if ( $this->mMagicHookDone ) {
2131                         return;
2132                 }
2133                 $this->mMagicHookDone = true;
2134                 wfProfileIn( 'LanguageGetMagic' );
2135                 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2136                 wfProfileOut( 'LanguageGetMagic' );
2137         }
2138
2139         # Fill a MagicWord object with data from here
2140         function getMagic( $mw ) {
2141                 $this->doMagicHook();
2142
2143                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2144                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2145                 } else {
2146                         $magicWords = $this->getMagicWords();
2147                         if ( isset( $magicWords[$mw->mId] ) ) {
2148                                 $rawEntry = $magicWords[$mw->mId];
2149                         } else {
2150                                 $rawEntry = false;
2151                         }
2152                 }
2153
2154                 if ( !is_array( $rawEntry ) ) {
2155                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2156                 } else {
2157                         $mw->mCaseSensitive = $rawEntry[0];
2158                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2159                 }
2160         }
2161
2162         /**
2163          * Add magic words to the extension array
2164          */
2165         function addMagicWordsByLang( $newWords ) {
2166                 $code = $this->getCode();
2167                 $fallbackChain = array();
2168                 while ( $code && !in_array( $code, $fallbackChain ) ) {
2169                         $fallbackChain[] = $code;
2170                         $code = self::getFallbackFor( $code );
2171                 }
2172                 if ( !in_array( 'en', $fallbackChain ) ) {
2173                         $fallbackChain[] = 'en';
2174                 }
2175                 $fallbackChain = array_reverse( $fallbackChain );
2176                 foreach ( $fallbackChain as $code ) {
2177                         if ( isset( $newWords[$code] ) ) {
2178                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2179                         }
2180                 }
2181         }
2182
2183         /**
2184          * Get special page names, as an associative array
2185          *   case folded alias => real name
2186          */
2187         function getSpecialPageAliases() {
2188                 // Cache aliases because it may be slow to load them
2189                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2190                         // Initialise array
2191                         $this->mExtendedSpecialPageAliases =
2192                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2193                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2194                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2195                 }
2196
2197                 return $this->mExtendedSpecialPageAliases;
2198         }
2199
2200         /**
2201          * Italic is unsuitable for some languages
2202          *
2203          * @param $text String: the text to be emphasized.
2204          * @return string
2205          */
2206         function emphasize( $text ) {
2207                 return "<em>$text</em>";
2208         }
2209
2210          /**
2211           * Normally we output all numbers in plain en_US style, that is
2212           * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2213           * point twohundredthirtyfive. However this is not sutable for all
2214           * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2215           * Icelandic just want to use commas instead of dots, and dots instead
2216           * of commas like "293.291,235".
2217           *
2218           * An example of this function being called:
2219           * <code>
2220           * wfMsg( 'message', $wgLang->formatNum( $num ) )
2221           * </code>
2222           *
2223           * See LanguageGu.php for the Gujarati implementation and
2224           * $separatorTransformTable on MessageIs.php for
2225           * the , => . and . => , implementation.
2226           *
2227           * @todo check if it's viable to use localeconv() for the decimal
2228           *       separator thing.
2229           * @param $number Mixed: the string to be formatted, should be an integer
2230           *        or a floating point number.
2231           * @param $nocommafy Bool: set to true for special numbers like dates
2232           * @return string
2233           */
2234         function formatNum( $number, $nocommafy = false ) {
2235                 global $wgTranslateNumerals;
2236                 if ( !$nocommafy ) {
2237                         $number = $this->commafy( $number );
2238                         $s = $this->separatorTransformTable();
2239                         if ( $s ) {
2240                                 $number = strtr( $number, $s );
2241                         }
2242                 }
2243
2244                 if ( $wgTranslateNumerals ) {
2245                         $s = $this->digitTransformTable();
2246                         if ( $s ) {
2247                                 $number = strtr( $number, $s );
2248                         }
2249                 }
2250
2251                 return $number;
2252         }
2253
2254         function parseFormattedNumber( $number ) {
2255                 $s = $this->digitTransformTable();
2256                 if ( $s ) {
2257                         $number = strtr( $number, array_flip( $s ) );
2258                 }
2259
2260                 $s = $this->separatorTransformTable();
2261                 if ( $s ) {
2262                         $number = strtr( $number, array_flip( $s ) );
2263                 }
2264
2265                 $number = strtr( $number, array( ',' => '' ) );
2266                 return $number;
2267         }
2268
2269         /**
2270          * Adds commas to a given number
2271          *
2272          * @param $_ mixed
2273          * @return string
2274          */
2275         function commafy( $_ ) {
2276                 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2277         }
2278
2279         function digitTransformTable() {
2280                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2281         }
2282
2283         function separatorTransformTable() {
2284                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2285         }
2286
2287         /**
2288          * Take a list of strings and build a locale-friendly comma-separated
2289          * list, using the local comma-separator message.
2290          * The last two strings are chained with an "and".
2291          *
2292          * @param $l Array
2293          * @return string
2294          */
2295         function listToText( $l ) {
2296                 $s = '';
2297                 $m = count( $l ) - 1;
2298                 if ( $m == 1 ) {
2299                         return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2300                 } else {
2301                         for ( $i = $m; $i >= 0; $i-- ) {
2302                                 if ( $i == $m ) {
2303                                         $s = $l[$i];
2304                                 } else if ( $i == $m - 1 ) {
2305                                         $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2306                                 } else {
2307                                         $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2308                                 }
2309                         }
2310                         return $s;
2311                 }
2312         }
2313
2314         /**
2315          * Take a list of strings and build a locale-friendly comma-separated
2316          * list, using the local comma-separator message.
2317          * @param $list array of strings to put in a comma list
2318          * @return string
2319          */
2320         function commaList( $list ) {
2321                 return implode(
2322                         $list,
2323                         wfMsgExt(
2324                                 'comma-separator',
2325                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2326                         )
2327                 );
2328         }
2329
2330         /**
2331          * Take a list of strings and build a locale-friendly semicolon-separated
2332          * list, using the local semicolon-separator message.
2333          * @param $list array of strings to put in a semicolon list
2334          * @return string
2335          */
2336         function semicolonList( $list ) {
2337                 return implode(
2338                         $list,
2339                         wfMsgExt(
2340                                 'semicolon-separator',
2341                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2342                         )
2343                 );
2344         }
2345
2346         /**
2347          * Same as commaList, but separate it with the pipe instead.
2348          * @param $list array of strings to put in a pipe list
2349          * @return string
2350          */
2351         function pipeList( $list ) {
2352                 return implode(
2353                         $list,
2354                         wfMsgExt(
2355                                 'pipe-separator',
2356                                 array( 'escapenoentities', 'language' => $this )
2357                         )
2358                 );
2359         }
2360
2361         /**
2362          * Truncate a string to a specified length in bytes, appending an optional
2363          * string (e.g. for ellipses)
2364          *
2365          * The database offers limited byte lengths for some columns in the database;
2366          * multi-byte character sets mean we need to ensure that only whole characters
2367          * are included, otherwise broken characters can be passed to the user
2368          *
2369          * If $length is negative, the string will be truncated from the beginning
2370          *
2371          * @param $string String to truncate
2372          * @param $length Int: maximum length (excluding ellipses)
2373          * @param $ellipsis String to append to the truncated text
2374          * @return string
2375          */
2376         function truncate( $string, $length, $ellipsis = '...' ) {
2377                 # Use the localized ellipsis character
2378                 if ( $ellipsis == '...' ) {
2379                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2380                 }
2381                 # Check if there is no need to truncate
2382                 if ( $length == 0 ) {
2383                         return $ellipsis;
2384                 } elseif ( strlen( $string ) <= abs( $length ) ) {
2385                         return $string;
2386                 }
2387                 $stringOriginal = $string;
2388                 if ( $length > 0 ) {
2389                         $string = substr( $string, 0, $length ); // xyz...
2390                         $string = $this->removeBadCharLast( $string );
2391                         $string = $string . $ellipsis;
2392                 } else {
2393                         $string = substr( $string, $length ); // ...xyz
2394                         $string = $this->removeBadCharFirst( $string );
2395                         $string = $ellipsis . $string;
2396                 }
2397                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2398                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2399                         return $string;
2400                 } else {
2401                         return $stringOriginal;
2402                 }
2403         }
2404
2405         /**
2406          * Remove bytes that represent an incomplete Unicode character
2407          * at the end of string (e.g. bytes of the char are missing)
2408          *
2409          * @param $string String
2410          * @return string
2411          */
2412         protected function removeBadCharLast( $string ) {
2413                 $char = ord( $string[strlen( $string ) - 1] );
2414                 $m = array();
2415                 if ( $char >= 0xc0 ) {
2416                         # We got the first byte only of a multibyte char; remove it.
2417                         $string = substr( $string, 0, -1 );
2418                 } elseif ( $char >= 0x80 &&
2419                           preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2420                                                   '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2421                 {
2422                         # We chopped in the middle of a character; remove it
2423                         $string = $m[1];
2424                 }
2425                 return $string;
2426         }
2427
2428         /**
2429          * Remove bytes that represent an incomplete Unicode character
2430          * at the start of string (e.g. bytes of the char are missing)
2431          *
2432          * @param $string String
2433          * @return string
2434          */
2435         protected function removeBadCharFirst( $string ) {
2436                 $char = ord( $string[0] );
2437                 if ( $char >= 0x80 && $char < 0xc0 ) {
2438                         # We chopped in the middle of a character; remove the whole thing
2439                         $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2440                 }
2441                 return $string;
2442         }
2443
2444         /*
2445          * Truncate a string of valid HTML to a specified length in bytes,
2446          * appending an optional string (e.g. for ellipses), and return valid HTML
2447          *
2448          * This is only intended for styled/linked text, such as HTML with
2449          * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2450          *
2451          * Note: tries to fix broken HTML with MWTidy
2452          *
2453          * @param string $text HTML string to truncate
2454          * @param int $length (zero/positive) Maximum length (excluding ellipses)
2455          * @param string $ellipsis String to append to the truncated text
2456          * @returns string
2457          */
2458         function truncateHtml( $text, $length, $ellipsis = '...' ) {
2459                 # Use the localized ellipsis character
2460                 if ( $ellipsis == '...' ) {
2461                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2462                 }
2463                 # Check if there is no need to truncate
2464                 if ( $length <= 0 ) {
2465                         return $ellipsis; // no text shown, nothing to format
2466                 } elseif ( strlen( $text ) <= $length ) {
2467                         return $text; // string short enough even *with* HTML
2468                 }
2469                 $text = MWTidy::tidy( $text ); // fix tags
2470                 $displayLen = 0; // innerHTML legth so far
2471                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2472                 $tagType = 0; // 0-open, 1-close
2473                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2474                 $entityState = 0; // 0-not entity, 1-entity
2475                 $tag = $ret = '';
2476                 $openTags = array(); // open tag stack
2477                 $textLen = strlen( $text );
2478                 for ( $pos = 0; $pos < $textLen; ++$pos ) {
2479                         $ch = $text[$pos];
2480                         $lastCh = $pos ? $text[$pos - 1] : '';
2481                         $ret .= $ch; // add to result string
2482                         if ( $ch == '<' ) {
2483                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2484                                 $entityState = 0; // for bad HTML
2485                                 $bracketState = 1; // tag started (checking for backslash)
2486                         } elseif ( $ch == '>' ) {
2487                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2488                                 $entityState = 0; // for bad HTML
2489                                 $bracketState = 0; // out of brackets
2490                         } elseif ( $bracketState == 1 ) {
2491                                 if ( $ch == '/' ) {
2492                                         $tagType = 1; // close tag (e.g. "</span>")
2493                                 } else {
2494                                         $tagType = 0; // open tag (e.g. "<span>")
2495                                         $tag .= $ch;
2496                                 }
2497                                 $bracketState = 2; // building tag name
2498                         } elseif ( $bracketState == 2 ) {
2499                                 if ( $ch != ' ' ) {
2500                                         $tag .= $ch;
2501                                 } else {
2502                                         // Name found (e.g. "<a href=..."), add on tag attributes...
2503                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2504                                 }
2505                         } elseif ( $bracketState == 0 ) {
2506                                 if ( $entityState ) {
2507                                         if ( $ch == ';' ) {
2508                                                 $entityState = 0;
2509                                                 $displayLen++; // entity is one displayed char
2510                                         }
2511                                 } else {
2512                                         if ( $ch == '&' ) {
2513                                                 $entityState = 1; // entity found, (e.g. "&#160;")
2514                                         } else {
2515                                                 $displayLen++; // this char is displayed
2516                                                 // Add on the other display text after this...
2517                                                 $skipped = $this->truncate_skip(
2518                                                         $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2519                                                 $displayLen += $skipped;
2520                                                 $pos += $skipped;
2521                                         }
2522                                 }
2523                         }
2524                         # Consider truncation once the display length has reached the maximim.
2525                         # Double-check that we're not in the middle of a bracket/entity...
2526                         if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2527                                 if ( !$testingEllipsis ) {
2528                                         $testingEllipsis = true;
2529                                         # Save where we are; we will truncate here unless
2530                                         # the ellipsis actually makes the string longer.
2531                                         $pOpenTags = $openTags; // save state
2532                                         $pRet = $ret; // save state
2533                                 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2534                                         # Ellipsis won't make string longer/equal, the truncation point was OK.
2535                                         $openTags = $pOpenTags; // reload state
2536                                         $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2537                                         $ret .= $ellipsis; // add ellipsis
2538                                         break;
2539                                 }
2540                         }
2541                 }
2542                 if ( $displayLen == 0 ) {
2543                         return ''; // no text shown, nothing to format
2544                 }
2545                 // Close the last tag if left unclosed by bad HTML
2546                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2547                 while ( count( $openTags ) > 0 ) {
2548                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2549                 }
2550                 return $ret;
2551         }
2552
2553         // truncateHtml() helper function
2554         // like strcspn() but adds the skipped chars to $ret
2555         private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2556                 $skipCount = 0;
2557                 if ( $start < strlen( $text ) ) {
2558                         $skipCount = strcspn( $text, $search, $start, $len );
2559                         $ret .= substr( $text, $start, $skipCount );
2560                 }
2561                 return $skipCount;
2562         }
2563
2564         /*
2565          * truncateHtml() helper function
2566          * (a) push or pop $tag from $openTags as needed
2567          * (b) clear $tag value
2568          * @param String &$tag Current HTML tag name we are looking at
2569          * @param int $tagType (0-open tag, 1-close tag)
2570          * @param char $lastCh Character before the '>' that ended this tag
2571          * @param array &$openTags Open tag stack (not accounting for $tag)
2572          */
2573         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2574                 $tag = ltrim( $tag );
2575                 if ( $tag != '' ) {
2576                         if ( $tagType == 0 && $lastCh != '/' ) {
2577                                 $openTags[] = $tag; // tag opened (didn't close itself)
2578                         } else if ( $tagType == 1 ) {
2579                                 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2580                                         array_pop( $openTags ); // tag closed
2581                                 }
2582                         }
2583                         $tag = '';
2584                 }
2585         }
2586
2587         /**
2588          * Grammatical transformations, needed for inflected languages
2589          * Invoked by putting {{grammar:case|word}} in a message
2590          *
2591          * @param $word string
2592          * @param $case string
2593          * @return string
2594          */
2595         function convertGrammar( $word, $case ) {
2596                 global $wgGrammarForms;
2597                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2598                         return $wgGrammarForms[$this->getCode()][$case][$word];
2599                 }
2600                 return $word;
2601         }
2602
2603         /**
2604          * Provides an alternative text depending on specified gender.
2605          * Usage {{gender:username|masculine|feminine|neutral}}.
2606          * username is optional, in which case the gender of current user is used,
2607          * but only in (some) interface messages; otherwise default gender is used.
2608          * If second or third parameter are not specified, masculine is used.
2609          * These details may be overriden per language.
2610          */
2611         function gender( $gender, $forms ) {
2612                 if ( !count( $forms ) ) {
2613                         return '';
2614                 }
2615                 $forms = $this->preConvertPlural( $forms, 2 );
2616                 if ( $gender === 'male' ) {
2617                         return $forms[0];
2618                 }
2619                 if ( $gender === 'female' ) {
2620                         return $forms[1];
2621                 }
2622                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2623         }
2624
2625         /**
2626          * Plural form transformations, needed for some languages.
2627          * For example, there are 3 form of plural in Russian and Polish,
2628          * depending on "count mod 10". See [[w:Plural]]
2629          * For English it is pretty simple.
2630          *
2631          * Invoked by putting {{plural:count|wordform1|wordform2}}
2632          * or {{plural:count|wordform1|wordform2|wordform3}}
2633          *
2634          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2635          *
2636          * @param $count Integer: non-localized number
2637          * @param $forms Array: different plural forms
2638          * @return string Correct form of plural for $count in this language
2639          */
2640         function convertPlural( $count, $forms ) {
2641                 if ( !count( $forms ) ) {
2642                         return '';
2643                 }
2644                 $forms = $this->preConvertPlural( $forms, 2 );
2645
2646                 return ( $count == 1 ) ? $forms[0] : $forms[1];
2647         }
2648
2649         /**
2650          * Checks that convertPlural was given an array and pads it to requested
2651          * amound of forms by copying the last one.
2652          *
2653          * @param $count Integer: How many forms should there be at least
2654          * @param $forms Array of forms given to convertPlural
2655          * @return array Padded array of forms or an exception if not an array
2656          */
2657         protected function preConvertPlural( /* Array */ $forms, $count ) {
2658                 while ( count( $forms ) < $count ) {
2659                         $forms[] = $forms[count( $forms ) - 1];
2660                 }
2661                 return $forms;
2662         }
2663
2664         /**
2665          * For translating of expiry times
2666          * @param $str String: the validated block time in English
2667          * @return Somehow translated block time
2668          * @see LanguageFi.php for example implementation
2669          */
2670         function translateBlockExpiry( $str ) {
2671                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2672
2673                 if ( $scBlockExpiryOptions == '-' ) {
2674                         return $str;
2675                 }
2676
2677                 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2678                         if ( strpos( $option, ':' ) === false ) {
2679                                 continue;
2680                         }
2681                         list( $show, $value ) = explode( ':', $option );
2682                         if ( strcmp( $str, $value ) == 0 ) {
2683                                 return htmlspecialchars( trim( $show ) );
2684                         }
2685                 }
2686
2687                 return $str;
2688         }
2689
2690         /**
2691          * languages like Chinese need to be segmented in order for the diff
2692          * to be of any use
2693          *
2694          * @param $text String
2695          * @return String
2696          */
2697         function segmentForDiff( $text ) {
2698                 return $text;
2699         }
2700
2701         /**
2702          * and unsegment to show the result
2703          *
2704          * @param $text String
2705          * @return String
2706          */
2707         function unsegmentForDiff( $text ) {
2708                 return $text;
2709         }
2710
2711         # convert text to all supported variants
2712         function autoConvertToAllVariants( $text ) {
2713                 return $this->mConverter->autoConvertToAllVariants( $text );
2714         }
2715
2716         # convert text to different variants of a language.
2717         function convert( $text ) {
2718                 return $this->mConverter->convert( $text );
2719         }
2720
2721         # Convert a Title object to a string in the preferred variant
2722         function convertTitle( $title ) {
2723                 return $this->mConverter->convertTitle( $title );
2724         }
2725
2726         # Check if this is a language with variants
2727         function hasVariants() {
2728                 return sizeof( $this->getVariants() ) > 1;
2729         }
2730
2731         # Put custom tags (e.g. -{ }-) around math to prevent conversion
2732         function armourMath( $text ) {
2733                 return $this->mConverter->armourMath( $text );
2734         }
2735
2736         /**
2737          * Perform output conversion on a string, and encode for safe HTML output.
2738          * @param $text String text to be converted
2739          * @param $isTitle Bool whether this conversion is for the article title
2740          * @return string
2741          * @todo this should get integrated somewhere sane
2742          */
2743         function convertHtml( $text, $isTitle = false ) {
2744                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2745         }
2746
2747         function convertCategoryKey( $key ) {
2748                 return $this->mConverter->convertCategoryKey( $key );
2749         }
2750
2751         /**
2752          * Get the list of variants supported by this language
2753          * see sample implementation in LanguageZh.php
2754          *
2755          * @return array an array of language codes
2756          */
2757         function getVariants() {
2758                 return $this->mConverter->getVariants();
2759         }
2760
2761         function getPreferredVariant() {
2762                 return $this->mConverter->getPreferredVariant();
2763         }
2764
2765         function getDefaultVariant() {
2766                 return $this->mConverter->getDefaultVariant();
2767         }
2768
2769         function getURLVariant() {
2770                 return $this->mConverter->getURLVariant();
2771         }
2772
2773         /**
2774          * If a language supports multiple variants, it is
2775          * possible that non-existing link in one variant
2776          * actually exists in another variant. this function
2777          * tries to find it. See e.g. LanguageZh.php
2778          *
2779          * @param $link String: the name of the link
2780          * @param $nt Mixed: the title object of the link
2781          * @param $ignoreOtherCond Boolean: to disable other conditions when
2782          *      we need to transclude a template or update a category's link
2783          * @return null the input parameters may be modified upon return
2784          */
2785         function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2786                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2787         }
2788
2789         /**
2790          * If a language supports multiple variants, converts text
2791          * into an array of all possible variants of the text:
2792          *  'variant' => text in that variant
2793          *
2794          * @deprecated Use autoConvertToAllVariants()
2795          */
2796         function convertLinkToAllVariants( $text ) {
2797                 return $this->mConverter->convertLinkToAllVariants( $text );
2798         }
2799
2800         /**
2801          * returns language specific options used by User::getPageRenderHash()
2802          * for example, the preferred language variant
2803          *
2804          * @return string
2805          */
2806         function getExtraHashOptions() {
2807                 return $this->mConverter->getExtraHashOptions();
2808         }
2809
2810         /**
2811          * For languages that support multiple variants, the title of an
2812          * article may be displayed differently in different variants. this
2813          * function returns the apporiate title defined in the body of the article.
2814          *
2815          * @return string
2816          */
2817         function getParsedTitle() {
2818                 return $this->mConverter->getParsedTitle();
2819         }
2820
2821         /**
2822          * Enclose a string with the "no conversion" tag. This is used by
2823          * various functions in the Parser
2824          *
2825          * @param $text String: text to be tagged for no conversion
2826          * @param $noParse
2827          * @return string the tagged text
2828          */
2829         function markNoConversion( $text, $noParse = false ) {
2830                 return $this->mConverter->markNoConversion( $text, $noParse );
2831         }
2832
2833         /**
2834          * A regular expression to match legal word-trailing characters
2835          * which should be merged onto a link of the form [[foo]]bar.
2836          *
2837          * @return string
2838          */
2839         function linkTrail() {
2840                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2841         }
2842
2843         function getLangObj() {
2844                 return $this;
2845         }
2846
2847         /**
2848          * Get the RFC 3066 code for this language object
2849          */
2850         function getCode() {
2851                 return $this->mCode;
2852         }
2853
2854         function setCode( $code ) {
2855                 $this->mCode = $code;
2856         }
2857
2858         /**
2859          * Get the name of a file for a certain language code
2860          * @param $prefix string Prepend this to the filename
2861          * @param $code string Language code
2862          * @param $suffix string Append this to the filename
2863          * @return string $prefix . $mangledCode . $suffix
2864          */
2865         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2866                 // Protect against path traversal
2867                 if ( !Language::isValidCode( $code ) ) {
2868                         throw new MWException( "Invalid language code \"$code\"" );
2869                 }
2870
2871                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2872         }
2873
2874         /**
2875          * Get the language code from a file name. Inverse of getFileName()
2876          * @param $filename string $prefix . $languageCode . $suffix
2877          * @param $prefix string Prefix before the language code
2878          * @param $suffix string Suffix after the language code
2879          * @return Language code, or false if $prefix or $suffix isn't found
2880          */
2881         static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2882                 $m = null;
2883                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2884                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
2885                 if ( !count( $m ) ) {
2886                         return false;
2887                 }
2888                 return str_replace( '_', '-', strtolower( $m[1] ) );
2889         }
2890
2891         static function getMessagesFileName( $code ) {
2892                 global $IP;
2893                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2894         }
2895
2896         static function getClassFileName( $code ) {
2897                 global $IP;
2898                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2899         }
2900
2901         /**
2902          * Get the fallback for a given language
2903          */
2904         static function getFallbackFor( $code ) {
2905                 if ( $code === 'en' ) {
2906                         // Shortcut
2907                         return false;
2908                 } else {
2909                         return self::getLocalisationCache()->getItem( $code, 'fallback' );
2910                 }
2911         }
2912
2913         /**
2914          * Get all messages for a given language
2915          * WARNING: this may take a long time
2916          */
2917         static function getMessagesFor( $code ) {
2918                 return self::getLocalisationCache()->getItem( $code, 'messages' );
2919         }
2920
2921         /**
2922          * Get a message for a given language
2923          */
2924         static function getMessageFor( $key, $code ) {
2925                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2926         }
2927
2928         function fixVariableInNamespace( $talk ) {
2929                 if ( strpos( $talk, '$1' ) === false ) {
2930                         return $talk;
2931                 }
2932
2933                 global $wgMetaNamespace;
2934                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2935
2936                 # Allow grammar transformations
2937                 # Allowing full message-style parsing would make simple requests
2938                 # such as action=raw much more expensive than they need to be.
2939                 # This will hopefully cover most cases.
2940                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2941                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
2942                 return str_replace( ' ', '_', $talk );
2943         }
2944
2945         function replaceGrammarInNamespace( $m ) {
2946                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2947         }
2948
2949         static function getCaseMaps() {
2950                 static $wikiUpperChars, $wikiLowerChars;
2951                 if ( isset( $wikiUpperChars ) ) {
2952                         return array( $wikiUpperChars, $wikiLowerChars );
2953                 }
2954
2955                 wfProfileIn( __METHOD__ );
2956                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2957                 if ( $arr === false ) {
2958                         throw new MWException(
2959                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2960                 }
2961                 $wikiUpperChars = $arr['wikiUpperChars'];
2962                 $wikiLowerChars = $arr['wikiLowerChars'];
2963                 wfProfileOut( __METHOD__ );
2964                 return array( $wikiUpperChars, $wikiLowerChars );
2965         }
2966
2967         function formatTimePeriod( $seconds ) {
2968                 if ( round( $seconds * 10 ) < 100 ) {
2969                         return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2970                 } elseif ( round( $seconds ) < 60 ) {
2971                         return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2972                 } elseif ( round( $seconds ) < 3600 ) {
2973                         $minutes = floor( $seconds / 60 );
2974                         $secondsPart = round( fmod( $seconds, 60 ) );
2975                         if ( $secondsPart == 60 ) {
2976                                 $secondsPart = 0;
2977                                 $minutes++;
2978                         }
2979                         return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2980                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2981                 } else {
2982                         $hours = floor( $seconds / 3600 );
2983                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2984                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2985                         if ( $secondsPart == 60 ) {
2986                                 $secondsPart = 0;
2987                                 $minutes++;
2988                         }
2989                         if ( $minutes == 60 ) {
2990                                 $minutes = 0;
2991                                 $hours++;
2992                         }
2993                         return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2994                                 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2995                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2996                 }
2997         }
2998
2999         function formatBitrate( $bps ) {
3000                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
3001                 if ( $bps <= 0 ) {
3002                         return $this->formatNum( $bps ) . $units[0];
3003                 }
3004                 $unitIndex = floor( log10( $bps ) / 3 );
3005                 $mantissa = $bps / pow( 1000, $unitIndex );
3006                 if ( $mantissa < 10 ) {
3007                         $mantissa = round( $mantissa, 1 );
3008                 } else {
3009                         $mantissa = round( $mantissa );
3010                 }
3011                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3012         }
3013
3014         /**
3015          * Format a size in bytes for output, using an appropriate
3016          * unit (B, KB, MB or GB) according to the magnitude in question
3017          *
3018          * @param $size Size to format
3019          * @return string Plain text (not HTML)
3020          */
3021         function formatSize( $size ) {
3022                 // For small sizes no decimal places necessary
3023                 $round = 0;
3024                 if ( $size > 1024 ) {
3025                         $size = $size / 1024;
3026                         if ( $size > 1024 ) {
3027                                 $size = $size / 1024;
3028                                 // For MB and bigger two decimal places are smarter
3029                                 $round = 2;
3030                                 if ( $size > 1024 ) {
3031                                         $size = $size / 1024;
3032                                         $msg = 'size-gigabytes';
3033                                 } else {
3034                                         $msg = 'size-megabytes';
3035                                 }
3036                         } else {
3037                                 $msg = 'size-kilobytes';
3038                         }
3039                 } else {
3040                         $msg = 'size-bytes';
3041                 }
3042                 $size = round( $size, $round );
3043                 $text = $this->getMessageFromDB( $msg );
3044                 return str_replace( '$1', $this->formatNum( $size ), $text );
3045         }
3046
3047         /**
3048          * Get the conversion rule title, if any.
3049          */
3050         function getConvRuleTitle() {
3051                 return $this->mConverter->getConvRuleTitle();
3052         }
3053 }