languages/Language.php

   1 <?php
   2 /**
   3  * Internationalisation code
   4  *
   5  * @file
   6  * @ingroup Language
   7  */
   8
   9 /**
  10  * @defgroup Language Language
  11  */
  12
  13 if ( !defined( 'MEDIAWIKI' ) ) {
  14         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  15         exit( 1 );
  16 }
  17
  18 # Read language names
  19 global $wgLanguageNames;
  20 require_once( dirname( __FILE__ ) . '/Names.php' );
  21
  22 global $wgInputEncoding, $wgOutputEncoding;
  23
  24 /**
  25  * These are always UTF-8, they exist only for backwards compatibility
  26  */
  27 $wgInputEncoding    = 'UTF-8';
  28 $wgOutputEncoding       = 'UTF-8';
  29
  30 if ( function_exists( 'mb_strtoupper' ) ) {
  31         mb_internal_encoding( 'UTF-8' );
  32 }
  33
  34 /**
  35  * a fake language converter
  36  *
  37  * @ingroup Language
  38  */
  39 class FakeConverter {
  40         var $mLang;
  41         function __construct( $langobj ) { $this->mLang = $langobj; }
  42         function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  43         function convert( $t ) { return $t; }
  44         function convertTitle( $t ) { return $t->getPrefixedText(); }
  45         function getVariants() { return array( $this->mLang->getCode() ); }
  46         function getPreferredVariant() { return $this->mLang->getCode(); }
  47         function getDefaultVariant() { return $this->mLang->getCode(); }
  48         function getURLVariant() { return ''; }
  49         function getConvRuleTitle() { return false; }
  50         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
  51         function getExtraHashOptions() { return ''; }
  52         function getParsedTitle() { return ''; }
  53         function markNoConversion( $text, $noParse = false ) { return $text; }
  54         function convertCategoryKey( $key ) { return $key; }
  55         function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
  56         function armourMath( $text ) { return $text; }
  57 }
  58
  59 /**
  60  * Internationalisation code
  61  * @ingroup Language
  62  */
  63 class Language {
  64         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  65         var $mMagicExtensions = array(), $mMagicHookDone = false;
  66
  67         var $mNamespaceIds, $namespaceNames, $namespaceAliases;
  68         var $dateFormatStrings = array();
  69         var $mExtendedSpecialPageAliases;
  70
  71         /**
  72          * ReplacementArray object caches
  73          */
  74         var $transformData = array();
  75
  76         static public $dataCache;
  77         static public $mLangObjCache = array();
  78
  79         static public $mWeekdayMsgs = array(
  80                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  81                 'friday', 'saturday'
  82         );
  83
  84         static public $mWeekdayAbbrevMsgs = array(
  85                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  86         );
  87
  88         static public $mMonthMsgs = array(
  89                 'january', 'february', 'march', 'april', 'may_long', 'june',
  90                 'july', 'august', 'september', 'october', 'november',
  91                 'december'
  92         );
  93         static public $mMonthGenMsgs = array(
  94                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  95                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  96                 'december-gen'
  97         );
  98         static public $mMonthAbbrevMsgs = array(
  99                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 100                 'sep', 'oct', 'nov', 'dec'
 101         );
 102
 103         static public $mIranianCalendarMonthMsgs = array(
 104                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
 105                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
 106                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 107                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 108         );
 109
 110         static public $mHebrewCalendarMonthMsgs = array(
 111                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 112                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 113                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 114                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 115                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 116         );
 117
 118         static public $mHebrewCalendarMonthGenMsgs = array(
 119                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 120                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 121                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 122                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 123                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 124         );
 125
 126         static public $mHijriCalendarMonthMsgs = array(
 127                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 128                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 129                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 130                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 131         );
 132
 133         /**
 134          * Get a cached language object for a given language code
 135          * @param $code String
 136          * @return Language
 137          */
 138         static function factory( $code ) {
 139                 if ( !isset( self::$mLangObjCache[$code] ) ) {
 140                         if ( count( self::$mLangObjCache ) > 10 ) {
 141                                 // Don't keep a billion objects around, that's stupid.
 142                                 self::$mLangObjCache = array();
 143                         }
 144                         self::$mLangObjCache[$code] = self::newFromCode( $code );
 145                 }
 146                 return self::$mLangObjCache[$code];
 147         }
 148
 149         /**
 150          * Create a language object for a given language code
 151          * @param $code String
 152          * @return Language
 153          */
 154         protected static function newFromCode( $code ) {
 155                 global $IP;
 156                 static $recursionLevel = 0;
 157
 158                 // Protect against path traversal below
 159                 if ( !Language::isValidCode( $code )
 160                         || strcspn( $code, "/\\\000" ) !== strlen( $code ) )
 161                 {
 162                         throw new MWException( "Invalid language code \"$code\"" );
 163                 }
 164
 165                 if ( $code == 'en' ) {
 166                         $class = 'Language';
 167                 } else {
 168                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 169                         // Preload base classes to work around APC/PHP5 bug
 170                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 171                                 include_once( "$IP/languages/classes/$class.deps.php" );
 172                         }
 173                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 174                                 include_once( "$IP/languages/classes/$class.php" );
 175                         }
 176                 }
 177
 178                 if ( $recursionLevel > 5 ) {
 179                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 180                 }
 181
 182                 if ( !class_exists( $class ) ) {
 183                         $fallback = Language::getFallbackFor( $code );
 184                         ++$recursionLevel;
 185                         $lang = Language::newFromCode( $fallback );
 186                         --$recursionLevel;
 187                         $lang->setCode( $code );
 188                 } else {
 189                         $lang = new $class;
 190                 }
 191                 return $lang;
 192         }
 193
 194         /**
 195          * Returns true if a language code string is of a valid form, whether or
 196          * not it exists.
 197          */
 198         public static function isValidCode( $code ) {
 199                 return strcspn( $code, "/\\\000" ) === strlen( $code );
 200         }
 201
 202         /**
 203          * Get the LocalisationCache instance
 204          *
 205          * @return LocalisationCache
 206          */
 207         public static function getLocalisationCache() {
 208                 if ( is_null( self::$dataCache ) ) {
 209                         global $wgLocalisationCacheConf;
 210                         $class = $wgLocalisationCacheConf['class'];
 211                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 212                 }
 213                 return self::$dataCache;
 214         }
 215
 216         function __construct() {
 217                 $this->mConverter = new FakeConverter( $this );
 218                 // Set the code to the name of the descendant
 219                 if ( get_class( $this ) == 'Language' ) {
 220                         $this->mCode = 'en';
 221                 } else {
 222                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 223                 }
 224                 self::getLocalisationCache();
 225         }
 226
 227         /**
 228          * Reduce memory usage
 229          */
 230         function __destruct() {
 231                 foreach ( $this as $name => $value ) {
 232                         unset( $this->$name );
 233                 }
 234         }
 235
 236         /**
 237          * Hook which will be called if this is the content language.
 238          * Descendants can use this to register hook functions or modify globals
 239          */
 240         function initContLang() { }
 241
 242         /**
 243          * @deprecated Use User::getDefaultOptions()
 244          * @return array
 245          */
 246         function getDefaultUserOptions() {
 247                 wfDeprecated( __METHOD__ );
 248                 return User::getDefaultOptions();
 249         }
 250
 251         function getFallbackLanguageCode() {
 252                 if ( $this->mCode === 'en' ) {
 253                         return false;
 254                 } else {
 255                         return self::$dataCache->getItem( $this->mCode, 'fallback' );
 256                 }
 257         }
 258
 259         /**
 260          * Exports $wgBookstoreListEn
 261          * @return array
 262          */
 263         function getBookstoreList() {
 264                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 265         }
 266
 267         /**
 268          * @return array
 269          */
 270         function getNamespaces() {
 271                 if ( is_null( $this->namespaceNames ) ) {
 272                         global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
 273
 274                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 275                         $validNamespaces = MWNamespace::getCanonicalNamespaces();
 276
 277                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
 278
 279                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 280                         if ( $wgMetaNamespaceTalk ) {
 281                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 282                         } else {
 283                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 284                                 $this->namespaceNames[NS_PROJECT_TALK] =
 285                                         $this->fixVariableInNamespace( $talk );
 286                         }
 287
 288                         # Sometimes a language will be localised but not actually exist on this wiki.
 289                         foreach( $this->namespaceNames as $key => $text ) {
 290                                 if ( !isset( $validNamespaces[$key] ) ) {
 291                                         unset( $this->namespaceNames[$key] );
 292                                 }
 293                         }
 294
 295                         # The above mixing may leave namespaces out of canonical order.
 296                         # Re-order by namespace ID number...
 297                         ksort( $this->namespaceNames );
 298                 }
 299                 return $this->namespaceNames;
 300         }
 301
 302         /**
 303          * A convenience function that returns the same thing as
 304          * getNamespaces() except with the array values changed to ' '
 305          * where it found '_', useful for producing output to be displayed
 306          * e.g. in <select> forms.
 307          *
 308          * @return array
 309          */
 310         function getFormattedNamespaces() {
 311                 $ns = $this->getNamespaces();
 312                 foreach ( $ns as $k => $v ) {
 313                         $ns[$k] = strtr( $v, '_', ' ' );
 314                 }
 315                 return $ns;
 316         }
 317
 318         /**
 319          * Get a namespace value by key
 320          * <code>
 321          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 322          * echo $mw_ns; // prints 'MediaWiki'
 323          * </code>
 324          *
 325          * @param $index Int: the array key of the namespace to return
 326          * @return mixed, string if the namespace value exists, otherwise false
 327          */
 328         function getNsText( $index ) {
 329                 $ns = $this->getNamespaces();
 330                 return isset( $ns[$index] ) ? $ns[$index] : false;
 331         }
 332
 333         /**
 334          * A convenience function that returns the same thing as
 335          * getNsText() except with '_' changed to ' ', useful for
 336          * producing output.
 337          *
 338          * @return array
 339          */
 340         function getFormattedNsText( $index ) {
 341                 $ns = $this->getNsText( $index );
 342                 return strtr( $ns, '_', ' ' );
 343         }
 344
 345         /**
 346          * Returns gender-dependent namespace alias if available.
 347          * @param $index Int: namespace index
 348          * @param $gender String: gender key (male, female... )
 349          * @return String
 350          * @since 1.18
 351          */
 352         function getGenderNsText( $index, $gender ) {
 353                 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 354                 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
 355         }
 356
 357         /**
 358          * Whether this language makes distinguishes genders for example in
 359          * namespaces.
 360          * @return bool
 361          * @since 1.18
 362          */
 363         function needsGenderDistinction() {
 364                 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 365                 return count( $aliases ) > 0;
 366         }
 367
 368         /**
 369          * Get a namespace key by value, case insensitive.
 370          * Only matches namespace names for the current language, not the
 371          * canonical ones defined in Namespace.php.
 372          *
 373          * @param $text String
 374          * @return mixed An integer if $text is a valid value otherwise false
 375          */
 376         function getLocalNsIndex( $text ) {
 377                 $lctext = $this->lc( $text );
 378                 $ids = $this->getNamespaceIds();
 379                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 380         }
 381
 382         function getNamespaceAliases() {
 383                 if ( is_null( $this->namespaceAliases ) ) {
 384                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 385                         if ( !$aliases ) {
 386                                 $aliases = array();
 387                         } else {
 388                                 foreach ( $aliases as $name => $index ) {
 389                                         if ( $index === NS_PROJECT_TALK ) {
 390                                                 unset( $aliases[$name] );
 391                                                 $name = $this->fixVariableInNamespace( $name );
 392                                                 $aliases[$name] = $index;
 393                                         }
 394                                 }
 395                         }
 396
 397                         $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 398                         foreach ( $genders as $index => $forms ) {
 399                                 foreach ( $forms as $alias ) {
 400                                         $aliases[$alias] = $index;
 401                                 }
 402                         }
 403
 404                         $this->namespaceAliases = $aliases;
 405                 }
 406                 return $this->namespaceAliases;
 407         }
 408
 409         function getNamespaceIds() {
 410                 if ( is_null( $this->mNamespaceIds ) ) {
 411                         global $wgNamespaceAliases;
 412                         # Put namespace names and aliases into a hashtable.
 413                         # If this is too slow, then we should arrange it so that it is done
 414                         # before caching. The catch is that at pre-cache time, the above
 415                         # class-specific fixup hasn't been done.
 416                         $this->mNamespaceIds = array();
 417                         foreach ( $this->getNamespaces() as $index => $name ) {
 418                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 419                         }
 420                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 421                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 422                         }
 423                         if ( $wgNamespaceAliases ) {
 424                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 425                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 426                                 }
 427                         }
 428                 }
 429                 return $this->mNamespaceIds;
 430         }
 431
 432
 433         /**
 434          * Get a namespace key by value, case insensitive.  Canonical namespace
 435          * names override custom ones defined for the current language.
 436          *
 437          * @param $text String
 438          * @return mixed An integer if $text is a valid value otherwise false
 439          */
 440         function getNsIndex( $text ) {
 441                 $lctext = $this->lc( $text );
 442                 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
 443                         return $ns;
 444                 }
 445                 $ids = $this->getNamespaceIds();
 446                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 447         }
 448
 449         /**
 450          * short names for language variants used for language conversion links.
 451          *
 452          * @param $code String
 453          * @return string
 454          */
 455         function getVariantname( $code ) {
 456                 return $this->getMessageFromDB( "variantname-$code" );
 457         }
 458
 459         function specialPage( $name ) {
 460                 $aliases = $this->getSpecialPageAliases();
 461                 if ( isset( $aliases[$name][0] ) ) {
 462                         $name = $aliases[$name][0];
 463                 }
 464                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 465         }
 466
 467         function getQuickbarSettings() {
 468                 return array(
 469                         $this->getMessage( 'qbsettings-none' ),
 470                         $this->getMessage( 'qbsettings-fixedleft' ),
 471                         $this->getMessage( 'qbsettings-fixedright' ),
 472                         $this->getMessage( 'qbsettings-floatingleft' ),
 473                         $this->getMessage( 'qbsettings-floatingright' )
 474                 );
 475         }
 476
 477         function getMathNames() {
 478                 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
 479         }
 480
 481         function getDatePreferences() {
 482                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 483         }
 484
 485         function getDateFormats() {
 486                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 487         }
 488
 489         function getDefaultDateFormat() {
 490                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 491                 if ( $df === 'dmy or mdy' ) {
 492                         global $wgAmericanDates;
 493                         return $wgAmericanDates ? 'mdy' : 'dmy';
 494                 } else {
 495                         return $df;
 496                 }
 497         }
 498
 499         function getDatePreferenceMigrationMap() {
 500                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 501         }
 502
 503         function getImageFile( $image ) {
 504                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 505         }
 506
 507         function getDefaultUserOptionOverrides() {
 508                 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
 509         }
 510
 511         function getExtraUserToggles() {
 512                 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 513         }
 514
 515         function getUserToggle( $tog ) {
 516                 return $this->getMessageFromDB( "tog-$tog" );
 517         }
 518
 519         /**
 520          * Get language names, indexed by code.
 521          * If $customisedOnly is true, only returns codes with a messages file
 522          */
 523         public static function getLanguageNames( $customisedOnly = false ) {
 524                 global $wgLanguageNames, $wgExtraLanguageNames;
 525                 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
 526                 if ( !$customisedOnly ) {
 527                         return $allNames;
 528                 }
 529
 530                 global $IP;
 531                 $names = array();
 532                 $dir = opendir( "$IP/languages/messages" );
 533                 while ( false !== ( $file = readdir( $dir ) ) ) {
 534                         $code = self::getCodeFromFileName( $file, 'Messages' );
 535                         if ( $code && isset( $allNames[$code] ) ) {
 536                                 $names[$code] = $allNames[$code];
 537                         }
 538                 }
 539                 closedir( $dir );
 540                 return $names;
 541         }
 542
 543         /**
 544          * Get translated language names. This is done on best effort and
 545          * by default this is exactly the same as Language::getLanguageNames.
 546          * The CLDR extension provides translated names.
 547          * @param $code String Language code.
 548          * @return Array language code => language name
 549          * @since 1.18.0
 550          */
 551         public static function getTranslatedLanguageNames( $code ) {
 552                 $names = array();
 553                 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
 554
 555                 foreach ( self::getLanguageNames() as $code => $name ) {
 556                         if ( !isset( $names[$code] ) ) $names[$code] = $name;
 557                 }
 558
 559                 return $names;
 560         }
 561
 562         /**
 563          * Get a message from the MediaWiki namespace.
 564          *
 565          * @param $msg String: message name
 566          * @return string
 567          */
 568         function getMessageFromDB( $msg ) {
 569                 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
 570         }
 571
 572         function getLanguageName( $code ) {
 573                 $names = self::getLanguageNames();
 574                 if ( !array_key_exists( $code, $names ) ) {
 575                         return '';
 576                 }
 577                 return $names[$code];
 578         }
 579
 580         function getMonthName( $key ) {
 581                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 582         }
 583
 584         function getMonthNameGen( $key ) {
 585                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 586         }
 587
 588         function getMonthAbbreviation( $key ) {
 589                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 590         }
 591
 592         function getWeekdayName( $key ) {
 593                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 594         }
 595
 596         function getWeekdayAbbreviation( $key ) {
 597                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 598         }
 599
 600         function getIranianCalendarMonthName( $key ) {
 601                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
 602         }
 603
 604         function getHebrewCalendarMonthName( $key ) {
 605                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
 606         }
 607
 608         function getHebrewCalendarMonthNameGen( $key ) {
 609                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
 610         }
 611
 612         function getHijriCalendarMonthName( $key ) {
 613                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
 614         }
 615
 616         /**
 617          * Used by date() and time() to adjust the time output.
 618          *
 619          * @param $ts Int the time in date('YmdHis') format
 620          * @param $tz Mixed: adjust the time by this amount (default false, mean we
 621          *            get user timecorrection setting)
 622          * @return int
 623          */
 624         function userAdjust( $ts, $tz = false ) {
 625                 global $wgUser, $wgLocalTZoffset;
 626
 627                 if ( $tz === false ) {
 628                         $tz = $wgUser->getOption( 'timecorrection' );
 629                 }
 630
 631                 $data = explode( '|', $tz, 3 );
 632
 633                 if ( $data[0] == 'ZoneInfo' ) {
 634                         if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
 635                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
 636                                 date_timezone_set( $date, timezone_open( $data[2] ) );
 637                                 $date = date_format( $date, 'YmdHis' );
 638                                 return $date;
 639                         }
 640                         # Unrecognized timezone, default to 'Offset' with the stored offset.
 641                         $data[0] = 'Offset';
 642                 }
 643
 644                 $minDiff = 0;
 645                 if ( $data[0] == 'System' || $tz == '' ) {
 646                         #  Global offset in minutes.
 647                         if ( isset( $wgLocalTZoffset ) ) {
 648                                 $minDiff = $wgLocalTZoffset;
 649                         }
 650                 } else if ( $data[0] == 'Offset' ) {
 651                         $minDiff = intval( $data[1] );
 652                 } else {
 653                         $data = explode( ':', $tz );
 654                         if ( count( $data ) == 2 ) {
 655                                 $data[0] = intval( $data[0] );
 656                                 $data[1] = intval( $data[1] );
 657                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
 658                                 if ( $data[0] < 0 ) {
 659                                         $minDiff = -$minDiff;
 660                                 }
 661                         } else {
 662                                 $minDiff = intval( $data[0] ) * 60;
 663                         }
 664                 }
 665
 666                 # No difference ? Return time unchanged
 667                 if ( 0 == $minDiff ) {
 668                         return $ts;
 669                 }
 670
 671                 wfSuppressWarnings(); // E_STRICT system time bitching
 672                 # Generate an adjusted date; take advantage of the fact that mktime
 673                 # will normalize out-of-range values so we don't have to split $minDiff
 674                 # into hours and minutes.
 675                 $t = mktime( (
 676                   (int)substr( $ts, 8, 2 ) ), # Hours
 677                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 678                   (int)substr( $ts, 12, 2 ), # Seconds
 679                   (int)substr( $ts, 4, 2 ), # Month
 680                   (int)substr( $ts, 6, 2 ), # Day
 681                   (int)substr( $ts, 0, 4 ) ); # Year
 682
 683                 $date = date( 'YmdHis', $t );
 684                 wfRestoreWarnings();
 685
 686                 return $date;
 687         }
 688
 689         /**
 690          * This is a workalike of PHP's date() function, but with better
 691          * internationalisation, a reduced set of format characters, and a better
 692          * escaping format.
 693          *
 694          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
 695          * PHP manual for definitions. There are a number of extensions, which
 696          * start with "x":
 697          *
 698          *    xn   Do not translate digits of the next numeric format character
 699          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 700          *    xr   Use roman numerals for the next numeric format character
 701          *    xh   Use hebrew numerals for the next numeric format character
 702          *    xx   Literal x
 703          *    xg   Genitive month name
 704          *
 705          *    xij  j (day number) in Iranian calendar
 706          *    xiF  F (month name) in Iranian calendar
 707          *    xin  n (month number) in Iranian calendar
 708          *    xiY  Y (full year) in Iranian calendar
 709          *
 710          *    xjj  j (day number) in Hebrew calendar
 711          *    xjF  F (month name) in Hebrew calendar
 712          *    xjt  t (days in month) in Hebrew calendar
 713          *    xjx  xg (genitive month name) in Hebrew calendar
 714          *    xjn  n (month number) in Hebrew calendar
 715          *    xjY  Y (full year) in Hebrew calendar
 716          *
 717          *    xmj  j (day number) in Hijri calendar
 718          *    xmF  F (month name) in Hijri calendar
 719          *    xmn  n (month number) in Hijri calendar
 720          *    xmY  Y (full year) in Hijri calendar
 721          *
 722          *    xkY  Y (full year) in Thai solar calendar. Months and days are
 723          *                       identical to the Gregorian calendar
 724          *    xoY  Y (full year) in Minguo calendar or Juche year.
 725          *                       Months and days are identical to the
 726          *                       Gregorian calendar
 727          *    xtY  Y (full year) in Japanese nengo. Months and days are
 728          *                       identical to the Gregorian calendar
 729          *
 730          * Characters enclosed in double quotes will be considered literal (with
 731          * the quotes themselves removed). Unmatched quotes will be considered
 732          * literal quotes. Example:
 733          *
 734          * "The month is" F       => The month is January
 735          * i's"                   => 20'11"
 736          *
 737          * Backslash escaping is also supported.
 738          *
 739          * Input timestamp is assumed to be pre-normalized to the desired local
 740          * time zone, if any.
 741          *
 742          * @param $format String
 743          * @param $ts String: 14-character timestamp
 744          *      YYYYMMDDHHMMSS
 745          *      01234567890123
 746          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
 747          */
 748         function sprintfDate( $format, $ts ) {
 749                 $s = '';
 750                 $raw = false;
 751                 $roman = false;
 752                 $hebrewNum = false;
 753                 $unix = false;
 754                 $rawToggle = false;
 755                 $iranian = false;
 756                 $hebrew = false;
 757                 $hijri = false;
 758                 $thai = false;
 759                 $minguo = false;
 760                 $tenno = false;
 761                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 762                         $num = false;
 763                         $code = $format[$p];
 764                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 765                                 $code .= $format[++$p];
 766                         }
 767
 768                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
 769                                 $code .= $format[++$p];
 770                         }
 771
 772                         switch ( $code ) {
 773                                 case 'xx':
 774                                         $s .= 'x';
 775                                         break;
 776                                 case 'xn':
 777                                         $raw = true;
 778                                         break;
 779                                 case 'xN':
 780                                         $rawToggle = !$rawToggle;
 781                                         break;
 782                                 case 'xr':
 783                                         $roman = true;
 784                                         break;
 785                                 case 'xh':
 786                                         $hebrewNum = true;
 787                                         break;
 788                                 case 'xg':
 789                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 790                                         break;
 791                                 case 'xjx':
 792                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
 793                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
 794                                         break;
 795                                 case 'd':
 796                                         $num = substr( $ts, 6, 2 );
 797                                         break;
 798                                 case 'D':
 799                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 800                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 801                                         break;
 802                                 case 'j':
 803                                         $num = intval( substr( $ts, 6, 2 ) );
 804                                         break;
 805                                 case 'xij':
 806                                         if ( !$iranian ) {
 807                                                 $iranian = self::tsToIranian( $ts );
 808                                         }
 809                                         $num = $iranian[2];
 810                                         break;
 811                                 case 'xmj':
 812                                         if ( !$hijri ) {
 813                                                 $hijri = self::tsToHijri( $ts );
 814                                         }
 815                                         $num = $hijri[2];
 816                                         break;
 817                                 case 'xjj':
 818                                         if ( !$hebrew ) {
 819                                                 $hebrew = self::tsToHebrew( $ts );
 820                                         }
 821                                         $num = $hebrew[2];
 822                                         break;
 823                                 case 'l':
 824                                         if ( !$unix ) {
 825                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 826                                         }
 827                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 828                                         break;
 829                                 case 'N':
 830                                         if ( !$unix ) {
 831                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 832                                         }
 833                                         $w = gmdate( 'w', $unix );
 834                                         $num = $w ? $w : 7;
 835                                         break;
 836                                 case 'w':
 837                                         if ( !$unix ) {
 838                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 839                                         }
 840                                         $num = gmdate( 'w', $unix );
 841                                         break;
 842                                 case 'z':
 843                                         if ( !$unix ) {
 844                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 845                                         }
 846                                         $num = gmdate( 'z', $unix );
 847                                         break;
 848                                 case 'W':
 849                                         if ( !$unix ) {
 850                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 851                                         }
 852                                         $num = gmdate( 'W', $unix );
 853                                         break;
 854                                 case 'F':
 855                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 856                                         break;
 857                                 case 'xiF':
 858                                         if ( !$iranian ) {
 859                                                 $iranian = self::tsToIranian( $ts );
 860                                         }
 861                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
 862                                         break;
 863                                 case 'xmF':
 864                                         if ( !$hijri ) {
 865                                                 $hijri = self::tsToHijri( $ts );
 866                                         }
 867                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
 868                                         break;
 869                                 case 'xjF':
 870                                         if ( !$hebrew ) {
 871                                                 $hebrew = self::tsToHebrew( $ts );
 872                                         }
 873                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
 874                                         break;
 875                                 case 'm':
 876                                         $num = substr( $ts, 4, 2 );
 877                                         break;
 878                                 case 'M':
 879                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 880                                         break;
 881                                 case 'n':
 882                                         $num = intval( substr( $ts, 4, 2 ) );
 883                                         break;
 884                                 case 'xin':
 885                                         if ( !$iranian ) {
 886                                                 $iranian = self::tsToIranian( $ts );
 887                                         }
 888                                         $num = $iranian[1];
 889                                         break;
 890                                 case 'xmn':
 891                                         if ( !$hijri ) {
 892                                                 $hijri = self::tsToHijri ( $ts );
 893                                         }
 894                                         $num = $hijri[1];
 895                                         break;
 896                                 case 'xjn':
 897                                         if ( !$hebrew ) {
 898                                                 $hebrew = self::tsToHebrew( $ts );
 899                                         }
 900                                         $num = $hebrew[1];
 901                                         break;
 902                                 case 't':
 903                                         if ( !$unix ) {
 904                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 905                                         }
 906                                         $num = gmdate( 't', $unix );
 907                                         break;
 908                                 case 'xjt':
 909                                         if ( !$hebrew ) {
 910                                                 $hebrew = self::tsToHebrew( $ts );
 911                                         }
 912                                         $num = $hebrew[3];
 913                                         break;
 914                                 case 'L':
 915                                         if ( !$unix ) {
 916                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 917                                         }
 918                                         $num = gmdate( 'L', $unix );
 919                                         break;
 920                                 case 'o':
 921                                         if ( !$unix ) {
 922                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 923                                         }
 924                                         $num = date( 'o', $unix );
 925                                         break;
 926                                 case 'Y':
 927                                         $num = substr( $ts, 0, 4 );
 928                                         break;
 929                                 case 'xiY':
 930                                         if ( !$iranian ) {
 931                                                 $iranian = self::tsToIranian( $ts );
 932                                         }
 933                                         $num = $iranian[0];
 934                                         break;
 935                                 case 'xmY':
 936                                         if ( !$hijri ) {
 937                                                 $hijri = self::tsToHijri( $ts );
 938                                         }
 939                                         $num = $hijri[0];
 940                                         break;
 941                                 case 'xjY':
 942                                         if ( !$hebrew ) {
 943                                                 $hebrew = self::tsToHebrew( $ts );
 944                                         }
 945                                         $num = $hebrew[0];
 946                                         break;
 947                                 case 'xkY':
 948                                         if ( !$thai ) {
 949                                                 $thai = self::tsToYear( $ts, 'thai' );
 950                                         }
 951                                         $num = $thai[0];
 952                                         break;
 953                                 case 'xoY':
 954                                         if ( !$minguo ) {
 955                                                 $minguo = self::tsToYear( $ts, 'minguo' );
 956                                         }
 957                                         $num = $minguo[0];
 958                                         break;
 959                                 case 'xtY':
 960                                         if ( !$tenno ) {
 961                                                 $tenno = self::tsToYear( $ts, 'tenno' );
 962                                         }
 963                                         $num = $tenno[0];
 964                                         break;
 965                                 case 'y':
 966                                         $num = substr( $ts, 2, 2 );
 967                                         break;
 968                                 case 'a':
 969                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 970                                         break;
 971                                 case 'A':
 972                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 973                                         break;
 974                                 case 'g':
 975                                         $h = substr( $ts, 8, 2 );
 976                                         $num = $h % 12 ? $h % 12 : 12;
 977                                         break;
 978                                 case 'G':
 979                                         $num = intval( substr( $ts, 8, 2 ) );
 980                                         break;
 981                                 case 'h':
 982                                         $h = substr( $ts, 8, 2 );
 983                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 984                                         break;
 985                                 case 'H':
 986                                         $num = substr( $ts, 8, 2 );
 987                                         break;
 988                                 case 'i':
 989                                         $num = substr( $ts, 10, 2 );
 990                                         break;
 991                                 case 's':
 992                                         $num = substr( $ts, 12, 2 );
 993                                         break;
 994                                 case 'c':
 995                                         if ( !$unix ) {
 996                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 997                                         }
 998                                         $s .= gmdate( 'c', $unix );
 999                                         break;
1000                                 case 'r':
1001                                         if ( !$unix ) {
1002                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1003                                         }
1004                                         $s .= gmdate( 'r', $unix );
1005                                         break;
1006                                 case 'U':
1007                                         if ( !$unix ) {
1008                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1009                                         }
1010                                         $num = $unix;
1011                                         break;
1012                                 case '\\':
1013                                         # Backslash escaping
1014                                         if ( $p < strlen( $format ) - 1 ) {
1015                                                 $s .= $format[++$p];
1016                                         } else {
1017                                                 $s .= '\\';
1018                                         }
1019                                         break;
1020                                 case '"':
1021                                         # Quoted literal
1022                                         if ( $p < strlen( $format ) - 1 ) {
1023                                                 $endQuote = strpos( $format, '"', $p + 1 );
1024                                                 if ( $endQuote === false ) {
1025                                                         # No terminating quote, assume literal "
1026                                                         $s .= '"';
1027                                                 } else {
1028                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1029                                                         $p = $endQuote;
1030                                                 }
1031                                         } else {
1032                                                 # Quote at end of string, assume literal "
1033                                                 $s .= '"';
1034                                         }
1035                                         break;
1036                                 default:
1037                                         $s .= $format[$p];
1038                         }
1039                         if ( $num !== false ) {
1040                                 if ( $rawToggle || $raw ) {
1041                                         $s .= $num;
1042                                         $raw = false;
1043                                 } elseif ( $roman ) {
1044                                         $s .= self::romanNumeral( $num );
1045                                         $roman = false;
1046                                 } elseif ( $hebrewNum ) {
1047                                         $s .= self::hebrewNumeral( $num );
1048                                         $hebrewNum = false;
1049                                 } else {
1050                                         $s .= $this->formatNum( $num, true );
1051                                 }
1052                         }
1053                 }
1054                 return $s;
1055         }
1056
1057         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1058         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1059         /**
1060          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1061          * Gregorian dates to Iranian dates. Originally written in C, it
1062          * is released under the terms of GNU Lesser General Public
1063          * License. Conversion to PHP was performed by Niklas Laxström.
1064          *
1065          * Link: http://www.farsiweb.info/jalali/jalali.c
1066          */
1067         private static function tsToIranian( $ts ) {
1068                 $gy = substr( $ts, 0, 4 ) -1600;
1069                 $gm = substr( $ts, 4, 2 ) -1;
1070                 $gd = substr( $ts, 6, 2 ) -1;
1071
1072                 # Days passed from the beginning (including leap years)
1073                 $gDayNo = 365 * $gy
1074                         + floor( ( $gy + 3 ) / 4 )
1075                         - floor( ( $gy + 99 ) / 100 )
1076                         + floor( ( $gy + 399 ) / 400 );
1077
1078
1079                 // Add days of the past months of this year
1080                 for ( $i = 0; $i < $gm; $i++ ) {
1081                         $gDayNo += self::$GREG_DAYS[$i];
1082                 }
1083
1084                 // Leap years
1085                 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1086                         $gDayNo++;
1087                 }
1088
1089                 // Days passed in current month
1090                 $gDayNo += $gd;
1091
1092                 $jDayNo = $gDayNo - 79;
1093
1094                 $jNp = floor( $jDayNo / 12053 );
1095                 $jDayNo %= 12053;
1096
1097                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1098                 $jDayNo %= 1461;
1099
1100                 if ( $jDayNo >= 366 ) {
1101                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1102                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1103                 }
1104
1105                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1106                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1107                 }
1108
1109                 $jm = $i + 1;
1110                 $jd = $jDayNo + 1;
1111
1112                 return array( $jy, $jm, $jd );
1113         }
1114
1115         /**
1116          * Converting Gregorian dates to Hijri dates.
1117          *
1118          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1119          *
1120          * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1121          */
1122         private static function tsToHijri( $ts ) {
1123                 $year = substr( $ts, 0, 4 );
1124                 $month = substr( $ts, 4, 2 );
1125                 $day = substr( $ts, 6, 2 );
1126
1127                 $zyr = $year;
1128                 $zd = $day;
1129                 $zm = $month;
1130                 $zy = $zyr;
1131
1132                 if (
1133                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1134                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1135                 )
1136                 {
1137                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1138                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1139                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1140                                         $zd - 32075;
1141                 } else {
1142                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1143                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1144                 }
1145
1146                 $zl = $zjd -1948440 + 10632;
1147                 $zn = (int)( ( $zl - 1 ) / 10631 );
1148                 $zl = $zl - 10631 * $zn + 354;
1149                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1150                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1151                 $zm = (int)( ( 24 * $zl ) / 709 );
1152                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1153                 $zy = 30 * $zn + $zj - 30;
1154
1155                 return array( $zy, $zm, $zd );
1156         }
1157
1158         /**
1159          * Converting Gregorian dates to Hebrew dates.
1160          *
1161          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1162          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1163          * to translate the relevant functions into PHP and release them under
1164          * GNU GPL.
1165          *
1166          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1167          * and Adar II is 14. In a non-leap year, Adar is 6.
1168          */
1169         private static function tsToHebrew( $ts ) {
1170                 # Parse date
1171                 $year = substr( $ts, 0, 4 );
1172                 $month = substr( $ts, 4, 2 );
1173                 $day = substr( $ts, 6, 2 );
1174
1175                 # Calculate Hebrew year
1176                 $hebrewYear = $year + 3760;
1177
1178                 # Month number when September = 1, August = 12
1179                 $month += 4;
1180                 if ( $month > 12 ) {
1181                         # Next year
1182                         $month -= 12;
1183                         $year++;
1184                         $hebrewYear++;
1185                 }
1186
1187                 # Calculate day of year from 1 September
1188                 $dayOfYear = $day;
1189                 for ( $i = 1; $i < $month; $i++ ) {
1190                         if ( $i == 6 ) {
1191                                 # February
1192                                 $dayOfYear += 28;
1193                                 # Check if the year is leap
1194                                 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1195                                         $dayOfYear++;
1196                                 }
1197                         } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1198                                 $dayOfYear += 30;
1199                         } else {
1200                                 $dayOfYear += 31;
1201                         }
1202                 }
1203
1204                 # Calculate the start of the Hebrew year
1205                 $start = self::hebrewYearStart( $hebrewYear );
1206
1207                 # Calculate next year's start
1208                 if ( $dayOfYear <= $start ) {
1209                         # Day is before the start of the year - it is the previous year
1210                         # Next year's start
1211                         $nextStart = $start;
1212                         # Previous year
1213                         $year--;
1214                         $hebrewYear--;
1215                         # Add days since previous year's 1 September
1216                         $dayOfYear += 365;
1217                         if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1218                                 # Leap year
1219                                 $dayOfYear++;
1220                         }
1221                         # Start of the new (previous) year
1222                         $start = self::hebrewYearStart( $hebrewYear );
1223                 } else {
1224                         # Next year's start
1225                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1226                 }
1227
1228                 # Calculate Hebrew day of year
1229                 $hebrewDayOfYear = $dayOfYear - $start;
1230
1231                 # Difference between year's days
1232                 $diff = $nextStart - $start;
1233                 # Add 12 (or 13 for leap years) days to ignore the difference between
1234                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1235                 # difference is only about the year type
1236                 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1237                         $diff += 13;
1238                 } else {
1239                         $diff += 12;
1240                 }
1241
1242                 # Check the year pattern, and is leap year
1243                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1244                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1245                 # and non-leap years
1246                 $yearPattern = $diff % 30;
1247                 # Check if leap year
1248                 $isLeap = $diff >= 30;
1249
1250                 # Calculate day in the month from number of day in the Hebrew year
1251                 # Don't check Adar - if the day is not in Adar, we will stop before;
1252                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1253                 $hebrewDay = $hebrewDayOfYear;
1254                 $hebrewMonth = 1;
1255                 $days = 0;
1256                 while ( $hebrewMonth <= 12 ) {
1257                         # Calculate days in this month
1258                         if ( $isLeap && $hebrewMonth == 6 ) {
1259                                 # Adar in a leap year
1260                                 if ( $isLeap ) {
1261                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1262                                         $days = 30;
1263                                         if ( $hebrewDay <= $days ) {
1264                                                 # Day in Adar I
1265                                                 $hebrewMonth = 13;
1266                                         } else {
1267                                                 # Subtract the days of Adar I
1268                                                 $hebrewDay -= $days;
1269                                                 # Try Adar II
1270                                                 $days = 29;
1271                                                 if ( $hebrewDay <= $days ) {
1272                                                         # Day in Adar II
1273                                                         $hebrewMonth = 14;
1274                                                 }
1275                                         }
1276                                 }
1277                         } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1278                                 # Cheshvan in a complete year (otherwise as the rule below)
1279                                 $days = 30;
1280                         } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1281                                 # Kislev in an incomplete year (otherwise as the rule below)
1282                                 $days = 29;
1283                         } else {
1284                                 # Odd months have 30 days, even have 29
1285                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1286                         }
1287                         if ( $hebrewDay <= $days ) {
1288                                 # In the current month
1289                                 break;
1290                         } else {
1291                                 # Subtract the days of the current month
1292                                 $hebrewDay -= $days;
1293                                 # Try in the next month
1294                                 $hebrewMonth++;
1295                         }
1296                 }
1297
1298                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1299         }
1300
1301         /**
1302          * This calculates the Hebrew year start, as days since 1 September.
1303          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1304          * Used for Hebrew date.
1305          */
1306         private static function hebrewYearStart( $year ) {
1307                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1308                 $b = intval( ( $year - 1 ) % 4 );
1309                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1310                 if ( $m < 0 ) {
1311                         $m--;
1312                 }
1313                 $Mar = intval( $m );
1314                 if ( $m < 0 ) {
1315                         $m++;
1316                 }
1317                 $m -= $Mar;
1318
1319                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1320                 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1321                         $Mar++;
1322                 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1323                         $Mar += 2;
1324                 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1325                         $Mar++;
1326                 }
1327
1328                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1329                 return $Mar;
1330         }
1331
1332         /**
1333          * Algorithm to convert Gregorian dates to Thai solar dates,
1334          * Minguo dates or Minguo dates.
1335          *
1336          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1337          *       http://en.wikipedia.org/wiki/Minguo_calendar
1338          *       http://en.wikipedia.org/wiki/Japanese_era_name
1339          *
1340          * @param $ts String: 14-character timestamp
1341          * @param $cName String: calender name
1342          * @return Array: converted year, month, day
1343          */
1344         private static function tsToYear( $ts, $cName ) {
1345                 $gy = substr( $ts, 0, 4 );
1346                 $gm = substr( $ts, 4, 2 );
1347                 $gd = substr( $ts, 6, 2 );
1348
1349                 if ( !strcmp( $cName, 'thai' ) ) {
1350                         # Thai solar dates
1351                         # Add 543 years to the Gregorian calendar
1352                         # Months and days are identical
1353                         $gy_offset = $gy + 543;
1354                 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1355                         # Minguo dates
1356                         # Deduct 1911 years from the Gregorian calendar
1357                         # Months and days are identical
1358                         $gy_offset = $gy - 1911;
1359                 } else if ( !strcmp( $cName, 'tenno' ) ) {
1360                         # Nengō dates up to Meiji period
1361                         # Deduct years from the Gregorian calendar
1362                         # depending on the nengo periods
1363                         # Months and days are identical
1364                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1365                                 # Meiji period
1366                                 $gy_gannen = $gy - 1868 + 1;
1367                                 $gy_offset = $gy_gannen;
1368                                 if ( $gy_gannen == 1 ) {
1369                                         $gy_offset = '元';
1370                                 }
1371                                 $gy_offset = '明治' . $gy_offset;
1372                         } else if (
1373                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1374                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1375                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1376                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1377                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1378                         )
1379                         {
1380                                 # Taishō period
1381                                 $gy_gannen = $gy - 1912 + 1;
1382                                 $gy_offset = $gy_gannen;
1383                                 if ( $gy_gannen == 1 ) {
1384                                         $gy_offset = '元';
1385                                 }
1386                                 $gy_offset = '大正' . $gy_offset;
1387                         } else if (
1388                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1389                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1390                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1391                         )
1392                         {
1393                                 # Shōwa period
1394                                 $gy_gannen = $gy - 1926 + 1;
1395                                 $gy_offset = $gy_gannen;
1396                                 if ( $gy_gannen == 1 ) {
1397                                         $gy_offset = '元';
1398                                 }
1399                                 $gy_offset = '昭和' . $gy_offset;
1400                         } else {
1401                                 # Heisei period
1402                                 $gy_gannen = $gy - 1989 + 1;
1403                                 $gy_offset = $gy_gannen;
1404                                 if ( $gy_gannen == 1 ) {
1405                                         $gy_offset = '元';
1406                                 }
1407                                 $gy_offset = '平成' . $gy_offset;
1408                         }
1409                 } else {
1410                         $gy_offset = $gy;
1411                 }
1412
1413                 return array( $gy_offset, $gm, $gd );
1414         }
1415
1416         /**
1417          * Roman number formatting up to 3000
1418          */
1419         static function romanNumeral( $num ) {
1420                 static $table = array(
1421                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1422                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1423                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1424                         array( '', 'M', 'MM', 'MMM' )
1425                 );
1426
1427                 $num = intval( $num );
1428                 if ( $num > 3000 || $num <= 0 ) {
1429                         return $num;
1430                 }
1431
1432                 $s = '';
1433                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1434                         if ( $num >= $pow10 ) {
1435                                 $s .= $table[$i][floor( $num / $pow10 )];
1436                         }
1437                         $num = $num % $pow10;
1438                 }
1439                 return $s;
1440         }
1441
1442         /**
1443          * Hebrew Gematria number formatting up to 9999
1444          */
1445         static function hebrewNumeral( $num ) {
1446                 static $table = array(
1447                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1448                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1449                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1450                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1451                 );
1452
1453                 $num = intval( $num );
1454                 if ( $num > 9999 || $num <= 0 ) {
1455                         return $num;
1456                 }
1457
1458                 $s = '';
1459                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1460                         if ( $num >= $pow10 ) {
1461                                 if ( $num == 15 || $num == 16 ) {
1462                                         $s .= $table[0][9] . $table[0][$num - 9];
1463                                         $num = 0;
1464                                 } else {
1465                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1466                                         if ( $pow10 == 1000 ) {
1467                                                 $s .= "'";
1468                                         }
1469                                 }
1470                         }
1471                         $num = $num % $pow10;
1472                 }
1473                 if ( strlen( $s ) == 2 ) {
1474                         $str = $s . "'";
1475                 } else  {
1476                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1477                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1478                 }
1479                 $start = substr( $str, 0, strlen( $str ) - 2 );
1480                 $end = substr( $str, strlen( $str ) - 2 );
1481                 switch( $end ) {
1482                         case 'כ':
1483                                 $str = $start . 'ך';
1484                                 break;
1485                         case 'מ':
1486                                 $str = $start . 'ם';
1487                                 break;
1488                         case 'נ':
1489                                 $str = $start . 'ן';
1490                                 break;
1491                         case 'פ':
1492                                 $str = $start . 'ף';
1493                                 break;
1494                         case 'צ':
1495                                 $str = $start . 'ץ';
1496                                 break;
1497                 }
1498                 return $str;
1499         }
1500
1501         /**
1502          * This is meant to be used by time(), date(), and timeanddate() to get
1503          * the date preference they're supposed to use, it should be used in
1504          * all children.
1505          *
1506          *<code>
1507          * function timeanddate([...], $format = true) {
1508          *      $datePreference = $this->dateFormat($format);
1509          * [...]
1510          * }
1511          *</code>
1512          *
1513          * @param $usePrefs Mixed: if true, the user's preference is used
1514          *                         if false, the site/language default is used
1515          *                         if int/string, assumed to be a format.
1516          * @return string
1517          */
1518         function dateFormat( $usePrefs = true ) {
1519                 global $wgUser;
1520
1521                 if ( is_bool( $usePrefs ) ) {
1522                         if ( $usePrefs ) {
1523                                 $datePreference = $wgUser->getDatePreference();
1524                         } else {
1525                                 $datePreference = (string)User::getDefaultOption( 'date' );
1526                         }
1527                 } else {
1528                         $datePreference = (string)$usePrefs;
1529                 }
1530
1531                 // return int
1532                 if ( $datePreference == '' ) {
1533                         return 'default';
1534                 }
1535
1536                 return $datePreference;
1537         }
1538
1539         /**
1540          * Get a format string for a given type and preference
1541          * @param $type May be date, time or both
1542          * @param $pref The format name as it appears in Messages*.php
1543          */
1544         function getDateFormatString( $type, $pref ) {
1545                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1546                         if ( $pref == 'default' ) {
1547                                 $pref = $this->getDefaultDateFormat();
1548                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1549                         } else {
1550                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1551                                 if ( is_null( $df ) ) {
1552                                         $pref = $this->getDefaultDateFormat();
1553                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1554                                 }
1555                         }
1556                         $this->dateFormatStrings[$type][$pref] = $df;
1557                 }
1558                 return $this->dateFormatStrings[$type][$pref];
1559         }
1560
1561         /**
1562          * @param $ts Mixed: the time format which needs to be turned into a
1563          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1564          * @param $adj Bool: whether to adjust the time output according to the
1565          *             user configured offset ($timecorrection)
1566          * @param $format Mixed: true to use user's date format preference
1567          * @param $timecorrection String: the time offset as returned by
1568          *                        validateTimeZone() in Special:Preferences
1569          * @return string
1570          */
1571         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1572                 $ts = wfTimestamp( TS_MW, $ts );
1573                 if ( $adj ) {
1574                         $ts = $this->userAdjust( $ts, $timecorrection );
1575                 }
1576                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1577                 return $this->sprintfDate( $df, $ts );
1578         }
1579
1580         /**
1581          * @param $ts Mixed: the time format which needs to be turned into a
1582          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1583          * @param $adj Bool: whether to adjust the time output according to the
1584          *             user configured offset ($timecorrection)
1585          * @param $format Mixed: true to use user's date format preference
1586          * @param $timecorrection String: the time offset as returned by
1587          *                        validateTimeZone() in Special:Preferences
1588          * @return string
1589          */
1590         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1591                 $ts = wfTimestamp( TS_MW, $ts );
1592                 if ( $adj ) {
1593                         $ts = $this->userAdjust( $ts, $timecorrection );
1594                 }
1595                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1596                 return $this->sprintfDate( $df, $ts );
1597         }
1598
1599         /**
1600          * @param $ts Mixed: the time format which needs to be turned into a
1601          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1602          * @param $adj Bool: whether to adjust the time output according to the
1603          *             user configured offset ($timecorrection)
1604          * @param $format Mixed: what format to return, if it's false output the
1605          *                default one (default true)
1606          * @param $timecorrection String: the time offset as returned by
1607          *                        validateTimeZone() in Special:Preferences
1608          * @return string
1609          */
1610         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1611                 $ts = wfTimestamp( TS_MW, $ts );
1612                 if ( $adj ) {
1613                         $ts = $this->userAdjust( $ts, $timecorrection );
1614                 }
1615                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1616                 return $this->sprintfDate( $df, $ts );
1617         }
1618
1619         function getMessage( $key ) {
1620                 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1621         }
1622
1623         function getAllMessages() {
1624                 return self::$dataCache->getItem( $this->mCode, 'messages' );
1625         }
1626
1627         function iconv( $in, $out, $string ) {
1628                 # This is a wrapper for iconv in all languages except esperanto,
1629                 # which does some nasty x-conversions beforehand
1630
1631                 # Even with //IGNORE iconv can whine about illegal characters in
1632                 # *input* string. We just ignore those too.
1633                 # REF: http://bugs.php.net/bug.php?id=37166
1634                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1635                 wfSuppressWarnings();
1636                 $text = iconv( $in, $out . '//IGNORE', $string );
1637                 wfRestoreWarnings();
1638                 return $text;
1639         }
1640
1641         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1642         function ucwordbreaksCallbackAscii( $matches ) {
1643                 return $this->ucfirst( $matches[1] );
1644         }
1645
1646         function ucwordbreaksCallbackMB( $matches ) {
1647                 return mb_strtoupper( $matches[0] );
1648         }
1649
1650         function ucCallback( $matches ) {
1651                 list( $wikiUpperChars ) = self::getCaseMaps();
1652                 return strtr( $matches[1], $wikiUpperChars );
1653         }
1654
1655         function lcCallback( $matches ) {
1656                 list( , $wikiLowerChars ) = self::getCaseMaps();
1657                 return strtr( $matches[1], $wikiLowerChars );
1658         }
1659
1660         function ucwordsCallbackMB( $matches ) {
1661                 return mb_strtoupper( $matches[0] );
1662         }
1663
1664         function ucwordsCallbackWiki( $matches ) {
1665                 list( $wikiUpperChars ) = self::getCaseMaps();
1666                 return strtr( $matches[0], $wikiUpperChars );
1667         }
1668
1669         /**
1670          * Make a string's first character uppercase
1671          */
1672         function ucfirst( $str ) {
1673                 $o = ord( $str );
1674                 if ( $o < 96 ) { // if already uppercase...
1675                         return $str;
1676                 } elseif ( $o < 128 ) {
1677                         return ucfirst( $str ); // use PHP's ucfirst()
1678                 } else {
1679                         // fall back to more complex logic in case of multibyte strings
1680                         return $this->uc( $str, true );
1681                 }
1682         }
1683
1684         /**
1685          * Convert a string to uppercase
1686          */
1687         function uc( $str, $first = false ) {
1688                 if ( function_exists( 'mb_strtoupper' ) ) {
1689                         if ( $first ) {
1690                                 if ( $this->isMultibyte( $str ) ) {
1691                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1692                                 } else {
1693                                         return ucfirst( $str );
1694                                 }
1695                         } else {
1696                                 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1697                         }
1698                 } else {
1699                         if ( $this->isMultibyte( $str ) ) {
1700                                 $x = $first ? '^' : '';
1701                                 return preg_replace_callback(
1702                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1703                                         array( $this, 'ucCallback' ),
1704                                         $str
1705                                 );
1706                         } else {
1707                                 return $first ? ucfirst( $str ) : strtoupper( $str );
1708                         }
1709                 }
1710         }
1711
1712         function lcfirst( $str ) {
1713                 $o = ord( $str );
1714                 if ( !$o ) {
1715                         return strval( $str );
1716                 } elseif ( $o >= 128 ) {
1717                         return $this->lc( $str, true );
1718                 } elseif ( $o > 96 ) {
1719                         return $str;
1720                 } else {
1721                         $str[0] = strtolower( $str[0] );
1722                         return $str;
1723                 }
1724         }
1725
1726         function lc( $str, $first = false ) {
1727                 if ( function_exists( 'mb_strtolower' ) ) {
1728                         if ( $first ) {
1729                                 if ( $this->isMultibyte( $str ) ) {
1730                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1731                                 } else {
1732                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1733                                 }
1734                         } else {
1735                                 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1736                         }
1737                 } else {
1738                         if ( $this->isMultibyte( $str ) ) {
1739                                 $x = $first ? '^' : '';
1740                                 return preg_replace_callback(
1741                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1742                                         array( $this, 'lcCallback' ),
1743                                         $str
1744                                 );
1745                         } else {
1746                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1747                         }
1748                 }
1749         }
1750
1751         function isMultibyte( $str ) {
1752                 return (bool)preg_match( '/[\x80-\xff]/', $str );
1753         }
1754
1755         function ucwords( $str ) {
1756                 if ( $this->isMultibyte( $str ) ) {
1757                         $str = $this->lc( $str );
1758
1759                         // regexp to find first letter in each word (i.e. after each space)
1760                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1761
1762                         // function to use to capitalize a single char
1763                         if ( function_exists( 'mb_strtoupper' ) ) {
1764                                 return preg_replace_callback(
1765                                         $replaceRegexp,
1766                                         array( $this, 'ucwordsCallbackMB' ),
1767                                         $str
1768                                 );
1769                         } else {
1770                                 return preg_replace_callback(
1771                                         $replaceRegexp,
1772                                         array( $this, 'ucwordsCallbackWiki' ),
1773                                         $str
1774                                 );
1775                         }
1776                 } else {
1777                         return ucwords( strtolower( $str ) );
1778                 }
1779         }
1780
1781         # capitalize words at word breaks
1782         function ucwordbreaks( $str ) {
1783                 if ( $this->isMultibyte( $str ) ) {
1784                         $str = $this->lc( $str );
1785
1786                         // since \b doesn't work for UTF-8, we explicitely define word break chars
1787                         $breaks = "[ \-\(\)\}\{\.,\?!]";
1788
1789                         // find first letter after word break
1790                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1791
1792                         if ( function_exists( 'mb_strtoupper' ) ) {
1793                                 return preg_replace_callback(
1794                                         $replaceRegexp,
1795                                         array( $this, 'ucwordbreaksCallbackMB' ),
1796                                         $str
1797                                 );
1798                         } else {
1799                                 return preg_replace_callback(
1800                                         $replaceRegexp,
1801                                         array( $this, 'ucwordsCallbackWiki' ),
1802                                         $str
1803                                 );
1804                         }
1805                 } else {
1806                         return preg_replace_callback(
1807                                 '/\b([\w\x80-\xff]+)\b/',
1808                                 array( $this, 'ucwordbreaksCallbackAscii' ),
1809                                 $str
1810                         );
1811                 }
1812         }
1813
1814         /**
1815          * Return a case-folded representation of $s
1816          *
1817          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1818          * and $s2 are the same except for the case of their characters. It is not
1819          * necessary for the value returned to make sense when displayed.
1820          *
1821          * Do *not* perform any other normalisation in this function. If a caller
1822          * uses this function when it should be using a more general normalisation
1823          * function, then fix the caller.
1824          */
1825         function caseFold( $s ) {
1826                 return $this->uc( $s );
1827         }
1828
1829         function checkTitleEncoding( $s ) {
1830                 if ( is_array( $s ) ) {
1831                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1832                 }
1833                 # Check for non-UTF-8 URLs
1834                 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1835                 if ( !$ishigh ) {
1836                         return $s;
1837                 }
1838
1839                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1840                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1841                 if ( $isutf8 ) {
1842                         return $s;
1843                 }
1844
1845                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1846         }
1847
1848         function fallback8bitEncoding() {
1849                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1850         }
1851
1852         /**
1853          * Most writing systems use whitespace to break up words.
1854          * Some languages such as Chinese don't conventionally do this,
1855          * which requires special handling when breaking up words for
1856          * searching etc.
1857          */
1858         function hasWordBreaks() {
1859                 return true;
1860         }
1861
1862         /**
1863          * Some languages such as Chinese require word segmentation,
1864          * Specify such segmentation when overridden in derived class.
1865          *
1866          * @param $string String
1867          * @return String
1868          */
1869         function segmentByWord( $string ) {
1870                 return $string;
1871         }
1872
1873         /**
1874          * Some languages have special punctuation need to be normalized.
1875          * Make such changes here.
1876          *
1877          * @param $string String
1878          * @return String
1879          */
1880         function normalizeForSearch( $string ) {
1881                 return self::convertDoubleWidth( $string );
1882         }
1883
1884         /**
1885          * convert double-width roman characters to single-width.
1886          * range: ff00-ff5f ~= 0020-007f
1887          */
1888         protected static function convertDoubleWidth( $string ) {
1889                 static $full = null;
1890                 static $half = null;
1891
1892                 if ( $full === null ) {
1893                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
1894                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1895                         $full = str_split( $fullWidth, 3 );
1896                         $half = str_split( $halfWidth );
1897                 }
1898
1899                 $string = str_replace( $full, $half, $string );
1900                 return $string;
1901         }
1902
1903         protected static function insertSpace( $string, $pattern ) {
1904                 $string = preg_replace( $pattern, " $1 ", $string );
1905                 $string = preg_replace( '/ +/', ' ', $string );
1906                 return $string;
1907         }
1908
1909         function convertForSearchResult( $termsArray ) {
1910                 # some languages, e.g. Chinese, need to do a conversion
1911                 # in order for search results to be displayed correctly
1912                 return $termsArray;
1913         }
1914
1915         /**
1916          * Get the first character of a string.
1917          *
1918          * @param $s string
1919          * @return string
1920          */
1921         function firstChar( $s ) {
1922                 $matches = array();
1923                 preg_match(
1924                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1925                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1926                         $s,
1927                         $matches
1928                 );
1929
1930                 if ( isset( $matches[1] ) ) {
1931                         if ( strlen( $matches[1] ) != 3 ) {
1932                                 return $matches[1];
1933                         }
1934
1935                         // Break down Hangul syllables to grab the first jamo
1936                         $code = utf8ToCodepoint( $matches[1] );
1937                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1938                                 return $matches[1];
1939                         } elseif ( $code < 0xb098 ) {
1940                                 return "\xe3\x84\xb1";
1941                         } elseif ( $code < 0xb2e4 ) {
1942                                 return "\xe3\x84\xb4";
1943                         } elseif ( $code < 0xb77c ) {
1944                                 return "\xe3\x84\xb7";
1945                         } elseif ( $code < 0xb9c8 ) {
1946                                 return "\xe3\x84\xb9";
1947                         } elseif ( $code < 0xbc14 ) {
1948                                 return "\xe3\x85\x81";
1949                         } elseif ( $code < 0xc0ac ) {
1950                                 return "\xe3\x85\x82";
1951                         } elseif ( $code < 0xc544 ) {
1952                                 return "\xe3\x85\x85";
1953                         } elseif ( $code < 0xc790 ) {
1954                                 return "\xe3\x85\x87";
1955                         } elseif ( $code < 0xcc28 ) {
1956                                 return "\xe3\x85\x88";
1957                         } elseif ( $code < 0xce74 ) {
1958                                 return "\xe3\x85\x8a";
1959                         } elseif ( $code < 0xd0c0 ) {
1960                                 return "\xe3\x85\x8b";
1961                         } elseif ( $code < 0xd30c ) {
1962                                 return "\xe3\x85\x8c";
1963                         } elseif ( $code < 0xd558 ) {
1964                                 return "\xe3\x85\x8d";
1965                         } else {
1966                                 return "\xe3\x85\x8e";
1967                         }
1968                 } else {
1969                         return '';
1970                 }
1971         }
1972
1973         function initEncoding() {
1974                 # Some languages may have an alternate char encoding option
1975                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1976                 # If this language is used as the primary content language,
1977                 # an override to the defaults can be set here on startup.
1978         }
1979
1980         function recodeForEdit( $s ) {
1981                 # For some languages we'll want to explicitly specify
1982                 # which characters make it into the edit box raw
1983                 # or are converted in some way or another.
1984                 # Note that if wgOutputEncoding is different from
1985                 # wgInputEncoding, this text will be further converted
1986                 # to wgOutputEncoding.
1987                 global $wgEditEncoding;
1988                 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1989                         return $s;
1990                 } else {
1991                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1992                 }
1993         }
1994
1995         function recodeInput( $s ) {
1996                 # Take the previous into account.
1997                 global $wgEditEncoding;
1998                 if ( $wgEditEncoding != '' ) {
1999                         $enc = $wgEditEncoding;
2000                 } else {
2001                         $enc = 'UTF-8';
2002                 }
2003                 if ( $enc == 'UTF-8' ) {
2004                         return $s;
2005                 } else {
2006                         return $this->iconv( $enc, 'UTF-8', $s );
2007                 }
2008         }
2009
2010         /**
2011          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2012          * also cleans up certain backwards-compatible sequences, converting them
2013          * to the modern Unicode equivalent.
2014          *
2015          * This is language-specific for performance reasons only.
2016          */
2017         function normalize( $s ) {
2018                 global $wgAllUnicodeFixes;
2019                 $s = UtfNormal::cleanUp( $s );
2020                 if ( $wgAllUnicodeFixes ) {
2021                         $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2022                         $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2023                 }
2024
2025                 return $s;
2026         }
2027
2028         /**
2029          * Transform a string using serialized data stored in the given file (which
2030          * must be in the serialized subdirectory of $IP). The file contains pairs
2031          * mapping source characters to destination characters.
2032          *
2033          * The data is cached in process memory. This will go faster if you have the
2034          * FastStringSearch extension.
2035          */
2036         function transformUsingPairFile( $file, $string ) {
2037                 if ( !isset( $this->transformData[$file] ) ) {
2038                         $data = wfGetPrecompiledData( $file );
2039                         if ( $data === false ) {
2040                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2041                         }
2042                         $this->transformData[$file] = new ReplacementArray( $data );
2043                 }
2044                 return $this->transformData[$file]->replace( $string );
2045         }
2046
2047         /**
2048          * For right-to-left language support
2049          *
2050          * @return bool
2051          */
2052         function isRTL() {
2053                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2054         }
2055
2056         /**
2057          * Return the correct HTML 'dir' attribute value for this language.
2058          * @return String
2059          */
2060         function getDir() {
2061                 return $this->isRTL() ? 'rtl' : 'ltr';
2062         }
2063
2064         /**
2065          * Return 'left' or 'right' as appropriate alignment for line-start
2066          * for this language's text direction.
2067          *
2068          * Should be equivalent to CSS3 'start' text-align value....
2069          *
2070          * @return String
2071          */
2072         function alignStart() {
2073                 return $this->isRTL() ? 'right' : 'left';
2074         }
2075
2076         /**
2077          * Return 'right' or 'left' as appropriate alignment for line-end
2078          * for this language's text direction.
2079          *
2080          * Should be equivalent to CSS3 'end' text-align value....
2081          *
2082          * @return String
2083          */
2084         function alignEnd() {
2085                 return $this->isRTL() ? 'left' : 'right';
2086         }
2087
2088         /**
2089          * A hidden direction mark (LRM or RLM), depending on the language direction
2090          *
2091          * @return string
2092          */
2093         function getDirMark() {
2094                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2095         }
2096
2097         function capitalizeAllNouns() {
2098                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2099         }
2100
2101         /**
2102          * An arrow, depending on the language direction
2103          *
2104          * @return string
2105          */
2106         function getArrow() {
2107                 return $this->isRTL() ? '←' : '→';
2108         }
2109
2110         /**
2111          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2112          *
2113          * @return bool
2114          */
2115         function linkPrefixExtension() {
2116                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2117         }
2118
2119         function getMagicWords() {
2120                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2121         }
2122
2123         protected function doMagicHook() {
2124                 if ( $this->mMagicHookDone ) {
2125                         return;
2126                 }
2127                 $this->mMagicHookDone = true;
2128                 wfProfileIn( 'LanguageGetMagic' );
2129                 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2130                 wfProfileOut( 'LanguageGetMagic' );
2131         }
2132
2133         # Fill a MagicWord object with data from here
2134         function getMagic( $mw ) {
2135                 $this->doMagicHook();
2136
2137                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2138                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2139                 } else {
2140                         $magicWords = $this->getMagicWords();
2141                         if ( isset( $magicWords[$mw->mId] ) ) {
2142                                 $rawEntry = $magicWords[$mw->mId];
2143                         } else {
2144                                 $rawEntry = false;
2145                         }
2146                 }
2147
2148                 if ( !is_array( $rawEntry ) ) {
2149                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2150                 } else {
2151                         $mw->mCaseSensitive = $rawEntry[0];
2152                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2153                 }
2154         }
2155
2156         /**
2157          * Add magic words to the extension array
2158          */
2159         function addMagicWordsByLang( $newWords ) {
2160                 $code = $this->getCode();
2161                 $fallbackChain = array();
2162                 while ( $code && !in_array( $code, $fallbackChain ) ) {
2163                         $fallbackChain[] = $code;
2164                         $code = self::getFallbackFor( $code );
2165                 }
2166                 if ( !in_array( 'en', $fallbackChain ) ) {
2167                         $fallbackChain[] = 'en';
2168                 }
2169                 $fallbackChain = array_reverse( $fallbackChain );
2170                 foreach ( $fallbackChain as $code ) {
2171                         if ( isset( $newWords[$code] ) ) {
2172                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2173                         }
2174                 }
2175         }
2176
2177         /**
2178          * Get special page names, as an associative array
2179          *   case folded alias => real name
2180          */
2181         function getSpecialPageAliases() {
2182                 // Cache aliases because it may be slow to load them
2183                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2184                         // Initialise array
2185                         $this->mExtendedSpecialPageAliases =
2186                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2187                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2188                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2189                 }
2190
2191                 return $this->mExtendedSpecialPageAliases;
2192         }
2193
2194         /**
2195          * Italic is unsuitable for some languages
2196          *
2197          * @param $text String: the text to be emphasized.
2198          * @return string
2199          */
2200         function emphasize( $text ) {
2201                 return "<em>$text</em>";
2202         }
2203
2204          /**
2205           * Normally we output all numbers in plain en_US style, that is
2206           * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2207           * point twohundredthirtyfive. However this is not sutable for all
2208           * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2209           * Icelandic just want to use commas instead of dots, and dots instead
2210           * of commas like "293.291,235".
2211           *
2212           * An example of this function being called:
2213           * <code>
2214           * wfMsg( 'message', $wgLang->formatNum( $num ) )
2215           * </code>
2216           *
2217           * See LanguageGu.php for the Gujarati implementation and
2218           * $separatorTransformTable on MessageIs.php for
2219           * the , => . and . => , implementation.
2220           *
2221           * @todo check if it's viable to use localeconv() for the decimal
2222           *       separator thing.
2223           * @param $number Mixed: the string to be formatted, should be an integer
2224           *        or a floating point number.
2225           * @param $nocommafy Bool: set to true for special numbers like dates
2226           * @return string
2227           */
2228         function formatNum( $number, $nocommafy = false ) {
2229                 global $wgTranslateNumerals;
2230                 if ( !$nocommafy ) {
2231                         $number = $this->commafy( $number );
2232                         $s = $this->separatorTransformTable();
2233                         if ( $s ) {
2234                                 $number = strtr( $number, $s );
2235                         }
2236                 }
2237
2238                 if ( $wgTranslateNumerals ) {
2239                         $s = $this->digitTransformTable();
2240                         if ( $s ) {
2241                                 $number = strtr( $number, $s );
2242                         }
2243                 }
2244
2245                 return $number;
2246         }
2247
2248         function parseFormattedNumber( $number ) {
2249                 $s = $this->digitTransformTable();
2250                 if ( $s ) {
2251                         $number = strtr( $number, array_flip( $s ) );
2252                 }
2253
2254                 $s = $this->separatorTransformTable();
2255                 if ( $s ) {
2256                         $number = strtr( $number, array_flip( $s ) );
2257                 }
2258
2259                 $number = strtr( $number, array( ',' => '' ) );
2260                 return $number;
2261         }
2262
2263         /**
2264          * Adds commas to a given number
2265          *
2266          * @param $_ mixed
2267          * @return string
2268          */
2269         function commafy( $_ ) {
2270                 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2271         }
2272
2273         function digitTransformTable() {
2274                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2275         }
2276
2277         function separatorTransformTable() {
2278                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2279         }
2280
2281         /**
2282          * Take a list of strings and build a locale-friendly comma-separated
2283          * list, using the local comma-separator message.
2284          * The last two strings are chained with an "and".
2285          *
2286          * @param $l Array
2287          * @return string
2288          */
2289         function listToText( $l ) {
2290                 $s = '';
2291                 $m = count( $l ) - 1;
2292                 if ( $m == 1 ) {
2293                         return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2294                 } else {
2295                         for ( $i = $m; $i >= 0; $i-- ) {
2296                                 if ( $i == $m ) {
2297                                         $s = $l[$i];
2298                                 } else if ( $i == $m - 1 ) {
2299                                         $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2300                                 } else {
2301                                         $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2302                                 }
2303                         }
2304                         return $s;
2305                 }
2306         }
2307
2308         /**
2309          * Take a list of strings and build a locale-friendly comma-separated
2310          * list, using the local comma-separator message.
2311          * @param $list array of strings to put in a comma list
2312          * @return string
2313          */
2314         function commaList( $list ) {
2315                 return implode(
2316                         $list,
2317                         wfMsgExt(
2318                                 'comma-separator',
2319                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2320                         )
2321                 );
2322         }
2323
2324         /**
2325          * Take a list of strings and build a locale-friendly semicolon-separated
2326          * list, using the local semicolon-separator message.
2327          * @param $list array of strings to put in a semicolon list
2328          * @return string
2329          */
2330         function semicolonList( $list ) {
2331                 return implode(
2332                         $list,
2333                         wfMsgExt(
2334                                 'semicolon-separator',
2335                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2336                         )
2337                 );
2338         }
2339
2340         /**
2341          * Same as commaList, but separate it with the pipe instead.
2342          * @param $list array of strings to put in a pipe list
2343          * @return string
2344          */
2345         function pipeList( $list ) {
2346                 return implode(
2347                         $list,
2348                         wfMsgExt(
2349                                 'pipe-separator',
2350                                 array( 'escapenoentities', 'language' => $this )
2351                         )
2352                 );
2353         }
2354
2355         /**
2356          * Truncate a string to a specified length in bytes, appending an optional
2357          * string (e.g. for ellipses)
2358          *
2359          * The database offers limited byte lengths for some columns in the database;
2360          * multi-byte character sets mean we need to ensure that only whole characters
2361          * are included, otherwise broken characters can be passed to the user
2362          *
2363          * If $length is negative, the string will be truncated from the beginning
2364          *
2365          * @param $string String to truncate
2366          * @param $length Int: maximum length (excluding ellipses)
2367          * @param $ellipsis String to append to the truncated text
2368          * @return string
2369          */
2370         function truncate( $string, $length, $ellipsis = '...' ) {
2371                 # Use the localized ellipsis character
2372                 if ( $ellipsis == '...' ) {
2373                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2374                 }
2375                 # Check if there is no need to truncate
2376                 if ( $length == 0 ) {
2377                         return $ellipsis;
2378                 } elseif ( strlen( $string ) <= abs( $length ) ) {
2379                         return $string;
2380                 }
2381                 $stringOriginal = $string;
2382                 if ( $length > 0 ) {
2383                         $string = substr( $string, 0, $length ); // xyz...
2384                         $string = $this->removeBadCharLast( $string );
2385                         $string = $string . $ellipsis;
2386                 } else {
2387                         $string = substr( $string, $length ); // ...xyz
2388                         $string = $this->removeBadCharFirst( $string );
2389                         $string = $ellipsis . $string;
2390                 }
2391                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2392                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2393                         return $string;
2394                 } else {
2395                         return $stringOriginal;
2396                 }
2397         }
2398
2399         /**
2400          * Remove bytes that represent an incomplete Unicode character
2401          * at the end of string (e.g. bytes of the char are missing)
2402          *
2403          * @param $string String
2404          * @return string
2405          */
2406         protected function removeBadCharLast( $string ) {
2407                 $char = ord( $string[strlen( $string ) - 1] );
2408                 $m = array();
2409                 if ( $char >= 0xc0 ) {
2410                         # We got the first byte only of a multibyte char; remove it.
2411                         $string = substr( $string, 0, -1 );
2412                 } elseif ( $char >= 0x80 &&
2413                           preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2414                                                   '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2415                 {
2416                         # We chopped in the middle of a character; remove it
2417                         $string = $m[1];
2418                 }
2419                 return $string;
2420         }
2421
2422         /**
2423          * Remove bytes that represent an incomplete Unicode character
2424          * at the start of string (e.g. bytes of the char are missing)
2425          *
2426          * @param $string String
2427          * @return string
2428          */
2429         protected function removeBadCharFirst( $string ) {
2430                 $char = ord( $string[0] );
2431                 if ( $char >= 0x80 && $char < 0xc0 ) {
2432                         # We chopped in the middle of a character; remove the whole thing
2433                         $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2434                 }
2435                 return $string;
2436         }
2437
2438         /*
2439          * Truncate a string of valid HTML to a specified length in bytes,
2440          * appending an optional string (e.g. for ellipses), and return valid HTML
2441          *
2442          * This is only intended for styled/linked text, such as HTML with
2443          * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2444          *
2445          * Note: tries to fix broken HTML with MWTidy
2446          *
2447          * @param string $text HTML string to truncate
2448          * @param int $length (zero/positive) Maximum length (excluding ellipses)
2449          * @param string $ellipsis String to append to the truncated text
2450          * @returns string
2451          */
2452         function truncateHtml( $text, $length, $ellipsis = '...' ) {
2453                 # Use the localized ellipsis character
2454                 if ( $ellipsis == '...' ) {
2455                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2456                 }
2457                 # Check if there is no need to truncate
2458                 if ( $length <= 0 ) {
2459                         return $ellipsis; // no text shown, nothing to format
2460                 } elseif ( strlen( $text ) <= $length ) {
2461                         return $text; // string short enough even *with* HTML
2462                 }
2463                 $text = MWTidy::tidy( $text ); // fix tags
2464                 $displayLen = 0; // innerHTML legth so far
2465                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2466                 $tagType = 0; // 0-open, 1-close
2467                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2468                 $entityState = 0; // 0-not entity, 1-entity
2469                 $tag = $ret = '';
2470                 $openTags = array(); // open tag stack
2471                 $textLen = strlen( $text );
2472                 for ( $pos = 0; $pos < $textLen; ++$pos ) {
2473                         $ch = $text[$pos];
2474                         $lastCh = $pos ? $text[$pos - 1] : '';
2475                         $ret .= $ch; // add to result string
2476                         if ( $ch == '<' ) {
2477                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2478                                 $entityState = 0; // for bad HTML
2479                                 $bracketState = 1; // tag started (checking for backslash)
2480                         } elseif ( $ch == '>' ) {
2481                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2482                                 $entityState = 0; // for bad HTML
2483                                 $bracketState = 0; // out of brackets
2484                         } elseif ( $bracketState == 1 ) {
2485                                 if ( $ch == '/' ) {
2486                                         $tagType = 1; // close tag (e.g. "</span>")
2487                                 } else {
2488                                         $tagType = 0; // open tag (e.g. "<span>")
2489                                         $tag .= $ch;
2490                                 }
2491                                 $bracketState = 2; // building tag name
2492                         } elseif ( $bracketState == 2 ) {
2493                                 if ( $ch != ' ' ) {
2494                                         $tag .= $ch;
2495                                 } else {
2496                                         // Name found (e.g. "<a href=..."), add on tag attributes...
2497                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2498                                 }
2499                         } elseif ( $bracketState == 0 ) {
2500                                 if ( $entityState ) {
2501                                         if ( $ch == ';' ) {
2502                                                 $entityState = 0;
2503                                                 $displayLen++; // entity is one displayed char
2504                                         }
2505                                 } else {
2506                                         if ( $ch == '&' ) {
2507                                                 $entityState = 1; // entity found, (e.g. "&#160;")
2508                                         } else {
2509                                                 $displayLen++; // this char is displayed
2510                                                 // Add on the other display text after this...
2511                                                 $skipped = $this->truncate_skip(
2512                                                         $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2513                                                 $displayLen += $skipped;
2514                                                 $pos += $skipped;
2515                                         }
2516                                 }
2517                         }
2518                         # Consider truncation once the display length has reached the maximim.
2519                         # Double-check that we're not in the middle of a bracket/entity...
2520                         if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2521                                 if ( !$testingEllipsis ) {
2522                                         $testingEllipsis = true;
2523                                         # Save where we are; we will truncate here unless
2524                                         # the ellipsis actually makes the string longer.
2525                                         $pOpenTags = $openTags; // save state
2526                                         $pRet = $ret; // save state
2527                                 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2528                                         # Ellipsis won't make string longer/equal, the truncation point was OK.
2529                                         $openTags = $pOpenTags; // reload state
2530                                         $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2531                                         $ret .= $ellipsis; // add ellipsis
2532                                         break;
2533                                 }
2534                         }
2535                 }
2536                 if ( $displayLen == 0 ) {
2537                         return ''; // no text shown, nothing to format
2538                 }
2539                 // Close the last tag if left unclosed by bad HTML
2540                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2541                 while ( count( $openTags ) > 0 ) {
2542                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2543                 }
2544                 return $ret;
2545         }
2546
2547         // truncateHtml() helper function
2548         // like strcspn() but adds the skipped chars to $ret
2549         private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2550                 $skipCount = 0;
2551                 if ( $start < strlen( $text ) ) {
2552                         $skipCount = strcspn( $text, $search, $start, $len );
2553                         $ret .= substr( $text, $start, $skipCount );
2554                 }
2555                 return $skipCount;
2556         }
2557
2558         /*
2559          * truncateHtml() helper function
2560          * (a) push or pop $tag from $openTags as needed
2561          * (b) clear $tag value
2562          * @param String &$tag Current HTML tag name we are looking at
2563          * @param int $tagType (0-open tag, 1-close tag)
2564          * @param char $lastCh Character before the '>' that ended this tag
2565          * @param array &$openTags Open tag stack (not accounting for $tag)
2566          */
2567         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2568                 $tag = ltrim( $tag );
2569                 if ( $tag != '' ) {
2570                         if ( $tagType == 0 && $lastCh != '/' ) {
2571                                 $openTags[] = $tag; // tag opened (didn't close itself)
2572                         } else if ( $tagType == 1 ) {
2573                                 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2574                                         array_pop( $openTags ); // tag closed
2575                                 }
2576                         }
2577                         $tag = '';
2578                 }
2579         }
2580
2581         /**
2582          * Grammatical transformations, needed for inflected languages
2583          * Invoked by putting {{grammar:case|word}} in a message
2584          *
2585          * @param $word string
2586          * @param $case string
2587          * @return string
2588          */
2589         function convertGrammar( $word, $case ) {
2590                 global $wgGrammarForms;
2591                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2592                         return $wgGrammarForms[$this->getCode()][$case][$word];
2593                 }
2594                 return $word;
2595         }
2596
2597         /**
2598          * Provides an alternative text depending on specified gender.
2599          * Usage {{gender:username|masculine|feminine|neutral}}.
2600          * username is optional, in which case the gender of current user is used,
2601          * but only in (some) interface messages; otherwise default gender is used.
2602          * If second or third parameter are not specified, masculine is used.
2603          * These details may be overriden per language.
2604          */
2605         function gender( $gender, $forms ) {
2606                 if ( !count( $forms ) ) {
2607                         return '';
2608                 }
2609                 $forms = $this->preConvertPlural( $forms, 2 );
2610                 if ( $gender === 'male' ) {
2611                         return $forms[0];
2612                 }
2613                 if ( $gender === 'female' ) {
2614                         return $forms[1];
2615                 }
2616                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2617         }
2618
2619         /**
2620          * Plural form transformations, needed for some languages.
2621          * For example, there are 3 form of plural in Russian and Polish,
2622          * depending on "count mod 10". See [[w:Plural]]
2623          * For English it is pretty simple.
2624          *
2625          * Invoked by putting {{plural:count|wordform1|wordform2}}
2626          * or {{plural:count|wordform1|wordform2|wordform3}}
2627          *
2628          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2629          *
2630          * @param $count Integer: non-localized number
2631          * @param $forms Array: different plural forms
2632          * @return string Correct form of plural for $count in this language
2633          */
2634         function convertPlural( $count, $forms ) {
2635                 if ( !count( $forms ) ) {
2636                         return '';
2637                 }
2638                 $forms = $this->preConvertPlural( $forms, 2 );
2639
2640                 return ( $count == 1 ) ? $forms[0] : $forms[1];
2641         }
2642
2643         /**
2644          * Checks that convertPlural was given an array and pads it to requested
2645          * amound of forms by copying the last one.
2646          *
2647          * @param $count Integer: How many forms should there be at least
2648          * @param $forms Array of forms given to convertPlural
2649          * @return array Padded array of forms or an exception if not an array
2650          */
2651         protected function preConvertPlural( /* Array */ $forms, $count ) {
2652                 while ( count( $forms ) < $count ) {
2653                         $forms[] = $forms[count( $forms ) - 1];
2654                 }
2655                 return $forms;
2656         }
2657
2658         /**
2659          * For translating of expiry times
2660          * @param $str String: the validated block time in English
2661          * @return Somehow translated block time
2662          * @see LanguageFi.php for example implementation
2663          */
2664         function translateBlockExpiry( $str ) {
2665                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2666
2667                 if ( $scBlockExpiryOptions == '-' ) {
2668                         return $str;
2669                 }
2670
2671                 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2672                         if ( strpos( $option, ':' ) === false ) {
2673                                 continue;
2674                         }
2675                         list( $show, $value ) = explode( ':', $option );
2676                         if ( strcmp( $str, $value ) == 0 ) {
2677                                 return htmlspecialchars( trim( $show ) );
2678                         }
2679                 }
2680
2681                 return $str;
2682         }
2683
2684         /**
2685          * languages like Chinese need to be segmented in order for the diff
2686          * to be of any use
2687          *
2688          * @param $text String
2689          * @return String
2690          */
2691         function segmentForDiff( $text ) {
2692                 return $text;
2693         }
2694
2695         /**
2696          * and unsegment to show the result
2697          *
2698          * @param $text String
2699          * @return String
2700          */
2701         function unsegmentForDiff( $text ) {
2702                 return $text;
2703         }
2704
2705         # convert text to all supported variants
2706         function autoConvertToAllVariants( $text ) {
2707                 return $this->mConverter->autoConvertToAllVariants( $text );
2708         }
2709
2710         # convert text to different variants of a language.
2711         function convert( $text ) {
2712                 return $this->mConverter->convert( $text );
2713         }
2714
2715         # Convert a Title object to a string in the preferred variant
2716         function convertTitle( $title ) {
2717                 return $this->mConverter->convertTitle( $title );
2718         }
2719
2720         # Check if this is a language with variants
2721         function hasVariants() {
2722                 return sizeof( $this->getVariants() ) > 1;
2723         }
2724
2725         # Put custom tags (e.g. -{ }-) around math to prevent conversion
2726         function armourMath( $text ) {
2727                 return $this->mConverter->armourMath( $text );
2728         }
2729
2730         /**
2731          * Perform output conversion on a string, and encode for safe HTML output.
2732          * @param $text String text to be converted
2733          * @param $isTitle Bool whether this conversion is for the article title
2734          * @return string
2735          * @todo this should get integrated somewhere sane
2736          */
2737         function convertHtml( $text, $isTitle = false ) {
2738                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2739         }
2740
2741         function convertCategoryKey( $key ) {
2742                 return $this->mConverter->convertCategoryKey( $key );
2743         }
2744
2745         /**
2746          * Get the list of variants supported by this language
2747          * see sample implementation in LanguageZh.php
2748          *
2749          * @return array an array of language codes
2750          */
2751         function getVariants() {
2752                 return $this->mConverter->getVariants();
2753         }
2754
2755         function getPreferredVariant() {
2756                 return $this->mConverter->getPreferredVariant();
2757         }
2758
2759         function getDefaultVariant() {
2760                 return $this->mConverter->getDefaultVariant();
2761         }
2762
2763         function getURLVariant() {
2764                 return $this->mConverter->getURLVariant();
2765         }
2766
2767         /**
2768          * If a language supports multiple variants, it is
2769          * possible that non-existing link in one variant
2770          * actually exists in another variant. this function
2771          * tries to find it. See e.g. LanguageZh.php
2772          *
2773          * @param $link String: the name of the link
2774          * @param $nt Mixed: the title object of the link
2775          * @param $ignoreOtherCond Boolean: to disable other conditions when
2776          *      we need to transclude a template or update a category's link
2777          * @return null the input parameters may be modified upon return
2778          */
2779         function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2780                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2781         }
2782
2783         /**
2784          * If a language supports multiple variants, converts text
2785          * into an array of all possible variants of the text:
2786          *  'variant' => text in that variant
2787          *
2788          * @deprecated Use autoConvertToAllVariants()
2789          */
2790         function convertLinkToAllVariants( $text ) {
2791                 return $this->mConverter->convertLinkToAllVariants( $text );
2792         }
2793
2794         /**
2795          * returns language specific options used by User::getPageRenderHash()
2796          * for example, the preferred language variant
2797          *
2798          * @return string
2799          */
2800         function getExtraHashOptions() {
2801                 return $this->mConverter->getExtraHashOptions();
2802         }
2803
2804         /**
2805          * For languages that support multiple variants, the title of an
2806          * article may be displayed differently in different variants. this
2807          * function returns the apporiate title defined in the body of the article.
2808          *
2809          * @return string
2810          */
2811         function getParsedTitle() {
2812                 return $this->mConverter->getParsedTitle();
2813         }
2814
2815         /**
2816          * Enclose a string with the "no conversion" tag. This is used by
2817          * various functions in the Parser
2818          *
2819          * @param $text String: text to be tagged for no conversion
2820          * @param $noParse
2821          * @return string the tagged text
2822          */
2823         function markNoConversion( $text, $noParse = false ) {
2824                 return $this->mConverter->markNoConversion( $text, $noParse );
2825         }
2826
2827         /**
2828          * A regular expression to match legal word-trailing characters
2829          * which should be merged onto a link of the form [[foo]]bar.
2830          *
2831          * @return string
2832          */
2833         function linkTrail() {
2834                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2835         }
2836
2837         function getLangObj() {
2838                 return $this;
2839         }
2840
2841         /**
2842          * Get the RFC 3066 code for this language object
2843          */
2844         function getCode() {
2845                 return $this->mCode;
2846         }
2847
2848         function setCode( $code ) {
2849                 $this->mCode = $code;
2850         }
2851
2852         /**
2853          * Get the name of a file for a certain language code
2854          * @param $prefix string Prepend this to the filename
2855          * @param $code string Language code
2856          * @param $suffix string Append this to the filename
2857          * @return string $prefix . $mangledCode . $suffix
2858          */
2859         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2860                 // Protect against path traversal
2861                 if ( !Language::isValidCode( $code )
2862                         || strcspn( $code, "/\\\000" ) !== strlen( $code ) )
2863                 {
2864                         throw new MWException( "Invalid language code \"$code\"" );
2865                 }
2866
2867                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2868         }
2869
2870         /**
2871          * Get the language code from a file name. Inverse of getFileName()
2872          * @param $filename string $prefix . $languageCode . $suffix
2873          * @param $prefix string Prefix before the language code
2874          * @param $suffix string Suffix after the language code
2875          * @return Language code, or false if $prefix or $suffix isn't found
2876          */
2877         static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2878                 $m = null;
2879                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2880                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
2881                 if ( !count( $m ) ) {
2882                         return false;
2883                 }
2884                 return str_replace( '_', '-', strtolower( $m[1] ) );
2885         }
2886
2887         static function getMessagesFileName( $code ) {
2888                 global $IP;
2889                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2890         }
2891
2892         static function getClassFileName( $code ) {
2893                 global $IP;
2894                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2895         }
2896
2897         /**
2898          * Get the fallback for a given language
2899          */
2900         static function getFallbackFor( $code ) {
2901                 if ( $code === 'en' ) {
2902                         // Shortcut
2903                         return false;
2904                 } else {
2905                         return self::getLocalisationCache()->getItem( $code, 'fallback' );
2906                 }
2907         }
2908
2909         /**
2910          * Get all messages for a given language
2911          * WARNING: this may take a long time
2912          */
2913         static function getMessagesFor( $code ) {
2914                 return self::getLocalisationCache()->getItem( $code, 'messages' );
2915         }
2916
2917         /**
2918          * Get a message for a given language
2919          */
2920         static function getMessageFor( $key, $code ) {
2921                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2922         }
2923
2924         function fixVariableInNamespace( $talk ) {
2925                 if ( strpos( $talk, '$1' ) === false ) {
2926                         return $talk;
2927                 }
2928
2929                 global $wgMetaNamespace;
2930                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2931
2932                 # Allow grammar transformations
2933                 # Allowing full message-style parsing would make simple requests
2934                 # such as action=raw much more expensive than they need to be.
2935                 # This will hopefully cover most cases.
2936                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2937                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
2938                 return str_replace( ' ', '_', $talk );
2939         }
2940
2941         function replaceGrammarInNamespace( $m ) {
2942                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2943         }
2944
2945         static function getCaseMaps() {
2946                 static $wikiUpperChars, $wikiLowerChars;
2947                 if ( isset( $wikiUpperChars ) ) {
2948                         return array( $wikiUpperChars, $wikiLowerChars );
2949                 }
2950
2951                 wfProfileIn( __METHOD__ );
2952                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2953                 if ( $arr === false ) {
2954                         throw new MWException(
2955                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2956                 }
2957                 $wikiUpperChars = $arr['wikiUpperChars'];
2958                 $wikiLowerChars = $arr['wikiLowerChars'];
2959                 wfProfileOut( __METHOD__ );
2960                 return array( $wikiUpperChars, $wikiLowerChars );
2961         }
2962
2963         function formatTimePeriod( $seconds ) {
2964                 if ( round( $seconds * 10 ) < 100 ) {
2965                         return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2966                 } elseif ( round( $seconds ) < 60 ) {
2967                         return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2968                 } elseif ( round( $seconds ) < 3600 ) {
2969                         $minutes = floor( $seconds / 60 );
2970                         $secondsPart = round( fmod( $seconds, 60 ) );
2971                         if ( $secondsPart == 60 ) {
2972                                 $secondsPart = 0;
2973                                 $minutes++;
2974                         }
2975                         return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2976                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2977                 } else {
2978                         $hours = floor( $seconds / 3600 );
2979                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2980                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2981                         if ( $secondsPart == 60 ) {
2982                                 $secondsPart = 0;
2983                                 $minutes++;
2984                         }
2985                         if ( $minutes == 60 ) {
2986                                 $minutes = 0;
2987                                 $hours++;
2988                         }
2989                         return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2990                                 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2991                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2992                 }
2993         }
2994
2995         function formatBitrate( $bps ) {
2996                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2997                 if ( $bps <= 0 ) {
2998                         return $this->formatNum( $bps ) . $units[0];
2999                 }
3000                 $unitIndex = floor( log10( $bps ) / 3 );
3001                 $mantissa = $bps / pow( 1000, $unitIndex );
3002                 if ( $mantissa < 10 ) {
3003                         $mantissa = round( $mantissa, 1 );
3004                 } else {
3005                         $mantissa = round( $mantissa );
3006                 }
3007                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3008         }
3009
3010         /**
3011          * Format a size in bytes for output, using an appropriate
3012          * unit (B, KB, MB or GB) according to the magnitude in question
3013          *
3014          * @param $size Size to format
3015          * @return string Plain text (not HTML)
3016          */
3017         function formatSize( $size ) {
3018                 // For small sizes no decimal places necessary
3019                 $round = 0;
3020                 if ( $size > 1024 ) {
3021                         $size = $size / 1024;
3022                         if ( $size > 1024 ) {
3023                                 $size = $size / 1024;
3024                                 // For MB and bigger two decimal places are smarter
3025                                 $round = 2;
3026                                 if ( $size > 1024 ) {
3027                                         $size = $size / 1024;
3028                                         $msg = 'size-gigabytes';
3029                                 } else {
3030                                         $msg = 'size-megabytes';
3031                                 }
3032                         } else {
3033                                 $msg = 'size-kilobytes';
3034                         }
3035                 } else {
3036                         $msg = 'size-bytes';
3037                 }
3038                 $size = round( $size, $round );
3039                 $text = $this->getMessageFromDB( $msg );
3040                 return str_replace( '$1', $this->formatNum( $size ), $text );
3041         }
3042
3043         /**
3044          * Get the conversion rule title, if any.
3045          */
3046         function getConvRuleTitle() {
3047                 return $this->mConverter->getConvRuleTitle();
3048         }
3049 }