languages/Language.php

   1 <?php
   2 /**
   3  * @defgroup Language Language
   4  *
   5  * @file
   6  * @ingroup Language
   7  */
   8
   9 if( !defined( 'MEDIAWIKI' ) ) {
  10         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  11         exit( 1 );
  12 }
  13
  14 # Read language names
  15 global $wgLanguageNames;
  16 require_once( dirname( __FILE__ ) . '/Names.php' );
  17
  18 global $wgInputEncoding, $wgOutputEncoding;
  19
  20 /**
  21  * These are always UTF-8, they exist only for backwards compatibility
  22  */
  23 $wgInputEncoding    = 'UTF-8';
  24 $wgOutputEncoding       = 'UTF-8';
  25
  26 if( function_exists( 'mb_strtoupper' ) ) {
  27         mb_internal_encoding( 'UTF-8' );
  28 }
  29
  30 /**
  31  * a fake language converter
  32  *
  33  * @ingroup Language
  34  */
  35 class FakeConverter {
  36         var $mLang;
  37         function FakeConverter( $langobj ) { $this->mLang = $langobj; }
  38         function autoConvertToAllVariants( $text ) { return $text; }
  39         function convert( $t, $i ) { return $t; }
  40         function getVariants() { return array( $this->mLang->getCode() ); }
  41         function getPreferredVariant() { return $this->mLang->getCode(); }
  42         function getConvRuleTitle() { return false; }
  43         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) {}
  44         function getExtraHashOptions() { return ''; }
  45         function getParsedTitle() { return ''; }
  46         function markNoConversion( $text, $noParse = false ) { return $text; }
  47         function convertCategoryKey( $key ) { return $key; }
  48         function convertLinkToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  49         function armourMath( $text ) { return $text; }
  50 }
  51
  52 /**
  53  * Internationalisation code
  54  * @ingroup Language
  55  */
  56 class Language {
  57         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  58         var $mMagicExtensions = array(), $mMagicHookDone = false;
  59
  60         var $mNamespaceIds, $namespaceNames, $namespaceAliases;
  61         var $dateFormatStrings = array();
  62         var $mExtendedSpecialPageAliases;
  63
  64         /**
  65          * ReplacementArray object caches
  66          */
  67         var $transformData = array();
  68
  69         static public $dataCache;
  70         static public $mLangObjCache = array();
  71
  72         static public $mWeekdayMsgs = array(
  73                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  74                 'friday', 'saturday'
  75         );
  76
  77         static public $mWeekdayAbbrevMsgs = array(
  78                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  79         );
  80
  81         static public $mMonthMsgs = array(
  82                 'january', 'february', 'march', 'april', 'may_long', 'june',
  83                 'july', 'august', 'september', 'october', 'november',
  84                 'december'
  85         );
  86         static public $mMonthGenMsgs = array(
  87                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  88                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  89                 'december-gen'
  90         );
  91         static public $mMonthAbbrevMsgs = array(
  92                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
  93                 'sep', 'oct', 'nov', 'dec'
  94         );
  95
  96         static public $mIranianCalendarMonthMsgs = array(
  97                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
  98                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
  99                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 100                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 101         );
 102
 103         static public $mHebrewCalendarMonthMsgs = array(
 104                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 105                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 106                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 107                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 108                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 109         );
 110
 111         static public $mHebrewCalendarMonthGenMsgs = array(
 112                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 113                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 114                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 115                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 116                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 117         );
 118
 119         static public $mHijriCalendarMonthMsgs = array(
 120                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 121                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 122                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 123                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 124         );
 125
 126         /**
 127          * Get a cached language object for a given language code
 128          */
 129         static function factory( $code ) {
 130                 if ( !isset( self::$mLangObjCache[$code] ) ) {
 131                         if( count( self::$mLangObjCache ) > 10 ) {
 132                                 // Don't keep a billion objects around, that's stupid.
 133                                 self::$mLangObjCache = array();
 134                         }
 135                         self::$mLangObjCache[$code] = self::newFromCode( $code );
 136                 }
 137                 return self::$mLangObjCache[$code];
 138         }
 139
 140         /**
 141          * Create a language object for a given language code
 142          */
 143         protected static function newFromCode( $code ) {
 144                 global $IP;
 145                 static $recursionLevel = 0;
 146                 if ( $code == 'en' ) {
 147                         $class = 'Language';
 148                 } else {
 149                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 150                         // Preload base classes to work around APC/PHP5 bug
 151                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 152                                 include_once( "$IP/languages/classes/$class.deps.php" );
 153                         }
 154                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 155                                 include_once( "$IP/languages/classes/$class.php" );
 156                         }
 157                 }
 158
 159                 if ( $recursionLevel > 5 ) {
 160                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 161                 }
 162
 163                 if( !class_exists( $class ) ) {
 164                         $fallback = Language::getFallbackFor( $code );
 165                         ++$recursionLevel;
 166                         $lang = Language::newFromCode( $fallback );
 167                         --$recursionLevel;
 168                         $lang->setCode( $code );
 169                 } else {
 170                         $lang = new $class;
 171                 }
 172                 return $lang;
 173         }
 174
 175         /**
 176          * Get the LocalisationCache instance
 177          */
 178         public static function getLocalisationCache() {
 179                 if ( is_null( self::$dataCache ) ) {
 180                         global $wgLocalisationCacheConf;
 181                         $class = $wgLocalisationCacheConf['class'];
 182                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 183                 }
 184                 return self::$dataCache;
 185         }
 186
 187         function __construct() {
 188                 $this->mConverter = new FakeConverter( $this );
 189                 // Set the code to the name of the descendant
 190                 if ( get_class( $this ) == 'Language' ) {
 191                         $this->mCode = 'en';
 192                 } else {
 193                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 194                 }
 195                 self::getLocalisationCache();
 196         }
 197
 198         /**
 199          * Reduce memory usage
 200          */
 201         function __destruct() {
 202                 foreach ( $this as $name => $value ) {
 203                         unset( $this->$name );
 204                 }
 205         }
 206
 207         /**
 208          * Hook which will be called if this is the content language.
 209          * Descendants can use this to register hook functions or modify globals
 210          */
 211         function initContLang() {}
 212
 213         /**
 214          * @deprecated Use User::getDefaultOptions()
 215          * @return array
 216          */
 217         function getDefaultUserOptions() {
 218                 wfDeprecated( __METHOD__ );
 219                 return User::getDefaultOptions();
 220         }
 221
 222         function getFallbackLanguageCode() {
 223                 if ( $this->mCode === 'en' ) {
 224                         return false;
 225                 } else {
 226                         return self::$dataCache->getItem( $this->mCode, 'fallback' );
 227                 }
 228         }
 229
 230         /**
 231          * Exports $wgBookstoreListEn
 232          * @return array
 233          */
 234         function getBookstoreList() {
 235                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 236         }
 237
 238         /**
 239          * @return array
 240          */
 241         function getNamespaces() {
 242                 if ( is_null( $this->namespaceNames ) ) {
 243                         global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk;
 244
 245                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 246                         if ( $wgExtraNamespaces ) {
 247                                 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames;
 248                         }
 249
 250                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 251                         if ( $wgMetaNamespaceTalk ) {
 252                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 253                         } else {
 254                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 255                                 $this->namespaceNames[NS_PROJECT_TALK] =
 256                                         $this->fixVariableInNamespace( $talk );
 257                         }
 258
 259                         # The above mixing may leave namespaces out of canonical order.
 260                         # Re-order by namespace ID number...
 261                         ksort( $this->namespaceNames );
 262                 }
 263                 return $this->namespaceNames;
 264         }
 265
 266         /**
 267          * A convenience function that returns the same thing as
 268          * getNamespaces() except with the array values changed to ' '
 269          * where it found '_', useful for producing output to be displayed
 270          * e.g. in <select> forms.
 271          *
 272          * @return array
 273          */
 274         function getFormattedNamespaces() {
 275                 $ns = $this->getNamespaces();
 276                 foreach( $ns as $k => $v ) {
 277                         $ns[$k] = strtr( $v, '_', ' ' );
 278                 }
 279                 return $ns;
 280         }
 281
 282         /**
 283          * Get a namespace value by key
 284          * <code>
 285          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 286          * echo $mw_ns; // prints 'MediaWiki'
 287          * </code>
 288          *
 289          * @param $index Int: the array key of the namespace to return
 290          * @return mixed, string if the namespace value exists, otherwise false
 291          */
 292         function getNsText( $index ) {
 293                 $ns = $this->getNamespaces();
 294                 return isset( $ns[$index] ) ? $ns[$index] : false;
 295         }
 296
 297         /**
 298          * A convenience function that returns the same thing as
 299          * getNsText() except with '_' changed to ' ', useful for
 300          * producing output.
 301          *
 302          * @return array
 303          */
 304         function getFormattedNsText( $index ) {
 305                 $ns = $this->getNsText( $index );
 306                 return strtr( $ns, '_', ' ' );
 307         }
 308
 309         /**
 310          * Get a namespace key by value, case insensitive.
 311          * Only matches namespace names for the current language, not the
 312          * canonical ones defined in Namespace.php.
 313          *
 314          * @param $text String
 315          * @return mixed An integer if $text is a valid value otherwise false
 316          */
 317         function getLocalNsIndex( $text ) {
 318                 $lctext = $this->lc( $text );
 319                 $ids = $this->getNamespaceIds();
 320                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 321         }
 322
 323         function getNamespaceAliases() {
 324                 if ( is_null( $this->namespaceAliases ) ) {
 325                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 326                         if ( !$aliases ) {
 327                                 $aliases = array();
 328                         } else {
 329                                 foreach ( $aliases as $name => $index ) {
 330                                         if ( $index === NS_PROJECT_TALK ) {
 331                                                 unset( $aliases[$name] );
 332                                                 $name = $this->fixVariableInNamespace( $name );
 333                                                 $aliases[$name] = $index;
 334                                         }
 335                                 }
 336                         }
 337                         $this->namespaceAliases = $aliases;
 338                 }
 339                 return $this->namespaceAliases;
 340         }
 341
 342         function getNamespaceIds() {
 343                 if ( is_null( $this->mNamespaceIds ) ) {
 344                         global $wgNamespaceAliases;
 345                         # Put namespace names and aliases into a hashtable.
 346                         # If this is too slow, then we should arrange it so that it is done
 347                         # before caching. The catch is that at pre-cache time, the above
 348                         # class-specific fixup hasn't been done.
 349                         $this->mNamespaceIds = array();
 350                         foreach ( $this->getNamespaces() as $index => $name ) {
 351                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 352                         }
 353                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 354                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 355                         }
 356                         if ( $wgNamespaceAliases ) {
 357                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 358                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 359                                 }
 360                         }
 361                 }
 362                 return $this->mNamespaceIds;
 363         }
 364
 365
 366         /**
 367          * Get a namespace key by value, case insensitive.  Canonical namespace
 368          * names override custom ones defined for the current language.
 369          *
 370          * @param $text String
 371          * @return mixed An integer if $text is a valid value otherwise false
 372          */
 373         function getNsIndex( $text ) {
 374                 $lctext = $this->lc( $text );
 375                 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
 376                         return $ns;
 377                 }
 378                 $ids = $this->getNamespaceIds();
 379                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 380         }
 381
 382         /**
 383          * short names for language variants used for language conversion links.
 384          *
 385          * @param $code String
 386          * @return string
 387          */
 388         function getVariantname( $code ) {
 389                 return $this->getMessageFromDB( "variantname-$code" );
 390         }
 391
 392         function specialPage( $name ) {
 393                 $aliases = $this->getSpecialPageAliases();
 394                 if ( isset( $aliases[$name][0] ) ) {
 395                         $name = $aliases[$name][0];
 396                 }
 397                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 398         }
 399
 400         function getQuickbarSettings() {
 401                 return array(
 402                         $this->getMessage( 'qbsettings-none' ),
 403                         $this->getMessage( 'qbsettings-fixedleft' ),
 404                         $this->getMessage( 'qbsettings-fixedright' ),
 405                         $this->getMessage( 'qbsettings-floatingleft' ),
 406                         $this->getMessage( 'qbsettings-floatingright' )
 407                 );
 408         }
 409
 410         function getMathNames() {
 411                 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
 412         }
 413
 414         function getDatePreferences() {
 415                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 416         }
 417
 418         function getDateFormats() {
 419                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 420         }
 421
 422         function getDefaultDateFormat() {
 423                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 424                 if ( $df === 'dmy or mdy' ) {
 425                         global $wgAmericanDates;
 426                         return $wgAmericanDates ? 'mdy' : 'dmy';
 427                 } else {
 428                         return $df;
 429                 }
 430         }
 431
 432         function getDatePreferenceMigrationMap() {
 433                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 434         }
 435
 436         function getImageFile( $image ) {
 437                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 438         }
 439
 440         function getDefaultUserOptionOverrides() {
 441                 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
 442         }
 443
 444         function getExtraUserToggles() {
 445                 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 446         }
 447
 448         function getUserToggle( $tog ) {
 449                 return $this->getMessageFromDB( "tog-$tog" );
 450         }
 451
 452         /**
 453          * Get language names, indexed by code.
 454          * If $customisedOnly is true, only returns codes with a messages file
 455          */
 456         public static function getLanguageNames( $customisedOnly = false ) {
 457                 global $wgLanguageNames, $wgExtraLanguageNames;
 458                 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
 459                 if ( !$customisedOnly ) {
 460                         return $allNames;
 461                 }
 462
 463                 global $IP;
 464                 $names = array();
 465                 $dir = opendir( "$IP/languages/messages" );
 466                 while( false !== ( $file = readdir( $dir ) ) ) {
 467                         $code = self::getCodeFromFileName( $file, 'Messages' );
 468                         if ( $code && isset( $allNames[$code] ) ) {
 469                                 $names[$code] = $allNames[$code];
 470                         }
 471                 }
 472                 closedir( $dir );
 473                 return $names;
 474         }
 475
 476         /**
 477          * Get a message from the MediaWiki namespace.
 478          *
 479          * @param $msg String: message name
 480          * @return string
 481          */
 482         function getMessageFromDB( $msg ) {
 483                 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
 484         }
 485
 486         function getLanguageName( $code ) {
 487                 $names = self::getLanguageNames();
 488                 if ( !array_key_exists( $code, $names ) ) {
 489                         return '';
 490                 }
 491                 return $names[$code];
 492         }
 493
 494         function getMonthName( $key ) {
 495                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 496         }
 497
 498         function getMonthNameGen( $key ) {
 499                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 500         }
 501
 502         function getMonthAbbreviation( $key ) {
 503                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 504         }
 505
 506         function getWeekdayName( $key ) {
 507                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 508         }
 509
 510         function getWeekdayAbbreviation( $key ) {
 511                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 512         }
 513
 514         function getIranianCalendarMonthName( $key ) {
 515                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
 516         }
 517
 518         function getHebrewCalendarMonthName( $key ) {
 519                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
 520         }
 521
 522         function getHebrewCalendarMonthNameGen( $key ) {
 523                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
 524         }
 525
 526         function getHijriCalendarMonthName( $key ) {
 527                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
 528         }
 529
 530         /**
 531          * Used by date() and time() to adjust the time output.
 532          *
 533          * @param $ts Int the time in date('YmdHis') format
 534          * @param $tz Mixed: adjust the time by this amount (default false, mean we
 535          *            get user timecorrection setting)
 536          * @return int
 537          */
 538         function userAdjust( $ts, $tz = false ) {
 539                 global $wgUser, $wgLocalTZoffset;
 540
 541                 if ( $tz === false ) {
 542                         $tz = $wgUser->getOption( 'timecorrection' );
 543                 }
 544
 545                 $data = explode( '|', $tz, 3 );
 546
 547                 if ( $data[0] == 'ZoneInfo' ) {
 548                         if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
 549                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
 550                                 date_timezone_set( $date, timezone_open( $data[2] ) );
 551                                 $date = date_format( $date, 'YmdHis' );
 552                                 return $date;
 553                         }
 554                         # Unrecognized timezone, default to 'Offset' with the stored offset.
 555                         $data[0] = 'Offset';
 556                 }
 557
 558                 $minDiff = 0;
 559                 if ( $data[0] == 'System' || $tz == '' ) {
 560                         # Global offset in minutes.
 561                         if( isset( $wgLocalTZoffset ) ) {
 562                                 $minDiff = $wgLocalTZoffset;
 563                         }
 564                 } else if ( $data[0] == 'Offset' ) {
 565                         $minDiff = intval( $data[1] );
 566                 } else {
 567                         $data = explode( ':', $tz );
 568                         if( count( $data ) == 2 ) {
 569                                 $data[0] = intval( $data[0] );
 570                                 $data[1] = intval( $data[1] );
 571                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
 572                                 if ( $data[0] < 0 ) {
 573                                         $minDiff = -$minDiff;
 574                                 }
 575                         } else {
 576                                 $minDiff = intval( $data[0] ) * 60;
 577                         }
 578                 }
 579
 580                 # No difference ? Return time unchanged
 581                 if ( 0 == $minDiff ) {
 582                         return $ts;
 583                 }
 584
 585                 wfSuppressWarnings(); // E_STRICT system time bitching
 586                 # Generate an adjusted date; take advantage of the fact that mktime
 587                 # will normalize out-of-range values so we don't have to split $minDiff
 588                 # into hours and minutes.
 589                 $t = mktime( (
 590                   (int)substr( $ts, 8, 2) ), # Hours
 591                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 592                   (int)substr( $ts, 12, 2 ), # Seconds
 593                   (int)substr( $ts, 4, 2 ), # Month
 594                   (int)substr( $ts, 6, 2 ), # Day
 595                   (int)substr( $ts, 0, 4 ) ); #Year
 596
 597                 $date = date( 'YmdHis', $t );
 598                 wfRestoreWarnings();
 599
 600                 return $date;
 601         }
 602
 603         /**
 604          * This is a workalike of PHP's date() function, but with better
 605          * internationalisation, a reduced set of format characters, and a better
 606          * escaping format.
 607          *
 608          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
 609          * PHP manual for definitions. "o" format character is supported since
 610          * PHP 5.1.0, previous versions return literal o.
 611          * There are a number of extensions, which start with "x":
 612          *
 613          *    xn   Do not translate digits of the next numeric format character
 614          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 615          *    xr   Use roman numerals for the next numeric format character
 616          *    xh   Use hebrew numerals for the next numeric format character
 617          *    xx   Literal x
 618          *    xg   Genitive month name
 619          *
 620          *    xij  j (day number) in Iranian calendar
 621          *    xiF  F (month name) in Iranian calendar
 622          *    xin  n (month number) in Iranian calendar
 623          *    xiY  Y (full year) in Iranian calendar
 624          *
 625          *    xjj  j (day number) in Hebrew calendar
 626          *    xjF  F (month name) in Hebrew calendar
 627          *    xjt  t (days in month) in Hebrew calendar
 628          *    xjx  xg (genitive month name) in Hebrew calendar
 629          *    xjn  n (month number) in Hebrew calendar
 630          *    xjY  Y (full year) in Hebrew calendar
 631          *
 632          *    xmj  j (day number) in Hijri calendar
 633          *    xmF  F (month name) in Hijri calendar
 634          *    xmn  n (month number) in Hijri calendar
 635          *    xmY  Y (full year) in Hijri calendar
 636          *
 637          *    xkY  Y (full year) in Thai solar calendar. Months and days are
 638          *                       identical to the Gregorian calendar
 639          *    xoY  Y (full year) in Minguo calendar or Juche year.
 640          *                       Months and days are identical to the
 641          *                       Gregorian calendar
 642          *    xtY  Y (full year) in Japanese nengo. Months and days are
 643          *                       identical to the Gregorian calendar
 644          *
 645          * Characters enclosed in double quotes will be considered literal (with
 646          * the quotes themselves removed). Unmatched quotes will be considered
 647          * literal quotes. Example:
 648          *
 649          * "The month is" F       => The month is January
 650          * i's"                   => 20'11"
 651          *
 652          * Backslash escaping is also supported.
 653          *
 654          * Input timestamp is assumed to be pre-normalized to the desired local
 655          * time zone, if any.
 656          *
 657          * @param $format String
 658          * @param $ts String: 14-character timestamp
 659          *      YYYYMMDDHHMMSS
 660          *      01234567890123
 661          * @todo emulation of "o" format character for PHP pre 5.1.0
 662          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
 663          */
 664         function sprintfDate( $format, $ts ) {
 665                 $s = '';
 666                 $raw = false;
 667                 $roman = false;
 668                 $hebrewNum = false;
 669                 $unix = false;
 670                 $rawToggle = false;
 671                 $iranian = false;
 672                 $hebrew = false;
 673                 $hijri = false;
 674                 $thai = false;
 675                 $minguo = false;
 676                 $tenno = false;
 677                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 678                         $num = false;
 679                         $code = $format[$p];
 680                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 681                                 $code .= $format[++$p];
 682                         }
 683
 684                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
 685                                 $code .= $format[++$p];
 686                         }
 687
 688                         switch ( $code ) {
 689                                 case 'xx':
 690                                         $s .= 'x';
 691                                         break;
 692                                 case 'xn':
 693                                         $raw = true;
 694                                         break;
 695                                 case 'xN':
 696                                         $rawToggle = !$rawToggle;
 697                                         break;
 698                                 case 'xr':
 699                                         $roman = true;
 700                                         break;
 701                                 case 'xh':
 702                                         $hebrewNum = true;
 703                                         break;
 704                                 case 'xg':
 705                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 706                                         break;
 707                                 case 'xjx':
 708                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
 709                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
 710                                         break;
 711                                 case 'd':
 712                                         $num = substr( $ts, 6, 2 );
 713                                         break;
 714                                 case 'D':
 715                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 716                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 717                                         break;
 718                                 case 'j':
 719                                         $num = intval( substr( $ts, 6, 2 ) );
 720                                         break;
 721                                 case 'xij':
 722                                         if ( !$iranian ) {
 723                                                 $iranian = self::tsToIranian( $ts );
 724                                         }
 725                                         $num = $iranian[2];
 726                                         break;
 727                                 case 'xmj':
 728                                         if ( !$hijri ) {
 729                                                 $hijri = self::tsToHijri( $ts );
 730                                         }
 731                                         $num = $hijri[2];
 732                                         break;
 733                                 case 'xjj':
 734                                         if ( !$hebrew ) {
 735                                                 $hebrew = self::tsToHebrew( $ts );
 736                                         }
 737                                         $num = $hebrew[2];
 738                                         break;
 739                                 case 'l':
 740                                         if ( !$unix ) {
 741                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 742                                         }
 743                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 744                                         break;
 745                                 case 'N':
 746                                         if ( !$unix ) {
 747                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 748                                         }
 749                                         $w = gmdate( 'w', $unix );
 750                                         $num = $w ? $w : 7;
 751                                         break;
 752                                 case 'w':
 753                                         if ( !$unix ) {
 754                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 755                                         }
 756                                         $num = gmdate( 'w', $unix );
 757                                         break;
 758                                 case 'z':
 759                                         if ( !$unix ) {
 760                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 761                                         }
 762                                         $num = gmdate( 'z', $unix );
 763                                         break;
 764                                 case 'W':
 765                                         if ( !$unix ) {
 766                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 767                                         }
 768                                         $num = gmdate( 'W', $unix );
 769                                         break;
 770                                 case 'F':
 771                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 772                                         break;
 773                                 case 'xiF':
 774                                         if ( !$iranian ) {
 775                                                 $iranian = self::tsToIranian( $ts );
 776                                         }
 777                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
 778                                         break;
 779                                 case 'xmF':
 780                                         if ( !$hijri ) {
 781                                                 $hijri = self::tsToHijri( $ts );
 782                                         }
 783                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
 784                                         break;
 785                                 case 'xjF':
 786                                         if ( !$hebrew ) {
 787                                                 $hebrew = self::tsToHebrew( $ts );
 788                                         }
 789                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
 790                                         break;
 791                                 case 'm':
 792                                         $num = substr( $ts, 4, 2 );
 793                                         break;
 794                                 case 'M':
 795                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 796                                         break;
 797                                 case 'n':
 798                                         $num = intval( substr( $ts, 4, 2 ) );
 799                                         break;
 800                                 case 'xin':
 801                                         if ( !$iranian ) {
 802                                                 $iranian = self::tsToIranian( $ts );
 803                                         }
 804                                         $num = $iranian[1];
 805                                         break;
 806                                 case 'xmn':
 807                                         if ( !$hijri ) {
 808                                                 $hijri = self::tsToHijri ( $ts );
 809                                         }
 810                                         $num = $hijri[1];
 811                                         break;
 812                                 case 'xjn':
 813                                         if ( !$hebrew ) {
 814                                                 $hebrew = self::tsToHebrew( $ts );
 815                                         }
 816                                         $num = $hebrew[1];
 817                                         break;
 818                                 case 't':
 819                                         if ( !$unix ) {
 820                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 821                                         }
 822                                         $num = gmdate( 't', $unix );
 823                                         break;
 824                                 case 'xjt':
 825                                         if ( !$hebrew ) {
 826                                                 $hebrew = self::tsToHebrew( $ts );
 827                                         }
 828                                         $num = $hebrew[3];
 829                                         break;
 830                                 case 'L':
 831                                         if ( !$unix ) {
 832                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 833                                         }
 834                                         $num = gmdate( 'L', $unix );
 835                                         break;
 836                                 # 'o' is supported since PHP 5.1.0
 837                                 # return literal if not supported
 838                                 # TODO: emulation for pre 5.1.0 versions
 839                                 case 'o':
 840                                         if ( !$unix ) {
 841                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 842                                         }
 843                                         if ( version_compare( PHP_VERSION, '5.1.0' ) === 1 ) {
 844                                                 $num = date( 'o', $unix );
 845                                         } else {
 846                                                 $s .= 'o';
 847                                         }
 848                                         break;
 849                                 case 'Y':
 850                                         $num = substr( $ts, 0, 4 );
 851                                         break;
 852                                 case 'xiY':
 853                                         if ( !$iranian ) {
 854                                                 $iranian = self::tsToIranian( $ts );
 855                                         }
 856                                         $num = $iranian[0];
 857                                         break;
 858                                 case 'xmY':
 859                                         if ( !$hijri ) {
 860                                                 $hijri = self::tsToHijri( $ts );
 861                                         }
 862                                         $num = $hijri[0];
 863                                         break;
 864                                 case 'xjY':
 865                                         if ( !$hebrew ) {
 866                                                 $hebrew = self::tsToHebrew( $ts );
 867                                         }
 868                                         $num = $hebrew[0];
 869                                         break;
 870                                 case 'xkY':
 871                                         if ( !$thai ) {
 872                                                 $thai = self::tsToYear( $ts, 'thai' );
 873                                         }
 874                                         $num = $thai[0];
 875                                         break;
 876                                 case 'xoY':
 877                                         if ( !$minguo ) {
 878                                                 $minguo = self::tsToYear( $ts, 'minguo' );
 879                                         }
 880                                         $num = $minguo[0];
 881                                         break;
 882                                 case 'xtY':
 883                                         if ( !$tenno ) {
 884                                                 $tenno = self::tsToYear( $ts, 'tenno' );
 885                                         }
 886                                         $num = $tenno[0];
 887                                         break;
 888                                 case 'y':
 889                                         $num = substr( $ts, 2, 2 );
 890                                         break;
 891                                 case 'a':
 892                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 893                                         break;
 894                                 case 'A':
 895                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 896                                         break;
 897                                 case 'g':
 898                                         $h = substr( $ts, 8, 2 );
 899                                         $num = $h % 12 ? $h % 12 : 12;
 900                                         break;
 901                                 case 'G':
 902                                         $num = intval( substr( $ts, 8, 2 ) );
 903                                         break;
 904                                 case 'h':
 905                                         $h = substr( $ts, 8, 2 );
 906                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 907                                         break;
 908                                 case 'H':
 909                                         $num = substr( $ts, 8, 2 );
 910                                         break;
 911                                 case 'i':
 912                                         $num = substr( $ts, 10, 2 );
 913                                         break;
 914                                 case 's':
 915                                         $num = substr( $ts, 12, 2 );
 916                                         break;
 917                                 case 'c':
 918                                         if ( !$unix ) {
 919                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 920                                         }
 921                                         $s .= gmdate( 'c', $unix );
 922                                         break;
 923                                 case 'r':
 924                                         if ( !$unix ) {
 925                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 926                                         }
 927                                         $s .= gmdate( 'r', $unix );
 928                                         break;
 929                                 case 'U':
 930                                         if ( !$unix ) {
 931                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 932                                         }
 933                                         $num = $unix;
 934                                         break;
 935                                 case '\\':
 936                                         # Backslash escaping
 937                                         if ( $p < strlen( $format ) - 1 ) {
 938                                                 $s .= $format[++$p];
 939                                         } else {
 940                                                 $s .= '\\';
 941                                         }
 942                                         break;
 943                                 case '"':
 944                                         # Quoted literal
 945                                         if ( $p < strlen( $format ) - 1 ) {
 946                                                 $endQuote = strpos( $format, '"', $p + 1 );
 947                                                 if ( $endQuote === false ) {
 948                                                         # No terminating quote, assume literal "
 949                                                         $s .= '"';
 950                                                 } else {
 951                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
 952                                                         $p = $endQuote;
 953                                                 }
 954                                         } else {
 955                                                 # Quote at end of string, assume literal "
 956                                                 $s .= '"';
 957                                         }
 958                                         break;
 959                                 default:
 960                                         $s .= $format[$p];
 961                         }
 962                         if ( $num !== false ) {
 963                                 if ( $rawToggle || $raw ) {
 964                                         $s .= $num;
 965                                         $raw = false;
 966                                 } elseif ( $roman ) {
 967                                         $s .= self::romanNumeral( $num );
 968                                         $roman = false;
 969                                 } elseif( $hebrewNum ) {
 970                                         $s .= self::hebrewNumeral( $num );
 971                                         $hebrewNum = false;
 972                                 } else {
 973                                         $s .= $this->formatNum( $num, true );
 974                                 }
 975                                 $num = false;
 976                         }
 977                 }
 978                 return $s;
 979         }
 980
 981         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
 982         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
 983         /**
 984          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
 985          * Gregorian dates to Iranian dates. Originally written in C, it
 986          * is released under the terms of GNU Lesser General Public
 987          * License. Conversion to PHP was performed by Niklas Laxström.
 988          *
 989          * Link: http://www.farsiweb.info/jalali/jalali.c
 990          */
 991         private static function tsToIranian( $ts ) {
 992                 $gy = substr( $ts, 0, 4 ) -1600;
 993                 $gm = substr( $ts, 4, 2 ) -1;
 994                 $gd = substr( $ts, 6, 2 ) -1;
 995
 996                 # Days passed from the beginning (including leap years)
 997                 $gDayNo = 365 * $gy
 998                         + floor( ( $gy + 3 ) / 4 )
 999                         - floor( ( $gy + 99 ) / 100 )
1000                         + floor( ( $gy + 399 ) / 400 );
1001
1002
1003                 // Add days of the past months of this year
1004                 for( $i = 0; $i < $gm; $i++ ) {
1005                         $gDayNo += self::$GREG_DAYS[$i];
1006                 }
1007
1008                 // Leap years
1009                 if ( $gm > 1 && ( ( $gy%4 === 0 && $gy%100 !== 0 || ( $gy%400 == 0 ) ) ) ) {
1010                         $gDayNo++;
1011                 }
1012
1013                 // Days passed in current month
1014                 $gDayNo += $gd;
1015
1016                 $jDayNo = $gDayNo - 79;
1017
1018                 $jNp = floor( $jDayNo / 12053 );
1019                 $jDayNo %= 12053;
1020
1021                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1022                 $jDayNo %= 1461;
1023
1024                 if ( $jDayNo >= 366 ) {
1025                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1026                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1027                 }
1028
1029                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1030                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1031                 }
1032
1033                 $jm = $i + 1;
1034                 $jd = $jDayNo + 1;
1035
1036                 return array( $jy, $jm, $jd );
1037         }
1038
1039         /**
1040          * Converting Gregorian dates to Hijri dates.
1041          *
1042          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1043          *
1044          * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1045          */
1046         private static function tsToHijri( $ts ) {
1047                 $year = substr( $ts, 0, 4 );
1048                 $month = substr( $ts, 4, 2 );
1049                 $day = substr( $ts, 6, 2 );
1050
1051                 $zyr = $year;
1052                 $zd = $day;
1053                 $zm = $month;
1054                 $zy = $zyr;
1055
1056                 if (
1057                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1058                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1059                 )
1060                 {
1061                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1062                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1063                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1064                                         $zd - 32075;
1065                 } else {
1066                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1067                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1068                 }
1069
1070                 $zl = $zjd-1948440 + 10632;
1071                 $zn = (int)( ( $zl - 1 ) / 10631 );
1072                 $zl = $zl - 10631 * $zn + 354;
1073                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ((int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1074                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ((int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1075                 $zm = (int)( ( 24 * $zl ) / 709 );
1076                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1077                 $zy = 30 * $zn + $zj - 30;
1078
1079                 return array( $zy, $zm, $zd );
1080         }
1081
1082         /**
1083          * Converting Gregorian dates to Hebrew dates.
1084          *
1085          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1086          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1087          * to translate the relevant functions into PHP and release them under
1088          * GNU GPL.
1089          *
1090          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1091          * and Adar II is 14. In a non-leap year, Adar is 6.
1092          */
1093         private static function tsToHebrew( $ts ) {
1094                 # Parse date
1095                 $year = substr( $ts, 0, 4 );
1096                 $month = substr( $ts, 4, 2 );
1097                 $day = substr( $ts, 6, 2 );
1098
1099                 # Calculate Hebrew year
1100                 $hebrewYear = $year + 3760;
1101
1102                 # Month number when September = 1, August = 12
1103                 $month += 4;
1104                 if( $month > 12 ) {
1105                         # Next year
1106                         $month -= 12;
1107                         $year++;
1108                         $hebrewYear++;
1109                 }
1110
1111                 # Calculate day of year from 1 September
1112                 $dayOfYear = $day;
1113                 for( $i = 1; $i < $month; $i++ ) {
1114                         if( $i == 6 ) {
1115                                 # February
1116                                 $dayOfYear += 28;
1117                                 # Check if the year is leap
1118                                 if( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1119                                         $dayOfYear++;
1120                                 }
1121                         } elseif( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1122                                 $dayOfYear += 30;
1123                         } else {
1124                                 $dayOfYear += 31;
1125                         }
1126                 }
1127
1128                 # Calculate the start of the Hebrew year
1129                 $start = self::hebrewYearStart( $hebrewYear );
1130
1131                 # Calculate next year's start
1132                 if( $dayOfYear <= $start ) {
1133                         # Day is before the start of the year - it is the previous year
1134                         # Next year's start
1135                         $nextStart = $start;
1136                         # Previous year
1137                         $year--;
1138                         $hebrewYear--;
1139                         # Add days since previous year's 1 September
1140                         $dayOfYear += 365;
1141                         if( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1142                                 # Leap year
1143                                 $dayOfYear++;
1144                         }
1145                         # Start of the new (previous) year
1146                         $start = self::hebrewYearStart( $hebrewYear );
1147                 } else {
1148                         # Next year's start
1149                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1150                 }
1151
1152                 # Calculate Hebrew day of year
1153                 $hebrewDayOfYear = $dayOfYear - $start;
1154
1155                 # Difference between year's days
1156                 $diff = $nextStart - $start;
1157                 # Add 12 (or 13 for leap years) days to ignore the difference between
1158                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1159                 # difference is only about the year type
1160                 if( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1161                         $diff += 13;
1162                 } else {
1163                         $diff += 12;
1164                 }
1165
1166                 # Check the year pattern, and is leap year
1167                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1168                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1169                 # and non-leap years
1170                 $yearPattern = $diff % 30;
1171                 # Check if leap year
1172                 $isLeap = $diff >= 30;
1173
1174                 # Calculate day in the month from number of day in the Hebrew year
1175                 # Don't check Adar - if the day is not in Adar, we will stop before;
1176                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1177                 $hebrewDay = $hebrewDayOfYear;
1178                 $hebrewMonth = 1;
1179                 $days = 0;
1180                 while( $hebrewMonth <= 12 ) {
1181                         # Calculate days in this month
1182                         if( $isLeap && $hebrewMonth == 6 ) {
1183                                 # Adar in a leap year
1184                                 if( $isLeap ) {
1185                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1186                                         $days = 30;
1187                                         if( $hebrewDay <= $days ) {
1188                                                 # Day in Adar I
1189                                                 $hebrewMonth = 13;
1190                                         } else {
1191                                                 # Subtract the days of Adar I
1192                                                 $hebrewDay -= $days;
1193                                                 # Try Adar II
1194                                                 $days = 29;
1195                                                 if( $hebrewDay <= $days ) {
1196                                                         # Day in Adar II
1197                                                         $hebrewMonth = 14;
1198                                                 }
1199                                         }
1200                                 }
1201                         } elseif( $hebrewMonth == 2 && $yearPattern == 2 ) {
1202                                 # Cheshvan in a complete year (otherwise as the rule below)
1203                                 $days = 30;
1204                         } elseif( $hebrewMonth == 3 && $yearPattern == 0 ) {
1205                                 # Kislev in an incomplete year (otherwise as the rule below)
1206                                 $days = 29;
1207                         } else {
1208                                 # Odd months have 30 days, even have 29
1209                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1210                         }
1211                         if( $hebrewDay <= $days ) {
1212                                 # In the current month
1213                                 break;
1214                         } else {
1215                                 # Subtract the days of the current month
1216                                 $hebrewDay -= $days;
1217                                 # Try in the next month
1218                                 $hebrewMonth++;
1219                         }
1220                 }
1221
1222                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1223         }
1224
1225         /**
1226          * This calculates the Hebrew year start, as days since 1 September.
1227          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1228          * Used for Hebrew date.
1229          */
1230         private static function hebrewYearStart( $year ) {
1231                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1232                 $b = intval( ( $year - 1 ) % 4 );
1233                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1234                 if( $m < 0 ) {
1235                         $m--;
1236                 }
1237                 $Mar = intval( $m );
1238                 if( $m < 0 ) {
1239                         $m++;
1240                 }
1241                 $m -= $Mar;
1242
1243                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7);
1244                 if( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1245                         $Mar++;
1246                 } else if( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1247                         $Mar += 2;
1248                 } else if( $c == 2 || $c == 4 || $c == 6 ) {
1249                         $Mar++;
1250                 }
1251
1252                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1253                 return $Mar;
1254         }
1255
1256         /**
1257          * Algorithm to convert Gregorian dates to Thai solar dates,
1258          * Minguo dates or Minguo dates.
1259          *
1260          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1261          *       http://en.wikipedia.org/wiki/Minguo_calendar
1262          *       http://en.wikipedia.org/wiki/Japanese_era_name
1263          *
1264          * @param $ts String: 14-character timestamp
1265          * @param $cName String: calender name
1266          * @return Array: converted year, month, day
1267          */
1268         private static function tsToYear( $ts, $cName ) {
1269                 $gy = substr( $ts, 0, 4 );
1270                 $gm = substr( $ts, 4, 2 );
1271                 $gd = substr( $ts, 6, 2 );
1272
1273                 if ( !strcmp( $cName, 'thai' ) ) {
1274                         # Thai solar dates
1275                         # Add 543 years to the Gregorian calendar
1276                         # Months and days are identical
1277                         $gy_offset = $gy + 543;
1278                 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1279                         # Minguo dates
1280                         # Deduct 1911 years from the Gregorian calendar
1281                         # Months and days are identical
1282                         $gy_offset = $gy - 1911;
1283                 } else if ( !strcmp( $cName, 'tenno' ) ) {
1284                         # Nengō dates up to Meiji period
1285                         # Deduct years from the Gregorian calendar
1286                         # depending on the nengo periods
1287                         # Months and days are identical
1288                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1289                                 # Meiji period
1290                                 $gy_gannen = $gy - 1868 + 1;
1291                                 $gy_offset = $gy_gannen;
1292                                 if ( $gy_gannen == 1 ) {
1293                                         $gy_offset = '元';
1294                                 }
1295                                 $gy_offset = '明治' . $gy_offset;
1296                         } else if (
1297                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1298                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1299                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1300                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1301                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1302                         )
1303                         {
1304                                 # Taishō period
1305                                 $gy_gannen = $gy - 1912 + 1;
1306                                 $gy_offset = $gy_gannen;
1307                                 if ( $gy_gannen == 1 ) {
1308                                         $gy_offset = '元';
1309                                 }
1310                                 $gy_offset = '大正' . $gy_offset;
1311                         } else if (
1312                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1313                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1314                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1315                         )
1316                         {
1317                                 # Shōwa period
1318                                 $gy_gannen = $gy - 1926 + 1;
1319                                 $gy_offset = $gy_gannen;
1320                                 if ( $gy_gannen == 1 ) {
1321                                         $gy_offset = '元';
1322                                 }
1323                                 $gy_offset = '昭和' . $gy_offset;
1324                         } else {
1325                                 # Heisei period
1326                                 $gy_gannen = $gy - 1989 + 1;
1327                                 $gy_offset = $gy_gannen;
1328                                 if ( $gy_gannen == 1 ) {
1329                                         $gy_offset = '元';
1330                                 }
1331                                 $gy_offset = '平成' . $gy_offset;
1332                         }
1333                 } else {
1334                         $gy_offset = $gy;
1335                 }
1336
1337                 return array( $gy_offset, $gm, $gd );
1338         }
1339
1340         /**
1341          * Roman number formatting up to 3000
1342          */
1343         static function romanNumeral( $num ) {
1344                 static $table = array(
1345                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1346                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1347                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1348                         array( '', 'M', 'MM', 'MMM' )
1349                 );
1350
1351                 $num = intval( $num );
1352                 if ( $num > 3000 || $num <= 0 ) {
1353                         return $num;
1354                 }
1355
1356                 $s = '';
1357                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1358                         if ( $num >= $pow10 ) {
1359                                 $s .= $table[$i][floor( $num / $pow10 )];
1360                         }
1361                         $num = $num % $pow10;
1362                 }
1363                 return $s;
1364         }
1365
1366         /**
1367          * Hebrew Gematria number formatting up to 9999
1368          */
1369         static function hebrewNumeral( $num ) {
1370                 static $table = array(
1371                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1372                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1373                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1374                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1375                 );
1376
1377                 $num = intval( $num );
1378                 if ( $num > 9999 || $num <= 0 ) {
1379                         return $num;
1380                 }
1381
1382                 $s = '';
1383                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1384                         if ( $num >= $pow10 ) {
1385                                 if ( $num == 15 || $num == 16 ) {
1386                                         $s .= $table[0][9] . $table[0][$num - 9];
1387                                         $num = 0;
1388                                 } else {
1389                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1390                                         if( $pow10 == 1000 ) {
1391                                                 $s .= "'";
1392                                         }
1393                                 }
1394                         }
1395                         $num = $num % $pow10;
1396                 }
1397                 if( strlen( $s ) == 2 ) {
1398                         $str = $s . "'";
1399                 } else  {
1400                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1401                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1402                 }
1403                 $start = substr( $str, 0, strlen( $str ) - 2 );
1404                 $end = substr( $str, strlen( $str ) - 2 );
1405                 switch( $end ) {
1406                         case 'כ':
1407                                 $str = $start . 'ך';
1408                                 break;
1409                         case 'מ':
1410                                 $str = $start . 'ם';
1411                                 break;
1412                         case 'נ':
1413                                 $str = $start . 'ן';
1414                                 break;
1415                         case 'פ':
1416                                 $str = $start . 'ף';
1417                                 break;
1418                         case 'צ':
1419                                 $str = $start . 'ץ';
1420                                 break;
1421                 }
1422                 return $str;
1423         }
1424
1425         /**
1426          * This is meant to be used by time(), date(), and timeanddate() to get
1427          * the date preference they're supposed to use, it should be used in
1428          * all children.
1429          *
1430          *<code>
1431          * function timeanddate([...], $format = true) {
1432          *      $datePreference = $this->dateFormat($format);
1433          * [...]
1434          * }
1435          *</code>
1436          *
1437          * @param $usePrefs Mixed: if true, the user's preference is used
1438          *                         if false, the site/language default is used
1439          *                         if int/string, assumed to be a format.
1440          * @return string
1441          */
1442         function dateFormat( $usePrefs = true ) {
1443                 global $wgUser;
1444
1445                 if( is_bool( $usePrefs ) ) {
1446                         if( $usePrefs ) {
1447                                 $datePreference = $wgUser->getDatePreference();
1448                         } else {
1449                                 $datePreference = (string)User::getDefaultOption( 'date' );
1450                         }
1451                 } else {
1452                         $datePreference = (string)$usePrefs;
1453                 }
1454
1455                 // return int
1456                 if( $datePreference == '' ) {
1457                         return 'default';
1458                 }
1459
1460                 return $datePreference;
1461         }
1462
1463         /**
1464          * Get a format string for a given type and preference
1465          * @param $type May be date, time or both
1466          * @param $pref The format name as it appears in Messages*.php
1467          */
1468         function getDateFormatString( $type, $pref ) {
1469                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1470                         if ( $pref == 'default' ) {
1471                                 $pref = $this->getDefaultDateFormat();
1472                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1473                         } else {
1474                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1475                                 if ( is_null( $df ) ) {
1476                                         $pref = $this->getDefaultDateFormat();
1477                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1478                                 }
1479                         }
1480                         $this->dateFormatStrings[$type][$pref] = $df;
1481                 }
1482                 return $this->dateFormatStrings[$type][$pref];
1483         }
1484
1485         /**
1486          * @param $ts Mixed: the time format which needs to be turned into a
1487          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1488          * @param $adj Bool: whether to adjust the time output according to the
1489          *             user configured offset ($timecorrection)
1490          * @param $format Mixed: true to use user's date format preference
1491          * @param $timecorrection String: the time offset as returned by
1492          *                        validateTimeZone() in Special:Preferences
1493          * @return string
1494          */
1495         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1496                 if ( $adj ) {
1497                         $ts = $this->userAdjust( $ts, $timecorrection );
1498                 }
1499                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1500                 return $this->sprintfDate( $df, $ts );
1501         }
1502
1503         /**
1504          * @param $ts Mixed: the time format which needs to be turned into a
1505          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1506          * @param $adj Bool: whether to adjust the time output according to the
1507          *             user configured offset ($timecorrection)
1508          * @param $format Mixed: true to use user's date format preference
1509          * @param $timecorrection String: the time offset as returned by
1510          *                        validateTimeZone() in Special:Preferences
1511          * @return string
1512          */
1513         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1514                 if ( $adj ) {
1515                         $ts = $this->userAdjust( $ts, $timecorrection );
1516                 }
1517                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1518                 return $this->sprintfDate( $df, $ts );
1519         }
1520
1521         /**
1522          * @param $ts Mixed: the time format which needs to be turned into a
1523          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1524          * @param $adj Bool: whether to adjust the time output according to the
1525          *             user configured offset ($timecorrection)
1526          * @param $format Mixed: what format to return, if it's false output the
1527          *                default one (default true)
1528          * @param $timecorrection String: the time offset as returned by
1529          *                        validateTimeZone() in Special:Preferences
1530          * @return string
1531          */
1532         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
1533                 $ts = wfTimestamp( TS_MW, $ts );
1534                 if ( $adj ) {
1535                         $ts = $this->userAdjust( $ts, $timecorrection );
1536                 }
1537                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1538                 return $this->sprintfDate( $df, $ts );
1539         }
1540
1541         function getMessage( $key ) {
1542                 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1543         }
1544
1545         function getAllMessages() {
1546                 return self::$dataCache->getItem( $this->mCode, 'messages' );
1547         }
1548
1549         function iconv( $in, $out, $string ) {
1550                 # This is a wrapper for iconv in all languages except esperanto,
1551                 # which does some nasty x-conversions beforehand
1552
1553                 # Even with //IGNORE iconv can whine about illegal characters in
1554                 # *input* string. We just ignore those too.
1555                 # REF: http://bugs.php.net/bug.php?id=37166
1556                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1557                 wfSuppressWarnings();
1558                 $text = iconv( $in, $out . '//IGNORE', $string );
1559                 wfRestoreWarnings();
1560                 return $text;
1561         }
1562
1563         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1564         function ucwordbreaksCallbackAscii( $matches ) {
1565                 return $this->ucfirst( $matches[1] );
1566         }
1567
1568         function ucwordbreaksCallbackMB( $matches ) {
1569                 return mb_strtoupper( $matches[0] );
1570         }
1571
1572         function ucCallback( $matches ) {
1573                 list( $wikiUpperChars ) = self::getCaseMaps();
1574                 return strtr( $matches[1], $wikiUpperChars );
1575         }
1576
1577         function lcCallback( $matches ) {
1578                 list( , $wikiLowerChars ) = self::getCaseMaps();
1579                 return strtr( $matches[1], $wikiLowerChars );
1580         }
1581
1582         function ucwordsCallbackMB( $matches ) {
1583                 return mb_strtoupper( $matches[0] );
1584         }
1585
1586         function ucwordsCallbackWiki( $matches ) {
1587                 list( $wikiUpperChars ) = self::getCaseMaps();
1588                 return strtr( $matches[0], $wikiUpperChars );
1589         }
1590
1591         function ucfirst( $str ) {
1592                 $o = ord( $str );
1593                 if ( $o < 96 ) {
1594                         return $str;
1595                 } elseif ( $o < 128 ) {
1596                         return ucfirst( $str );
1597                 } else {
1598                         // fall back to more complex logic in case of multibyte strings
1599                         return self::uc( $str, true );
1600                 }
1601         }
1602
1603         function uc( $str, $first = false ) {
1604                 if ( function_exists( 'mb_strtoupper' ) ) {
1605                         if ( $first ) {
1606                                 if ( self::isMultibyte( $str ) ) {
1607                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1608                                 } else {
1609                                         return ucfirst( $str );
1610                                 }
1611                         } else {
1612                                 return self::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1613                         }
1614                 } else {
1615                         if ( self::isMultibyte( $str ) ) {
1616                                 list( $wikiUpperChars ) = $this->getCaseMaps();
1617                                 $x = $first ? '^' : '';
1618                                 return preg_replace_callback(
1619                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1620                                         array( $this, 'ucCallback' ),
1621                                         $str
1622                                 );
1623                         } else {
1624                                 return $first ? ucfirst( $str ) : strtoupper( $str );
1625                         }
1626                 }
1627         }
1628
1629         function lcfirst( $str ) {
1630                 $o = ord( $str );
1631                 if ( !$o ) {
1632                         return strval( $str );
1633                 } elseif ( $o >= 128 ) {
1634                         return self::lc( $str, true );
1635                 } elseif ( $o > 96 ) {
1636                         return $str;
1637                 } else {
1638                         $str[0] = strtolower( $str[0] );
1639                         return $str;
1640                 }
1641         }
1642
1643         function lc( $str, $first = false ) {
1644                 if ( function_exists( 'mb_strtolower' ) ) {
1645                         if ( $first ) {
1646                                 if ( self::isMultibyte( $str ) ) {
1647                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1648                                 } else {
1649                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1650                                 }
1651                         } else {
1652                                 return self::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1653                         }
1654                 } else {
1655                         if ( self::isMultibyte( $str ) ) {
1656                                 list( , $wikiLowerChars ) = self::getCaseMaps();
1657                                 $x = $first ? '^' : '';
1658                                 return preg_replace_callback(
1659                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1660                                         array( $this, 'lcCallback' ),
1661                                         $str
1662                                 );
1663                         } else {
1664                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1665                         }
1666                 }
1667         }
1668
1669         function isMultibyte( $str ) {
1670                 return (bool)preg_match( '/[\x80-\xff]/', $str );
1671         }
1672
1673         function ucwords( $str ) {
1674                 if ( self::isMultibyte( $str ) ) {
1675                         $str = self::lc( $str );
1676
1677                         // regexp to find first letter in each word (i.e. after each space)
1678                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1679
1680                         // function to use to capitalize a single char
1681                         if ( function_exists( 'mb_strtoupper' ) ) {
1682                                 return preg_replace_callback(
1683                                         $replaceRegexp,
1684                                         array( $this, 'ucwordsCallbackMB' ),
1685                                         $str
1686                                 );
1687                         } else {
1688                                 return preg_replace_callback(
1689                                         $replaceRegexp,
1690                                         array( $this, 'ucwordsCallbackWiki' ),
1691                                         $str
1692                                 );
1693                         }
1694                 } else {
1695                         return ucwords( strtolower( $str ) );
1696                 }
1697         }
1698
1699         # capitalize words at word breaks
1700         function ucwordbreaks( $str ) {
1701                 if ( self::isMultibyte( $str ) ) {
1702                         $str = self::lc( $str );
1703
1704                         // since \b doesn't work for UTF-8, we explicitely define word break chars
1705                         $breaks = "[ \-\(\)\}\{\.,\?!]";
1706
1707                         // find first letter after word break
1708                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1709
1710                         if ( function_exists( 'mb_strtoupper' ) ) {
1711                                 return preg_replace_callback(
1712                                         $replaceRegexp,
1713                                         array( $this, 'ucwordbreaksCallbackMB' ),
1714                                         $str
1715                                 );
1716                         } else {
1717                                 return preg_replace_callback(
1718                                         $replaceRegexp,
1719                                         array( $this, 'ucwordsCallbackWiki' ),
1720                                         $str
1721                                 );
1722                         }
1723                 } else {
1724                         return preg_replace_callback(
1725                                 '/\b([\w\x80-\xff]+)\b/',
1726                                 array( $this, 'ucwordbreaksCallbackAscii' ),
1727                                 $str
1728                         );
1729                 }
1730         }
1731
1732         /**
1733          * Return a case-folded representation of $s
1734          *
1735          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1736          * and $s2 are the same except for the case of their characters. It is not
1737          * necessary for the value returned to make sense when displayed.
1738          *
1739          * Do *not* perform any other normalisation in this function. If a caller
1740          * uses this function when it should be using a more general normalisation
1741          * function, then fix the caller.
1742          */
1743         function caseFold( $s ) {
1744                 return $this->uc( $s );
1745         }
1746
1747         function checkTitleEncoding( $s ) {
1748                 if( is_array( $s ) ) {
1749                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1750                 }
1751                 # Check for non-UTF-8 URLs
1752                 $ishigh = preg_match( '/[\x80-\xff]/', $s);
1753                 if( !$ishigh ) {
1754                         return $s;
1755                 }
1756
1757                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1758                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1759                 if( $isutf8 ) {
1760                         return $s;
1761                 }
1762
1763                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1764         }
1765
1766         function fallback8bitEncoding() {
1767                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1768         }
1769
1770         /**
1771          * Most writing systems use whitespace to break up words.
1772          * Some languages such as Chinese don't conventionally do this,
1773          * which requires special handling when breaking up words for
1774          * searching etc.
1775          */
1776         function hasWordBreaks() {
1777                 return true;
1778         }
1779
1780         /**
1781          * Some languages such as Chinese require word segmentation,
1782          * Specify such segmentation when overridden in derived class.
1783          *
1784          * @param $string String
1785          * @return String
1786          */
1787         function segmentByWord( $string ) {
1788                 return $string;
1789         }
1790
1791         /**
1792          * Some languages have special punctuation need to be normalized.
1793          * Make such changes here.
1794          *
1795          * @param $string String
1796          * @return String
1797          */
1798         function normalizeForSearch( $string ) {
1799                 return self::convertDoubleWidth($string);
1800         }
1801
1802         /**
1803          * convert double-width roman characters to single-width.
1804          * range: ff00-ff5f ~= 0020-007f
1805          */
1806         protected static function convertDoubleWidth( $string ) {
1807                 static $full = null;
1808                 static $half = null;
1809
1810                 if( $full === null ) {
1811                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
1812                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1813                         $full = str_split( $fullWidth, 3 );
1814                         $half = str_split( $halfWidth );
1815                 }
1816
1817                 $string = str_replace( $full, $half, $string );
1818                 return $string;
1819         }
1820
1821         protected static function insertSpace( $string, $pattern ) {
1822                 $string = preg_replace( $pattern, " $1 ", $string );
1823                 $string = preg_replace( '/ +/', ' ', $string );
1824                 return $string;
1825         }
1826
1827         function convertForSearchResult( $termsArray ) {
1828                 # some languages, e.g. Chinese, need to do a conversion
1829                 # in order for search results to be displayed correctly
1830                 return $termsArray;
1831         }
1832
1833         /**
1834          * Get the first character of a string.
1835          *
1836          * @param $s string
1837          * @return string
1838          */
1839         function firstChar( $s ) {
1840                 $matches = array();
1841                 preg_match(
1842                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1843                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1844                         $s,
1845                         $matches
1846                 );
1847
1848                 if ( isset( $matches[1] ) ) {
1849                         if ( strlen( $matches[1] ) != 3 ) {
1850                                 return $matches[1];
1851                         }
1852
1853                         // Break down Hangul syllables to grab the first jamo
1854                         $code = utf8ToCodepoint( $matches[1] );
1855                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1856                                 return $matches[1];
1857                         } elseif ( $code < 0xb098 ) {
1858                                 return "\xe3\x84\xb1";
1859                         } elseif ( $code < 0xb2e4 ) {
1860                                 return "\xe3\x84\xb4";
1861                         } elseif ( $code < 0xb77c ) {
1862                                 return "\xe3\x84\xb7";
1863                         } elseif ( $code < 0xb9c8 ) {
1864                                 return "\xe3\x84\xb9";
1865                         } elseif ( $code < 0xbc14 ) {
1866                                 return "\xe3\x85\x81";
1867                         } elseif ( $code < 0xc0ac ) {
1868                                 return "\xe3\x85\x82";
1869                         } elseif ( $code < 0xc544 ) {
1870                                 return "\xe3\x85\x85";
1871                         } elseif ( $code < 0xc790 ) {
1872                                 return "\xe3\x85\x87";
1873                         } elseif ( $code < 0xcc28 ) {
1874                                 return "\xe3\x85\x88";
1875                         } elseif ( $code < 0xce74 ) {
1876                                 return "\xe3\x85\x8a";
1877                         } elseif ( $code < 0xd0c0 ) {
1878                                 return "\xe3\x85\x8b";
1879                         } elseif ( $code < 0xd30c ) {
1880                                 return "\xe3\x85\x8c";
1881                         } elseif ( $code < 0xd558 ) {
1882                                 return "\xe3\x85\x8d";
1883                         } else {
1884                                 return "\xe3\x85\x8e";
1885                         }
1886                 } else {
1887                         return '';
1888                 }
1889         }
1890
1891         function initEncoding() {
1892                 # Some languages may have an alternate char encoding option
1893                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1894                 # If this language is used as the primary content language,
1895                 # an override to the defaults can be set here on startup.
1896         }
1897
1898         function recodeForEdit( $s ) {
1899                 # For some languages we'll want to explicitly specify
1900                 # which characters make it into the edit box raw
1901                 # or are converted in some way or another.
1902                 # Note that if wgOutputEncoding is different from
1903                 # wgInputEncoding, this text will be further converted
1904                 # to wgOutputEncoding.
1905                 global $wgEditEncoding;
1906                 if( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1907                         return $s;
1908                 } else {
1909                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1910                 }
1911         }
1912
1913         function recodeInput( $s ) {
1914                 # Take the previous into account.
1915                 global $wgEditEncoding;
1916                 if( $wgEditEncoding != '' ) {
1917                         $enc = $wgEditEncoding;
1918                 } else {
1919                         $enc = 'UTF-8';
1920                 }
1921                 if( $enc == 'UTF-8' ) {
1922                         return $s;
1923                 } else {
1924                         return $this->iconv( $enc, 'UTF-8', $s );
1925                 }
1926         }
1927
1928         /**
1929          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
1930          * also cleans up certain backwards-compatible sequences, converting them
1931          * to the modern Unicode equivalent.
1932          *
1933          * This is language-specific for performance reasons only.
1934          */
1935         function normalize( $s ) {
1936                 return UtfNormal::cleanUp( $s );
1937         }
1938
1939         /**
1940          * Transform a string using serialized data stored in the given file (which
1941          * must be in the serialized subdirectory of $IP). The file contains pairs
1942          * mapping source characters to destination characters.
1943          *
1944          * The data is cached in process memory. This will go faster if you have the
1945          * FastStringSearch extension.
1946          */
1947         function transformUsingPairFile( $file, $string ) {
1948                 if ( !isset( $this->transformData[$file] ) ) {
1949                         $data = wfGetPrecompiledData( $file );
1950                         if ( $data === false ) {
1951                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
1952                         }
1953                         $this->transformData[$file] = new ReplacementArray( $data );
1954                 }
1955                 return $this->transformData[$file]->replace( $string );
1956         }
1957
1958         /**
1959          * For right-to-left language support
1960          *
1961          * @return bool
1962          */
1963         function isRTL() {
1964                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
1965         }
1966
1967         /**
1968          * Return the correct HTML 'dir' attribute value for this language.
1969          * @return String
1970          */
1971         function getDir() {
1972                 return $this->isRTL() ? 'rtl' : 'ltr';
1973         }
1974
1975         /**
1976          * Return 'left' or 'right' as appropriate alignment for line-start
1977          * for this language's text direction.
1978          *
1979          * Should be equivalent to CSS3 'start' text-align value....
1980          *
1981          * @return String
1982          */
1983         function alignStart() {
1984                 return $this->isRTL() ? 'right' : 'left';
1985         }
1986
1987         /**
1988          * Return 'right' or 'left' as appropriate alignment for line-end
1989          * for this language's text direction.
1990          *
1991          * Should be equivalent to CSS3 'end' text-align value....
1992          *
1993          * @return String
1994          */
1995         function alignEnd() {
1996                 return $this->isRTL() ? 'left' : 'right';
1997         }
1998
1999         /**
2000          * A hidden direction mark (LRM or RLM), depending on the language direction
2001          *
2002          * @return string
2003          */
2004         function getDirMark() {
2005                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2006         }
2007
2008         function capitalizeAllNouns() {
2009                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2010         }
2011
2012         /**
2013          * An arrow, depending on the language direction
2014          *
2015          * @return string
2016          */
2017         function getArrow() {
2018                 return $this->isRTL() ? '←' : '→';
2019         }
2020
2021         /**
2022          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2023          *
2024          * @return bool
2025          */
2026         function linkPrefixExtension() {
2027                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2028         }
2029
2030         function getMagicWords() {
2031                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2032         }
2033
2034         # Fill a MagicWord object with data from here
2035         function getMagic( $mw ) {
2036                 if ( !$this->mMagicHookDone ) {
2037                         $this->mMagicHookDone = true;
2038                         wfProfileIn( 'LanguageGetMagic' );
2039                         wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2040                         wfProfileOut( 'LanguageGetMagic' );
2041                 }
2042                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2043                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2044                 } else {
2045                         $magicWords = $this->getMagicWords();
2046                         if ( isset( $magicWords[$mw->mId] ) ) {
2047                                 $rawEntry = $magicWords[$mw->mId];
2048                         } else {
2049                                 $rawEntry = false;
2050                         }
2051                 }
2052
2053                 if( !is_array( $rawEntry ) ) {
2054                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2055                 } else {
2056                         $mw->mCaseSensitive = $rawEntry[0];
2057                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2058                 }
2059         }
2060
2061         /**
2062          * Add magic words to the extension array
2063          */
2064         function addMagicWordsByLang( $newWords ) {
2065                 $code = $this->getCode();
2066                 $fallbackChain = array();
2067                 while ( $code && !in_array( $code, $fallbackChain ) ) {
2068                         $fallbackChain[] = $code;
2069                         $code = self::getFallbackFor( $code );
2070                 }
2071                 if ( !in_array( 'en', $fallbackChain ) ) {
2072                         $fallbackChain[] = 'en';
2073                 }
2074                 $fallbackChain = array_reverse( $fallbackChain );
2075                 foreach ( $fallbackChain as $code ) {
2076                         if ( isset( $newWords[$code] ) ) {
2077                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2078                         }
2079                 }
2080         }
2081
2082         /**
2083          * Get special page names, as an associative array
2084          *   case folded alias => real name
2085          */
2086         function getSpecialPageAliases() {
2087                 // Cache aliases because it may be slow to load them
2088                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2089                         // Initialise array
2090                         $this->mExtendedSpecialPageAliases =
2091                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2092                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2093                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2094                 }
2095
2096                 return $this->mExtendedSpecialPageAliases;
2097         }
2098
2099         /**
2100          * Italic is unsuitable for some languages
2101          *
2102          * @param $text String: the text to be emphasized.
2103          * @return string
2104          */
2105         function emphasize( $text ) {
2106                 return "<em>$text</em>";
2107         }
2108
2109          /**
2110           * Normally we output all numbers in plain en_US style, that is
2111           * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2112           * point twohundredthirtyfive. However this is not sutable for all
2113           * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2114           * Icelandic just want to use commas instead of dots, and dots instead
2115           * of commas like "293.291,235".
2116           *
2117           * An example of this function being called:
2118           * <code>
2119           * wfMsg( 'message', $wgLang->formatNum( $num ) )
2120           * </code>
2121           *
2122           * See LanguageGu.php for the Gujarati implementation and
2123           * $separatorTransformTable on MessageIs.php for
2124           * the , => . and . => , implementation.
2125           *
2126           * @todo check if it's viable to use localeconv() for the decimal
2127           *       separator thing.
2128           * @param $number Mixed: the string to be formatted, should be an integer
2129           *        or a floating point number.
2130           * @param $nocommafy Bool: set to true for special numbers like dates
2131           * @return string
2132           */
2133         function formatNum( $number, $nocommafy = false ) {
2134                 global $wgTranslateNumerals;
2135                 if ( !$nocommafy ) {
2136                         $number = $this->commafy( $number );
2137                         $s = $this->separatorTransformTable();
2138                         if ( $s ) {
2139                                 $number = strtr( $number, $s );
2140                         }
2141                 }
2142
2143                 if ( $wgTranslateNumerals ) {
2144                         $s = $this->digitTransformTable();
2145                         if ( $s ) {
2146                                 $number = strtr( $number, $s );
2147                         }
2148                 }
2149
2150                 return $number;
2151         }
2152
2153         function parseFormattedNumber( $number ) {
2154                 $s = $this->digitTransformTable();
2155                 if ( $s ) {
2156                         $number = strtr( $number, array_flip( $s ) );
2157                 }
2158
2159                 $s = $this->separatorTransformTable();
2160                 if ( $s ) {
2161                         $number = strtr( $number, array_flip( $s ) );
2162                 }
2163
2164                 $number = strtr( $number, array( ',' => '' ) );
2165                 return $number;
2166         }
2167
2168         /**
2169          * Adds commas to a given number
2170          *
2171          * @param $_ mixed
2172          * @return string
2173          */
2174         function commafy($_) {
2175                 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2176         }
2177
2178         function digitTransformTable() {
2179                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2180         }
2181
2182         function separatorTransformTable() {
2183                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2184         }
2185
2186         /**
2187          * Take a list of strings and build a locale-friendly comma-separated
2188          * list, using the local comma-separator message.
2189          * The last two strings are chained with an "and".
2190          *
2191          * @param $l Array
2192          * @return string
2193          */
2194         function listToText( $l ) {
2195                 $s = '';
2196                 $m = count( $l ) - 1;
2197                 if( $m == 1 ) {
2198                         return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2199                 } else {
2200                         for ( $i = $m; $i >= 0; $i-- ) {
2201                                 if ( $i == $m ) {
2202                                         $s = $l[$i];
2203                                 } else if( $i == $m - 1 ) {
2204                                         $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2205                                 } else {
2206                                         $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2207                                 }
2208                         }
2209                         return $s;
2210                 }
2211         }
2212
2213         /**
2214          * Take a list of strings and build a locale-friendly comma-separated
2215          * list, using the local comma-separator message.
2216          * @param $list array of strings to put in a comma list
2217          * @return string
2218          */
2219         function commaList( $list ) {
2220                 return implode(
2221                         $list,
2222                         wfMsgExt(
2223                                 'comma-separator',
2224                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2225                         )
2226                 );
2227         }
2228
2229         /**
2230          * Take a list of strings and build a locale-friendly semicolon-separated
2231          * list, using the local semicolon-separator message.
2232          * @param $list array of strings to put in a semicolon list
2233          * @return string
2234          */
2235         function semicolonList( $list ) {
2236                 return implode(
2237                         $list,
2238                         wfMsgExt(
2239                                 'semicolon-separator',
2240                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2241                         )
2242                 );
2243         }
2244
2245         /**
2246          * Same as commaList, but separate it with the pipe instead.
2247          * @param $list array of strings to put in a pipe list
2248          * @return string
2249          */
2250         function pipeList( $list ) {
2251                 return implode(
2252                         $list,
2253                         wfMsgExt(
2254                                 'pipe-separator',
2255                                 array( 'escapenoentities', 'language' => $this )
2256                         )
2257                 );
2258         }
2259
2260         /**
2261          * Truncate a string to a specified length in bytes, appending an optional
2262          * string (e.g. for ellipses)
2263          *
2264          * The database offers limited byte lengths for some columns in the database;
2265          * multi-byte character sets mean we need to ensure that only whole characters
2266          * are included, otherwise broken characters can be passed to the user
2267          *
2268          * If $length is negative, the string will be truncated from the beginning
2269          *
2270          * @param $string String to truncate
2271          * @param $length Int: maximum length (excluding ellipses)
2272          * @param $ellipsis String to append to the truncated text
2273          * @return string
2274          */
2275         function truncate( $string, $length, $ellipsis = '...' ) {
2276                 # Use the localized ellipsis character
2277                 if ( $ellipsis == '...' ) {
2278                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2279                 }
2280                 # Check if there is no need to truncate
2281                 if ( $length == 0 ) {
2282                         return $ellipsis;
2283                 } elseif ( strlen( $string ) <= abs( $length ) ) {
2284                         return $string;
2285                 }
2286                 $stringOriginal = $string;
2287                 if ( $length > 0 ) {
2288                         $string = substr( $string, 0, $length ); // xyz...
2289                         $string = $this->removeBadCharLast( $string );
2290                         $string = $string . $ellipsis;
2291                 } else {
2292                         $string = substr( $string, $length ); // ...xyz
2293                         $string = $this->removeBadCharFirst( $string );
2294                         $string = $ellipsis . $string;
2295                 }
2296                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2297                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2298                         return $string;
2299                 } else {
2300                         return $stringOriginal;
2301                 }
2302         }
2303
2304         /**
2305          * Remove bytes that represent an incomplete Unicode character
2306          * at the end of string (e.g. bytes of the char are missing)
2307          *
2308          * @param $string String
2309          * @return string
2310          */
2311         protected function removeBadCharLast( $string ) {
2312                 $char = ord( $string[strlen( $string ) - 1] );
2313                 $m = array();
2314                 if ( $char >= 0xc0 ) {
2315                         # We got the first byte only of a multibyte char; remove it.
2316                         $string = substr( $string, 0, -1 );
2317                 } elseif ( $char >= 0x80 &&
2318                       preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2319                                   '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2320                 {
2321                         # We chopped in the middle of a character; remove it
2322                         $string = $m[1];
2323                 }
2324                 return $string;
2325         }
2326
2327         /**
2328          * Remove bytes that represent an incomplete Unicode character
2329          * at the start of string (e.g. bytes of the char are missing)
2330          *
2331          * @param $string String
2332          * @return string
2333          */
2334         protected function removeBadCharFirst( $string ) {
2335                 $char = ord( $string[0] );
2336                 if ( $char >= 0x80 && $char < 0xc0 ) {
2337                         # We chopped in the middle of a character; remove the whole thing
2338                         $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2339                 }
2340                 return $string;
2341         }
2342
2343         /*
2344          * Truncate a string of valid HTML to a specified length in bytes,
2345          * appending an optional string (e.g. for ellipses), and return valid HTML
2346          *
2347          * This is only intended for styled/linked text, such as HTML with
2348          * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2349          *
2350          * Note: tries to fix broken HTML with MWTidy
2351          *
2352          * @param string $text String to truncate
2353          * @param int $length (zero/positive) Maximum length (excluding ellipses)
2354          * @param string $ellipsis String to append to the truncated text
2355          * @returns string
2356          */
2357         function truncateHtml( $text, $length, $ellipsis = '...' ) {
2358                 # Use the localized ellipsis character
2359                 if ( $ellipsis == '...' ) {
2360                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2361                 }
2362                 # Check if there is no need to truncate
2363                 if ( $length <= 0 ) {
2364                         return $ellipsis; // no text shown, nothing to format
2365                 } elseif ( strlen( $text ) <= $length ) {
2366                         return $text; // string short enough even *with* HTML
2367                 }
2368                 $text = MWTidy::tidy( $text ); // fix tags
2369                 $displayLen = 0; // innerHTML legth so far
2370                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2371                 $tagType = 0; // 0-open, 1-close
2372                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2373                 $entityState = 0; // 0-not entity, 1-entity
2374                 $tag = $ret = $ch = '';
2375                 $openTags = array();
2376                 $textLen = strlen($text);
2377                 for( $pos = 0; $pos < $textLen; ++$pos ) {
2378                         $ch = $text[$pos];
2379                         $lastCh = $pos ? $text[$pos - 1] : '';
2380                         $ret .= $ch; // add to result string
2381                         if ( $ch == '<' ) {
2382                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2383                                 $entityState = 0; // for bad HTML
2384                                 $bracketState = 1; // tag started (checking for backslash)
2385                         } elseif ( $ch == '>' ) {
2386                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2387                                 $entityState = 0; // for bad HTML
2388                                 $bracketState = 0; // out of brackets
2389                         } elseif ( $bracketState == 1 ) {
2390                                 if ( $ch == '/' ) {
2391                                         $tagType = 1; // close tag (e.g. "</span>")
2392                                 } else {
2393                                         $tagType = 0; // open tag (e.g. "<span>")
2394                                         $tag .= $ch;
2395                                 }
2396                                 $bracketState = 2; // building tag name
2397                         } elseif ( $bracketState == 2 ) {
2398                                 if ( $ch != ' ' ) {
2399                                         $tag .= $ch;
2400                                 } else {
2401                                         // Name found (e.g. "<a href=..."), add on tag attributes...
2402                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2403                                 }
2404                         } elseif ( $bracketState == 0 ) {
2405                                 if ( $entityState ) {
2406                                         if ( $ch == ';' ) {
2407                                                 $entityState = 0;
2408                                                 $displayLen++; // entity is one displayed char
2409                                         }
2410                                 } else {
2411                                         if ( $ch == '&' ) {
2412                                                 $entityState = 1; // entity found, (e.g. "&nbsp;")
2413                                         } else {
2414                                                 $displayLen++; // this char is displayed
2415                                                 // Add on the other display text after this...
2416                                                 $skipped = $this->truncate_skip(
2417                                                         $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2418                                                 $displayLen += $skipped;
2419                                                 $pos += $skipped;
2420                                         }
2421                                 }
2422                         }
2423                         # Consider truncation once the display length has reached the maximim.
2424                         # Double-check that we're not in the middle of a bracket/entity...
2425                         if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2426                                 if ( !$testingEllipsis ) {
2427                                         $testingEllipsis = true;
2428                                         # Save where we are; we will truncate here unless
2429                                         # the ellipsis actually makes the string longer.
2430                                         $pOpenTags = $openTags; // save state
2431                                         $pRet = $ret; // save state
2432                                 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2433                                         # Ellipsis won't make string longer/equal, the truncation point was OK.
2434                                         $openTags = $pOpenTags; // reload state
2435                                         $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2436                                         $ret .= $ellipsis; // add ellipsis
2437                                         break;
2438                                 }
2439                         }
2440                 }
2441                 if ( $displayLen == 0 ) {
2442                         return ''; // no text shown, nothing to format
2443                 }
2444                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags ); // for bad HTML
2445                 while ( count( $openTags ) > 0 ) {
2446                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2447                 }
2448                 return $ret;
2449         }
2450
2451         // truncateHtml() helper function
2452         // like strcspn() but adds the skipped chars to $ret
2453         private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2454                 $skipCount = 0;
2455                 if( $start < strlen( $text ) ) {
2456                         $skipCount = strcspn( $text, $search, $start, $len );
2457                         $ret .= substr( $text, $start, $skipCount );
2458                 }
2459                 return $skipCount;
2460         }
2461
2462         // truncateHtml() helper function
2463         // (a) push or pop $tag from $openTags as needed
2464         // (b) clear $tag value
2465         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2466                 $tag = ltrim( $tag );
2467                 if( $tag != '' ) {
2468                         if( $tagType == 0 && $lastCh != '/' ) {
2469                                 $openTags[] = $tag; // tag opened (didn't close itself)
2470                         } else if( $tagType == 1 ) {
2471                                 if( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2472                                         array_pop( $openTags ); // tag closed
2473                                 }
2474                         }
2475                         $tag = '';
2476                 }
2477         }
2478
2479         /**
2480          * Grammatical transformations, needed for inflected languages
2481          * Invoked by putting {{grammar:case|word}} in a message
2482          *
2483          * @param $word string
2484          * @param $case string
2485          * @return string
2486          */
2487         function convertGrammar( $word, $case ) {
2488                 global $wgGrammarForms;
2489                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2490                         return $wgGrammarForms[$this->getCode()][$case][$word];
2491                 }
2492                 return $word;
2493         }
2494
2495         /**
2496          * Provides an alternative text depending on specified gender.
2497          * Usage {{gender:username|masculine|feminine|neutral}}.
2498          * username is optional, in which case the gender of current user is used,
2499          * but only in (some) interface messages; otherwise default gender is used.
2500          * If second or third parameter are not specified, masculine is used.
2501          * These details may be overriden per language.
2502          */
2503         function gender( $gender, $forms ) {
2504                 if ( !count( $forms ) ) {
2505                         return '';
2506                 }
2507                 $forms = $this->preConvertPlural( $forms, 2 );
2508                 if ( $gender === 'male' ) {
2509                         return $forms[0];
2510                 }
2511                 if ( $gender === 'female' ) {
2512                         return $forms[1];
2513                 }
2514                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2515         }
2516
2517         /**
2518          * Plural form transformations, needed for some languages.
2519          * For example, there are 3 form of plural in Russian and Polish,
2520          * depending on "count mod 10". See [[w:Plural]]
2521          * For English it is pretty simple.
2522          *
2523          * Invoked by putting {{plural:count|wordform1|wordform2}}
2524          * or {{plural:count|wordform1|wordform2|wordform3}}
2525          *
2526          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2527          *
2528          * @param $count Integer: non-localized number
2529          * @param $forms Array: different plural forms
2530          * @return string Correct form of plural for $count in this language
2531          */
2532         function convertPlural( $count, $forms ) {
2533                 if ( !count( $forms ) ) {
2534                         return '';
2535                 }
2536                 $forms = $this->preConvertPlural( $forms, 2 );
2537
2538                 return ( $count == 1 ) ? $forms[0] : $forms[1];
2539         }
2540
2541         /**
2542          * Checks that convertPlural was given an array and pads it to requested
2543          * amound of forms by copying the last one.
2544          *
2545          * @param $count Integer: How many forms should there be at least
2546          * @param $forms Array of forms given to convertPlural
2547          * @return array Padded array of forms or an exception if not an array
2548          */
2549         protected function preConvertPlural( /* Array */ $forms, $count ) {
2550                 while ( count( $forms ) < $count ) {
2551                         $forms[] = $forms[count( $forms ) - 1];
2552                 }
2553                 return $forms;
2554         }
2555
2556         /**
2557          * For translating of expiry times
2558          * @param $str String: the validated block time in English
2559          * @return Somehow translated block time
2560          * @see LanguageFi.php for example implementation
2561          */
2562         function translateBlockExpiry( $str ) {
2563                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2564
2565                 if ( $scBlockExpiryOptions == '-' ) {
2566                         return $str;
2567                 }
2568
2569                 foreach ( explode( ',', $scBlockExpiryOptions) as $option ) {
2570                         if ( strpos( $option, ':' ) === false ) {
2571                                 continue;
2572                         }
2573                         list( $show, $value ) = explode( ':', $option );
2574                         if ( strcmp( $str, $value ) == 0 ) {
2575                                 return htmlspecialchars( trim( $show ) );
2576                         }
2577                 }
2578
2579                 return $str;
2580         }
2581
2582         /**
2583          * languages like Chinese need to be segmented in order for the diff
2584          * to be of any use
2585          *
2586          * @param $text String
2587          * @return String
2588          */
2589         function segmentForDiff( $text ) {
2590                 return $text;
2591         }
2592
2593         /**
2594          * and unsegment to show the result
2595          *
2596          * @param $text String
2597          * @return String
2598          */
2599         function unsegmentForDiff( $text ) {
2600                 return $text;
2601         }
2602
2603         # convert text to all supported variants
2604         function autoConvertToAllVariants( $text ) {
2605                 return $this->mConverter->autoConvertToAllVariants( $text );
2606         }
2607
2608         # convert text to different variants of a language.
2609         function convert( $text, $isTitle = false ) {
2610                 return $this->mConverter->convert( $text, $isTitle );
2611         }
2612
2613         # Check if this is a language with variants
2614         function hasVariants() {
2615                 return sizeof( $this->getVariants() ) > 1;
2616         }
2617
2618         # Put custom tags (e.g. -{ }-) around math to prevent conversion
2619         function armourMath( $text ) {
2620                 return $this->mConverter->armourMath( $text );
2621         }
2622
2623         /**
2624          * Perform output conversion on a string, and encode for safe HTML output.
2625          * @param $text String text to be converted
2626          * @param $isTitle Bool whether this conversion is for the article title
2627          * @return string
2628          * @todo this should get integrated somewhere sane
2629          */
2630         function convertHtml( $text, $isTitle = false ) {
2631                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2632         }
2633
2634         function convertCategoryKey( $key ) {
2635                 return $this->mConverter->convertCategoryKey( $key );
2636         }
2637
2638         /**
2639          * Get the list of variants supported by this langauge
2640          * see sample implementation in LanguageZh.php
2641          *
2642          * @return array an array of language codes
2643          */
2644         function getVariants() {
2645                 return $this->mConverter->getVariants();
2646         }
2647
2648         function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
2649                 return $this->mConverter->getPreferredVariant( $fromUser, $fromHeader );
2650         }
2651
2652         /**
2653          * If a language supports multiple variants, it is
2654          * possible that non-existing link in one variant
2655          * actually exists in another variant. this function
2656          * tries to find it. See e.g. LanguageZh.php
2657          *
2658          * @param $link String: the name of the link
2659          * @param $nt Mixed: the title object of the link
2660          * @param $ignoreOtherCond Boolean: to disable other conditions when
2661          *      we need to transclude a template or update a category's link
2662          * @return null the input parameters may be modified upon return
2663          */
2664         function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2665                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2666         }
2667
2668         /**
2669          * If a language supports multiple variants, converts text
2670          * into an array of all possible variants of the text:
2671          *  'variant' => text in that variant
2672          */
2673         function convertLinkToAllVariants( $text ) {
2674                 return $this->mConverter->convertLinkToAllVariants( $text );
2675         }
2676
2677         /**
2678          * returns language specific options used by User::getPageRenderHash()
2679          * for example, the preferred language variant
2680          *
2681          * @return string
2682          */
2683         function getExtraHashOptions() {
2684                 return $this->mConverter->getExtraHashOptions();
2685         }
2686
2687         /**
2688          * For languages that support multiple variants, the title of an
2689          * article may be displayed differently in different variants. this
2690          * function returns the apporiate title defined in the body of the article.
2691          *
2692          * @return string
2693          */
2694         function getParsedTitle() {
2695                 return $this->mConverter->getParsedTitle();
2696         }
2697
2698         /**
2699          * Enclose a string with the "no conversion" tag. This is used by
2700          * various functions in the Parser
2701          *
2702          * @param $text String: text to be tagged for no conversion
2703          * @param $noParse
2704          * @return string the tagged text
2705          */
2706         function markNoConversion( $text, $noParse = false ) {
2707                 return $this->mConverter->markNoConversion( $text, $noParse );
2708         }
2709
2710         /**
2711          * A regular expression to match legal word-trailing characters
2712          * which should be merged onto a link of the form [[foo]]bar.
2713          *
2714          * @return string
2715          */
2716         function linkTrail() {
2717                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2718         }
2719
2720         function getLangObj() {
2721                 return $this;
2722         }
2723
2724         /**
2725          * Get the RFC 3066 code for this language object
2726          */
2727         function getCode() {
2728                 return $this->mCode;
2729         }
2730
2731         function setCode( $code ) {
2732                 $this->mCode = $code;
2733         }
2734
2735         /**
2736          * Get the name of a file for a certain language code
2737          * @param $prefix string Prepend this to the filename
2738          * @param $code string Language code
2739          * @param $suffix string Append this to the filename
2740          * @return string $prefix . $mangledCode . $suffix
2741          */
2742         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2743                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2744         }
2745
2746         /**
2747          * Get the language code from a file name. Inverse of getFileName()
2748          * @param $filename string $prefix . $languageCode . $suffix
2749          * @param $prefix string Prefix before the language code
2750          * @param $suffix string Suffix after the language code
2751          * @return Language code, or false if $prefix or $suffix isn't found
2752          */
2753         static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2754                 $m = null;
2755                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2756                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
2757                 if ( !count( $m ) ) {
2758                         return false;
2759                 }
2760                 return str_replace( '_', '-', strtolower( $m[1] ) );
2761         }
2762
2763         static function getMessagesFileName( $code ) {
2764                 global $IP;
2765                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2766         }
2767
2768         static function getClassFileName( $code ) {
2769                 global $IP;
2770                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2771         }
2772
2773         /**
2774          * Get the fallback for a given language
2775          */
2776         static function getFallbackFor( $code ) {
2777                 if ( $code === 'en' ) {
2778                         // Shortcut
2779                         return false;
2780                 } else {
2781                         return self::getLocalisationCache()->getItem( $code, 'fallback' );
2782                 }
2783         }
2784
2785         /**
2786          * Get all messages for a given language
2787          * WARNING: this may take a long time
2788          */
2789         static function getMessagesFor( $code ) {
2790                 return self::getLocalisationCache()->getItem( $code, 'messages' );
2791         }
2792
2793         /**
2794          * Get a message for a given language
2795          */
2796         static function getMessageFor( $key, $code ) {
2797                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2798         }
2799
2800         function fixVariableInNamespace( $talk ) {
2801                 if ( strpos( $talk, '$1' ) === false ) {
2802                         return $talk;
2803                 }
2804
2805                 global $wgMetaNamespace;
2806                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2807
2808                 # Allow grammar transformations
2809                 # Allowing full message-style parsing would make simple requests
2810                 # such as action=raw much more expensive than they need to be.
2811                 # This will hopefully cover most cases.
2812                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2813                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
2814                 return str_replace( ' ', '_', $talk );
2815         }
2816
2817         function replaceGrammarInNamespace( $m ) {
2818                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2819         }
2820
2821         static function getCaseMaps() {
2822                 static $wikiUpperChars, $wikiLowerChars;
2823                 if ( isset( $wikiUpperChars ) ) {
2824                         return array( $wikiUpperChars, $wikiLowerChars );
2825                 }
2826
2827                 wfProfileIn( __METHOD__ );
2828                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2829                 if ( $arr === false ) {
2830                         throw new MWException(
2831                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2832                 }
2833                 extract( $arr );
2834                 wfProfileOut( __METHOD__ );
2835                 return array( $wikiUpperChars, $wikiLowerChars );
2836         }
2837
2838         function formatTimePeriod( $seconds ) {
2839                 if ( $seconds < 10 ) {
2840                         return $this->formatNum( sprintf( "%.1f", $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
2841                 } elseif ( $seconds < 60 ) {
2842                         return $this->formatNum( round( $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
2843                 } elseif ( $seconds < 3600 ) {
2844                         return $this->formatNum( floor( $seconds / 60 ) ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
2845                                 $this->formatNum( round( fmod( $seconds, 60 ) ) ) . ' ' . wfMsg( 'seconds-abbrev' );
2846                 } else {
2847                         $hours = floor( $seconds / 3600 );
2848                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2849                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2850                         return $this->formatNum( $hours ) . ' ' . wfMsg( 'hours-abbrev' ) . ' ' .
2851                                 $this->formatNum( $minutes ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
2852                                 $this->formatNum( $secondsPart ) . ' ' . wfMsg( 'seconds-abbrev' );
2853                 }
2854         }
2855
2856         function formatBitrate( $bps ) {
2857                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2858                 if ( $bps <= 0 ) {
2859                         return $this->formatNum( $bps ) . $units[0];
2860                 }
2861                 $unitIndex = floor( log10( $bps ) / 3 );
2862                 $mantissa = $bps / pow( 1000, $unitIndex );
2863                 if ( $mantissa < 10 ) {
2864                         $mantissa = round( $mantissa, 1 );
2865                 } else {
2866                         $mantissa = round( $mantissa );
2867                 }
2868                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
2869         }
2870
2871         /**
2872          * Format a size in bytes for output, using an appropriate
2873          * unit (B, KB, MB or GB) according to the magnitude in question
2874          *
2875          * @param $size Size to format
2876          * @return string Plain text (not HTML)
2877          */
2878         function formatSize( $size ) {
2879                 // For small sizes no decimal places necessary
2880                 $round = 0;
2881                 if( $size > 1024 ) {
2882                         $size = $size / 1024;
2883                         if( $size > 1024 ) {
2884                                 $size = $size / 1024;
2885                                 // For MB and bigger two decimal places are smarter
2886                                 $round = 2;
2887                                 if( $size > 1024 ) {
2888                                         $size = $size / 1024;
2889                                         $msg = 'size-gigabytes';
2890                                 } else {
2891                                         $msg = 'size-megabytes';
2892                                 }
2893                         } else {
2894                                 $msg = 'size-kilobytes';
2895                         }
2896                 } else {
2897                         $msg = 'size-bytes';
2898                 }
2899                 $size = round( $size, $round );
2900                 $text = $this->getMessageFromDB( $msg );
2901                 return str_replace( '$1', $this->formatNum( $size ), $text );
2902         }
2903
2904         /**
2905          * Get the conversion rule title, if any.
2906          */
2907         function getConvRuleTitle() {
2908                 return $this->mConverter->getConvRuleTitle();
2909         }
2910 }