languages/Language.php

   1 <?php
   2 /**
   3  * @addtogroup Language
   4  */
   5
   6 if( !defined( 'MEDIAWIKI' ) ) {
   7         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
   8         exit( 1 );
   9 }
  10
  11 # Read language names
  12 global $wgLanguageNames;
  13 require_once( dirname(__FILE__) . '/Names.php' ) ;
  14
  15 global $wgInputEncoding, $wgOutputEncoding;
  16
  17 /**
  18  * These are always UTF-8, they exist only for backwards compatibility
  19  */
  20 $wgInputEncoding    = "UTF-8";
  21 $wgOutputEncoding       = "UTF-8";
  22
  23 if( function_exists( 'mb_strtoupper' ) ) {
  24         mb_internal_encoding('UTF-8');
  25 }
  26
  27 /* a fake language converter */
  28 class FakeConverter {
  29         var $mLang;
  30         function FakeConverter($langobj) {$this->mLang = $langobj;}
  31         function convert($t, $i) {return $t;}
  32         function parserConvert($t, $p) {return $t;}
  33         function getVariants() { return array( $this->mLang->getCode() ); }
  34         function getPreferredVariant() {return $this->mLang->getCode(); }
  35         function findVariantLink(&$l, &$n) {}
  36         function getExtraHashOptions() {return '';}
  37         function getParsedTitle() {return '';}
  38         function markNoConversion($text, $noParse=false) {return $text;}
  39         function convertCategoryKey( $key ) {return $key; }
  40         function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); }
  41         function armourMath($text){ return $text; }
  42 }
  43
  44 #--------------------------------------------------------------------------
  45 # Internationalisation code
  46 #--------------------------------------------------------------------------
  47
  48 class Language {
  49         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  50         var $mMagicExtensions = array(), $mMagicHookDone = false;
  51
  52         static public $mLocalisationKeys = array( 'fallback', 'namespaceNames',
  53                 'skinNames', 'mathNames',
  54                 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable',
  55                 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
  56                 'defaultUserOptionOverrides', 'linkTrail', 'namespaceAliases',
  57                 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
  58                 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases' );
  59
  60         static public $mMergeableMapKeys = array( 'messages', 'namespaceNames', 'mathNames',
  61                 'dateFormats', 'defaultUserOptionOverrides', 'magicWords' );
  62
  63         static public $mMergeableListKeys = array( 'extraUserToggles' );
  64
  65         static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
  66
  67         static public $mLocalisationCache = array();
  68
  69         static public $mWeekdayMsgs = array(
  70                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  71                 'friday', 'saturday'
  72         );
  73
  74         static public $mWeekdayAbbrevMsgs = array(
  75                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  76         );
  77
  78         static public $mMonthMsgs = array(
  79                 'january', 'february', 'march', 'april', 'may_long', 'june',
  80                 'july', 'august', 'september', 'october', 'november',
  81                 'december'
  82         );
  83         static public $mMonthGenMsgs = array(
  84                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  85                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  86                 'december-gen'
  87         );
  88         static public $mMonthAbbrevMsgs = array(
  89                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
  90                 'sep', 'oct', 'nov', 'dec'
  91         );
  92
  93         /**
  94          * Create a language object for a given language code
  95          */
  96         static function factory( $code ) {
  97                 global $IP;
  98                 static $recursionLevel = 0;
  99
 100                 if ( $code == 'en' ) {
 101                         $class = 'Language';
 102                 } else {
 103                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 104                         // Preload base classes to work around APC/PHP5 bug
 105                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 106                                 include_once("$IP/languages/classes/$class.deps.php");
 107                         }
 108                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 109                                 include_once("$IP/languages/classes/$class.php");
 110                         }
 111                 }
 112
 113                 if ( $recursionLevel > 5 ) {
 114                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 115                 }
 116
 117                 if( ! class_exists( $class ) ) {
 118                         $fallback = Language::getFallbackFor( $code );
 119                         ++$recursionLevel;
 120                         $lang = Language::factory( $fallback );
 121                         --$recursionLevel;
 122                         $lang->setCode( $code );
 123                 } else {
 124                         $lang = new $class;
 125                 }
 126
 127                 return $lang;
 128         }
 129
 130         function __construct() {
 131                 $this->mConverter = new FakeConverter($this);
 132                 // Set the code to the name of the descendant
 133                 if ( get_class( $this ) == 'Language' ) {
 134                         $this->mCode = 'en';
 135                 } else {
 136                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 137                 }
 138         }
 139
 140         /**
 141          * Hook which will be called if this is the content language.
 142          * Descendants can use this to register hook functions or modify globals
 143          */
 144         function initContLang() {}
 145
 146         /**
 147          * @deprecated
 148          * @return array
 149          */
 150         function getDefaultUserOptions() {
 151                 return User::getDefaultOptions();
 152         }
 153
 154         function getFallbackLanguageCode() {
 155                 $this->load();
 156                 return $this->fallback;
 157         }
 158
 159         /**
 160          * Exports $wgBookstoreListEn
 161          * @return array
 162          */
 163         function getBookstoreList() {
 164                 $this->load();
 165                 return $this->bookstoreList;
 166         }
 167
 168         /**
 169          * @return array
 170          */
 171         function getNamespaces() {
 172                 $this->load();
 173                 return $this->namespaceNames;
 174         }
 175
 176         /**
 177          * A convenience function that returns the same thing as
 178          * getNamespaces() except with the array values changed to ' '
 179          * where it found '_', useful for producing output to be displayed
 180          * e.g. in <select> forms.
 181          *
 182          * @return array
 183          */
 184         function getFormattedNamespaces() {
 185                 $ns = $this->getNamespaces();
 186                 foreach($ns as $k => $v) {
 187                         $ns[$k] = strtr($v, '_', ' ');
 188                 }
 189                 return $ns;
 190         }
 191
 192         /**
 193          * Get a namespace value by key
 194          * <code>
 195          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 196          * echo $mw_ns; // prints 'MediaWiki'
 197          * </code>
 198          *
 199          * @param int $index the array key of the namespace to return
 200          * @return mixed, string if the namespace value exists, otherwise false
 201          */
 202         function getNsText( $index ) {
 203                 $ns = $this->getNamespaces();
 204                 return isset( $ns[$index] ) ? $ns[$index] : false;
 205         }
 206
 207         /**
 208          * A convenience function that returns the same thing as
 209          * getNsText() except with '_' changed to ' ', useful for
 210          * producing output.
 211          *
 212          * @return array
 213          */
 214         function getFormattedNsText( $index ) {
 215                 $ns = $this->getNsText( $index );
 216                 return strtr($ns, '_', ' ');
 217         }
 218
 219         /**
 220          * Get a namespace key by value, case insensitive.
 221          * Only matches namespace names for the current language, not the
 222          * canonical ones defined in Namespace.php.
 223          *
 224          * @param string $text
 225          * @return mixed An integer if $text is a valid value otherwise false
 226          */
 227         function getLocalNsIndex( $text ) {
 228                 $this->load();
 229                 $lctext = $this->lc($text);
 230                 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
 231         }
 232
 233         /**
 234          * Get a namespace key by value, case insensitive.  Canonical namespace
 235          * names override custom ones defined for the current language.
 236          *
 237          * @param string $text
 238          * @return mixed An integer if $text is a valid value otherwise false
 239          */
 240         function getNsIndex( $text ) {
 241                 $this->load();
 242                 $lctext = $this->lc($text);
 243                 if( ( $ns = Namespace::getCanonicalIndex( $lctext ) ) !== null ) return $ns;
 244                 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
 245         }
 246
 247         /**
 248          * short names for language variants used for language conversion links.
 249          *
 250          * @param string $code
 251          * @return string
 252          */
 253         function getVariantname( $code ) {
 254                 return $this->getMessageFromDB( "variantname-$code" );
 255         }
 256
 257         function specialPage( $name ) {
 258                 $aliases = $this->getSpecialPageAliases();
 259                 if ( isset( $aliases[$name][0] ) ) {
 260                         $name = $aliases[$name][0];
 261                 }
 262                 return $this->getNsText(NS_SPECIAL) . ':' . $name;
 263         }
 264
 265         function getQuickbarSettings() {
 266                 return array(
 267                         $this->getMessage( 'qbsettings-none' ),
 268                         $this->getMessage( 'qbsettings-fixedleft' ),
 269                         $this->getMessage( 'qbsettings-fixedright' ),
 270                         $this->getMessage( 'qbsettings-floatingleft' ),
 271                         $this->getMessage( 'qbsettings-floatingright' )
 272                 );
 273         }
 274
 275         function getSkinNames() {
 276                 $this->load();
 277                 return $this->skinNames;
 278         }
 279
 280         function getMathNames() {
 281                 $this->load();
 282                 return $this->mathNames;
 283         }
 284
 285         function getDatePreferences() {
 286                 $this->load();
 287                 return $this->datePreferences;
 288         }
 289
 290         function getDateFormats() {
 291                 $this->load();
 292                 return $this->dateFormats;
 293         }
 294
 295         function getDefaultDateFormat() {
 296                 $this->load();
 297                 return $this->defaultDateFormat;
 298         }
 299
 300         function getDatePreferenceMigrationMap() {
 301                 $this->load();
 302                 return $this->datePreferenceMigrationMap;
 303         }
 304
 305         function getDefaultUserOptionOverrides() {
 306                 $this->load();
 307                 # XXX - apparently some languageas get empty arrays, didn't get to it yet -- midom
 308                 if (is_array($this->defaultUserOptionOverrides)) {
 309                         return $this->defaultUserOptionOverrides;
 310                 } else {
 311                         return array();
 312                 }
 313         }
 314
 315         function getExtraUserToggles() {
 316                 $this->load();
 317                 return $this->extraUserToggles;
 318         }
 319
 320         function getUserToggle( $tog ) {
 321                 return $this->getMessageFromDB( "tog-$tog" );
 322         }
 323
 324         /**
 325          * Get language names, indexed by code.
 326          * If $customisedOnly is true, only returns codes with a messages file
 327          */
 328         public static function getLanguageNames( $customisedOnly = false ) {
 329                 global $wgLanguageNames;
 330                 if ( !$customisedOnly ) {
 331                         return $wgLanguageNames;
 332                 }
 333
 334                 global $IP;
 335                 $names = array();
 336                 $dir = opendir( "$IP/languages/messages" );
 337                 while( false !== ( $file = readdir( $dir ) ) ) {
 338                         $m = array();
 339                         if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
 340                                 $code = str_replace( '_', '-', strtolower( $m[1] ) );
 341                                 if ( isset( $wgLanguageNames[$code] ) ) {
 342                                         $names[$code] = $wgLanguageNames[$code];
 343                                 }
 344                         }
 345                 }
 346                 closedir( $dir );
 347                 return $names;
 348         }
 349
 350         /**
 351          * Ugly hack to get a message maybe from the MediaWiki namespace, if this
 352          * language object is the content or user language.
 353          */
 354         function getMessageFromDB( $msg ) {
 355                 global $wgContLang, $wgLang;
 356                 if ( $wgContLang->getCode() == $this->getCode() ) {
 357                         # Content language
 358                         return wfMsgForContent( $msg );
 359                 } elseif ( $wgLang->getCode() == $this->getCode() ) {
 360                         # User language
 361                         return wfMsg( $msg );
 362                 } else {
 363                         # Neither, get from localisation
 364                         return $this->getMessage( $msg );
 365                 }
 366         }
 367
 368         function getLanguageName( $code ) {
 369                 global $wgLanguageNames;
 370                 if ( ! array_key_exists( $code, $wgLanguageNames ) ) {
 371                         return '';
 372                 }
 373                 return $wgLanguageNames[$code];
 374         }
 375
 376         function getMonthName( $key ) {
 377                 return $this->getMessageFromDB( self::$mMonthMsgs[$key-1] );
 378         }
 379
 380         function getMonthNameGen( $key ) {
 381                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key-1] );
 382         }
 383
 384         function getMonthAbbreviation( $key ) {
 385                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key-1] );
 386         }
 387
 388         function getWeekdayName( $key ) {
 389                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key-1] );
 390         }
 391
 392         function getWeekdayAbbreviation( $key ) {
 393                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key-1] );
 394         }
 395
 396         /**
 397          * Used by date() and time() to adjust the time output.
 398          * @public
 399          * @param int   $ts the time in date('YmdHis') format
 400          * @param mixed $tz adjust the time by this amount (default false,
 401          *                  mean we get user timecorrection setting)
 402          * @return int
 403          */
 404         function userAdjust( $ts, $tz = false ) {
 405                 global $wgUser, $wgLocalTZoffset;
 406
 407                 if (!$tz) {
 408                         $tz = $wgUser->getOption( 'timecorrection' );
 409                 }
 410
 411                 # minutes and hours differences:
 412                 $minDiff = 0;
 413                 $hrDiff  = 0;
 414
 415                 if ( $tz === '' ) {
 416                         # Global offset in minutes.
 417                         if( isset($wgLocalTZoffset) ) {
 418                                 if( $wgLocalTZoffset >= 0 ) {
 419                                         $hrDiff = floor($wgLocalTZoffset / 60);
 420                                 } else {
 421                                         $hrDiff = ceil($wgLocalTZoffset / 60);
 422                                 }
 423                                 $minDiff = $wgLocalTZoffset % 60;
 424                         }
 425                 } elseif ( strpos( $tz, ':' ) !== false ) {
 426                         $tzArray = explode( ':', $tz );
 427                         $hrDiff = intval($tzArray[0]);
 428                         $minDiff = intval($hrDiff < 0 ? -$tzArray[1] : $tzArray[1]);
 429                 } else {
 430                         $hrDiff = intval( $tz );
 431                 }
 432
 433                 # No difference ? Return time unchanged
 434                 if ( 0 == $hrDiff && 0 == $minDiff ) { return $ts; }
 435
 436                 wfSuppressWarnings(); // E_STRICT system time bitching
 437                 # Generate an adjusted date
 438                 $t = mktime( (
 439                   (int)substr( $ts, 8, 2) ) + $hrDiff, # Hours
 440                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 441                   (int)substr( $ts, 12, 2 ), # Seconds
 442                   (int)substr( $ts, 4, 2 ), # Month
 443                   (int)substr( $ts, 6, 2 ), # Day
 444                   (int)substr( $ts, 0, 4 ) ); #Year
 445
 446                 $date = date( 'YmdHis', $t );
 447                 wfRestoreWarnings();
 448
 449                 return $date;
 450         }
 451
 452         /**
 453          * This is a workalike of PHP's date() function, but with better
 454          * internationalisation, a reduced set of format characters, and a better
 455          * escaping format.
 456          *
 457          * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the
 458          * PHP manual for definitions. There are a number of extensions, which
 459          * start with "x":
 460          *
 461          *    xn   Do not translate digits of the next numeric format character
 462          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 463          *    xr   Use roman numerals for the next numeric format character
 464          *    xx   Literal x
 465          *    xg   Genitive month name
 466          *
 467          * Characters enclosed in double quotes will be considered literal (with
 468          * the quotes themselves removed). Unmatched quotes will be considered
 469          * literal quotes. Example:
 470          *
 471          * "The month is" F       => The month is January
 472          * i's"                   => 20'11"
 473          *
 474          * Backslash escaping is also supported.
 475          *
 476          * Input timestamp is assumed to be pre-normalized to the desired local
 477          * time zone, if any.
 478          *
 479          * @param string $format
 480          * @param string $ts 14-character timestamp
 481          *      YYYYMMDDHHMMSS
 482          *      01234567890123
 483          */
 484         function sprintfDate( $format, $ts ) {
 485                 $s = '';
 486                 $raw = false;
 487                 $roman = false;
 488                 $unix = false;
 489                 $rawToggle = false;
 490                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 491                         $num = false;
 492                         $code = $format[$p];
 493                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 494                                 $code .= $format[++$p];
 495                         }
 496
 497                         switch ( $code ) {
 498                                 case 'xx':
 499                                         $s .= 'x';
 500                                         break;
 501                                 case 'xn':
 502                                         $raw = true;
 503                                         break;
 504                                 case 'xN':
 505                                         $rawToggle = !$rawToggle;
 506                                         break;
 507                                 case 'xr':
 508                                         $roman = true;
 509                                         break;
 510                                 case 'xg':
 511                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 512                                         break;
 513                                 case 'd':
 514                                         $num = substr( $ts, 6, 2 );
 515                                         break;
 516                                 case 'D':
 517                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 518                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 519                                         break;
 520                                 case 'j':
 521                                         $num = intval( substr( $ts, 6, 2 ) );
 522                                         break;
 523                                 case 'l':
 524                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 525                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 526                                         break;
 527                                 case 'N':
 528                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 529                                         $w = gmdate( 'w', $unix );
 530                                         $num = $w ? $w : 7;
 531                                         break;
 532                                 case 'w':
 533                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 534                                         $num = gmdate( 'w', $unix );
 535                                         break;
 536                                 case 'z':
 537                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 538                                         $num = gmdate( 'z', $unix );
 539                                         break;
 540                                 case 'W':
 541                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 542                                         $num = gmdate( 'W', $unix );
 543                                         break;
 544                                 case 'F':
 545                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 546                                         break;
 547                                 case 'm':
 548                                         $num = substr( $ts, 4, 2 );
 549                                         break;
 550                                 case 'M':
 551                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 552                                         break;
 553                                 case 'n':
 554                                         $num = intval( substr( $ts, 4, 2 ) );
 555                                         break;
 556                                 case 't':
 557                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 558                                         $num = gmdate( 't', $unix );
 559                                         break;
 560                                 case 'L':
 561                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 562                                         $num = gmdate( 'L', $unix );
 563                                         break;
 564                                 case 'Y':
 565                                         $num = substr( $ts, 0, 4 );
 566                                         break;
 567                                 case 'y':
 568                                         $num = substr( $ts, 2, 2 );
 569                                         break;
 570                                 case 'a':
 571                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 572                                         break;
 573                                 case 'A':
 574                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 575                                         break;
 576                                 case 'g':
 577                                         $h = substr( $ts, 8, 2 );
 578                                         $num = $h % 12 ? $h % 12 : 12;
 579                                         break;
 580                                 case 'G':
 581                                         $num = intval( substr( $ts, 8, 2 ) );
 582                                         break;
 583                                 case 'h':
 584                                         $h = substr( $ts, 8, 2 );
 585                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 586                                         break;
 587                                 case 'H':
 588                                         $num = substr( $ts, 8, 2 );
 589                                         break;
 590                                 case 'i':
 591                                         $num = substr( $ts, 10, 2 );
 592                                         break;
 593                                 case 's':
 594                                         $num = substr( $ts, 12, 2 );
 595                                         break;
 596                                 case 'c':
 597                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 598                                         $s .= gmdate( 'c', $unix );
 599                                         break;
 600                                 case 'r':
 601                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 602                                         $s .= gmdate( 'r', $unix );
 603                                         break;
 604                                 case 'U':
 605                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 606                                         $num = $unix;
 607                                         break;
 608                                 case '\\':
 609                                         # Backslash escaping
 610                                         if ( $p < strlen( $format ) - 1 ) {
 611                                                 $s .= $format[++$p];
 612                                         } else {
 613                                                 $s .= '\\';
 614                                         }
 615                                         break;
 616                                 case '"':
 617                                         # Quoted literal
 618                                         if ( $p < strlen( $format ) - 1 ) {
 619                                                 $endQuote = strpos( $format, '"', $p + 1 );
 620                                                 if ( $endQuote === false ) {
 621                                                         # No terminating quote, assume literal "
 622                                                         $s .= '"';
 623                                                 } else {
 624                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
 625                                                         $p = $endQuote;
 626                                                 }
 627                                         } else {
 628                                                 # Quote at end of string, assume literal "
 629                                                 $s .= '"';
 630                                         }
 631                                         break;
 632                                 default:
 633                                         $s .= $format[$p];
 634                         }
 635                         if ( $num !== false ) {
 636                                 if ( $rawToggle || $raw ) {
 637                                         $s .= $num;
 638                                         $raw = false;
 639                                 } elseif ( $roman ) {
 640                                         $s .= self::romanNumeral( $num );
 641                                         $roman = false;
 642                                 } else {
 643                                         $s .= $this->formatNum( $num, true );
 644                                 }
 645                                 $num = false;
 646                         }
 647                 }
 648                 return $s;
 649         }
 650
 651         /**
 652          * Roman number formatting up to 3000
 653          */
 654         static function romanNumeral( $num ) {
 655                 static $table = array(
 656                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
 657                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
 658                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
 659                         array( '', 'M', 'MM', 'MMM' )
 660                 );
 661
 662                 $num = intval( $num );
 663                 if ( $num > 3000 || $num <= 0 ) {
 664                         return $num;
 665                 }
 666
 667                 $s = '';
 668                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
 669                         if ( $num >= $pow10 ) {
 670                                 $s .= $table[$i][floor($num / $pow10)];
 671                         }
 672                         $num = $num % $pow10;
 673                 }
 674                 return $s;
 675         }
 676
 677         /**
 678          * This is meant to be used by time(), date(), and timeanddate() to get
 679          * the date preference they're supposed to use, it should be used in
 680          * all children.
 681          *
 682          *<code>
 683          * function timeanddate([...], $format = true) {
 684          *      $datePreference = $this->dateFormat($format);
 685          * [...]
 686          * }
 687          *</code>
 688          *
 689          * @param mixed $usePrefs: if true, the user's preference is used
 690          *                         if false, the site/language default is used
 691          *                         if int/string, assumed to be a format.
 692          * @return string
 693          */
 694         function dateFormat( $usePrefs = true ) {
 695                 global $wgUser;
 696
 697                 if( is_bool( $usePrefs ) ) {
 698                         if( $usePrefs ) {
 699                                 $datePreference = $wgUser->getDatePreference();
 700                         } else {
 701                                 $options = User::getDefaultOptions();
 702                                 $datePreference = (string)$options['date'];
 703                         }
 704                 } else {
 705                         $datePreference = (string)$usePrefs;
 706                 }
 707
 708                 // return int
 709                 if( $datePreference == '' ) {
 710                         return 'default';
 711                 }
 712
 713                 return $datePreference;
 714         }
 715
 716         /**
 717          * @public
 718          * @param mixed  $ts the time format which needs to be turned into a
 719          *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 720          * @param bool   $adj whether to adjust the time output according to the
 721          *               user configured offset ($timecorrection)
 722          * @param mixed  $format true to use user's date format preference
 723          * @param string $timecorrection the time offset as returned by
 724          *               validateTimeZone() in Special:Preferences
 725          * @return string
 726          */
 727         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
 728                 $this->load();
 729                 if ( $adj ) {
 730                         $ts = $this->userAdjust( $ts, $timecorrection );
 731                 }
 732
 733                 $pref = $this->dateFormat( $format );
 734                 if( $pref == 'default' || !isset( $this->dateFormats["$pref date"] ) ) {
 735                         $pref = $this->defaultDateFormat;
 736                 }
 737                 return $this->sprintfDate( $this->dateFormats["$pref date"], $ts );
 738         }
 739
 740         /**
 741         * @public
 742         * @param mixed  $ts the time format which needs to be turned into a
 743         *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 744         * @param bool   $adj whether to adjust the time output according to the
 745         *               user configured offset ($timecorrection)
 746         * @param mixed  $format true to use user's date format preference
 747         * @param string $timecorrection the time offset as returned by
 748         *               validateTimeZone() in Special:Preferences
 749         * @return string
 750         */
 751         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
 752                 $this->load();
 753                 if ( $adj ) {
 754                         $ts = $this->userAdjust( $ts, $timecorrection );
 755                 }
 756
 757                 $pref = $this->dateFormat( $format );
 758                 if( $pref == 'default' || !isset( $this->dateFormats["$pref time"] ) ) {
 759                         $pref = $this->defaultDateFormat;
 760                 }
 761                 return $this->sprintfDate( $this->dateFormats["$pref time"], $ts );
 762         }
 763
 764         /**
 765         * @public
 766         * @param mixed  $ts the time format which needs to be turned into a
 767         *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 768         * @param bool   $adj whether to adjust the time output according to the
 769         *               user configured offset ($timecorrection)
 770
 771         * @param mixed  $format what format to return, if it's false output the
 772         *               default one (default true)
 773         * @param string $timecorrection the time offset as returned by
 774         *               validateTimeZone() in Special:Preferences
 775         * @return string
 776         */
 777         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
 778                 $this->load();
 779
 780                 $ts = wfTimestamp( TS_MW, $ts );
 781
 782                 if ( $adj ) {
 783                         $ts = $this->userAdjust( $ts, $timecorrection );
 784                 }
 785
 786                 $pref = $this->dateFormat( $format );
 787                 if( $pref == 'default' || !isset( $this->dateFormats["$pref both"] ) ) {
 788                         $pref = $this->defaultDateFormat;
 789                 }
 790
 791                 return $this->sprintfDate( $this->dateFormats["$pref both"], $ts );
 792         }
 793
 794         function getMessage( $key ) {
 795                 $this->load();
 796                 return isset( $this->messages[$key] ) ? $this->messages[$key] : null;
 797         }
 798
 799         function getAllMessages() {
 800                 $this->load();
 801                 return $this->messages;
 802         }
 803
 804         function iconv( $in, $out, $string ) {
 805                 # For most languages, this is a wrapper for iconv
 806                 return iconv( $in, $out . '//IGNORE', $string );
 807         }
 808
 809         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
 810         function ucwordbreaksCallbackAscii($matches){
 811                 return $this->ucfirst($matches[1]);
 812         }
 813
 814         function ucwordbreaksCallbackMB($matches){
 815                 return mb_strtoupper($matches[0]);
 816         }
 817
 818         function ucCallback($matches){
 819                 list( $wikiUpperChars ) = self::getCaseMaps();
 820                 return strtr( $matches[1], $wikiUpperChars );
 821         }
 822
 823         function lcCallback($matches){
 824                 list( , $wikiLowerChars ) = self::getCaseMaps();
 825                 return strtr( $matches[1], $wikiLowerChars );
 826         }
 827
 828         function ucwordsCallbackMB($matches){
 829                 return mb_strtoupper($matches[0]);
 830         }
 831
 832         function ucwordsCallbackWiki($matches){
 833                 list( $wikiUpperChars ) = self::getCaseMaps();
 834                 return strtr( $matches[0], $wikiUpperChars );
 835         }
 836
 837         function ucfirst( $str ) {
 838                 return self::uc( $str, true );
 839         }
 840
 841         function uc( $str, $first = false ) {
 842                 if ( function_exists( 'mb_strtoupper' ) ) {
 843                         if ( $first ) {
 844                                 if ( self::isMultibyte( $str ) ) {
 845                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
 846                                 } else {
 847                                         return ucfirst( $str );
 848                                 }
 849                         } else {
 850                                 return self::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
 851                         }
 852                 } else {
 853                         if ( self::isMultibyte( $str ) ) {
 854                                 list( $wikiUpperChars ) = $this->getCaseMaps();
 855                                 $x = $first ? '^' : '';
 856                                 return preg_replace_callback(
 857                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
 858                                         array($this,"ucCallback"),
 859                                         $str
 860                                 );
 861                         } else {
 862                                 return $first ? ucfirst( $str ) : strtoupper( $str );
 863                         }
 864                 }
 865         }
 866
 867         function lcfirst( $str ) {
 868                 return self::lc( $str, true );
 869         }
 870
 871         function lc( $str, $first = false ) {
 872                 if ( function_exists( 'mb_strtolower' ) )
 873                         if ( $first )
 874                                 if ( self::isMultibyte( $str ) )
 875                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
 876                                 else
 877                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
 878                         else
 879                                 return self::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
 880                 else
 881                         if ( self::isMultibyte( $str ) ) {
 882                                 list( , $wikiLowerChars ) = self::getCaseMaps();
 883                                 $x = $first ? '^' : '';
 884                                 return preg_replace_callback(
 885                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
 886                                         array($this,"lcCallback"),
 887                                         $str
 888                                 );
 889                         } else
 890                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
 891         }
 892
 893         function isMultibyte( $str ) {
 894                 return (bool)preg_match( '/[\x80-\xff]/', $str );
 895         }
 896
 897         function ucwords($str) {
 898                 if ( self::isMultibyte( $str ) ) {
 899                         $str = self::lc($str);
 900
 901                         // regexp to find first letter in each word (i.e. after each space)
 902                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
 903
 904                         // function to use to capitalize a single char
 905                         if ( function_exists( 'mb_strtoupper' ) )
 906                                 return preg_replace_callback(
 907                                         $replaceRegexp,
 908                                         array($this,"ucwordsCallbackMB"),
 909                                         $str
 910                                 );
 911                         else
 912                                 return preg_replace_callback(
 913                                         $replaceRegexp,
 914                                         array($this,"ucwordsCallbackWiki"),
 915                                         $str
 916                                 );
 917                 }
 918                 else
 919                         return ucwords( strtolower( $str ) );
 920         }
 921
 922   # capitalize words at word breaks
 923         function ucwordbreaks($str){
 924                 if (self::isMultibyte( $str ) ) {
 925                         $str = self::lc($str);
 926
 927                         // since \b doesn't work for UTF-8, we explicitely define word break chars
 928                         $breaks= "[ \-\(\)\}\{\.,\?!]";
 929
 930                         // find first letter after word break
 931                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
 932
 933                         if ( function_exists( 'mb_strtoupper' ) )
 934                                 return preg_replace_callback(
 935                                         $replaceRegexp,
 936                                         array($this,"ucwordbreaksCallbackMB"),
 937                                         $str
 938                                 );
 939                         else
 940                                 return preg_replace_callback(
 941                                         $replaceRegexp,
 942                                         array($this,"ucwordsCallbackWiki"),
 943                                         $str
 944                                 );
 945                 }
 946                 else
 947                         return preg_replace_callback(
 948                         '/\b([\w\x80-\xff]+)\b/',
 949                         array($this,"ucwordbreaksCallbackAscii"),
 950                         $str );
 951         }
 952
 953         /**
 954          * Return a case-folded representation of $s
 955          *
 956          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
 957          * and $s2 are the same except for the case of their characters. It is not
 958          * necessary for the value returned to make sense when displayed.
 959          *
 960          * Do *not* perform any other normalisation in this function. If a caller
 961          * uses this function when it should be using a more general normalisation
 962          * function, then fix the caller.
 963          */
 964         function caseFold( $s ) {
 965                 return $this->uc( $s );
 966         }
 967
 968         function checkTitleEncoding( $s ) {
 969                 if( is_array( $s ) ) {
 970                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
 971                 }
 972                 # Check for non-UTF-8 URLs
 973                 $ishigh = preg_match( '/[\x80-\xff]/', $s);
 974                 if(!$ishigh) return $s;
 975
 976                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 977                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 978                 if( $isutf8 ) return $s;
 979
 980                 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
 981         }
 982
 983         function fallback8bitEncoding() {
 984                 $this->load();
 985                 return $this->fallback8bitEncoding;
 986         }
 987
 988         /**
 989          * Some languages have special punctuation to strip out
 990          * or characters which need to be converted for MySQL's
 991          * indexing to grok it correctly. Make such changes here.
 992          *
 993          * @param string $in
 994          * @return string
 995          */
 996         function stripForSearch( $string ) {
 997                 global $wgDBtype;
 998                 if ( $wgDBtype != 'mysql' ) {
 999                         return $string;
1000                 }
1001
1002                 # MySQL fulltext index doesn't grok utf-8, so we
1003                 # need to fold cases and convert to hex
1004
1005                 wfProfileIn( __METHOD__ );
1006                 if( function_exists( 'mb_strtolower' ) ) {
1007                         $out = preg_replace(
1008                                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1009                                 "'U8' . bin2hex( \"$1\" )",
1010                                 mb_strtolower( $string ) );
1011                 } else {
1012                         list( , $wikiLowerChars ) = self::getCaseMaps();
1013                         $out = preg_replace(
1014                                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1015                                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
1016                                 $string );
1017                 }
1018                 wfProfileOut( __METHOD__ );
1019                 return $out;
1020         }
1021
1022         function convertForSearchResult( $termsArray ) {
1023                 # some languages, e.g. Chinese, need to do a conversion
1024                 # in order for search results to be displayed correctly
1025                 return $termsArray;
1026         }
1027
1028         /**
1029          * Get the first character of a string.
1030          *
1031          * @param string $s
1032          * @return string
1033          */
1034         function firstChar( $s ) {
1035                 $matches = array();
1036                 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1037                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
1038
1039                 return isset( $matches[1] ) ? $matches[1] : "";
1040         }
1041
1042         function initEncoding() {
1043                 # Some languages may have an alternate char encoding option
1044                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1045                 # If this language is used as the primary content language,
1046                 # an override to the defaults can be set here on startup.
1047         }
1048
1049         function recodeForEdit( $s ) {
1050                 # For some languages we'll want to explicitly specify
1051                 # which characters make it into the edit box raw
1052                 # or are converted in some way or another.
1053                 # Note that if wgOutputEncoding is different from
1054                 # wgInputEncoding, this text will be further converted
1055                 # to wgOutputEncoding.
1056                 global $wgEditEncoding;
1057                 if( $wgEditEncoding == '' or
1058                   $wgEditEncoding == 'UTF-8' ) {
1059                         return $s;
1060                 } else {
1061                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1062                 }
1063         }
1064
1065         function recodeInput( $s ) {
1066                 # Take the previous into account.
1067                 global $wgEditEncoding;
1068                 if($wgEditEncoding != "") {
1069                         $enc = $wgEditEncoding;
1070                 } else {
1071                         $enc = 'UTF-8';
1072                 }
1073                 if( $enc == 'UTF-8' ) {
1074                         return $s;
1075                 } else {
1076                         return $this->iconv( $enc, 'UTF-8', $s );
1077                 }
1078         }
1079
1080         /**
1081          * For right-to-left language support
1082          *
1083          * @return bool
1084          */
1085         function isRTL() {
1086                 $this->load();
1087                 return $this->rtl;
1088         }
1089
1090         /**
1091          * A hidden direction mark (LRM or RLM), depending on the language direction
1092          *
1093          * @return string
1094          */
1095         function getDirMark() {
1096                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
1097         }
1098
1099         /**
1100          * An arrow, depending on the language direction
1101          *
1102          * @return string
1103          */
1104         function getArrow() {
1105                 return $this->isRTL() ? '←' : '→';
1106         }
1107
1108         /**
1109          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
1110          *
1111          * @return bool
1112          */
1113         function linkPrefixExtension() {
1114                 $this->load();
1115                 return $this->linkPrefixExtension;
1116         }
1117
1118         function &getMagicWords() {
1119                 $this->load();
1120                 return $this->magicWords;
1121         }
1122
1123         # Fill a MagicWord object with data from here
1124         function getMagic( &$mw ) {
1125                 if ( !$this->mMagicHookDone ) {
1126                         $this->mMagicHookDone = true;
1127                         wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
1128                 }
1129                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
1130                         $rawEntry = $this->mMagicExtensions[$mw->mId];
1131                 } else {
1132                         $magicWords =& $this->getMagicWords();
1133                         if ( isset( $magicWords[$mw->mId] ) ) {
1134                                 $rawEntry = $magicWords[$mw->mId];
1135                         } else {
1136                                 # Fall back to English if local list is incomplete
1137                                 $magicWords =& Language::getMagicWords();
1138                                 $rawEntry = $magicWords[$mw->mId];
1139                         }
1140                 }
1141
1142                 if( !is_array( $rawEntry ) ) {
1143                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
1144                 }
1145                 $mw->mCaseSensitive = $rawEntry[0];
1146                 $mw->mSynonyms = array_slice( $rawEntry, 1 );
1147         }
1148
1149         /**
1150          * Add magic words to the extension array
1151          */
1152         function addMagicWordsByLang( $newWords ) {
1153                 $code = $this->getCode();
1154                 $fallbackChain = array();
1155                 while ( $code && !in_array( $code, $fallbackChain ) ) {
1156                         $fallbackChain[] = $code;
1157                         $code = self::getFallbackFor( $code );
1158                 }
1159                 if ( !in_array( 'en', $fallbackChain ) ) {
1160                         $fallbackChain[] = 'en';
1161                 }
1162                 $fallbackChain = array_reverse( $fallbackChain );
1163                 foreach ( $fallbackChain as $code ) {
1164                         if ( isset( $newWords[$code] ) ) {
1165                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
1166                         }
1167                 }
1168         }
1169
1170         /**
1171          * Get special page names, as an associative array
1172          *   case folded alias => real name
1173          */
1174         function getSpecialPageAliases() {
1175                 $this->load();
1176                 if ( !isset( $this->mExtendedSpecialPageAliases ) ) {
1177                         $this->mExtendedSpecialPageAliases = $this->specialPageAliases;
1178                         wfRunHooks( 'LanguageGetSpecialPageAliases',
1179                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
1180                 }
1181                 return $this->mExtendedSpecialPageAliases;
1182         }
1183
1184         /**
1185          * Italic is unsuitable for some languages
1186          *
1187          * @public
1188          *
1189          * @param string $text The text to be emphasized.
1190          * @return string
1191          */
1192         function emphasize( $text ) {
1193                 return "<em>$text</em>";
1194         }
1195
1196          /**
1197          * Normally we output all numbers in plain en_US style, that is
1198          * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
1199          * point twohundredthirtyfive. However this is not sutable for all
1200          * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
1201          * Icelandic just want to use commas instead of dots, and dots instead
1202          * of commas like "293.291,235".
1203          *
1204          * An example of this function being called:
1205          * <code>
1206          * wfMsg( 'message', $wgLang->formatNum( $num ) )
1207          * </code>
1208          *
1209          * See LanguageGu.php for the Gujarati implementation and
1210          * LanguageIs.php for the , => . and . => , implementation.
1211          *
1212          * @todo check if it's viable to use localeconv() for the decimal
1213          *       seperator thing.
1214          * @public
1215          * @param mixed $number the string to be formatted, should be an integer or
1216          *        a floating point number.
1217          * @param bool $nocommafy Set to true for special numbers like dates
1218          * @return string
1219          */
1220         function formatNum( $number, $nocommafy = false ) {
1221                 global $wgTranslateNumerals;
1222                 if (!$nocommafy) {
1223                         $number = $this->commafy($number);
1224                         $s = $this->separatorTransformTable();
1225                         if (!is_null($s)) { $number = strtr($number, $s); }
1226                 }
1227
1228                 if ($wgTranslateNumerals) {
1229                         $s = $this->digitTransformTable();
1230                         if (!is_null($s)) { $number = strtr($number, $s); }
1231                 }
1232
1233                 return $number;
1234         }
1235
1236         function parseFormattedNumber( $number ) {
1237                 $s = $this->digitTransformTable();
1238                 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1239
1240                 $s = $this->separatorTransformTable();
1241                 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1242
1243                 $number = strtr( $number, array (',' => '') );
1244                 return $number;
1245         }
1246
1247         /**
1248          * Adds commas to a given number
1249          *
1250          * @param mixed $_
1251          * @return string
1252          */
1253         function commafy($_) {
1254                 return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
1255         }
1256
1257         function digitTransformTable() {
1258                 $this->load();
1259                 return $this->digitTransformTable;
1260         }
1261
1262         function separatorTransformTable() {
1263                 $this->load();
1264                 return $this->separatorTransformTable;
1265         }
1266
1267
1268         /**
1269          * For the credit list in includes/Credits.php (action=credits)
1270          *
1271          * @param array $l
1272          * @return string
1273          */
1274         function listToText( $l ) {
1275                 $s = '';
1276                 $m = count($l) - 1;
1277                 for ($i = $m; $i >= 0; $i--) {
1278                         if ($i == $m) {
1279                                 $s = $l[$i];
1280                         } else if ($i == $m - 1) {
1281                                 $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
1282                         } else {
1283                                 $s = $l[$i] . ', ' . $s;
1284                         }
1285                 }
1286                 return $s;
1287         }
1288
1289         /**
1290          * Truncate a string to a specified length in bytes, appending an optional
1291          * string (e.g. for ellipses)
1292          *
1293          * The database offers limited byte lengths for some columns in the database;
1294          * multi-byte character sets mean we need to ensure that only whole characters
1295          * are included, otherwise broken characters can be passed to the user
1296          *
1297          * If $length is negative, the string will be truncated from the beginning
1298          *
1299          * @param string $string String to truncate
1300          * @param int $length Maximum length (excluding ellipses)
1301          * @param string $ellipses String to append to the truncated text
1302          * @return string
1303          */
1304         function truncate( $string, $length, $ellipsis = "" ) {
1305                 if( $length == 0 ) {
1306                         return $ellipsis;
1307                 }
1308                 if ( strlen( $string ) <= abs( $length ) ) {
1309                         return $string;
1310                 }
1311                 if( $length > 0 ) {
1312                         $string = substr( $string, 0, $length );
1313                         $char = ord( $string[strlen( $string ) - 1] );
1314                         $m = array();
1315                         if ($char >= 0xc0) {
1316                                 # We got the first byte only of a multibyte char; remove it.
1317                                 $string = substr( $string, 0, -1 );
1318                         } elseif( $char >= 0x80 &&
1319                                   preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
1320                                               '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
1321                             # We chopped in the middle of a character; remove it
1322                                 $string = $m[1];
1323                         }
1324                         return $string . $ellipsis;
1325                 } else {
1326                         $string = substr( $string, $length );
1327                         $char = ord( $string[0] );
1328                         if( $char >= 0x80 && $char < 0xc0 ) {
1329                                 # We chopped in the middle of a character; remove the whole thing
1330                                 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
1331                         }
1332                         return $ellipsis . $string;
1333                 }
1334         }
1335
1336         /**
1337          * Grammatical transformations, needed for inflected languages
1338          * Invoked by putting {{grammar:case|word}} in a message
1339          *
1340          * @param string $word
1341          * @param string $case
1342          * @return string
1343          */
1344         function convertGrammar( $word, $case ) {
1345                 global $wgGrammarForms;
1346                 if ( isset($wgGrammarForms['en'][$case][$word]) ) {
1347                         return $wgGrammarForms['en'][$case][$word];
1348                 }
1349                 return $word;
1350         }
1351
1352         /**
1353          * Plural form transformations, needed for some languages.
1354          * For example, where are 3 form of plural in Russian and Polish,
1355          * depending on "count mod 10". See [[w:Plural]]
1356          * For English it is pretty simple.
1357          *
1358          * Invoked by putting {{plural:count|wordform1|wordform2}}
1359          * or {{plural:count|wordform1|wordform2|wordform3}}
1360          *
1361          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
1362          *
1363          * @param integer $count
1364          * @param string $wordform1
1365          * @param string $wordform2
1366          * @param string $wordform3 (optional)
1367          * @param string $wordform4 (optional)
1368          * @param string $wordform5 (optional)
1369          * @return string
1370          */
1371         function convertPlural( $count, $w1, $w2, $w3, $w4, $w5) {
1372                 return ( $count == '1' || $count == '-1' ) ? $w1 : $w2;
1373         }
1374
1375         /**
1376          * For translaing of expiry times
1377          * @param string The validated block time in English
1378          * @return Somehow translated block time
1379          * @see LanguageFi.php for example implementation
1380          */
1381         function translateBlockExpiry( $str ) {
1382
1383                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
1384
1385                 if ( $scBlockExpiryOptions == '-') {
1386                         return $str;
1387                 }
1388
1389                 foreach (explode(',', $scBlockExpiryOptions) as $option) {
1390                         if ( strpos($option, ":") === false )
1391                                 continue;
1392                         list($show, $value) = explode(":", $option);
1393                         if ( strcmp ( $str, $value) == 0 ) {
1394                                 return htmlspecialchars( trim( $show ) );
1395                         }
1396                 }
1397
1398                 return $str;
1399         }
1400
1401         /**
1402          * languages like Chinese need to be segmented in order for the diff
1403          * to be of any use
1404          *
1405          * @param string $text
1406          * @return string
1407          */
1408         function segmentForDiff( $text ) {
1409                 return $text;
1410         }
1411
1412         /**
1413          * and unsegment to show the result
1414          *
1415          * @param string $text
1416          * @return string
1417          */
1418         function unsegmentForDiff( $text ) {
1419                 return $text;
1420         }
1421
1422         # convert text to different variants of a language.
1423         function convert( $text, $isTitle = false) {
1424                 return $this->mConverter->convert($text, $isTitle);
1425         }
1426
1427         # Convert text from within Parser
1428         function parserConvert( $text, &$parser ) {
1429                 return $this->mConverter->parserConvert( $text, $parser );
1430         }
1431
1432         # Check if this is a language with variants
1433         function hasVariants(){
1434                 return sizeof($this->getVariants())>1;
1435         }
1436
1437         # Put custom tags (e.g. -{ }-) around math to prevent conversion
1438         function armourMath($text){
1439                 return $this->mConverter->armourMath($text);
1440         }
1441
1442
1443         /**
1444          * Perform output conversion on a string, and encode for safe HTML output.
1445          * @param string $text
1446          * @param bool $isTitle -- wtf?
1447          * @return string
1448          * @todo this should get integrated somewhere sane
1449          */
1450         function convertHtml( $text, $isTitle = false ) {
1451                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
1452         }
1453
1454         function convertCategoryKey( $key ) {
1455                 return $this->mConverter->convertCategoryKey( $key );
1456         }
1457
1458         /**
1459          * get the list of variants supported by this langauge
1460          * see sample implementation in LanguageZh.php
1461          *
1462          * @return array an array of language codes
1463          */
1464         function getVariants() {
1465                 return $this->mConverter->getVariants();
1466         }
1467
1468
1469         function getPreferredVariant( $fromUser = true ) {
1470                 return $this->mConverter->getPreferredVariant( $fromUser );
1471         }
1472
1473         /**
1474          * if a language supports multiple variants, it is
1475          * possible that non-existing link in one variant
1476          * actually exists in another variant. this function
1477          * tries to find it. See e.g. LanguageZh.php
1478          *
1479          * @param string $link the name of the link
1480          * @param mixed $nt the title object of the link
1481          * @return null the input parameters may be modified upon return
1482          */
1483         function findVariantLink( &$link, &$nt ) {
1484                 $this->mConverter->findVariantLink($link, $nt);
1485         }
1486
1487         /**
1488          * If a language supports multiple variants, converts text
1489          * into an array of all possible variants of the text:
1490          *  'variant' => text in that variant
1491          */
1492
1493         function convertLinkToAllVariants($text){
1494                 return $this->mConverter->convertLinkToAllVariants($text);
1495         }
1496
1497
1498         /**
1499          * returns language specific options used by User::getPageRenderHash()
1500          * for example, the preferred language variant
1501          *
1502          * @return string
1503          * @public
1504          */
1505         function getExtraHashOptions() {
1506                 return $this->mConverter->getExtraHashOptions();
1507         }
1508
1509         /**
1510          * for languages that support multiple variants, the title of an
1511          * article may be displayed differently in different variants. this
1512          * function returns the apporiate title defined in the body of the article.
1513          *
1514          * @return string
1515          */
1516         function getParsedTitle() {
1517                 return $this->mConverter->getParsedTitle();
1518         }
1519
1520         /**
1521          * Enclose a string with the "no conversion" tag. This is used by
1522          * various functions in the Parser
1523          *
1524          * @param string $text text to be tagged for no conversion
1525          * @return string the tagged text
1526         */
1527         function markNoConversion( $text, $noParse=false ) {
1528                 return $this->mConverter->markNoConversion( $text, $noParse );
1529         }
1530
1531         /**
1532          * A regular expression to match legal word-trailing characters
1533          * which should be merged onto a link of the form [[foo]]bar.
1534          *
1535          * @return string
1536          * @public
1537          */
1538         function linkTrail() {
1539                 $this->load();
1540                 return $this->linkTrail;
1541         }
1542
1543         function getLangObj() {
1544                 return $this;
1545         }
1546
1547         /**
1548          * Get the RFC 3066 code for this language object
1549          */
1550         function getCode() {
1551                 return $this->mCode;
1552         }
1553
1554         function setCode( $code ) {
1555                 $this->mCode = $code;
1556         }
1557
1558         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
1559                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
1560         }
1561
1562         static function getMessagesFileName( $code ) {
1563                 global $IP;
1564                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
1565         }
1566
1567         static function getClassFileName( $code ) {
1568                 global $IP;
1569                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
1570         }
1571
1572         static function getLocalisationArray( $code, $disableCache = false ) {
1573                 self::loadLocalisation( $code, $disableCache );
1574                 return self::$mLocalisationCache[$code];
1575         }
1576
1577         /**
1578          * Load localisation data for a given code into the static cache
1579          *
1580          * @return array Dependencies, map of filenames to mtimes
1581          */
1582         static function loadLocalisation( $code, $disableCache = false ) {
1583                 static $recursionGuard = array();
1584                 global $wgMemc;
1585
1586                 if ( !$code ) {
1587                         throw new MWException( "Invalid language code requested" );
1588                 }
1589
1590                 if ( !$disableCache ) {
1591                         # Try the per-process cache
1592                         if ( isset( self::$mLocalisationCache[$code] ) ) {
1593                                 return self::$mLocalisationCache[$code]['deps'];
1594                         }
1595
1596                         wfProfileIn( __METHOD__ );
1597
1598                         # Try the serialized directory
1599                         $cache = wfGetPrecompiledData( self::getFileName( "Messages", $code, '.ser' ) );
1600                         if ( $cache ) {
1601                                 self::$mLocalisationCache[$code] = $cache;
1602                                 wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
1603                                 wfProfileOut( __METHOD__ );
1604                                 return self::$mLocalisationCache[$code]['deps'];
1605                         }
1606
1607                         # Try the global cache
1608                         $memcKey = wfMemcKey('localisation', $code );
1609                         $cache = $wgMemc->get( $memcKey );
1610                         if ( $cache ) {
1611                                 # Check file modification times
1612                                 foreach ( $cache['deps'] as $file => $mtime ) {
1613                                         if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1614                                                 break;
1615                                         }
1616                                 }
1617                                 if ( self::isLocalisationOutOfDate( $cache ) ) {
1618                                         $wgMemc->delete( $memcKey );
1619                                         $cache = false;
1620                                         wfDebug( "Language::loadLocalisation(): localisation cache for $code had expired due to update of $file\n" );
1621                                 } else {
1622                                         self::$mLocalisationCache[$code] = $cache;
1623                                         wfDebug( "Language::loadLocalisation(): got localisation for $code from cache\n" );
1624                                         wfProfileOut( __METHOD__ );
1625                                         return $cache['deps'];
1626                                 }
1627                         }
1628                 } else {
1629                         wfProfileIn( __METHOD__ );
1630                 }
1631
1632                 # Default fallback, may be overridden when the messages file is included
1633                 if ( $code != 'en' ) {
1634                         $fallback = 'en';
1635                 } else {
1636                         $fallback = false;
1637                 }
1638
1639                 # Load the primary localisation from the source file
1640                 $filename = self::getMessagesFileName( $code );
1641                 if ( !file_exists( $filename ) ) {
1642                         wfDebug( "Language::loadLocalisation(): no localisation file for $code, using implicit fallback to en\n" );
1643                         $cache = array();
1644                         $deps = array();
1645                 } else {
1646                         $deps = array( $filename => filemtime( $filename ) );
1647                         require( $filename );
1648                         $cache = compact( self::$mLocalisationKeys );
1649                         wfDebug( "Language::loadLocalisation(): got localisation for $code from source\n" );
1650                 }
1651
1652                 if ( !empty( $fallback ) ) {
1653                         # Load the fallback localisation, with a circular reference guard
1654                         if ( isset( $recursionGuard[$code] ) ) {
1655                                 throw new MWException( "Error: Circular fallback reference in language code $code" );
1656                         }
1657                         $recursionGuard[$code] = true;
1658                         $newDeps = self::loadLocalisation( $fallback, $disableCache );
1659                         unset( $recursionGuard[$code] );
1660
1661                         $secondary = self::$mLocalisationCache[$fallback];
1662                         $deps = array_merge( $deps, $newDeps );
1663
1664                         # Merge the fallback localisation with the current localisation
1665                         foreach ( self::$mLocalisationKeys as $key ) {
1666                                 if ( isset( $cache[$key] ) ) {
1667                                         if ( isset( $secondary[$key] ) ) {
1668                                                 if ( in_array( $key, self::$mMergeableMapKeys ) ) {
1669                                                         $cache[$key] = $cache[$key] + $secondary[$key];
1670                                                 } elseif ( in_array( $key, self::$mMergeableListKeys ) ) {
1671                                                         $cache[$key] = array_merge( $secondary[$key], $cache[$key] );
1672                                                 } elseif ( in_array( $key, self::$mMergeableAliasListKeys ) ) {
1673                                                         $cache[$key] = array_merge_recursive( $cache[$key], $secondary[$key] );
1674                                                 }
1675                                         }
1676                                 } else {
1677                                         $cache[$key] = $secondary[$key];
1678                                 }
1679                         }
1680
1681                         # Merge bookstore lists if requested
1682                         if ( !empty( $cache['bookstoreList']['inherit'] ) ) {
1683                                 $cache['bookstoreList'] = array_merge( $cache['bookstoreList'], $secondary['bookstoreList'] );
1684                         }
1685                         if ( isset( $cache['bookstoreList']['inherit'] ) ) {
1686                                 unset( $cache['bookstoreList']['inherit'] );
1687                         }
1688                 }
1689
1690                 # Add dependencies to the cache entry
1691                 $cache['deps'] = $deps;
1692
1693                 # Replace spaces with underscores in namespace names
1694                 $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
1695
1696                 # Save to both caches
1697                 self::$mLocalisationCache[$code] = $cache;
1698                 if ( !$disableCache ) {
1699                         $wgMemc->set( $memcKey, $cache );
1700                 }
1701
1702                 wfProfileOut( __METHOD__ );
1703                 return $deps;
1704         }
1705
1706         /**
1707          * Test if a given localisation cache is out of date with respect to the
1708          * source Messages files. This is done automatically for the global cache
1709          * in $wgMemc, but is only done on certain occasions for the serialized
1710          * data file.
1711          *
1712          * @param $cache mixed Either a language code or a cache array
1713          */
1714         static function isLocalisationOutOfDate( $cache ) {
1715                 if ( !is_array( $cache ) ) {
1716                         self::loadLocalisation( $cache );
1717                         $cache = self::$mLocalisationCache[$cache];
1718                 }
1719                 $expired = false;
1720                 foreach ( $cache['deps'] as $file => $mtime ) {
1721                         if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1722                                 $expired = true;
1723                                 break;
1724                         }
1725                 }
1726                 return $expired;
1727         }
1728
1729         /**
1730          * Get the fallback for a given language
1731          */
1732         static function getFallbackFor( $code ) {
1733                 self::loadLocalisation( $code );
1734                 return self::$mLocalisationCache[$code]['fallback'];
1735         }
1736
1737         /**
1738          * Get all messages for a given language
1739          */
1740         static function getMessagesFor( $code ) {
1741                 self::loadLocalisation( $code );
1742                 return self::$mLocalisationCache[$code]['messages'];
1743         }
1744
1745         /**
1746          * Get a message for a given language
1747          */
1748         static function getMessageFor( $key, $code ) {
1749                 self::loadLocalisation( $code );
1750                 return isset( self::$mLocalisationCache[$code]['messages'][$key] ) ? self::$mLocalisationCache[$code]['messages'][$key] : null;
1751         }
1752
1753         /**
1754          * Load localisation data for this object
1755          */
1756         function load() {
1757                 if ( !$this->mLoaded ) {
1758                         self::loadLocalisation( $this->getCode() );
1759                         $cache =& self::$mLocalisationCache[$this->getCode()];
1760                         foreach ( self::$mLocalisationKeys as $key ) {
1761                                 $this->$key = $cache[$key];
1762                         }
1763                         $this->mLoaded = true;
1764
1765                         $this->fixUpSettings();
1766                 }
1767         }
1768
1769         /**
1770          * Do any necessary post-cache-load settings adjustment
1771          */
1772         function fixUpSettings() {
1773                 global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk,
1774                         $wgNamespaceAliases, $wgAmericanDates;
1775                 wfProfileIn( __METHOD__ );
1776                 if ( $wgExtraNamespaces ) {
1777                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames;
1778                 }
1779
1780                 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
1781                 if ( $wgMetaNamespaceTalk ) {
1782                         $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
1783                 } else {
1784                         $talk = $this->namespaceNames[NS_PROJECT_TALK];
1785                         $talk = str_replace( '$1', $wgMetaNamespace, $talk );
1786
1787                         # Allow grammar transformations
1788                         # Allowing full message-style parsing would make simple requests
1789                         # such as action=raw much more expensive than they need to be.
1790                         # This will hopefully cover most cases.
1791                         $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
1792                                 array( &$this, 'replaceGrammarInNamespace' ), $talk );
1793                         $talk = str_replace( ' ', '_', $talk );
1794                         $this->namespaceNames[NS_PROJECT_TALK] = $talk;
1795                 }
1796
1797                 # The above mixing may leave namespaces out of canonical order.
1798                 # Re-order by namespace ID number...
1799                 ksort( $this->namespaceNames );
1800
1801                 # Put namespace names and aliases into a hashtable.
1802                 # If this is too slow, then we should arrange it so that it is done
1803                 # before caching. The catch is that at pre-cache time, the above
1804                 # class-specific fixup hasn't been done.
1805                 $this->mNamespaceIds = array();
1806                 foreach ( $this->namespaceNames as $index => $name ) {
1807                         $this->mNamespaceIds[$this->lc($name)] = $index;
1808                 }
1809                 if ( $this->namespaceAliases ) {
1810                         foreach ( $this->namespaceAliases as $name => $index ) {
1811                                 $this->mNamespaceIds[$this->lc($name)] = $index;
1812                         }
1813                 }
1814                 if ( $wgNamespaceAliases ) {
1815                         foreach ( $wgNamespaceAliases as $name => $index ) {
1816                                 $this->mNamespaceIds[$this->lc($name)] = $index;
1817                         }
1818                 }
1819
1820                 if ( $this->defaultDateFormat == 'dmy or mdy' ) {
1821                         $this->defaultDateFormat = $wgAmericanDates ? 'mdy' : 'dmy';
1822                 }
1823                 wfProfileOut( __METHOD__ );
1824         }
1825
1826         function replaceGrammarInNamespace( $m ) {
1827                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
1828         }
1829
1830         static function getCaseMaps() {
1831                 static $wikiUpperChars, $wikiLowerChars;
1832                 if ( isset( $wikiUpperChars ) ) {
1833                         return array( $wikiUpperChars, $wikiLowerChars );
1834                 }
1835
1836                 wfProfileIn( __METHOD__ );
1837                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
1838                 if ( $arr === false ) {
1839                         throw new MWException(
1840                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
1841                 }
1842                 extract( $arr );
1843                 wfProfileOut( __METHOD__ );
1844                 return array( $wikiUpperChars, $wikiLowerChars );
1845         }
1846
1847         function formatTimePeriod( $seconds ) {
1848                 if ( $seconds < 10 ) {
1849                         return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' );
1850                 } elseif ( $seconds < 60 ) {
1851                         return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' );
1852                 } elseif ( $seconds < 3600 ) {
1853                         return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) .
1854                                 $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' );
1855                 } else {
1856                         $hours = floor( $seconds / 3600 );
1857                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
1858                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
1859                         return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) .
1860                                 $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) .
1861                                 $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' );
1862                 }
1863         }
1864
1865         function formatBitrate( $bps ) {
1866                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
1867                 if ( $bps <= 0 ) {
1868                         return $this->formatNum( $bps ) . $units[0];
1869                 }
1870                 $unitIndex = floor( log10( $bps ) / 3 );
1871                 $mantissa = $bps / pow( 1000, $unitIndex );
1872                 if ( $mantissa < 10 ) {
1873                         $mantissa = round( $mantissa, 1 );
1874                 } else {
1875                         $mantissa = round( $mantissa );
1876                 }
1877                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
1878         }
1879
1880         /**
1881          * Format a size in bytes for output, using an appropriate
1882          * unit (B, KB, MB or GB) according to the magnitude in question
1883          *
1884          * @param $size Size to format
1885          * @return string Plain text (not HTML)
1886          */
1887         function formatSize( $size ) {
1888                 // For small sizes no decimal places necessary
1889                 $round = 0;
1890                 if( $size > 1024 ) {
1891                         $size = $size / 1024;
1892                         if( $size > 1024 ) {
1893                                 $size = $size / 1024;
1894                                 // For MB and bigger two decimal places are smarter
1895                                 $round = 2;
1896                                 if( $size > 1024 ) {
1897                                         $size = $size / 1024;
1898                                         $msg = 'size-gigabytes';
1899                                 } else {
1900                                         $msg = 'size-megabytes';
1901                                 }
1902                         } else {
1903                                 $msg = 'size-kilobytes';
1904                         }
1905                 } else {
1906                         $msg = 'size-bytes';
1907                 }
1908                 $size = round( $size, $round );
1909                 $text = $this->getMessageFromDB( $msg );
1910                 return str_replace( '$1', $this->formatNum( $size ), $text );
1911         }
1912 }
1913
1914
1915