languages/Language.php

   1 <?php
   2 /**
   3  * @addtogroup Language
   4  */
   5
   6 if( !defined( 'MEDIAWIKI' ) ) {
   7         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
   8         exit( 1 );
   9 }
  10
  11 #
  12 # In general you should not make customizations in these language files
  13 # directly, but should use the MediaWiki: special namespace to customize
  14 # user interface messages through the wiki.
  15 # See http://meta.wikipedia.org/wiki/MediaWiki_namespace
  16 #
  17 # NOTE TO TRANSLATORS: Do not copy this whole file when making translations!
  18 # A lot of common constants and a base class with inheritable methods are
  19 # defined here, which should not be redefined. See the other LanguageXx.php
  20 # files for examples.
  21 #
  22
  23 # Read language names
  24 global $wgLanguageNames;
  25 require_once( dirname(__FILE__) . '/Names.php' ) ;
  26
  27 global $wgInputEncoding, $wgOutputEncoding;
  28
  29 /**
  30  * These are always UTF-8, they exist only for backwards compatibility
  31  */
  32 $wgInputEncoding    = "UTF-8";
  33 $wgOutputEncoding       = "UTF-8";
  34
  35 if( function_exists( 'mb_strtoupper' ) ) {
  36         mb_internal_encoding('UTF-8');
  37 }
  38
  39 /* a fake language converter */
  40 class FakeConverter {
  41         var $mLang;
  42         function FakeConverter($langobj) {$this->mLang = $langobj;}
  43         function convert($t, $i) {return $t;}
  44         function parserConvert($t, $p) {return $t;}
  45         function getVariants() { return array( $this->mLang->getCode() ); }
  46         function getPreferredVariant() {return $this->mLang->getCode(); }
  47         function findVariantLink(&$l, &$n) {}
  48         function getExtraHashOptions() {return '';}
  49         function getParsedTitle() {return '';}
  50         function markNoConversion($text, $noParse=false) {return $text;}
  51         function convertCategoryKey( $key ) {return $key; }
  52         function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); }
  53         function armourMath($text){ return $text; }
  54 }
  55
  56 #--------------------------------------------------------------------------
  57 # Internationalisation code
  58 #--------------------------------------------------------------------------
  59
  60 class Language {
  61         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  62         var $mMagicExtensions = array(), $mMagicHookDone = false;
  63
  64         static public $mLocalisationKeys = array( 'fallback', 'namespaceNames',
  65                 'skinNames', 'mathNames',
  66                 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable',
  67                 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
  68                 'defaultUserOptionOverrides', 'linkTrail', 'namespaceAliases',
  69                 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
  70                 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases' );
  71
  72         static public $mMergeableMapKeys = array( 'messages', 'namespaceNames', 'mathNames',
  73                 'dateFormats', 'defaultUserOptionOverrides', 'magicWords' );
  74
  75         static public $mMergeableListKeys = array( 'extraUserToggles' );
  76
  77         static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
  78
  79         static public $mLocalisationCache = array();
  80
  81         static public $mWeekdayMsgs = array(
  82                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  83                 'friday', 'saturday'
  84         );
  85
  86         static public $mWeekdayAbbrevMsgs = array(
  87                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  88         );
  89
  90         static public $mMonthMsgs = array(
  91                 'january', 'february', 'march', 'april', 'may_long', 'june',
  92                 'july', 'august', 'september', 'october', 'november',
  93                 'december'
  94         );
  95         static public $mMonthGenMsgs = array(
  96                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  97                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  98                 'december-gen'
  99         );
 100         static public $mMonthAbbrevMsgs = array(
 101                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 102                 'sep', 'oct', 'nov', 'dec'
 103         );
 104
 105         /**
 106          * Create a language object for a given language code
 107          */
 108         static function factory( $code ) {
 109                 global $IP;
 110                 static $recursionLevel = 0;
 111
 112                 if ( $code == 'en' ) {
 113                         $class = 'Language';
 114                 } else {
 115                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 116                         // Preload base classes to work around APC/PHP5 bug
 117                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 118                                 include_once("$IP/languages/classes/$class.deps.php");
 119                         }
 120                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 121                                 include_once("$IP/languages/classes/$class.php");
 122                         }
 123                 }
 124
 125                 if ( $recursionLevel > 5 ) {
 126                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 127                 }
 128
 129                 if( ! class_exists( $class ) ) {
 130                         $fallback = Language::getFallbackFor( $code );
 131                         ++$recursionLevel;
 132                         $lang = Language::factory( $fallback );
 133                         --$recursionLevel;
 134                         $lang->setCode( $code );
 135                 } else {
 136                         $lang = new $class;
 137                 }
 138
 139                 return $lang;
 140         }
 141
 142         function __construct() {
 143                 $this->mConverter = new FakeConverter($this);
 144                 // Set the code to the name of the descendant
 145                 if ( get_class( $this ) == 'Language' ) {
 146                         $this->mCode = 'en';
 147                 } else {
 148                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 149                 }
 150         }
 151
 152         /**
 153          * Hook which will be called if this is the content language.
 154          * Descendants can use this to register hook functions or modify globals
 155          */
 156         function initContLang() {}
 157
 158         /**
 159          * @deprecated
 160          * @return array
 161          */
 162         function getDefaultUserOptions() {
 163                 return User::getDefaultOptions();
 164         }
 165
 166         function getFallbackLanguageCode() {
 167                 $this->load();
 168                 return $this->fallback;
 169         }
 170
 171         /**
 172          * Exports $wgBookstoreListEn
 173          * @return array
 174          */
 175         function getBookstoreList() {
 176                 $this->load();
 177                 return $this->bookstoreList;
 178         }
 179
 180         /**
 181          * @return array
 182          */
 183         function getNamespaces() {
 184                 $this->load();
 185                 return $this->namespaceNames;
 186         }
 187
 188         /**
 189          * A convenience function that returns the same thing as
 190          * getNamespaces() except with the array values changed to ' '
 191          * where it found '_', useful for producing output to be displayed
 192          * e.g. in <select> forms.
 193          *
 194          * @return array
 195          */
 196         function getFormattedNamespaces() {
 197                 $ns = $this->getNamespaces();
 198                 foreach($ns as $k => $v) {
 199                         $ns[$k] = strtr($v, '_', ' ');
 200                 }
 201                 return $ns;
 202         }
 203
 204         /**
 205          * Get a namespace value by key
 206          * <code>
 207          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 208          * echo $mw_ns; // prints 'MediaWiki'
 209          * </code>
 210          *
 211          * @param int $index the array key of the namespace to return
 212          * @return mixed, string if the namespace value exists, otherwise false
 213          */
 214         function getNsText( $index ) {
 215                 $ns = $this->getNamespaces();
 216                 return isset( $ns[$index] ) ? $ns[$index] : false;
 217         }
 218
 219         /**
 220          * A convenience function that returns the same thing as
 221          * getNsText() except with '_' changed to ' ', useful for
 222          * producing output.
 223          *
 224          * @return array
 225          */
 226         function getFormattedNsText( $index ) {
 227                 $ns = $this->getNsText( $index );
 228                 return strtr($ns, '_', ' ');
 229         }
 230
 231         /**
 232          * Get a namespace key by value, case insensitive.
 233          * Only matches namespace names for the current language, not the
 234          * canonical ones defined in Namespace.php.
 235          *
 236          * @param string $text
 237          * @return mixed An integer if $text is a valid value otherwise false
 238          */
 239         function getLocalNsIndex( $text ) {
 240                 $this->load();
 241                 $lctext = $this->lc($text);
 242                 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
 243         }
 244
 245         /**
 246          * Get a namespace key by value, case insensitive.  Canonical namespace
 247          * names override custom ones defined for the current language.
 248          *
 249          * @param string $text
 250          * @return mixed An integer if $text is a valid value otherwise false
 251          */
 252         function getNsIndex( $text ) {
 253                 $this->load();
 254                 $lctext = $this->lc($text);
 255                 if( ( $ns = Namespace::getCanonicalIndex( $lctext ) ) !== null ) return $ns;
 256                 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
 257         }
 258
 259         /**
 260          * short names for language variants used for language conversion links.
 261          *
 262          * @param string $code
 263          * @return string
 264          */
 265         function getVariantname( $code ) {
 266                 return $this->getMessageFromDB( "variantname-$code" );
 267         }
 268
 269         function specialPage( $name ) {
 270                 $aliases = $this->getSpecialPageAliases();
 271                 if ( isset( $aliases[$name][0] ) ) {
 272                         $name = $aliases[$name][0];
 273                 }
 274                 return $this->getNsText(NS_SPECIAL) . ':' . $name;
 275         }
 276
 277         function getQuickbarSettings() {
 278                 return array(
 279                         $this->getMessage( 'qbsettings-none' ),
 280                         $this->getMessage( 'qbsettings-fixedleft' ),
 281                         $this->getMessage( 'qbsettings-fixedright' ),
 282                         $this->getMessage( 'qbsettings-floatingleft' ),
 283                         $this->getMessage( 'qbsettings-floatingright' )
 284                 );
 285         }
 286
 287         function getSkinNames() {
 288                 $this->load();
 289                 return $this->skinNames;
 290         }
 291
 292         function getMathNames() {
 293                 $this->load();
 294                 return $this->mathNames;
 295         }
 296
 297         function getDatePreferences() {
 298                 $this->load();
 299                 return $this->datePreferences;
 300         }
 301
 302         function getDateFormats() {
 303                 $this->load();
 304                 return $this->dateFormats;
 305         }
 306
 307         function getDefaultDateFormat() {
 308                 $this->load();
 309                 return $this->defaultDateFormat;
 310         }
 311
 312         function getDatePreferenceMigrationMap() {
 313                 $this->load();
 314                 return $this->datePreferenceMigrationMap;
 315         }
 316
 317         function getDefaultUserOptionOverrides() {
 318                 $this->load();
 319                 # XXX - apparently some languageas get empty arrays, didn't get to it yet -- midom
 320                 if (is_array($this->defaultUserOptionOverrides)) {
 321                         return $this->defaultUserOptionOverrides;
 322                 } else {
 323                         return array();
 324                 }
 325         }
 326
 327         function getExtraUserToggles() {
 328                 $this->load();
 329                 return $this->extraUserToggles;
 330         }
 331
 332         function getUserToggle( $tog ) {
 333                 return $this->getMessageFromDB( "tog-$tog" );
 334         }
 335
 336         /**
 337          * Get language names, indexed by code.
 338          * If $customisedOnly is true, only returns codes with a messages file
 339          */
 340         public static function getLanguageNames( $customisedOnly = false ) {
 341                 global $wgLanguageNames;
 342                 if ( !$customisedOnly ) {
 343                         return $wgLanguageNames;
 344                 }
 345
 346                 global $IP;
 347                 $names = array();
 348                 $dir = opendir( "$IP/languages/messages" );
 349                 while( false !== ( $file = readdir( $dir ) ) ) {
 350                         $m = array();
 351                         if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
 352                                 $code = str_replace( '_', '-', strtolower( $m[1] ) );
 353                                 if ( isset( $wgLanguageNames[$code] ) ) {
 354                                         $names[$code] = $wgLanguageNames[$code];
 355                                 }
 356                         }
 357                 }
 358                 closedir( $dir );
 359                 return $names;
 360         }
 361
 362         /**
 363          * Ugly hack to get a message maybe from the MediaWiki namespace, if this
 364          * language object is the content or user language.
 365          */
 366         function getMessageFromDB( $msg ) {
 367                 global $wgContLang, $wgLang;
 368                 if ( $wgContLang->getCode() == $this->getCode() ) {
 369                         # Content language
 370                         return wfMsgForContent( $msg );
 371                 } elseif ( $wgLang->getCode() == $this->getCode() ) {
 372                         # User language
 373                         return wfMsg( $msg );
 374                 } else {
 375                         # Neither, get from localisation
 376                         return $this->getMessage( $msg );
 377                 }
 378         }
 379
 380         function getLanguageName( $code ) {
 381                 global $wgLanguageNames;
 382                 if ( ! array_key_exists( $code, $wgLanguageNames ) ) {
 383                         return '';
 384                 }
 385                 return $wgLanguageNames[$code];
 386         }
 387
 388         function getMonthName( $key ) {
 389                 return $this->getMessageFromDB( self::$mMonthMsgs[$key-1] );
 390         }
 391
 392         function getMonthNameGen( $key ) {
 393                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key-1] );
 394         }
 395
 396         function getMonthAbbreviation( $key ) {
 397                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key-1] );
 398         }
 399
 400         function getWeekdayName( $key ) {
 401                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key-1] );
 402         }
 403
 404         function getWeekdayAbbreviation( $key ) {
 405                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key-1] );
 406         }
 407
 408         /**
 409          * Used by date() and time() to adjust the time output.
 410          * @public
 411          * @param int   $ts the time in date('YmdHis') format
 412          * @param mixed $tz adjust the time by this amount (default false,
 413          *                  mean we get user timecorrection setting)
 414          * @return int
 415          */
 416         function userAdjust( $ts, $tz = false ) {
 417                 global $wgUser, $wgLocalTZoffset;
 418
 419                 if (!$tz) {
 420                         $tz = $wgUser->getOption( 'timecorrection' );
 421                 }
 422
 423                 # minutes and hours differences:
 424                 $minDiff = 0;
 425                 $hrDiff  = 0;
 426
 427                 if ( $tz === '' ) {
 428                         # Global offset in minutes.
 429                         if( isset($wgLocalTZoffset) ) {
 430                                 if( $wgLocalTZoffset >= 0 ) {
 431                                         $hrDiff = floor($wgLocalTZoffset / 60);
 432                                 } else {
 433                                         $hrDiff = ceil($wgLocalTZoffset / 60);
 434                                 }
 435                                 $minDiff = $wgLocalTZoffset % 60;
 436                         }
 437                 } elseif ( strpos( $tz, ':' ) !== false ) {
 438                         $tzArray = explode( ':', $tz );
 439                         $hrDiff = intval($tzArray[0]);
 440                         $minDiff = intval($hrDiff < 0 ? -$tzArray[1] : $tzArray[1]);
 441                 } else {
 442                         $hrDiff = intval( $tz );
 443                 }
 444
 445                 # No difference ? Return time unchanged
 446                 if ( 0 == $hrDiff && 0 == $minDiff ) { return $ts; }
 447
 448                 wfSuppressWarnings(); // E_STRICT system time bitching
 449                 # Generate an adjusted date
 450                 $t = mktime( (
 451                   (int)substr( $ts, 8, 2) ) + $hrDiff, # Hours
 452                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 453                   (int)substr( $ts, 12, 2 ), # Seconds
 454                   (int)substr( $ts, 4, 2 ), # Month
 455                   (int)substr( $ts, 6, 2 ), # Day
 456                   (int)substr( $ts, 0, 4 ) ); #Year
 457
 458                 $date = date( 'YmdHis', $t );
 459                 wfRestoreWarnings();
 460
 461                 return $date;
 462         }
 463
 464         /**
 465          * This is a workalike of PHP's date() function, but with better
 466          * internationalisation, a reduced set of format characters, and a better
 467          * escaping format.
 468          *
 469          * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the
 470          * PHP manual for definitions. There are a number of extensions, which
 471          * start with "x":
 472          *
 473          *    xn   Do not translate digits of the next numeric format character
 474          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 475          *    xr   Use roman numerals for the next numeric format character
 476          *    xx   Literal x
 477          *    xg   Genitive month name
 478          *
 479          * Characters enclosed in double quotes will be considered literal (with
 480          * the quotes themselves removed). Unmatched quotes will be considered
 481          * literal quotes. Example:
 482          *
 483          * "The month is" F       => The month is January
 484          * i's"                   => 20'11"
 485          *
 486          * Backslash escaping is also supported.
 487          *
 488          * Input timestamp is assumed to be pre-normalized to the desired local
 489          * time zone, if any.
 490          *
 491          * @param string $format
 492          * @param string $ts 14-character timestamp
 493          *      YYYYMMDDHHMMSS
 494          *      01234567890123
 495          */
 496         function sprintfDate( $format, $ts ) {
 497                 $s = '';
 498                 $raw = false;
 499                 $roman = false;
 500                 $unix = false;
 501                 $rawToggle = false;
 502                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 503                         $num = false;
 504                         $code = $format[$p];
 505                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 506                                 $code .= $format[++$p];
 507                         }
 508
 509                         switch ( $code ) {
 510                                 case 'xx':
 511                                         $s .= 'x';
 512                                         break;
 513                                 case 'xn':
 514                                         $raw = true;
 515                                         break;
 516                                 case 'xN':
 517                                         $rawToggle = !$rawToggle;
 518                                         break;
 519                                 case 'xr':
 520                                         $roman = true;
 521                                         break;
 522                                 case 'xg':
 523                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 524                                         break;
 525                                 case 'd':
 526                                         $num = substr( $ts, 6, 2 );
 527                                         break;
 528                                 case 'D':
 529                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 530                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 531                                         break;
 532                                 case 'j':
 533                                         $num = intval( substr( $ts, 6, 2 ) );
 534                                         break;
 535                                 case 'l':
 536                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 537                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 538                                         break;
 539                                 case 'N':
 540                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 541                                         $w = gmdate( 'w', $unix );
 542                                         $num = $w ? $w : 7;
 543                                         break;
 544                                 case 'w':
 545                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 546                                         $num = gmdate( 'w', $unix );
 547                                         break;
 548                                 case 'z':
 549                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 550                                         $num = gmdate( 'z', $unix );
 551                                         break;
 552                                 case 'W':
 553                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 554                                         $num = gmdate( 'W', $unix );
 555                                         break;
 556                                 case 'F':
 557                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 558                                         break;
 559                                 case 'm':
 560                                         $num = substr( $ts, 4, 2 );
 561                                         break;
 562                                 case 'M':
 563                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 564                                         break;
 565                                 case 'n':
 566                                         $num = intval( substr( $ts, 4, 2 ) );
 567                                         break;
 568                                 case 't':
 569                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 570                                         $num = gmdate( 't', $unix );
 571                                         break;
 572                                 case 'L':
 573                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 574                                         $num = gmdate( 'L', $unix );
 575                                         break;
 576                                 case 'Y':
 577                                         $num = substr( $ts, 0, 4 );
 578                                         break;
 579                                 case 'y':
 580                                         $num = substr( $ts, 2, 2 );
 581                                         break;
 582                                 case 'a':
 583                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 584                                         break;
 585                                 case 'A':
 586                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 587                                         break;
 588                                 case 'g':
 589                                         $h = substr( $ts, 8, 2 );
 590                                         $num = $h % 12 ? $h % 12 : 12;
 591                                         break;
 592                                 case 'G':
 593                                         $num = intval( substr( $ts, 8, 2 ) );
 594                                         break;
 595                                 case 'h':
 596                                         $h = substr( $ts, 8, 2 );
 597                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 598                                         break;
 599                                 case 'H':
 600                                         $num = substr( $ts, 8, 2 );
 601                                         break;
 602                                 case 'i':
 603                                         $num = substr( $ts, 10, 2 );
 604                                         break;
 605                                 case 's':
 606                                         $num = substr( $ts, 12, 2 );
 607                                         break;
 608                                 case 'c':
 609                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 610                                         $s .= gmdate( 'c', $unix );
 611                                         break;
 612                                 case 'r':
 613                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 614                                         $s .= gmdate( 'r', $unix );
 615                                         break;
 616                                 case 'U':
 617                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 618                                         $num = $unix;
 619                                         break;
 620                                 case '\\':
 621                                         # Backslash escaping
 622                                         if ( $p < strlen( $format ) - 1 ) {
 623                                                 $s .= $format[++$p];
 624                                         } else {
 625                                                 $s .= '\\';
 626                                         }
 627                                         break;
 628                                 case '"':
 629                                         # Quoted literal
 630                                         if ( $p < strlen( $format ) - 1 ) {
 631                                                 $endQuote = strpos( $format, '"', $p + 1 );
 632                                                 if ( $endQuote === false ) {
 633                                                         # No terminating quote, assume literal "
 634                                                         $s .= '"';
 635                                                 } else {
 636                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
 637                                                         $p = $endQuote;
 638                                                 }
 639                                         } else {
 640                                                 # Quote at end of string, assume literal "
 641                                                 $s .= '"';
 642                                         }
 643                                         break;
 644                                 default:
 645                                         $s .= $format[$p];
 646                         }
 647                         if ( $num !== false ) {
 648                                 if ( $rawToggle || $raw ) {
 649                                         $s .= $num;
 650                                         $raw = false;
 651                                 } elseif ( $roman ) {
 652                                         $s .= self::romanNumeral( $num );
 653                                         $roman = false;
 654                                 } else {
 655                                         $s .= $this->formatNum( $num, true );
 656                                 }
 657                                 $num = false;
 658                         }
 659                 }
 660                 return $s;
 661         }
 662
 663         /**
 664          * Roman number formatting up to 3000
 665          */
 666         static function romanNumeral( $num ) {
 667                 static $table = array(
 668                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
 669                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
 670                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
 671                         array( '', 'M', 'MM', 'MMM' )
 672                 );
 673
 674                 $num = intval( $num );
 675                 if ( $num > 3000 || $num <= 0 ) {
 676                         return $num;
 677                 }
 678
 679                 $s = '';
 680                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
 681                         if ( $num >= $pow10 ) {
 682                                 $s .= $table[$i][floor($num / $pow10)];
 683                         }
 684                         $num = $num % $pow10;
 685                 }
 686                 return $s;
 687         }
 688
 689         /**
 690          * This is meant to be used by time(), date(), and timeanddate() to get
 691          * the date preference they're supposed to use, it should be used in
 692          * all children.
 693          *
 694          *<code>
 695          * function timeanddate([...], $format = true) {
 696          *      $datePreference = $this->dateFormat($format);
 697          * [...]
 698          * }
 699          *</code>
 700          *
 701          * @param mixed $usePrefs: if true, the user's preference is used
 702          *                         if false, the site/language default is used
 703          *                         if int/string, assumed to be a format.
 704          * @return string
 705          */
 706         function dateFormat( $usePrefs = true ) {
 707                 global $wgUser;
 708
 709                 if( is_bool( $usePrefs ) ) {
 710                         if( $usePrefs ) {
 711                                 $datePreference = $wgUser->getDatePreference();
 712                         } else {
 713                                 $options = User::getDefaultOptions();
 714                                 $datePreference = (string)$options['date'];
 715                         }
 716                 } else {
 717                         $datePreference = (string)$usePrefs;
 718                 }
 719
 720                 // return int
 721                 if( $datePreference == '' ) {
 722                         return 'default';
 723                 }
 724
 725                 return $datePreference;
 726         }
 727
 728         /**
 729          * @public
 730          * @param mixed  $ts the time format which needs to be turned into a
 731          *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 732          * @param bool   $adj whether to adjust the time output according to the
 733          *               user configured offset ($timecorrection)
 734          * @param mixed  $format true to use user's date format preference
 735          * @param string $timecorrection the time offset as returned by
 736          *               validateTimeZone() in Special:Preferences
 737          * @return string
 738          */
 739         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
 740                 $this->load();
 741                 if ( $adj ) {
 742                         $ts = $this->userAdjust( $ts, $timecorrection );
 743                 }
 744
 745                 $pref = $this->dateFormat( $format );
 746                 if( $pref == 'default' || !isset( $this->dateFormats["$pref date"] ) ) {
 747                         $pref = $this->defaultDateFormat;
 748                 }
 749                 return $this->sprintfDate( $this->dateFormats["$pref date"], $ts );
 750         }
 751
 752         /**
 753         * @public
 754         * @param mixed  $ts the time format which needs to be turned into a
 755         *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 756         * @param bool   $adj whether to adjust the time output according to the
 757         *               user configured offset ($timecorrection)
 758         * @param mixed  $format true to use user's date format preference
 759         * @param string $timecorrection the time offset as returned by
 760         *               validateTimeZone() in Special:Preferences
 761         * @return string
 762         */
 763         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
 764                 $this->load();
 765                 if ( $adj ) {
 766                         $ts = $this->userAdjust( $ts, $timecorrection );
 767                 }
 768
 769                 $pref = $this->dateFormat( $format );
 770                 if( $pref == 'default' || !isset( $this->dateFormats["$pref time"] ) ) {
 771                         $pref = $this->defaultDateFormat;
 772                 }
 773                 return $this->sprintfDate( $this->dateFormats["$pref time"], $ts );
 774         }
 775
 776         /**
 777         * @public
 778         * @param mixed  $ts the time format which needs to be turned into a
 779         *               date('YmdHis') format with wfTimestamp(TS_MW,$ts)
 780         * @param bool   $adj whether to adjust the time output according to the
 781         *               user configured offset ($timecorrection)
 782
 783         * @param mixed  $format what format to return, if it's false output the
 784         *               default one (default true)
 785         * @param string $timecorrection the time offset as returned by
 786         *               validateTimeZone() in Special:Preferences
 787         * @return string
 788         */
 789         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
 790                 $this->load();
 791
 792                 $ts = wfTimestamp( TS_MW, $ts );
 793
 794                 if ( $adj ) {
 795                         $ts = $this->userAdjust( $ts, $timecorrection );
 796                 }
 797
 798                 $pref = $this->dateFormat( $format );
 799                 if( $pref == 'default' || !isset( $this->dateFormats["$pref both"] ) ) {
 800                         $pref = $this->defaultDateFormat;
 801                 }
 802
 803                 return $this->sprintfDate( $this->dateFormats["$pref both"], $ts );
 804         }
 805
 806         function getMessage( $key ) {
 807                 $this->load();
 808                 return isset( $this->messages[$key] ) ? $this->messages[$key] : null;
 809         }
 810
 811         function getAllMessages() {
 812                 $this->load();
 813                 return $this->messages;
 814         }
 815
 816         function iconv( $in, $out, $string ) {
 817                 # For most languages, this is a wrapper for iconv
 818                 return iconv( $in, $out . '//IGNORE', $string );
 819         }
 820
 821         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
 822         function ucwordbreaksCallbackAscii($matches){
 823                 return $this->ucfirst($matches[1]);
 824         }
 825
 826         function ucwordbreaksCallbackMB($matches){
 827                 return mb_strtoupper($matches[0]);
 828         }
 829
 830         function ucCallback($matches){
 831                 list( $wikiUpperChars ) = self::getCaseMaps();
 832                 return strtr( $matches[1], $wikiUpperChars );
 833         }
 834
 835         function lcCallback($matches){
 836                 list( , $wikiLowerChars ) = self::getCaseMaps();
 837                 return strtr( $matches[1], $wikiLowerChars );
 838         }
 839
 840         function ucwordsCallbackMB($matches){
 841                 return mb_strtoupper($matches[0]);
 842         }
 843
 844         function ucwordsCallbackWiki($matches){
 845                 list( $wikiUpperChars ) = self::getCaseMaps();
 846                 return strtr( $matches[0], $wikiUpperChars );
 847         }
 848
 849         function ucfirst( $str ) {
 850                 return self::uc( $str, true );
 851         }
 852
 853         function uc( $str, $first = false ) {
 854                 if ( function_exists( 'mb_strtoupper' ) ) {
 855                         if ( $first ) {
 856                                 if ( self::isMultibyte( $str ) ) {
 857                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
 858                                 } else {
 859                                         return ucfirst( $str );
 860                                 }
 861                         } else {
 862                                 return self::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
 863                         }
 864                 } else {
 865                         if ( self::isMultibyte( $str ) ) {
 866                                 list( $wikiUpperChars ) = $this->getCaseMaps();
 867                                 $x = $first ? '^' : '';
 868                                 return preg_replace_callback(
 869                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
 870                                         array($this,"ucCallback"),
 871                                         $str
 872                                 );
 873                         } else {
 874                                 return $first ? ucfirst( $str ) : strtoupper( $str );
 875                         }
 876                 }
 877         }
 878
 879         function lcfirst( $str ) {
 880                 return self::lc( $str, true );
 881         }
 882
 883         function lc( $str, $first = false ) {
 884                 if ( function_exists( 'mb_strtolower' ) )
 885                         if ( $first )
 886                                 if ( self::isMultibyte( $str ) )
 887                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
 888                                 else
 889                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
 890                         else
 891                                 return self::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
 892                 else
 893                         if ( self::isMultibyte( $str ) ) {
 894                                 list( , $wikiLowerChars ) = self::getCaseMaps();
 895                                 $x = $first ? '^' : '';
 896                                 return preg_replace_callback(
 897                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
 898                                         array($this,"lcCallback"),
 899                                         $str
 900                                 );
 901                         } else
 902                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
 903         }
 904
 905         function isMultibyte( $str ) {
 906                 return (bool)preg_match( '/[\x80-\xff]/', $str );
 907         }
 908
 909         function ucwords($str) {
 910                 if ( self::isMultibyte( $str ) ) {
 911                         $str = self::lc($str);
 912
 913                         // regexp to find first letter in each word (i.e. after each space)
 914                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
 915
 916                         // function to use to capitalize a single char
 917                         if ( function_exists( 'mb_strtoupper' ) )
 918                                 return preg_replace_callback(
 919                                         $replaceRegexp,
 920                                         array($this,"ucwordsCallbackMB"),
 921                                         $str
 922                                 );
 923                         else
 924                                 return preg_replace_callback(
 925                                         $replaceRegexp,
 926                                         array($this,"ucwordsCallbackWiki"),
 927                                         $str
 928                                 );
 929                 }
 930                 else
 931                         return ucwords( strtolower( $str ) );
 932         }
 933
 934   # capitalize words at word breaks
 935         function ucwordbreaks($str){
 936                 if (self::isMultibyte( $str ) ) {
 937                         $str = self::lc($str);
 938
 939                         // since \b doesn't work for UTF-8, we explicitely define word break chars
 940                         $breaks= "[ \-\(\)\}\{\.,\?!]";
 941
 942                         // find first letter after word break
 943                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
 944
 945                         if ( function_exists( 'mb_strtoupper' ) )
 946                                 return preg_replace_callback(
 947                                         $replaceRegexp,
 948                                         array($this,"ucwordbreaksCallbackMB"),
 949                                         $str
 950                                 );
 951                         else
 952                                 return preg_replace_callback(
 953                                         $replaceRegexp,
 954                                         array($this,"ucwordsCallbackWiki"),
 955                                         $str
 956                                 );
 957                 }
 958                 else
 959                         return preg_replace_callback(
 960                         '/\b([\w\x80-\xff]+)\b/',
 961                         array($this,"ucwordbreaksCallbackAscii"),
 962                         $str );
 963         }
 964
 965         /**
 966          * Return a case-folded representation of $s
 967          *
 968          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
 969          * and $s2 are the same except for the case of their characters. It is not
 970          * necessary for the value returned to make sense when displayed.
 971          *
 972          * Do *not* perform any other normalisation in this function. If a caller
 973          * uses this function when it should be using a more general normalisation
 974          * function, then fix the caller.
 975          */
 976         function caseFold( $s ) {
 977                 return $this->uc( $s );
 978         }
 979
 980         function checkTitleEncoding( $s ) {
 981                 if( is_array( $s ) ) {
 982                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
 983                 }
 984                 # Check for non-UTF-8 URLs
 985                 $ishigh = preg_match( '/[\x80-\xff]/', $s);
 986                 if(!$ishigh) return $s;
 987
 988                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 989                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 990                 if( $isutf8 ) return $s;
 991
 992                 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
 993         }
 994
 995         function fallback8bitEncoding() {
 996                 $this->load();
 997                 return $this->fallback8bitEncoding;
 998         }
 999
1000         /**
1001          * Some languages have special punctuation to strip out
1002          * or characters which need to be converted for MySQL's
1003          * indexing to grok it correctly. Make such changes here.
1004          *
1005          * @param string $in
1006          * @return string
1007          */
1008         function stripForSearch( $string ) {
1009                 global $wgDBtype;
1010                 if ( $wgDBtype != 'mysql' ) {
1011                         return $string;
1012                 }
1013
1014                 # MySQL fulltext index doesn't grok utf-8, so we
1015                 # need to fold cases and convert to hex
1016
1017                 wfProfileIn( __METHOD__ );
1018                 if( function_exists( 'mb_strtolower' ) ) {
1019                         $out = preg_replace(
1020                                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1021                                 "'U8' . bin2hex( \"$1\" )",
1022                                 mb_strtolower( $string ) );
1023                 } else {
1024                         list( , $wikiLowerChars ) = self::getCaseMaps();
1025                         $out = preg_replace(
1026                                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1027                                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
1028                                 $string );
1029                 }
1030                 wfProfileOut( __METHOD__ );
1031                 return $out;
1032         }
1033
1034         function convertForSearchResult( $termsArray ) {
1035                 # some languages, e.g. Chinese, need to do a conversion
1036                 # in order for search results to be displayed correctly
1037                 return $termsArray;
1038         }
1039
1040         /**
1041          * Get the first character of a string.
1042          *
1043          * @param string $s
1044          * @return string
1045          */
1046         function firstChar( $s ) {
1047                 $matches = array();
1048                 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1049                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
1050
1051                 return isset( $matches[1] ) ? $matches[1] : "";
1052         }
1053
1054         function initEncoding() {
1055                 # Some languages may have an alternate char encoding option
1056                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1057                 # If this language is used as the primary content language,
1058                 # an override to the defaults can be set here on startup.
1059         }
1060
1061         function recodeForEdit( $s ) {
1062                 # For some languages we'll want to explicitly specify
1063                 # which characters make it into the edit box raw
1064                 # or are converted in some way or another.
1065                 # Note that if wgOutputEncoding is different from
1066                 # wgInputEncoding, this text will be further converted
1067                 # to wgOutputEncoding.
1068                 global $wgEditEncoding;
1069                 if( $wgEditEncoding == '' or
1070                   $wgEditEncoding == 'UTF-8' ) {
1071                         return $s;
1072                 } else {
1073                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1074                 }
1075         }
1076
1077         function recodeInput( $s ) {
1078                 # Take the previous into account.
1079                 global $wgEditEncoding;
1080                 if($wgEditEncoding != "") {
1081                         $enc = $wgEditEncoding;
1082                 } else {
1083                         $enc = 'UTF-8';
1084                 }
1085                 if( $enc == 'UTF-8' ) {
1086                         return $s;
1087                 } else {
1088                         return $this->iconv( $enc, 'UTF-8', $s );
1089                 }
1090         }
1091
1092         /**
1093          * For right-to-left language support
1094          *
1095          * @return bool
1096          */
1097         function isRTL() {
1098                 $this->load();
1099                 return $this->rtl;
1100         }
1101
1102         /**
1103          * A hidden direction mark (LRM or RLM), depending on the language direction
1104          *
1105          * @return string
1106          */
1107         function getDirMark() {
1108                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
1109         }
1110
1111         /**
1112          * An arrow, depending on the language direction
1113          *
1114          * @return string
1115          */
1116         function getArrow() {
1117                 return $this->isRTL() ? '←' : '→';
1118         }
1119
1120         /**
1121          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
1122          *
1123          * @return bool
1124          */
1125         function linkPrefixExtension() {
1126                 $this->load();
1127                 return $this->linkPrefixExtension;
1128         }
1129
1130         function &getMagicWords() {
1131                 $this->load();
1132                 return $this->magicWords;
1133         }
1134
1135         # Fill a MagicWord object with data from here
1136         function getMagic( &$mw ) {
1137                 if ( !$this->mMagicHookDone ) {
1138                         $this->mMagicHookDone = true;
1139                         wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
1140                 }
1141                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
1142                         $rawEntry = $this->mMagicExtensions[$mw->mId];
1143                 } else {
1144                         $magicWords =& $this->getMagicWords();
1145                         if ( isset( $magicWords[$mw->mId] ) ) {
1146                                 $rawEntry = $magicWords[$mw->mId];
1147                         } else {
1148                                 # Fall back to English if local list is incomplete
1149                                 $magicWords =& Language::getMagicWords();
1150                                 $rawEntry = $magicWords[$mw->mId];
1151                         }
1152                 }
1153
1154                 if( !is_array( $rawEntry ) ) {
1155                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
1156                 }
1157                 $mw->mCaseSensitive = $rawEntry[0];
1158                 $mw->mSynonyms = array_slice( $rawEntry, 1 );
1159         }
1160
1161         /**
1162          * Add magic words to the extension array
1163          */
1164         function addMagicWordsByLang( $newWords ) {
1165                 $code = $this->getCode();
1166                 $fallbackChain = array();
1167                 while ( $code && !in_array( $code, $fallbackChain ) ) {
1168                         $fallbackChain[] = $code;
1169                         $code = self::getFallbackFor( $code );
1170                 }
1171                 if ( !in_array( 'en', $fallbackChain ) ) {
1172                         $fallbackChain[] = 'en';
1173                 }
1174                 $fallbackChain = array_reverse( $fallbackChain );
1175                 foreach ( $fallbackChain as $code ) {
1176                         if ( isset( $newWords[$code] ) ) {
1177                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
1178                         }
1179                 }
1180         }
1181
1182         /**
1183          * Get special page names, as an associative array
1184          *   case folded alias => real name
1185          */
1186         function getSpecialPageAliases() {
1187                 $this->load();
1188                 if ( !isset( $this->mExtendedSpecialPageAliases ) ) {
1189                         $this->mExtendedSpecialPageAliases = $this->specialPageAliases;
1190                         wfRunHooks( 'LangugeGetSpecialPageAliases',
1191                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
1192                 }
1193                 return $this->mExtendedSpecialPageAliases;
1194         }
1195
1196         /**
1197          * Italic is unsuitable for some languages
1198          *
1199          * @public
1200          *
1201          * @param string $text The text to be emphasized.
1202          * @return string
1203          */
1204         function emphasize( $text ) {
1205                 return "<em>$text</em>";
1206         }
1207
1208          /**
1209          * Normally we output all numbers in plain en_US style, that is
1210          * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
1211          * point twohundredthirtyfive. However this is not sutable for all
1212          * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
1213          * Icelandic just want to use commas instead of dots, and dots instead
1214          * of commas like "293.291,235".
1215          *
1216          * An example of this function being called:
1217          * <code>
1218          * wfMsg( 'message', $wgLang->formatNum( $num ) )
1219          * </code>
1220          *
1221          * See LanguageGu.php for the Gujarati implementation and
1222          * LanguageIs.php for the , => . and . => , implementation.
1223          *
1224          * @todo check if it's viable to use localeconv() for the decimal
1225          *       seperator thing.
1226          * @public
1227          * @param mixed $number the string to be formatted, should be an integer or
1228          *        a floating point number.
1229          * @param bool $nocommafy Set to true for special numbers like dates
1230          * @return string
1231          */
1232         function formatNum( $number, $nocommafy = false ) {
1233                 global $wgTranslateNumerals;
1234                 if (!$nocommafy) {
1235                         $number = $this->commafy($number);
1236                         $s = $this->separatorTransformTable();
1237                         if (!is_null($s)) { $number = strtr($number, $s); }
1238                 }
1239
1240                 if ($wgTranslateNumerals) {
1241                         $s = $this->digitTransformTable();
1242                         if (!is_null($s)) { $number = strtr($number, $s); }
1243                 }
1244
1245                 return $number;
1246         }
1247
1248         function parseFormattedNumber( $number ) {
1249                 $s = $this->digitTransformTable();
1250                 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1251
1252                 $s = $this->separatorTransformTable();
1253                 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1254
1255                 $number = strtr( $number, array (',' => '') );
1256                 return $number;
1257         }
1258
1259         /**
1260          * Adds commas to a given number
1261          *
1262          * @param mixed $_
1263          * @return string
1264          */
1265         function commafy($_) {
1266                 return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
1267         }
1268
1269         function digitTransformTable() {
1270                 $this->load();
1271                 return $this->digitTransformTable;
1272         }
1273
1274         function separatorTransformTable() {
1275                 $this->load();
1276                 return $this->separatorTransformTable;
1277         }
1278
1279
1280         /**
1281          * For the credit list in includes/Credits.php (action=credits)
1282          *
1283          * @param array $l
1284          * @return string
1285          */
1286         function listToText( $l ) {
1287                 $s = '';
1288                 $m = count($l) - 1;
1289                 for ($i = $m; $i >= 0; $i--) {
1290                         if ($i == $m) {
1291                                 $s = $l[$i];
1292                         } else if ($i == $m - 1) {
1293                                 $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
1294                         } else {
1295                                 $s = $l[$i] . ', ' . $s;
1296                         }
1297                 }
1298                 return $s;
1299         }
1300
1301         /**
1302          * Truncate a string to a specified length in bytes, appending an optional
1303          * string (e.g. for ellipses)
1304          *
1305          * The database offers limited byte lengths for some columns in the database;
1306          * multi-byte character sets mean we need to ensure that only whole characters
1307          * are included, otherwise broken characters can be passed to the user
1308          *
1309          * If $length is negative, the string will be truncated from the beginning
1310          *
1311          * @param string $string String to truncate
1312          * @param int $length Maximum length (excluding ellipses)
1313          * @param string $ellipses String to append to the truncated text
1314          * @return string
1315          */
1316         function truncate( $string, $length, $ellipsis = "" ) {
1317                 if( $length == 0 ) {
1318                         return $ellipsis;
1319                 }
1320                 if ( strlen( $string ) <= abs( $length ) ) {
1321                         return $string;
1322                 }
1323                 if( $length > 0 ) {
1324                         $string = substr( $string, 0, $length );
1325                         $char = ord( $string[strlen( $string ) - 1] );
1326                         $m = array();
1327                         if ($char >= 0xc0) {
1328                                 # We got the first byte only of a multibyte char; remove it.
1329                                 $string = substr( $string, 0, -1 );
1330                         } elseif( $char >= 0x80 &&
1331                                   preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
1332                                               '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
1333                             # We chopped in the middle of a character; remove it
1334                                 $string = $m[1];
1335                         }
1336                         return $string . $ellipsis;
1337                 } else {
1338                         $string = substr( $string, $length );
1339                         $char = ord( $string[0] );
1340                         if( $char >= 0x80 && $char < 0xc0 ) {
1341                                 # We chopped in the middle of a character; remove the whole thing
1342                                 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
1343                         }
1344                         return $ellipsis . $string;
1345                 }
1346         }
1347
1348         /**
1349          * Grammatical transformations, needed for inflected languages
1350          * Invoked by putting {{grammar:case|word}} in a message
1351          *
1352          * @param string $word
1353          * @param string $case
1354          * @return string
1355          */
1356         function convertGrammar( $word, $case ) {
1357                 global $wgGrammarForms;
1358                 if ( isset($wgGrammarForms['en'][$case][$word]) ) {
1359                         return $wgGrammarForms['en'][$case][$word];
1360                 }
1361                 return $word;
1362         }
1363
1364         /**
1365          * Plural form transformations, needed for some languages.
1366          * For example, where are 3 form of plural in Russian and Polish,
1367          * depending on "count mod 10". See [[w:Plural]]
1368          * For English it is pretty simple.
1369          *
1370          * Invoked by putting {{plural:count|wordform1|wordform2}}
1371          * or {{plural:count|wordform1|wordform2|wordform3}}
1372          *
1373          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
1374          *
1375          * @param integer $count
1376          * @param string $wordform1
1377          * @param string $wordform2
1378          * @param string $wordform3 (optional)
1379          * @param string $wordform4 (optional)
1380          * @param string $wordform5 (optional)
1381          * @return string
1382          */
1383         function convertPlural( $count, $w1, $w2, $w3, $w4, $w5) {
1384                 return ( $count == '1' || $count == '-1' ) ? $w1 : $w2;
1385         }
1386
1387         /**
1388          * For translaing of expiry times
1389          * @param string The validated block time in English
1390          * @return Somehow translated block time
1391          * @see LanguageFi.php for example implementation
1392          */
1393         function translateBlockExpiry( $str ) {
1394
1395                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
1396
1397                 if ( $scBlockExpiryOptions == '-') {
1398                         return $str;
1399                 }
1400
1401                 foreach (explode(',', $scBlockExpiryOptions) as $option) {
1402                         if ( strpos($option, ":") === false )
1403                                 continue;
1404                         list($show, $value) = explode(":", $option);
1405                         if ( strcmp ( $str, $value) == 0 ) {
1406                                 return htmlspecialchars( trim( $show ) );
1407                         }
1408                 }
1409
1410                 return $str;
1411         }
1412
1413         /**
1414          * languages like Chinese need to be segmented in order for the diff
1415          * to be of any use
1416          *
1417          * @param string $text
1418          * @return string
1419          */
1420         function segmentForDiff( $text ) {
1421                 return $text;
1422         }
1423
1424         /**
1425          * and unsegment to show the result
1426          *
1427          * @param string $text
1428          * @return string
1429          */
1430         function unsegmentForDiff( $text ) {
1431                 return $text;
1432         }
1433
1434         # convert text to different variants of a language.
1435         function convert( $text, $isTitle = false) {
1436                 return $this->mConverter->convert($text, $isTitle);
1437         }
1438
1439         # Convert text from within Parser
1440         function parserConvert( $text, &$parser ) {
1441                 return $this->mConverter->parserConvert( $text, $parser );
1442         }
1443
1444         # Check if this is a language with variants
1445         function hasVariants(){
1446                 return sizeof($this->getVariants())>1;
1447         }
1448
1449         # Put custom tags (e.g. -{ }-) around math to prevent conversion
1450         function armourMath($text){
1451                 return $this->mConverter->armourMath($text);
1452         }
1453
1454
1455         /**
1456          * Perform output conversion on a string, and encode for safe HTML output.
1457          * @param string $text
1458          * @param bool $isTitle -- wtf?
1459          * @return string
1460          * @todo this should get integrated somewhere sane
1461          */
1462         function convertHtml( $text, $isTitle = false ) {
1463                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
1464         }
1465
1466         function convertCategoryKey( $key ) {
1467                 return $this->mConverter->convertCategoryKey( $key );
1468         }
1469
1470         /**
1471          * get the list of variants supported by this langauge
1472          * see sample implementation in LanguageZh.php
1473          *
1474          * @return array an array of language codes
1475          */
1476         function getVariants() {
1477                 return $this->mConverter->getVariants();
1478         }
1479
1480
1481         function getPreferredVariant( $fromUser = true ) {
1482                 return $this->mConverter->getPreferredVariant( $fromUser );
1483         }
1484
1485         /**
1486          * if a language supports multiple variants, it is
1487          * possible that non-existing link in one variant
1488          * actually exists in another variant. this function
1489          * tries to find it. See e.g. LanguageZh.php
1490          *
1491          * @param string $link the name of the link
1492          * @param mixed $nt the title object of the link
1493          * @return null the input parameters may be modified upon return
1494          */
1495         function findVariantLink( &$link, &$nt ) {
1496                 $this->mConverter->findVariantLink($link, $nt);
1497         }
1498
1499         /**
1500          * If a language supports multiple variants, converts text
1501          * into an array of all possible variants of the text:
1502          *  'variant' => text in that variant
1503          */
1504
1505         function convertLinkToAllVariants($text){
1506                 return $this->mConverter->convertLinkToAllVariants($text);
1507         }
1508
1509
1510         /**
1511          * returns language specific options used by User::getPageRenderHash()
1512          * for example, the preferred language variant
1513          *
1514          * @return string
1515          * @public
1516          */
1517         function getExtraHashOptions() {
1518                 return $this->mConverter->getExtraHashOptions();
1519         }
1520
1521         /**
1522          * for languages that support multiple variants, the title of an
1523          * article may be displayed differently in different variants. this
1524          * function returns the apporiate title defined in the body of the article.
1525          *
1526          * @return string
1527          */
1528         function getParsedTitle() {
1529                 return $this->mConverter->getParsedTitle();
1530         }
1531
1532         /**
1533          * Enclose a string with the "no conversion" tag. This is used by
1534          * various functions in the Parser
1535          *
1536          * @param string $text text to be tagged for no conversion
1537          * @return string the tagged text
1538         */
1539         function markNoConversion( $text, $noParse=false ) {
1540                 return $this->mConverter->markNoConversion( $text, $noParse );
1541         }
1542
1543         /**
1544          * A regular expression to match legal word-trailing characters
1545          * which should be merged onto a link of the form [[foo]]bar.
1546          *
1547          * @return string
1548          * @public
1549          */
1550         function linkTrail() {
1551                 $this->load();
1552                 return $this->linkTrail;
1553         }
1554
1555         function getLangObj() {
1556                 return $this;
1557         }
1558
1559         /**
1560          * Get the RFC 3066 code for this language object
1561          */
1562         function getCode() {
1563                 return $this->mCode;
1564         }
1565
1566         function setCode( $code ) {
1567                 $this->mCode = $code;
1568         }
1569
1570         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
1571                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
1572         }
1573
1574         static function getMessagesFileName( $code ) {
1575                 global $IP;
1576                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
1577         }
1578
1579         static function getClassFileName( $code ) {
1580                 global $IP;
1581                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
1582         }
1583
1584         static function getLocalisationArray( $code, $disableCache = false ) {
1585                 self::loadLocalisation( $code, $disableCache );
1586                 return self::$mLocalisationCache[$code];
1587         }
1588
1589         /**
1590          * Load localisation data for a given code into the static cache
1591          *
1592          * @return array Dependencies, map of filenames to mtimes
1593          */
1594         static function loadLocalisation( $code, $disableCache = false ) {
1595                 static $recursionGuard = array();
1596                 global $wgMemc;
1597
1598                 if ( !$code ) {
1599                         throw new MWException( "Invalid language code requested" );
1600                 }
1601
1602                 if ( !$disableCache ) {
1603                         # Try the per-process cache
1604                         if ( isset( self::$mLocalisationCache[$code] ) ) {
1605                                 return self::$mLocalisationCache[$code]['deps'];
1606                         }
1607
1608                         wfProfileIn( __METHOD__ );
1609
1610                         # Try the serialized directory
1611                         $cache = wfGetPrecompiledData( self::getFileName( "Messages", $code, '.ser' ) );
1612                         if ( $cache ) {
1613                                 self::$mLocalisationCache[$code] = $cache;
1614                                 wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
1615                                 wfProfileOut( __METHOD__ );
1616                                 return self::$mLocalisationCache[$code]['deps'];
1617                         }
1618
1619                         # Try the global cache
1620                         $memcKey = wfMemcKey('localisation', $code );
1621                         $cache = $wgMemc->get( $memcKey );
1622                         if ( $cache ) {
1623                                 # Check file modification times
1624                                 foreach ( $cache['deps'] as $file => $mtime ) {
1625                                         if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1626                                                 break;
1627                                         }
1628                                 }
1629                                 if ( self::isLocalisationOutOfDate( $cache ) ) {
1630                                         $wgMemc->delete( $memcKey );
1631                                         $cache = false;
1632                                         wfDebug( "Language::loadLocalisation(): localisation cache for $code had expired due to update of $file\n" );
1633                                 } else {
1634                                         self::$mLocalisationCache[$code] = $cache;
1635                                         wfDebug( "Language::loadLocalisation(): got localisation for $code from cache\n" );
1636                                         wfProfileOut( __METHOD__ );
1637                                         return $cache['deps'];
1638                                 }
1639                         }
1640                 } else {
1641                         wfProfileIn( __METHOD__ );
1642                 }
1643
1644                 # Default fallback, may be overridden when the messages file is included
1645                 if ( $code != 'en' ) {
1646                         $fallback = 'en';
1647                 } else {
1648                         $fallback = false;
1649                 }
1650
1651                 # Load the primary localisation from the source file
1652                 $filename = self::getMessagesFileName( $code );
1653                 if ( !file_exists( $filename ) ) {
1654                         wfDebug( "Language::loadLocalisation(): no localisation file for $code, using implicit fallback to en\n" );
1655                         $cache = array();
1656                         $deps = array();
1657                 } else {
1658                         $deps = array( $filename => filemtime( $filename ) );
1659                         require( $filename );
1660                         $cache = compact( self::$mLocalisationKeys );
1661                         wfDebug( "Language::loadLocalisation(): got localisation for $code from source\n" );
1662                 }
1663
1664                 if ( !empty( $fallback ) ) {
1665                         # Load the fallback localisation, with a circular reference guard
1666                         if ( isset( $recursionGuard[$code] ) ) {
1667                                 throw new MWException( "Error: Circular fallback reference in language code $code" );
1668                         }
1669                         $recursionGuard[$code] = true;
1670                         $newDeps = self::loadLocalisation( $fallback, $disableCache );
1671                         unset( $recursionGuard[$code] );
1672
1673                         $secondary = self::$mLocalisationCache[$fallback];
1674                         $deps = array_merge( $deps, $newDeps );
1675
1676                         # Merge the fallback localisation with the current localisation
1677                         foreach ( self::$mLocalisationKeys as $key ) {
1678                                 if ( isset( $cache[$key] ) ) {
1679                                         if ( isset( $secondary[$key] ) ) {
1680                                                 if ( in_array( $key, self::$mMergeableMapKeys ) ) {
1681                                                         $cache[$key] = $cache[$key] + $secondary[$key];
1682                                                 } elseif ( in_array( $key, self::$mMergeableListKeys ) ) {
1683                                                         $cache[$key] = array_merge( $secondary[$key], $cache[$key] );
1684                                                 } elseif ( in_array( $key, self::$mMergeableAliasListKeys ) ) {
1685                                                         $cache[$key] = array_merge_recursive( $cache[$key], $secondary[$key] );
1686                                                 }
1687                                         }
1688                                 } else {
1689                                         $cache[$key] = $secondary[$key];
1690                                 }
1691                         }
1692
1693                         # Merge bookstore lists if requested
1694                         if ( !empty( $cache['bookstoreList']['inherit'] ) ) {
1695                                 $cache['bookstoreList'] = array_merge( $cache['bookstoreList'], $secondary['bookstoreList'] );
1696                         }
1697                         if ( isset( $cache['bookstoreList']['inherit'] ) ) {
1698                                 unset( $cache['bookstoreList']['inherit'] );
1699                         }
1700                 }
1701
1702                 # Add dependencies to the cache entry
1703                 $cache['deps'] = $deps;
1704
1705                 # Replace spaces with underscores in namespace names
1706                 $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
1707
1708                 # Save to both caches
1709                 self::$mLocalisationCache[$code] = $cache;
1710                 if ( !$disableCache ) {
1711                         $wgMemc->set( $memcKey, $cache );
1712                 }
1713
1714                 wfProfileOut( __METHOD__ );
1715                 return $deps;
1716         }
1717
1718         /**
1719          * Test if a given localisation cache is out of date with respect to the
1720          * source Messages files. This is done automatically for the global cache
1721          * in $wgMemc, but is only done on certain occasions for the serialized
1722          * data file.
1723          *
1724          * @param $cache mixed Either a language code or a cache array
1725          */
1726         static function isLocalisationOutOfDate( $cache ) {
1727                 if ( !is_array( $cache ) ) {
1728                         self::loadLocalisation( $cache );
1729                         $cache = self::$mLocalisationCache[$cache];
1730                 }
1731                 $expired = false;
1732                 foreach ( $cache['deps'] as $file => $mtime ) {
1733                         if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1734                                 $expired = true;
1735                                 break;
1736                         }
1737                 }
1738                 return $expired;
1739         }
1740
1741         /**
1742          * Get the fallback for a given language
1743          */
1744         static function getFallbackFor( $code ) {
1745                 self::loadLocalisation( $code );
1746                 return self::$mLocalisationCache[$code]['fallback'];
1747         }
1748
1749         /**
1750          * Get all messages for a given language
1751          */
1752         static function getMessagesFor( $code ) {
1753                 self::loadLocalisation( $code );
1754                 return self::$mLocalisationCache[$code]['messages'];
1755         }
1756
1757         /**
1758          * Get a message for a given language
1759          */
1760         static function getMessageFor( $key, $code ) {
1761                 self::loadLocalisation( $code );
1762                 return isset( self::$mLocalisationCache[$code]['messages'][$key] ) ? self::$mLocalisationCache[$code]['messages'][$key] : null;
1763         }
1764
1765         /**
1766          * Load localisation data for this object
1767          */
1768         function load() {
1769                 if ( !$this->mLoaded ) {
1770                         self::loadLocalisation( $this->getCode() );
1771                         $cache =& self::$mLocalisationCache[$this->getCode()];
1772                         foreach ( self::$mLocalisationKeys as $key ) {
1773                                 $this->$key = $cache[$key];
1774                         }
1775                         $this->mLoaded = true;
1776
1777                         $this->fixUpSettings();
1778                 }
1779         }
1780
1781         /**
1782          * Do any necessary post-cache-load settings adjustment
1783          */
1784         function fixUpSettings() {
1785                 global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk,
1786                         $wgNamespaceAliases, $wgAmericanDates;
1787                 wfProfileIn( __METHOD__ );
1788                 if ( $wgExtraNamespaces ) {
1789                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames;
1790                 }
1791
1792                 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
1793                 if ( $wgMetaNamespaceTalk ) {
1794                         $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
1795                 } else {
1796                         $talk = $this->namespaceNames[NS_PROJECT_TALK];
1797                         $talk = str_replace( '$1', $wgMetaNamespace, $talk );
1798
1799                         # Allow grammar transformations
1800                         # Allowing full message-style parsing would make simple requests
1801                         # such as action=raw much more expensive than they need to be.
1802                         # This will hopefully cover most cases.
1803                         $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
1804                                 array( &$this, 'replaceGrammarInNamespace' ), $talk );
1805                         $talk = str_replace( ' ', '_', $talk );
1806                         $this->namespaceNames[NS_PROJECT_TALK] = $talk;
1807                 }
1808
1809                 # The above mixing may leave namespaces out of canonical order.
1810                 # Re-order by namespace ID number...
1811                 ksort( $this->namespaceNames );
1812
1813                 # Put namespace names and aliases into a hashtable.
1814                 # If this is too slow, then we should arrange it so that it is done
1815                 # before caching. The catch is that at pre-cache time, the above
1816                 # class-specific fixup hasn't been done.
1817                 $this->mNamespaceIds = array();
1818                 foreach ( $this->namespaceNames as $index => $name ) {
1819                         $this->mNamespaceIds[$this->lc($name)] = $index;
1820                 }
1821                 if ( $this->namespaceAliases ) {
1822                         foreach ( $this->namespaceAliases as $name => $index ) {
1823                                 $this->mNamespaceIds[$this->lc($name)] = $index;
1824                         }
1825                 }
1826                 if ( $wgNamespaceAliases ) {
1827                         foreach ( $wgNamespaceAliases as $name => $index ) {
1828                                 $this->mNamespaceIds[$this->lc($name)] = $index;
1829                         }
1830                 }
1831
1832                 if ( $this->defaultDateFormat == 'dmy or mdy' ) {
1833                         $this->defaultDateFormat = $wgAmericanDates ? 'mdy' : 'dmy';
1834                 }
1835                 wfProfileOut( __METHOD__ );
1836         }
1837
1838         function replaceGrammarInNamespace( $m ) {
1839                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
1840         }
1841
1842         static function getCaseMaps() {
1843                 static $wikiUpperChars, $wikiLowerChars;
1844                 if ( isset( $wikiUpperChars ) ) {
1845                         return array( $wikiUpperChars, $wikiLowerChars );
1846                 }
1847
1848                 wfProfileIn( __METHOD__ );
1849                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
1850                 if ( $arr === false ) {
1851                         throw new MWException(
1852                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
1853                 }
1854                 extract( $arr );
1855                 wfProfileOut( __METHOD__ );
1856                 return array( $wikiUpperChars, $wikiLowerChars );
1857         }
1858
1859         function formatTimePeriod( $seconds ) {
1860                 if ( $seconds < 10 ) {
1861                         return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' );
1862                 } elseif ( $seconds < 60 ) {
1863                         return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' );
1864                 } elseif ( $seconds < 3600 ) {
1865                         return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) .
1866                                 $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' );
1867                 } else {
1868                         $hours = floor( $seconds / 3600 );
1869                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
1870                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
1871                         return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) .
1872                                 $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) .
1873                                 $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' );
1874                 }
1875         }
1876
1877         function formatBitrate( $bps ) {
1878                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
1879                 if ( $bps <= 0 ) {
1880                         return $this->formatNum( $bps ) . $units[0];
1881                 }
1882                 $unitIndex = floor( log10( $bps ) / 3 );
1883                 $mantissa = $bps / pow( 1000, $unitIndex );
1884                 if ( $mantissa < 10 ) {
1885                         $mantissa = round( $mantissa, 1 );
1886                 } else {
1887                         $mantissa = round( $mantissa );
1888                 }
1889                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
1890         }
1891
1892         /**
1893          * Format a size in bytes for output, using an appropriate
1894          * unit (B, KB, MB or GB) according to the magnitude in question
1895          *
1896          * @param $size Size to format
1897          * @return string Plain text (not HTML)
1898          */
1899         function formatSize( $size ) {
1900                 // For small sizes no decimal places necessary
1901                 $round = 0;
1902                 if( $size > 1024 ) {
1903                         $size = $size / 1024;
1904                         if( $size > 1024 ) {
1905                                 $size = $size / 1024;
1906                                 // For MB and bigger two decimal places are smarter
1907                                 $round = 2;
1908                                 if( $size > 1024 ) {
1909                                         $size = $size / 1024;
1910                                         $msg = 'size-gigabytes';
1911                                 } else {
1912                                         $msg = 'size-megabytes';
1913                                 }
1914                         } else {
1915                                 $msg = 'size-kilobytes';
1916                         }
1917                 } else {
1918                         $msg = 'size-bytes';
1919                 }
1920                 $size = round( $size, $round );
1921                 $text = $this->getMessageFromDB( $msg );
1922                 return str_replace( '$1', $this->formatNum( $size ), $text );
1923         }
1924 }
1925
1926
1927