do nothing for empty strings... how can you upper-case the void?
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * @addtogroup Language
4 */
5
6 if( !defined( 'MEDIAWIKI' ) ) {
7 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
8 exit( 1 );
9 }
10
11 # Read language names
12 global $wgLanguageNames;
13 require_once( dirname(__FILE__) . '/Names.php' ) ;
14
15 global $wgInputEncoding, $wgOutputEncoding;
16
17 /**
18 * These are always UTF-8, they exist only for backwards compatibility
19 */
20 $wgInputEncoding = "UTF-8";
21 $wgOutputEncoding = "UTF-8";
22
23 if( function_exists( 'mb_strtoupper' ) ) {
24 mb_internal_encoding('UTF-8');
25 }
26
27 /* a fake language converter */
28 class FakeConverter {
29 var $mLang;
30 function FakeConverter($langobj) {$this->mLang = $langobj;}
31 function convert($t, $i) {return $t;}
32 function parserConvert($t, $p) {return $t;}
33 function getVariants() { return array( $this->mLang->getCode() ); }
34 function getPreferredVariant() {return $this->mLang->getCode(); }
35 function findVariantLink(&$l, &$n) {}
36 function getExtraHashOptions() {return '';}
37 function getParsedTitle() {return '';}
38 function markNoConversion($text, $noParse=false) {return $text;}
39 function convertCategoryKey( $key ) {return $key; }
40 function convertLinkToAllVariants($text){ return array( $this->mLang->getCode() => $text); }
41 function armourMath($text){ return $text; }
42 }
43
44 #--------------------------------------------------------------------------
45 # Internationalisation code
46 #--------------------------------------------------------------------------
47
48 class Language {
49 var $mConverter, $mVariants, $mCode, $mLoaded = false;
50 var $mMagicExtensions = array(), $mMagicHookDone = false;
51
52 static public $mLocalisationKeys = array( 'fallback', 'namespaceNames',
53 'skinNames', 'mathNames',
54 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable',
55 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
56 'defaultUserOptionOverrides', 'linkTrail', 'namespaceAliases',
57 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
58 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases' );
59
60 static public $mMergeableMapKeys = array( 'messages', 'namespaceNames', 'mathNames',
61 'dateFormats', 'defaultUserOptionOverrides', 'magicWords' );
62
63 static public $mMergeableListKeys = array( 'extraUserToggles' );
64
65 static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
66
67 static public $mLocalisationCache = array();
68
69 static public $mWeekdayMsgs = array(
70 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
71 'friday', 'saturday'
72 );
73
74 static public $mWeekdayAbbrevMsgs = array(
75 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
76 );
77
78 static public $mMonthMsgs = array(
79 'january', 'february', 'march', 'april', 'may_long', 'june',
80 'july', 'august', 'september', 'october', 'november',
81 'december'
82 );
83 static public $mMonthGenMsgs = array(
84 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
85 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
86 'december-gen'
87 );
88 static public $mMonthAbbrevMsgs = array(
89 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
90 'sep', 'oct', 'nov', 'dec'
91 );
92
93 static public $mIranianCalendarMonthMsgs = array(
94 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
95 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
96 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
97 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
98 );
99
100 /**
101 * Create a language object for a given language code
102 */
103 static function factory( $code ) {
104 global $IP;
105 static $recursionLevel = 0;
106
107 if ( $code == 'en' ) {
108 $class = 'Language';
109 } else {
110 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
111 // Preload base classes to work around APC/PHP5 bug
112 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
113 include_once("$IP/languages/classes/$class.deps.php");
114 }
115 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
116 include_once("$IP/languages/classes/$class.php");
117 }
118 }
119
120 if ( $recursionLevel > 5 ) {
121 throw new MWException( "Language fallback loop detected when creating class $class\n" );
122 }
123
124 if( ! class_exists( $class ) ) {
125 $fallback = Language::getFallbackFor( $code );
126 ++$recursionLevel;
127 $lang = Language::factory( $fallback );
128 --$recursionLevel;
129 $lang->setCode( $code );
130 } else {
131 $lang = new $class;
132 }
133
134 return $lang;
135 }
136
137 function __construct() {
138 $this->mConverter = new FakeConverter($this);
139 // Set the code to the name of the descendant
140 if ( get_class( $this ) == 'Language' ) {
141 $this->mCode = 'en';
142 } else {
143 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
144 }
145 }
146
147 /**
148 * Hook which will be called if this is the content language.
149 * Descendants can use this to register hook functions or modify globals
150 */
151 function initContLang() {}
152
153 /**
154 * @deprecated
155 * @return array
156 */
157 function getDefaultUserOptions() {
158 return User::getDefaultOptions();
159 }
160
161 function getFallbackLanguageCode() {
162 $this->load();
163 return $this->fallback;
164 }
165
166 /**
167 * Exports $wgBookstoreListEn
168 * @return array
169 */
170 function getBookstoreList() {
171 $this->load();
172 return $this->bookstoreList;
173 }
174
175 /**
176 * @return array
177 */
178 function getNamespaces() {
179 $this->load();
180 return $this->namespaceNames;
181 }
182
183 /**
184 * A convenience function that returns the same thing as
185 * getNamespaces() except with the array values changed to ' '
186 * where it found '_', useful for producing output to be displayed
187 * e.g. in <select> forms.
188 *
189 * @return array
190 */
191 function getFormattedNamespaces() {
192 $ns = $this->getNamespaces();
193 foreach($ns as $k => $v) {
194 $ns[$k] = strtr($v, '_', ' ');
195 }
196 return $ns;
197 }
198
199 /**
200 * Get a namespace value by key
201 * <code>
202 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
203 * echo $mw_ns; // prints 'MediaWiki'
204 * </code>
205 *
206 * @param int $index the array key of the namespace to return
207 * @return mixed, string if the namespace value exists, otherwise false
208 */
209 function getNsText( $index ) {
210 $ns = $this->getNamespaces();
211 return isset( $ns[$index] ) ? $ns[$index] : false;
212 }
213
214 /**
215 * A convenience function that returns the same thing as
216 * getNsText() except with '_' changed to ' ', useful for
217 * producing output.
218 *
219 * @return array
220 */
221 function getFormattedNsText( $index ) {
222 $ns = $this->getNsText( $index );
223 return strtr($ns, '_', ' ');
224 }
225
226 /**
227 * Get a namespace key by value, case insensitive.
228 * Only matches namespace names for the current language, not the
229 * canonical ones defined in Namespace.php.
230 *
231 * @param string $text
232 * @return mixed An integer if $text is a valid value otherwise false
233 */
234 function getLocalNsIndex( $text ) {
235 $this->load();
236 $lctext = $this->lc($text);
237 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
238 }
239
240 /**
241 * Get a namespace key by value, case insensitive. Canonical namespace
242 * names override custom ones defined for the current language.
243 *
244 * @param string $text
245 * @return mixed An integer if $text is a valid value otherwise false
246 */
247 function getNsIndex( $text ) {
248 $this->load();
249 $lctext = $this->lc($text);
250 if( ( $ns = Namespace::getCanonicalIndex( $lctext ) ) !== null ) return $ns;
251 return isset( $this->mNamespaceIds[$lctext] ) ? $this->mNamespaceIds[$lctext] : false;
252 }
253
254 /**
255 * short names for language variants used for language conversion links.
256 *
257 * @param string $code
258 * @return string
259 */
260 function getVariantname( $code ) {
261 return $this->getMessageFromDB( "variantname-$code" );
262 }
263
264 function specialPage( $name ) {
265 $aliases = $this->getSpecialPageAliases();
266 if ( isset( $aliases[$name][0] ) ) {
267 $name = $aliases[$name][0];
268 }
269 return $this->getNsText(NS_SPECIAL) . ':' . $name;
270 }
271
272 function getQuickbarSettings() {
273 return array(
274 $this->getMessage( 'qbsettings-none' ),
275 $this->getMessage( 'qbsettings-fixedleft' ),
276 $this->getMessage( 'qbsettings-fixedright' ),
277 $this->getMessage( 'qbsettings-floatingleft' ),
278 $this->getMessage( 'qbsettings-floatingright' )
279 );
280 }
281
282 function getSkinNames() {
283 $this->load();
284 return $this->skinNames;
285 }
286
287 function getMathNames() {
288 $this->load();
289 return $this->mathNames;
290 }
291
292 function getDatePreferences() {
293 $this->load();
294 return $this->datePreferences;
295 }
296
297 function getDateFormats() {
298 $this->load();
299 return $this->dateFormats;
300 }
301
302 function getDefaultDateFormat() {
303 $this->load();
304 return $this->defaultDateFormat;
305 }
306
307 function getDatePreferenceMigrationMap() {
308 $this->load();
309 return $this->datePreferenceMigrationMap;
310 }
311
312 function getDefaultUserOptionOverrides() {
313 $this->load();
314 # XXX - apparently some languageas get empty arrays, didn't get to it yet -- midom
315 if (is_array($this->defaultUserOptionOverrides)) {
316 return $this->defaultUserOptionOverrides;
317 } else {
318 return array();
319 }
320 }
321
322 function getExtraUserToggles() {
323 $this->load();
324 return $this->extraUserToggles;
325 }
326
327 function getUserToggle( $tog ) {
328 return $this->getMessageFromDB( "tog-$tog" );
329 }
330
331 /**
332 * Get language names, indexed by code.
333 * If $customisedOnly is true, only returns codes with a messages file
334 */
335 public static function getLanguageNames( $customisedOnly = false ) {
336 global $wgLanguageNames;
337 if ( !$customisedOnly ) {
338 return $wgLanguageNames;
339 }
340
341 global $IP;
342 $names = array();
343 $dir = opendir( "$IP/languages/messages" );
344 while( false !== ( $file = readdir( $dir ) ) ) {
345 $m = array();
346 if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
347 $code = str_replace( '_', '-', strtolower( $m[1] ) );
348 if ( isset( $wgLanguageNames[$code] ) ) {
349 $names[$code] = $wgLanguageNames[$code];
350 }
351 }
352 }
353 closedir( $dir );
354 return $names;
355 }
356
357 /**
358 * Ugly hack to get a message maybe from the MediaWiki namespace, if this
359 * language object is the content or user language.
360 */
361 function getMessageFromDB( $msg ) {
362 global $wgContLang, $wgLang;
363 if ( $wgContLang->getCode() == $this->getCode() ) {
364 # Content language
365 return wfMsgForContent( $msg );
366 } elseif ( $wgLang->getCode() == $this->getCode() ) {
367 # User language
368 return wfMsg( $msg );
369 } else {
370 # Neither, get from localisation
371 return $this->getMessage( $msg );
372 }
373 }
374
375 function getLanguageName( $code ) {
376 global $wgLanguageNames;
377 if ( ! array_key_exists( $code, $wgLanguageNames ) ) {
378 return '';
379 }
380 return $wgLanguageNames[$code];
381 }
382
383 function getMonthName( $key ) {
384 return $this->getMessageFromDB( self::$mMonthMsgs[$key-1] );
385 }
386
387 function getMonthNameGen( $key ) {
388 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key-1] );
389 }
390
391 function getMonthAbbreviation( $key ) {
392 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key-1] );
393 }
394
395 function getWeekdayName( $key ) {
396 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key-1] );
397 }
398
399 function getWeekdayAbbreviation( $key ) {
400 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key-1] );
401 }
402
403 function getIranianCalendarMonthName( $key ) {
404 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key-1] );
405 }
406
407
408 /**
409 * Used by date() and time() to adjust the time output.
410 * @public
411 * @param int $ts the time in date('YmdHis') format
412 * @param mixed $tz adjust the time by this amount (default false,
413 * mean we get user timecorrection setting)
414 * @return int
415 */
416 function userAdjust( $ts, $tz = false ) {
417 global $wgUser, $wgLocalTZoffset;
418
419 if (!$tz) {
420 $tz = $wgUser->getOption( 'timecorrection' );
421 }
422
423 # minutes and hours differences:
424 $minDiff = 0;
425 $hrDiff = 0;
426
427 if ( $tz === '' ) {
428 # Global offset in minutes.
429 if( isset($wgLocalTZoffset) ) {
430 if( $wgLocalTZoffset >= 0 ) {
431 $hrDiff = floor($wgLocalTZoffset / 60);
432 } else {
433 $hrDiff = ceil($wgLocalTZoffset / 60);
434 }
435 $minDiff = $wgLocalTZoffset % 60;
436 }
437 } elseif ( strpos( $tz, ':' ) !== false ) {
438 $tzArray = explode( ':', $tz );
439 $hrDiff = intval($tzArray[0]);
440 $minDiff = intval($hrDiff < 0 ? -$tzArray[1] : $tzArray[1]);
441 } else {
442 $hrDiff = intval( $tz );
443 }
444
445 # No difference ? Return time unchanged
446 if ( 0 == $hrDiff && 0 == $minDiff ) { return $ts; }
447
448 wfSuppressWarnings(); // E_STRICT system time bitching
449 # Generate an adjusted date
450 $t = mktime( (
451 (int)substr( $ts, 8, 2) ) + $hrDiff, # Hours
452 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
453 (int)substr( $ts, 12, 2 ), # Seconds
454 (int)substr( $ts, 4, 2 ), # Month
455 (int)substr( $ts, 6, 2 ), # Day
456 (int)substr( $ts, 0, 4 ) ); #Year
457
458 $date = date( 'YmdHis', $t );
459 wfRestoreWarnings();
460
461 return $date;
462 }
463
464 /**
465 * This is a workalike of PHP's date() function, but with better
466 * internationalisation, a reduced set of format characters, and a better
467 * escaping format.
468 *
469 * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the
470 * PHP manual for definitions. There are a number of extensions, which
471 * start with "x":
472 *
473 * xn Do not translate digits of the next numeric format character
474 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
475 * xr Use roman numerals for the next numeric format character
476 * xx Literal x
477 * xg Genitive month name
478 *
479 * xij j (day number) in Iranian calendar
480 * xiF F (month name) in Iranian calendar
481 * xin n (month number) in Iranian calendar
482 * xiY Y (full year) in Iranian calendar
483 *
484 * Characters enclosed in double quotes will be considered literal (with
485 * the quotes themselves removed). Unmatched quotes will be considered
486 * literal quotes. Example:
487 *
488 * "The month is" F => The month is January
489 * i's" => 20'11"
490 *
491 * Backslash escaping is also supported.
492 *
493 * Input timestamp is assumed to be pre-normalized to the desired local
494 * time zone, if any.
495 *
496 * @param string $format
497 * @param string $ts 14-character timestamp
498 * YYYYMMDDHHMMSS
499 * 01234567890123
500 */
501 function sprintfDate( $format, $ts ) {
502 $s = '';
503 $raw = false;
504 $roman = false;
505 $unix = false;
506 $rawToggle = false;
507 $iranian = false;
508 for ( $p = 0; $p < strlen( $format ); $p++ ) {
509 $num = false;
510 $code = $format[$p];
511 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
512 $code .= $format[++$p];
513 }
514
515 if ( $code === 'xi' && $p < strlen( $format ) - 1 ) {
516 $code .= $format[++$p];
517 }
518
519 switch ( $code ) {
520 case 'xx':
521 $s .= 'x';
522 break;
523 case 'xn':
524 $raw = true;
525 break;
526 case 'xN':
527 $rawToggle = !$rawToggle;
528 break;
529 case 'xr':
530 $roman = true;
531 break;
532 case 'xg':
533 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
534 break;
535 case 'd':
536 $num = substr( $ts, 6, 2 );
537 break;
538 case 'D':
539 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
540 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
541 break;
542 case 'j':
543 $num = intval( substr( $ts, 6, 2 ) );
544 break;
545 case 'xij':
546 if ( !$iranian ) $iranian = self::tsToIranian( $ts );
547 $num = $iranian[2];
548 break;
549 case 'l':
550 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
551 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
552 break;
553 case 'N':
554 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
555 $w = gmdate( 'w', $unix );
556 $num = $w ? $w : 7;
557 break;
558 case 'w':
559 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
560 $num = gmdate( 'w', $unix );
561 break;
562 case 'z':
563 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
564 $num = gmdate( 'z', $unix );
565 break;
566 case 'W':
567 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
568 $num = gmdate( 'W', $unix );
569 break;
570 case 'F':
571 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
572 break;
573 case 'xiF':
574 if ( !$iranian ) $iranian = self::tsToIranian( $ts );
575 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
576 break;
577 case 'm':
578 $num = substr( $ts, 4, 2 );
579 break;
580 case 'M':
581 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
582 break;
583 case 'n':
584 $num = intval( substr( $ts, 4, 2 ) );
585 break;
586 case 'xin':
587 if ( !$iranian ) $iranian = self::tsToIranian( $ts );
588 $num = $iranian[1];
589 break;
590 case 't':
591 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
592 $num = gmdate( 't', $unix );
593 break;
594 case 'L':
595 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
596 $num = gmdate( 'L', $unix );
597 break;
598 case 'Y':
599 $num = substr( $ts, 0, 4 );
600 break;
601 case 'xiY':
602 if ( !$iranian ) $iranian = self::tsToIranian( $ts );
603 $num = $iranian[0];
604 break;
605 case 'y':
606 $num = substr( $ts, 2, 2 );
607 break;
608 case 'a':
609 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
610 break;
611 case 'A':
612 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
613 break;
614 case 'g':
615 $h = substr( $ts, 8, 2 );
616 $num = $h % 12 ? $h % 12 : 12;
617 break;
618 case 'G':
619 $num = intval( substr( $ts, 8, 2 ) );
620 break;
621 case 'h':
622 $h = substr( $ts, 8, 2 );
623 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
624 break;
625 case 'H':
626 $num = substr( $ts, 8, 2 );
627 break;
628 case 'i':
629 $num = substr( $ts, 10, 2 );
630 break;
631 case 's':
632 $num = substr( $ts, 12, 2 );
633 break;
634 case 'c':
635 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
636 $s .= gmdate( 'c', $unix );
637 break;
638 case 'r':
639 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
640 $s .= gmdate( 'r', $unix );
641 break;
642 case 'U':
643 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
644 $num = $unix;
645 break;
646 case '\\':
647 # Backslash escaping
648 if ( $p < strlen( $format ) - 1 ) {
649 $s .= $format[++$p];
650 } else {
651 $s .= '\\';
652 }
653 break;
654 case '"':
655 # Quoted literal
656 if ( $p < strlen( $format ) - 1 ) {
657 $endQuote = strpos( $format, '"', $p + 1 );
658 if ( $endQuote === false ) {
659 # No terminating quote, assume literal "
660 $s .= '"';
661 } else {
662 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
663 $p = $endQuote;
664 }
665 } else {
666 # Quote at end of string, assume literal "
667 $s .= '"';
668 }
669 break;
670 default:
671 $s .= $format[$p];
672 }
673 if ( $num !== false ) {
674 if ( $rawToggle || $raw ) {
675 $s .= $num;
676 $raw = false;
677 } elseif ( $roman ) {
678 $s .= self::romanNumeral( $num );
679 $roman = false;
680 } else {
681 $s .= $this->formatNum( $num, true );
682 }
683 $num = false;
684 }
685 }
686 return $s;
687 }
688
689 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
690 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
691 /**
692 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
693 * Gregorian dates to Iranian dates. Originally written in C, it
694 * is released under the terms of GNU Lesser General Public
695 * License. Conversion to PHP was performed by Niklas Laxström.
696 *
697 * Link: http://www.farsiweb.info/jalali/jalali.c
698 */
699 private static function tsToIranian( $ts ) {
700 $gy = substr( $ts, 0, 4 ) -1600;
701 $gm = substr( $ts, 4, 2 ) -1;
702 $gd = substr( $ts, 6, 2 ) -1;
703
704 # Days passed from the beginning (including leap years)
705 $gDayNo = 365*$gy
706 + floor(($gy+3) / 4)
707 - floor(($gy+99) / 100)
708 + floor(($gy+399) / 400);
709
710
711 // Add days of the past months of this year
712 for( $i = 0; $i < $gm; $i++ ) {
713 $gDayNo += self::$GREG_DAYS[$i];
714 }
715
716 // Leap years
717 if ( $gm > 1 && (($gy%4===0 && $gy%100!==0 || ($gy%400==0)))) {
718 $gDayNo++;
719 }
720
721 // Days passed in current month
722 $gDayNo += $gd;
723
724 $jDayNo = $gDayNo - 79;
725
726 $jNp = floor($jDayNo / 12053);
727 $jDayNo %= 12053;
728
729 $jy = 979 + 33*$jNp + 4*floor($jDayNo/1461);
730 $jDayNo %= 1461;
731
732 if ( $jDayNo >= 366 ) {
733 $jy += floor(($jDayNo-1)/365);
734 $jDayNo = floor(($jDayNo-1)%365);
735 }
736
737 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
738 $jDayNo -= self::$IRANIAN_DAYS[$i];
739 }
740
741 $jm= $i+1;
742 $jd= $jDayNo+1;
743
744 return array($jy, $jm, $jd);
745 }
746
747 /**
748 * Roman number formatting up to 3000
749 */
750 static function romanNumeral( $num ) {
751 static $table = array(
752 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
753 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
754 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
755 array( '', 'M', 'MM', 'MMM' )
756 );
757
758 $num = intval( $num );
759 if ( $num > 3000 || $num <= 0 ) {
760 return $num;
761 }
762
763 $s = '';
764 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
765 if ( $num >= $pow10 ) {
766 $s .= $table[$i][floor($num / $pow10)];
767 }
768 $num = $num % $pow10;
769 }
770 return $s;
771 }
772
773 /**
774 * This is meant to be used by time(), date(), and timeanddate() to get
775 * the date preference they're supposed to use, it should be used in
776 * all children.
777 *
778 *<code>
779 * function timeanddate([...], $format = true) {
780 * $datePreference = $this->dateFormat($format);
781 * [...]
782 * }
783 *</code>
784 *
785 * @param mixed $usePrefs: if true, the user's preference is used
786 * if false, the site/language default is used
787 * if int/string, assumed to be a format.
788 * @return string
789 */
790 function dateFormat( $usePrefs = true ) {
791 global $wgUser;
792
793 if( is_bool( $usePrefs ) ) {
794 if( $usePrefs ) {
795 $datePreference = $wgUser->getDatePreference();
796 } else {
797 $options = User::getDefaultOptions();
798 $datePreference = (string)$options['date'];
799 }
800 } else {
801 $datePreference = (string)$usePrefs;
802 }
803
804 // return int
805 if( $datePreference == '' ) {
806 return 'default';
807 }
808
809 return $datePreference;
810 }
811
812 /**
813 * @public
814 * @param mixed $ts the time format which needs to be turned into a
815 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
816 * @param bool $adj whether to adjust the time output according to the
817 * user configured offset ($timecorrection)
818 * @param mixed $format true to use user's date format preference
819 * @param string $timecorrection the time offset as returned by
820 * validateTimeZone() in Special:Preferences
821 * @return string
822 */
823 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
824 $this->load();
825 if ( $adj ) {
826 $ts = $this->userAdjust( $ts, $timecorrection );
827 }
828
829 $pref = $this->dateFormat( $format );
830 if( $pref == 'default' || !isset( $this->dateFormats["$pref date"] ) ) {
831 $pref = $this->defaultDateFormat;
832 }
833 return $this->sprintfDate( $this->dateFormats["$pref date"], $ts );
834 }
835
836 /**
837 * @public
838 * @param mixed $ts the time format which needs to be turned into a
839 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
840 * @param bool $adj whether to adjust the time output according to the
841 * user configured offset ($timecorrection)
842 * @param mixed $format true to use user's date format preference
843 * @param string $timecorrection the time offset as returned by
844 * validateTimeZone() in Special:Preferences
845 * @return string
846 */
847 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
848 $this->load();
849 if ( $adj ) {
850 $ts = $this->userAdjust( $ts, $timecorrection );
851 }
852
853 $pref = $this->dateFormat( $format );
854 if( $pref == 'default' || !isset( $this->dateFormats["$pref time"] ) ) {
855 $pref = $this->defaultDateFormat;
856 }
857 return $this->sprintfDate( $this->dateFormats["$pref time"], $ts );
858 }
859
860 /**
861 * @public
862 * @param mixed $ts the time format which needs to be turned into a
863 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
864 * @param bool $adj whether to adjust the time output according to the
865 * user configured offset ($timecorrection)
866
867 * @param mixed $format what format to return, if it's false output the
868 * default one (default true)
869 * @param string $timecorrection the time offset as returned by
870 * validateTimeZone() in Special:Preferences
871 * @return string
872 */
873 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
874 $this->load();
875
876 $ts = wfTimestamp( TS_MW, $ts );
877
878 if ( $adj ) {
879 $ts = $this->userAdjust( $ts, $timecorrection );
880 }
881
882 $pref = $this->dateFormat( $format );
883 if( $pref == 'default' || !isset( $this->dateFormats["$pref both"] ) ) {
884 $pref = $this->defaultDateFormat;
885 }
886
887 return $this->sprintfDate( $this->dateFormats["$pref both"], $ts );
888 }
889
890 function getMessage( $key ) {
891 $this->load();
892 return isset( $this->messages[$key] ) ? $this->messages[$key] : null;
893 }
894
895 function getAllMessages() {
896 $this->load();
897 return $this->messages;
898 }
899
900 function iconv( $in, $out, $string ) {
901 # For most languages, this is a wrapper for iconv
902 return iconv( $in, $out . '//IGNORE', $string );
903 }
904
905 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
906 function ucwordbreaksCallbackAscii($matches){
907 return $this->ucfirst($matches[1]);
908 }
909
910 function ucwordbreaksCallbackMB($matches){
911 return mb_strtoupper($matches[0]);
912 }
913
914 function ucCallback($matches){
915 list( $wikiUpperChars ) = self::getCaseMaps();
916 return strtr( $matches[1], $wikiUpperChars );
917 }
918
919 function lcCallback($matches){
920 list( , $wikiLowerChars ) = self::getCaseMaps();
921 return strtr( $matches[1], $wikiLowerChars );
922 }
923
924 function ucwordsCallbackMB($matches){
925 return mb_strtoupper($matches[0]);
926 }
927
928 function ucwordsCallbackWiki($matches){
929 list( $wikiUpperChars ) = self::getCaseMaps();
930 return strtr( $matches[0], $wikiUpperChars );
931 }
932
933 function ucfirst( $str ) {
934 if ($str == "") return "";
935 if ( ord($str[0]) < 128 ) return ucfirst($str);
936 else return self::uc($str,true); // fall back to more complex logic in case of multibyte strings
937 }
938
939 function uc( $str, $first = false ) {
940 if ( function_exists( 'mb_strtoupper' ) ) {
941 if ( $first ) {
942 if ( self::isMultibyte( $str ) ) {
943 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
944 } else {
945 return ucfirst( $str );
946 }
947 } else {
948 return self::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
949 }
950 } else {
951 if ( self::isMultibyte( $str ) ) {
952 list( $wikiUpperChars ) = $this->getCaseMaps();
953 $x = $first ? '^' : '';
954 return preg_replace_callback(
955 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
956 array($this,"ucCallback"),
957 $str
958 );
959 } else {
960 return $first ? ucfirst( $str ) : strtoupper( $str );
961 }
962 }
963 }
964
965 function lcfirst( $str ) {
966 if ($str == "") return "";
967 if ( ord($str[0]) < 128 ) {
968 // editing string in place = cool
969 $str[0]=strtolower($str[0]);
970 return $str;
971 }
972 else return self::lc( $str, true );
973 }
974
975 function lc( $str, $first = false ) {
976 if ( function_exists( 'mb_strtolower' ) )
977 if ( $first )
978 if ( self::isMultibyte( $str ) )
979 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
980 else
981 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
982 else
983 return self::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
984 else
985 if ( self::isMultibyte( $str ) ) {
986 list( , $wikiLowerChars ) = self::getCaseMaps();
987 $x = $first ? '^' : '';
988 return preg_replace_callback(
989 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
990 array($this,"lcCallback"),
991 $str
992 );
993 } else
994 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
995 }
996
997 function isMultibyte( $str ) {
998 return (bool)preg_match( '/[\x80-\xff]/', $str );
999 }
1000
1001 function ucwords($str) {
1002 if ( self::isMultibyte( $str ) ) {
1003 $str = self::lc($str);
1004
1005 // regexp to find first letter in each word (i.e. after each space)
1006 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1007
1008 // function to use to capitalize a single char
1009 if ( function_exists( 'mb_strtoupper' ) )
1010 return preg_replace_callback(
1011 $replaceRegexp,
1012 array($this,"ucwordsCallbackMB"),
1013 $str
1014 );
1015 else
1016 return preg_replace_callback(
1017 $replaceRegexp,
1018 array($this,"ucwordsCallbackWiki"),
1019 $str
1020 );
1021 }
1022 else
1023 return ucwords( strtolower( $str ) );
1024 }
1025
1026 # capitalize words at word breaks
1027 function ucwordbreaks($str){
1028 if (self::isMultibyte( $str ) ) {
1029 $str = self::lc($str);
1030
1031 // since \b doesn't work for UTF-8, we explicitely define word break chars
1032 $breaks= "[ \-\(\)\}\{\.,\?!]";
1033
1034 // find first letter after word break
1035 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1036
1037 if ( function_exists( 'mb_strtoupper' ) )
1038 return preg_replace_callback(
1039 $replaceRegexp,
1040 array($this,"ucwordbreaksCallbackMB"),
1041 $str
1042 );
1043 else
1044 return preg_replace_callback(
1045 $replaceRegexp,
1046 array($this,"ucwordsCallbackWiki"),
1047 $str
1048 );
1049 }
1050 else
1051 return preg_replace_callback(
1052 '/\b([\w\x80-\xff]+)\b/',
1053 array($this,"ucwordbreaksCallbackAscii"),
1054 $str );
1055 }
1056
1057 /**
1058 * Return a case-folded representation of $s
1059 *
1060 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1061 * and $s2 are the same except for the case of their characters. It is not
1062 * necessary for the value returned to make sense when displayed.
1063 *
1064 * Do *not* perform any other normalisation in this function. If a caller
1065 * uses this function when it should be using a more general normalisation
1066 * function, then fix the caller.
1067 */
1068 function caseFold( $s ) {
1069 return $this->uc( $s );
1070 }
1071
1072 function checkTitleEncoding( $s ) {
1073 if( is_array( $s ) ) {
1074 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1075 }
1076 # Check for non-UTF-8 URLs
1077 $ishigh = preg_match( '/[\x80-\xff]/', $s);
1078 if(!$ishigh) return $s;
1079
1080 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1081 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1082 if( $isutf8 ) return $s;
1083
1084 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
1085 }
1086
1087 function fallback8bitEncoding() {
1088 $this->load();
1089 return $this->fallback8bitEncoding;
1090 }
1091
1092 /**
1093 * Some languages have special punctuation to strip out
1094 * or characters which need to be converted for MySQL's
1095 * indexing to grok it correctly. Make such changes here.
1096 *
1097 * @param string $in
1098 * @return string
1099 */
1100 function stripForSearch( $string ) {
1101 global $wgDBtype;
1102 if ( $wgDBtype != 'mysql' ) {
1103 return $string;
1104 }
1105
1106 # MySQL fulltext index doesn't grok utf-8, so we
1107 # need to fold cases and convert to hex
1108
1109 wfProfileIn( __METHOD__ );
1110 if( function_exists( 'mb_strtolower' ) ) {
1111 $out = preg_replace(
1112 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1113 "'U8' . bin2hex( \"$1\" )",
1114 mb_strtolower( $string ) );
1115 } else {
1116 list( , $wikiLowerChars ) = self::getCaseMaps();
1117 $out = preg_replace(
1118 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1119 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
1120 $string );
1121 }
1122 wfProfileOut( __METHOD__ );
1123 return $out;
1124 }
1125
1126 function convertForSearchResult( $termsArray ) {
1127 # some languages, e.g. Chinese, need to do a conversion
1128 # in order for search results to be displayed correctly
1129 return $termsArray;
1130 }
1131
1132 /**
1133 * Get the first character of a string.
1134 *
1135 * @param string $s
1136 * @return string
1137 */
1138 function firstChar( $s ) {
1139 $matches = array();
1140 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1141 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
1142
1143 return isset( $matches[1] ) ? $matches[1] : "";
1144 }
1145
1146 function initEncoding() {
1147 # Some languages may have an alternate char encoding option
1148 # (Esperanto X-coding, Japanese furigana conversion, etc)
1149 # If this language is used as the primary content language,
1150 # an override to the defaults can be set here on startup.
1151 }
1152
1153 function recodeForEdit( $s ) {
1154 # For some languages we'll want to explicitly specify
1155 # which characters make it into the edit box raw
1156 # or are converted in some way or another.
1157 # Note that if wgOutputEncoding is different from
1158 # wgInputEncoding, this text will be further converted
1159 # to wgOutputEncoding.
1160 global $wgEditEncoding;
1161 if( $wgEditEncoding == '' or
1162 $wgEditEncoding == 'UTF-8' ) {
1163 return $s;
1164 } else {
1165 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1166 }
1167 }
1168
1169 function recodeInput( $s ) {
1170 # Take the previous into account.
1171 global $wgEditEncoding;
1172 if($wgEditEncoding != "") {
1173 $enc = $wgEditEncoding;
1174 } else {
1175 $enc = 'UTF-8';
1176 }
1177 if( $enc == 'UTF-8' ) {
1178 return $s;
1179 } else {
1180 return $this->iconv( $enc, 'UTF-8', $s );
1181 }
1182 }
1183
1184 /**
1185 * For right-to-left language support
1186 *
1187 * @return bool
1188 */
1189 function isRTL() {
1190 $this->load();
1191 return $this->rtl;
1192 }
1193
1194 /**
1195 * A hidden direction mark (LRM or RLM), depending on the language direction
1196 *
1197 * @return string
1198 */
1199 function getDirMark() {
1200 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
1201 }
1202
1203 /**
1204 * An arrow, depending on the language direction
1205 *
1206 * @return string
1207 */
1208 function getArrow() {
1209 return $this->isRTL() ? '←' : '→';
1210 }
1211
1212 /**
1213 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
1214 *
1215 * @return bool
1216 */
1217 function linkPrefixExtension() {
1218 $this->load();
1219 return $this->linkPrefixExtension;
1220 }
1221
1222 function &getMagicWords() {
1223 $this->load();
1224 return $this->magicWords;
1225 }
1226
1227 # Fill a MagicWord object with data from here
1228 function getMagic( &$mw ) {
1229 if ( !$this->mMagicHookDone ) {
1230 $this->mMagicHookDone = true;
1231 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
1232 }
1233 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
1234 $rawEntry = $this->mMagicExtensions[$mw->mId];
1235 } else {
1236 $magicWords =& $this->getMagicWords();
1237 if ( isset( $magicWords[$mw->mId] ) ) {
1238 $rawEntry = $magicWords[$mw->mId];
1239 } else {
1240 # Fall back to English if local list is incomplete
1241 $magicWords =& Language::getMagicWords();
1242 $rawEntry = $magicWords[$mw->mId];
1243 }
1244 }
1245
1246 if( !is_array( $rawEntry ) ) {
1247 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
1248 }
1249 $mw->mCaseSensitive = $rawEntry[0];
1250 $mw->mSynonyms = array_slice( $rawEntry, 1 );
1251 }
1252
1253 /**
1254 * Add magic words to the extension array
1255 */
1256 function addMagicWordsByLang( $newWords ) {
1257 $code = $this->getCode();
1258 $fallbackChain = array();
1259 while ( $code && !in_array( $code, $fallbackChain ) ) {
1260 $fallbackChain[] = $code;
1261 $code = self::getFallbackFor( $code );
1262 }
1263 if ( !in_array( 'en', $fallbackChain ) ) {
1264 $fallbackChain[] = 'en';
1265 }
1266 $fallbackChain = array_reverse( $fallbackChain );
1267 foreach ( $fallbackChain as $code ) {
1268 if ( isset( $newWords[$code] ) ) {
1269 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
1270 }
1271 }
1272 }
1273
1274 /**
1275 * Get special page names, as an associative array
1276 * case folded alias => real name
1277 */
1278 function getSpecialPageAliases() {
1279 $this->load();
1280 if ( !isset( $this->mExtendedSpecialPageAliases ) ) {
1281 $this->mExtendedSpecialPageAliases = $this->specialPageAliases;
1282 wfRunHooks( 'LanguageGetSpecialPageAliases',
1283 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
1284 }
1285 return $this->mExtendedSpecialPageAliases;
1286 }
1287
1288 /**
1289 * Italic is unsuitable for some languages
1290 *
1291 * @public
1292 *
1293 * @param string $text The text to be emphasized.
1294 * @return string
1295 */
1296 function emphasize( $text ) {
1297 return "<em>$text</em>";
1298 }
1299
1300 /**
1301 * Normally we output all numbers in plain en_US style, that is
1302 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
1303 * point twohundredthirtyfive. However this is not sutable for all
1304 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
1305 * Icelandic just want to use commas instead of dots, and dots instead
1306 * of commas like "293.291,235".
1307 *
1308 * An example of this function being called:
1309 * <code>
1310 * wfMsg( 'message', $wgLang->formatNum( $num ) )
1311 * </code>
1312 *
1313 * See LanguageGu.php for the Gujarati implementation and
1314 * LanguageIs.php for the , => . and . => , implementation.
1315 *
1316 * @todo check if it's viable to use localeconv() for the decimal
1317 * seperator thing.
1318 * @public
1319 * @param mixed $number the string to be formatted, should be an integer or
1320 * a floating point number.
1321 * @param bool $nocommafy Set to true for special numbers like dates
1322 * @return string
1323 */
1324 function formatNum( $number, $nocommafy = false ) {
1325 global $wgTranslateNumerals;
1326 if (!$nocommafy) {
1327 $number = $this->commafy($number);
1328 $s = $this->separatorTransformTable();
1329 if (!is_null($s)) { $number = strtr($number, $s); }
1330 }
1331
1332 if ($wgTranslateNumerals) {
1333 $s = $this->digitTransformTable();
1334 if (!is_null($s)) { $number = strtr($number, $s); }
1335 }
1336
1337 return $number;
1338 }
1339
1340 function parseFormattedNumber( $number ) {
1341 $s = $this->digitTransformTable();
1342 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1343
1344 $s = $this->separatorTransformTable();
1345 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1346
1347 $number = strtr( $number, array (',' => '') );
1348 return $number;
1349 }
1350
1351 /**
1352 * Adds commas to a given number
1353 *
1354 * @param mixed $_
1355 * @return string
1356 */
1357 function commafy($_) {
1358 return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
1359 }
1360
1361 function digitTransformTable() {
1362 $this->load();
1363 return $this->digitTransformTable;
1364 }
1365
1366 function separatorTransformTable() {
1367 $this->load();
1368 return $this->separatorTransformTable;
1369 }
1370
1371
1372 /**
1373 * For the credit list in includes/Credits.php (action=credits)
1374 *
1375 * @param array $l
1376 * @return string
1377 */
1378 function listToText( $l ) {
1379 $s = '';
1380 $m = count($l) - 1;
1381 for ($i = $m; $i >= 0; $i--) {
1382 if ($i == $m) {
1383 $s = $l[$i];
1384 } else if ($i == $m - 1) {
1385 $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
1386 } else {
1387 $s = $l[$i] . ', ' . $s;
1388 }
1389 }
1390 return $s;
1391 }
1392
1393 /**
1394 * Truncate a string to a specified length in bytes, appending an optional
1395 * string (e.g. for ellipses)
1396 *
1397 * The database offers limited byte lengths for some columns in the database;
1398 * multi-byte character sets mean we need to ensure that only whole characters
1399 * are included, otherwise broken characters can be passed to the user
1400 *
1401 * If $length is negative, the string will be truncated from the beginning
1402 *
1403 * @param string $string String to truncate
1404 * @param int $length Maximum length (excluding ellipses)
1405 * @param string $ellipses String to append to the truncated text
1406 * @return string
1407 */
1408 function truncate( $string, $length, $ellipsis = "" ) {
1409 if( $length == 0 ) {
1410 return $ellipsis;
1411 }
1412 if ( strlen( $string ) <= abs( $length ) ) {
1413 return $string;
1414 }
1415 if( $length > 0 ) {
1416 $string = substr( $string, 0, $length );
1417 $char = ord( $string[strlen( $string ) - 1] );
1418 $m = array();
1419 if ($char >= 0xc0) {
1420 # We got the first byte only of a multibyte char; remove it.
1421 $string = substr( $string, 0, -1 );
1422 } elseif( $char >= 0x80 &&
1423 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
1424 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
1425 # We chopped in the middle of a character; remove it
1426 $string = $m[1];
1427 }
1428 return $string . $ellipsis;
1429 } else {
1430 $string = substr( $string, $length );
1431 $char = ord( $string[0] );
1432 if( $char >= 0x80 && $char < 0xc0 ) {
1433 # We chopped in the middle of a character; remove the whole thing
1434 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
1435 }
1436 return $ellipsis . $string;
1437 }
1438 }
1439
1440 /**
1441 * Grammatical transformations, needed for inflected languages
1442 * Invoked by putting {{grammar:case|word}} in a message
1443 *
1444 * @param string $word
1445 * @param string $case
1446 * @return string
1447 */
1448 function convertGrammar( $word, $case ) {
1449 global $wgGrammarForms;
1450 if ( isset($wgGrammarForms['en'][$case][$word]) ) {
1451 return $wgGrammarForms['en'][$case][$word];
1452 }
1453 return $word;
1454 }
1455
1456 /**
1457 * Plural form transformations, needed for some languages.
1458 * For example, there are 3 form of plural in Russian and Polish,
1459 * depending on "count mod 10". See [[w:Plural]]
1460 * For English it is pretty simple.
1461 *
1462 * Invoked by putting {{plural:count|wordform1|wordform2}}
1463 * or {{plural:count|wordform1|wordform2|wordform3}}
1464 *
1465 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
1466 *
1467 * @param integer $count
1468 * @param string $wordform1
1469 * @param string $wordform2
1470 * @param string $wordform3 (optional)
1471 * @param string $wordform4 (optional)
1472 * @param string $wordform5 (optional)
1473 * @return string
1474 */
1475 function convertPlural( $count, $w1, $w2, $w3, $w4, $w5) {
1476 return ( $count == '1' || $count == '-1' ) ? $w1 : $w2;
1477 }
1478
1479 /**
1480 * For translaing of expiry times
1481 * @param string The validated block time in English
1482 * @return Somehow translated block time
1483 * @see LanguageFi.php for example implementation
1484 */
1485 function translateBlockExpiry( $str ) {
1486
1487 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
1488
1489 if ( $scBlockExpiryOptions == '-') {
1490 return $str;
1491 }
1492
1493 foreach (explode(',', $scBlockExpiryOptions) as $option) {
1494 if ( strpos($option, ":") === false )
1495 continue;
1496 list($show, $value) = explode(":", $option);
1497 if ( strcmp ( $str, $value) == 0 ) {
1498 return htmlspecialchars( trim( $show ) );
1499 }
1500 }
1501
1502 return $str;
1503 }
1504
1505 /**
1506 * languages like Chinese need to be segmented in order for the diff
1507 * to be of any use
1508 *
1509 * @param string $text
1510 * @return string
1511 */
1512 function segmentForDiff( $text ) {
1513 return $text;
1514 }
1515
1516 /**
1517 * and unsegment to show the result
1518 *
1519 * @param string $text
1520 * @return string
1521 */
1522 function unsegmentForDiff( $text ) {
1523 return $text;
1524 }
1525
1526 # convert text to different variants of a language.
1527 function convert( $text, $isTitle = false) {
1528 return $this->mConverter->convert($text, $isTitle);
1529 }
1530
1531 # Convert text from within Parser
1532 function parserConvert( $text, &$parser ) {
1533 return $this->mConverter->parserConvert( $text, $parser );
1534 }
1535
1536 # Check if this is a language with variants
1537 function hasVariants(){
1538 return sizeof($this->getVariants())>1;
1539 }
1540
1541 # Put custom tags (e.g. -{ }-) around math to prevent conversion
1542 function armourMath($text){
1543 return $this->mConverter->armourMath($text);
1544 }
1545
1546
1547 /**
1548 * Perform output conversion on a string, and encode for safe HTML output.
1549 * @param string $text
1550 * @param bool $isTitle -- wtf?
1551 * @return string
1552 * @todo this should get integrated somewhere sane
1553 */
1554 function convertHtml( $text, $isTitle = false ) {
1555 return htmlspecialchars( $this->convert( $text, $isTitle ) );
1556 }
1557
1558 function convertCategoryKey( $key ) {
1559 return $this->mConverter->convertCategoryKey( $key );
1560 }
1561
1562 /**
1563 * get the list of variants supported by this langauge
1564 * see sample implementation in LanguageZh.php
1565 *
1566 * @return array an array of language codes
1567 */
1568 function getVariants() {
1569 return $this->mConverter->getVariants();
1570 }
1571
1572
1573 function getPreferredVariant( $fromUser = true ) {
1574 return $this->mConverter->getPreferredVariant( $fromUser );
1575 }
1576
1577 /**
1578 * if a language supports multiple variants, it is
1579 * possible that non-existing link in one variant
1580 * actually exists in another variant. this function
1581 * tries to find it. See e.g. LanguageZh.php
1582 *
1583 * @param string $link the name of the link
1584 * @param mixed $nt the title object of the link
1585 * @return null the input parameters may be modified upon return
1586 */
1587 function findVariantLink( &$link, &$nt ) {
1588 $this->mConverter->findVariantLink($link, $nt);
1589 }
1590
1591 /**
1592 * If a language supports multiple variants, converts text
1593 * into an array of all possible variants of the text:
1594 * 'variant' => text in that variant
1595 */
1596
1597 function convertLinkToAllVariants($text){
1598 return $this->mConverter->convertLinkToAllVariants($text);
1599 }
1600
1601
1602 /**
1603 * returns language specific options used by User::getPageRenderHash()
1604 * for example, the preferred language variant
1605 *
1606 * @return string
1607 * @public
1608 */
1609 function getExtraHashOptions() {
1610 return $this->mConverter->getExtraHashOptions();
1611 }
1612
1613 /**
1614 * for languages that support multiple variants, the title of an
1615 * article may be displayed differently in different variants. this
1616 * function returns the apporiate title defined in the body of the article.
1617 *
1618 * @return string
1619 */
1620 function getParsedTitle() {
1621 return $this->mConverter->getParsedTitle();
1622 }
1623
1624 /**
1625 * Enclose a string with the "no conversion" tag. This is used by
1626 * various functions in the Parser
1627 *
1628 * @param string $text text to be tagged for no conversion
1629 * @return string the tagged text
1630 */
1631 function markNoConversion( $text, $noParse=false ) {
1632 return $this->mConverter->markNoConversion( $text, $noParse );
1633 }
1634
1635 /**
1636 * A regular expression to match legal word-trailing characters
1637 * which should be merged onto a link of the form [[foo]]bar.
1638 *
1639 * @return string
1640 * @public
1641 */
1642 function linkTrail() {
1643 $this->load();
1644 return $this->linkTrail;
1645 }
1646
1647 function getLangObj() {
1648 return $this;
1649 }
1650
1651 /**
1652 * Get the RFC 3066 code for this language object
1653 */
1654 function getCode() {
1655 return $this->mCode;
1656 }
1657
1658 function setCode( $code ) {
1659 $this->mCode = $code;
1660 }
1661
1662 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
1663 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
1664 }
1665
1666 static function getMessagesFileName( $code ) {
1667 global $IP;
1668 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
1669 }
1670
1671 static function getClassFileName( $code ) {
1672 global $IP;
1673 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
1674 }
1675
1676 static function getLocalisationArray( $code, $disableCache = false ) {
1677 self::loadLocalisation( $code, $disableCache );
1678 return self::$mLocalisationCache[$code];
1679 }
1680
1681 /**
1682 * Load localisation data for a given code into the static cache
1683 *
1684 * @return array Dependencies, map of filenames to mtimes
1685 */
1686 static function loadLocalisation( $code, $disableCache = false ) {
1687 static $recursionGuard = array();
1688 global $wgMemc;
1689
1690 if ( !$code ) {
1691 throw new MWException( "Invalid language code requested" );
1692 }
1693
1694 if ( !$disableCache ) {
1695 # Try the per-process cache
1696 if ( isset( self::$mLocalisationCache[$code] ) ) {
1697 return self::$mLocalisationCache[$code]['deps'];
1698 }
1699
1700 wfProfileIn( __METHOD__ );
1701
1702 # Try the serialized directory
1703 $cache = wfGetPrecompiledData( self::getFileName( "Messages", $code, '.ser' ) );
1704 if ( $cache ) {
1705 self::$mLocalisationCache[$code] = $cache;
1706 wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
1707 wfProfileOut( __METHOD__ );
1708 return self::$mLocalisationCache[$code]['deps'];
1709 }
1710
1711 # Try the global cache
1712 $memcKey = wfMemcKey('localisation', $code );
1713 $cache = $wgMemc->get( $memcKey );
1714 if ( $cache ) {
1715 # Check file modification times
1716 foreach ( $cache['deps'] as $file => $mtime ) {
1717 if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1718 break;
1719 }
1720 }
1721 if ( self::isLocalisationOutOfDate( $cache ) ) {
1722 $wgMemc->delete( $memcKey );
1723 $cache = false;
1724 wfDebug( "Language::loadLocalisation(): localisation cache for $code had expired due to update of $file\n" );
1725 } else {
1726 self::$mLocalisationCache[$code] = $cache;
1727 wfDebug( "Language::loadLocalisation(): got localisation for $code from cache\n" );
1728 wfProfileOut( __METHOD__ );
1729 return $cache['deps'];
1730 }
1731 }
1732 } else {
1733 wfProfileIn( __METHOD__ );
1734 }
1735
1736 # Default fallback, may be overridden when the messages file is included
1737 if ( $code != 'en' ) {
1738 $fallback = 'en';
1739 } else {
1740 $fallback = false;
1741 }
1742
1743 # Load the primary localisation from the source file
1744 $filename = self::getMessagesFileName( $code );
1745 if ( !file_exists( $filename ) ) {
1746 wfDebug( "Language::loadLocalisation(): no localisation file for $code, using implicit fallback to en\n" );
1747 $cache = array();
1748 $deps = array();
1749 } else {
1750 $deps = array( $filename => filemtime( $filename ) );
1751 require( $filename );
1752 $cache = compact( self::$mLocalisationKeys );
1753 wfDebug( "Language::loadLocalisation(): got localisation for $code from source\n" );
1754 }
1755
1756 if ( !empty( $fallback ) ) {
1757 # Load the fallback localisation, with a circular reference guard
1758 if ( isset( $recursionGuard[$code] ) ) {
1759 throw new MWException( "Error: Circular fallback reference in language code $code" );
1760 }
1761 $recursionGuard[$code] = true;
1762 $newDeps = self::loadLocalisation( $fallback, $disableCache );
1763 unset( $recursionGuard[$code] );
1764
1765 $secondary = self::$mLocalisationCache[$fallback];
1766 $deps = array_merge( $deps, $newDeps );
1767
1768 # Merge the fallback localisation with the current localisation
1769 foreach ( self::$mLocalisationKeys as $key ) {
1770 if ( isset( $cache[$key] ) ) {
1771 if ( isset( $secondary[$key] ) ) {
1772 if ( in_array( $key, self::$mMergeableMapKeys ) ) {
1773 $cache[$key] = $cache[$key] + $secondary[$key];
1774 } elseif ( in_array( $key, self::$mMergeableListKeys ) ) {
1775 $cache[$key] = array_merge( $secondary[$key], $cache[$key] );
1776 } elseif ( in_array( $key, self::$mMergeableAliasListKeys ) ) {
1777 $cache[$key] = array_merge_recursive( $cache[$key], $secondary[$key] );
1778 }
1779 }
1780 } else {
1781 $cache[$key] = $secondary[$key];
1782 }
1783 }
1784
1785 # Merge bookstore lists if requested
1786 if ( !empty( $cache['bookstoreList']['inherit'] ) ) {
1787 $cache['bookstoreList'] = array_merge( $cache['bookstoreList'], $secondary['bookstoreList'] );
1788 }
1789 if ( isset( $cache['bookstoreList']['inherit'] ) ) {
1790 unset( $cache['bookstoreList']['inherit'] );
1791 }
1792 }
1793
1794 # Add dependencies to the cache entry
1795 $cache['deps'] = $deps;
1796
1797 # Replace spaces with underscores in namespace names
1798 $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
1799
1800 # Save to both caches
1801 self::$mLocalisationCache[$code] = $cache;
1802 if ( !$disableCache ) {
1803 $wgMemc->set( $memcKey, $cache );
1804 }
1805
1806 wfProfileOut( __METHOD__ );
1807 return $deps;
1808 }
1809
1810 /**
1811 * Test if a given localisation cache is out of date with respect to the
1812 * source Messages files. This is done automatically for the global cache
1813 * in $wgMemc, but is only done on certain occasions for the serialized
1814 * data file.
1815 *
1816 * @param $cache mixed Either a language code or a cache array
1817 */
1818 static function isLocalisationOutOfDate( $cache ) {
1819 if ( !is_array( $cache ) ) {
1820 self::loadLocalisation( $cache );
1821 $cache = self::$mLocalisationCache[$cache];
1822 }
1823 $expired = false;
1824 foreach ( $cache['deps'] as $file => $mtime ) {
1825 if ( !file_exists( $file ) || filemtime( $file ) > $mtime ) {
1826 $expired = true;
1827 break;
1828 }
1829 }
1830 return $expired;
1831 }
1832
1833 /**
1834 * Get the fallback for a given language
1835 */
1836 static function getFallbackFor( $code ) {
1837 self::loadLocalisation( $code );
1838 return self::$mLocalisationCache[$code]['fallback'];
1839 }
1840
1841 /**
1842 * Get all messages for a given language
1843 */
1844 static function getMessagesFor( $code ) {
1845 self::loadLocalisation( $code );
1846 return self::$mLocalisationCache[$code]['messages'];
1847 }
1848
1849 /**
1850 * Get a message for a given language
1851 */
1852 static function getMessageFor( $key, $code ) {
1853 self::loadLocalisation( $code );
1854 return isset( self::$mLocalisationCache[$code]['messages'][$key] ) ? self::$mLocalisationCache[$code]['messages'][$key] : null;
1855 }
1856
1857 /**
1858 * Load localisation data for this object
1859 */
1860 function load() {
1861 if ( !$this->mLoaded ) {
1862 self::loadLocalisation( $this->getCode() );
1863 $cache =& self::$mLocalisationCache[$this->getCode()];
1864 foreach ( self::$mLocalisationKeys as $key ) {
1865 $this->$key = $cache[$key];
1866 }
1867 $this->mLoaded = true;
1868
1869 $this->fixUpSettings();
1870 }
1871 }
1872
1873 /**
1874 * Do any necessary post-cache-load settings adjustment
1875 */
1876 function fixUpSettings() {
1877 global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk,
1878 $wgNamespaceAliases, $wgAmericanDates;
1879 wfProfileIn( __METHOD__ );
1880 if ( $wgExtraNamespaces ) {
1881 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames;
1882 }
1883
1884 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
1885 if ( $wgMetaNamespaceTalk ) {
1886 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
1887 } else {
1888 $talk = $this->namespaceNames[NS_PROJECT_TALK];
1889 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
1890
1891 # Allow grammar transformations
1892 # Allowing full message-style parsing would make simple requests
1893 # such as action=raw much more expensive than they need to be.
1894 # This will hopefully cover most cases.
1895 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
1896 array( &$this, 'replaceGrammarInNamespace' ), $talk );
1897 $talk = str_replace( ' ', '_', $talk );
1898 $this->namespaceNames[NS_PROJECT_TALK] = $talk;
1899 }
1900
1901 # The above mixing may leave namespaces out of canonical order.
1902 # Re-order by namespace ID number...
1903 ksort( $this->namespaceNames );
1904
1905 # Put namespace names and aliases into a hashtable.
1906 # If this is too slow, then we should arrange it so that it is done
1907 # before caching. The catch is that at pre-cache time, the above
1908 # class-specific fixup hasn't been done.
1909 $this->mNamespaceIds = array();
1910 foreach ( $this->namespaceNames as $index => $name ) {
1911 $this->mNamespaceIds[$this->lc($name)] = $index;
1912 }
1913 if ( $this->namespaceAliases ) {
1914 foreach ( $this->namespaceAliases as $name => $index ) {
1915 $this->mNamespaceIds[$this->lc($name)] = $index;
1916 }
1917 }
1918 if ( $wgNamespaceAliases ) {
1919 foreach ( $wgNamespaceAliases as $name => $index ) {
1920 $this->mNamespaceIds[$this->lc($name)] = $index;
1921 }
1922 }
1923
1924 if ( $this->defaultDateFormat == 'dmy or mdy' ) {
1925 $this->defaultDateFormat = $wgAmericanDates ? 'mdy' : 'dmy';
1926 }
1927 wfProfileOut( __METHOD__ );
1928 }
1929
1930 function replaceGrammarInNamespace( $m ) {
1931 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
1932 }
1933
1934 static function getCaseMaps() {
1935 static $wikiUpperChars, $wikiLowerChars;
1936 if ( isset( $wikiUpperChars ) ) {
1937 return array( $wikiUpperChars, $wikiLowerChars );
1938 }
1939
1940 wfProfileIn( __METHOD__ );
1941 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
1942 if ( $arr === false ) {
1943 throw new MWException(
1944 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
1945 }
1946 extract( $arr );
1947 wfProfileOut( __METHOD__ );
1948 return array( $wikiUpperChars, $wikiLowerChars );
1949 }
1950
1951 function formatTimePeriod( $seconds ) {
1952 if ( $seconds < 10 ) {
1953 return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' );
1954 } elseif ( $seconds < 60 ) {
1955 return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' );
1956 } elseif ( $seconds < 3600 ) {
1957 return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) .
1958 $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' );
1959 } else {
1960 $hours = floor( $seconds / 3600 );
1961 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
1962 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
1963 return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) .
1964 $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) .
1965 $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' );
1966 }
1967 }
1968
1969 function formatBitrate( $bps ) {
1970 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
1971 if ( $bps <= 0 ) {
1972 return $this->formatNum( $bps ) . $units[0];
1973 }
1974 $unitIndex = floor( log10( $bps ) / 3 );
1975 $mantissa = $bps / pow( 1000, $unitIndex );
1976 if ( $mantissa < 10 ) {
1977 $mantissa = round( $mantissa, 1 );
1978 } else {
1979 $mantissa = round( $mantissa );
1980 }
1981 return $this->formatNum( $mantissa ) . $units[$unitIndex];
1982 }
1983
1984 /**
1985 * Format a size in bytes for output, using an appropriate
1986 * unit (B, KB, MB or GB) according to the magnitude in question
1987 *
1988 * @param $size Size to format
1989 * @return string Plain text (not HTML)
1990 */
1991 function formatSize( $size ) {
1992 // For small sizes no decimal places necessary
1993 $round = 0;
1994 if( $size > 1024 ) {
1995 $size = $size / 1024;
1996 if( $size > 1024 ) {
1997 $size = $size / 1024;
1998 // For MB and bigger two decimal places are smarter
1999 $round = 2;
2000 if( $size > 1024 ) {
2001 $size = $size / 1024;
2002 $msg = 'size-gigabytes';
2003 } else {
2004 $msg = 'size-megabytes';
2005 }
2006 } else {
2007 $msg = 'size-kilobytes';
2008 }
2009 } else {
2010 $msg = 'size-bytes';
2011 }
2012 $size = round( $size, $round );
2013 $text = $this->getMessageFromDB( $msg );
2014 return str_replace( '$1', $this->formatNum( $size ), $text );
2015 }
2016 }