b8515f36694eee2d7c4244f581c78d3a16a6b273
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * Internationalisation code
4 *
5 * @file
6 * @ingroup Language
7 */
8
9 /**
10 * @defgroup Language Language
11 */
12
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
15 exit( 1 );
16 }
17
18 # Read language names
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__ ) . '/Names.php' );
21
22 global $wgInputEncoding, $wgOutputEncoding;
23
24 /**
25 * These are always UTF-8, they exist only for backwards compatibility
26 */
27 $wgInputEncoding = 'UTF-8';
28 $wgOutputEncoding = 'UTF-8';
29
30 if ( function_exists( 'mb_strtoupper' ) ) {
31 mb_internal_encoding( 'UTF-8' );
32 }
33
34 /**
35 * a fake language converter
36 *
37 * @ingroup Language
38 */
39 class FakeConverter {
40 var $mLang;
41 function __construct( $langobj ) { $this->mLang = $langobj; }
42 function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
43 function convert( $t ) { return $t; }
44 function convertTitle( $t ) { return $t->getPrefixedText(); }
45 function getVariants() { return array( $this->mLang->getCode() ); }
46 function getPreferredVariant() { return $this->mLang->getCode(); }
47 function getDefaultVariant() { return $this->mLang->getCode(); }
48 function getURLVariant() { return ''; }
49 function getConvRuleTitle() { return false; }
50 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
51 function getExtraHashOptions() { return ''; }
52 function getParsedTitle() { return ''; }
53 function markNoConversion( $text, $noParse = false ) { return $text; }
54 function convertCategoryKey( $key ) { return $key; }
55 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
56 function armourMath( $text ) { return $text; }
57 }
58
59 /**
60 * Internationalisation code
61 * @ingroup Language
62 */
63 class Language {
64 var $mConverter, $mVariants, $mCode, $mLoaded = false;
65 var $mMagicExtensions = array(), $mMagicHookDone = false;
66
67 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
68 var $dateFormatStrings = array();
69 var $mExtendedSpecialPageAliases;
70
71 /**
72 * ReplacementArray object caches
73 */
74 var $transformData = array();
75
76 static public $dataCache;
77 static public $mLangObjCache = array();
78
79 static public $mWeekdayMsgs = array(
80 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
81 'friday', 'saturday'
82 );
83
84 static public $mWeekdayAbbrevMsgs = array(
85 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
86 );
87
88 static public $mMonthMsgs = array(
89 'january', 'february', 'march', 'april', 'may_long', 'june',
90 'july', 'august', 'september', 'october', 'november',
91 'december'
92 );
93 static public $mMonthGenMsgs = array(
94 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
95 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
96 'december-gen'
97 );
98 static public $mMonthAbbrevMsgs = array(
99 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
100 'sep', 'oct', 'nov', 'dec'
101 );
102
103 static public $mIranianCalendarMonthMsgs = array(
104 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
105 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
106 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
107 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
108 );
109
110 static public $mHebrewCalendarMonthMsgs = array(
111 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
112 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
113 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
114 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
115 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
116 );
117
118 static public $mHebrewCalendarMonthGenMsgs = array(
119 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
120 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
121 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
122 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
123 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
124 );
125
126 static public $mHijriCalendarMonthMsgs = array(
127 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
128 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
129 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
130 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
131 );
132
133 /**
134 * Get a cached language object for a given language code
135 * @param $code String
136 * @return Language
137 */
138 static function factory( $code ) {
139 if ( !isset( self::$mLangObjCache[$code] ) ) {
140 if ( count( self::$mLangObjCache ) > 10 ) {
141 // Don't keep a billion objects around, that's stupid.
142 self::$mLangObjCache = array();
143 }
144 self::$mLangObjCache[$code] = self::newFromCode( $code );
145 }
146 return self::$mLangObjCache[$code];
147 }
148
149 /**
150 * Create a language object for a given language code
151 * @param $code String
152 * @return Language
153 */
154 protected static function newFromCode( $code ) {
155 global $IP;
156 static $recursionLevel = 0;
157
158 // Protect against path traversal below
159 if ( !Language::isValidCode( $code ) ) {
160 throw new MWException( "Invalid language code \"$code\"" );
161 }
162
163 if ( $code == 'en' ) {
164 $class = 'Language';
165 } else {
166 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
167 // Preload base classes to work around APC/PHP5 bug
168 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
169 include_once( "$IP/languages/classes/$class.deps.php" );
170 }
171 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
172 include_once( "$IP/languages/classes/$class.php" );
173 }
174 }
175
176 if ( $recursionLevel > 5 ) {
177 throw new MWException( "Language fallback loop detected when creating class $class\n" );
178 }
179
180 if ( !class_exists( $class ) ) {
181 $fallback = Language::getFallbackFor( $code );
182 ++$recursionLevel;
183 $lang = Language::newFromCode( $fallback );
184 --$recursionLevel;
185 $lang->setCode( $code );
186 } else {
187 $lang = new $class;
188 }
189 return $lang;
190 }
191
192 /**
193 * Returns true if a language code string is of a valid form, whether or
194 * not it exists.
195 */
196 public static function isValidCode( $code ) {
197 return strcspn( $code, "/\\\000" ) === strlen( $code );
198 }
199
200 /**
201 * Get the LocalisationCache instance
202 *
203 * @return LocalisationCache
204 */
205 public static function getLocalisationCache() {
206 if ( is_null( self::$dataCache ) ) {
207 global $wgLocalisationCacheConf;
208 $class = $wgLocalisationCacheConf['class'];
209 self::$dataCache = new $class( $wgLocalisationCacheConf );
210 }
211 return self::$dataCache;
212 }
213
214 function __construct() {
215 $this->mConverter = new FakeConverter( $this );
216 // Set the code to the name of the descendant
217 if ( get_class( $this ) == 'Language' ) {
218 $this->mCode = 'en';
219 } else {
220 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
221 }
222 self::getLocalisationCache();
223 }
224
225 /**
226 * Reduce memory usage
227 */
228 function __destruct() {
229 foreach ( $this as $name => $value ) {
230 unset( $this->$name );
231 }
232 }
233
234 /**
235 * Hook which will be called if this is the content language.
236 * Descendants can use this to register hook functions or modify globals
237 */
238 function initContLang() { }
239
240 /**
241 * @deprecated Use User::getDefaultOptions()
242 * @return array
243 */
244 function getDefaultUserOptions() {
245 wfDeprecated( __METHOD__ );
246 return User::getDefaultOptions();
247 }
248
249 function getFallbackLanguageCode() {
250 if ( $this->mCode === 'en' ) {
251 return false;
252 } else {
253 return self::$dataCache->getItem( $this->mCode, 'fallback' );
254 }
255 }
256
257 /**
258 * Exports $wgBookstoreListEn
259 * @return array
260 */
261 function getBookstoreList() {
262 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
263 }
264
265 /**
266 * @return array
267 */
268 function getNamespaces() {
269 if ( is_null( $this->namespaceNames ) ) {
270 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
271
272 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
273 $validNamespaces = MWNamespace::getCanonicalNamespaces();
274
275 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
276
277 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
278 if ( $wgMetaNamespaceTalk ) {
279 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
280 } else {
281 $talk = $this->namespaceNames[NS_PROJECT_TALK];
282 $this->namespaceNames[NS_PROJECT_TALK] =
283 $this->fixVariableInNamespace( $talk );
284 }
285
286 # Sometimes a language will be localised but not actually exist on this wiki.
287 foreach( $this->namespaceNames as $key => $text ) {
288 if ( !isset( $validNamespaces[$key] ) ) {
289 unset( $this->namespaceNames[$key] );
290 }
291 }
292
293 # The above mixing may leave namespaces out of canonical order.
294 # Re-order by namespace ID number...
295 ksort( $this->namespaceNames );
296 }
297 return $this->namespaceNames;
298 }
299
300 /**
301 * A convenience function that returns the same thing as
302 * getNamespaces() except with the array values changed to ' '
303 * where it found '_', useful for producing output to be displayed
304 * e.g. in <select> forms.
305 *
306 * @return array
307 */
308 function getFormattedNamespaces() {
309 $ns = $this->getNamespaces();
310 foreach ( $ns as $k => $v ) {
311 $ns[$k] = strtr( $v, '_', ' ' );
312 }
313 return $ns;
314 }
315
316 /**
317 * Get a namespace value by key
318 * <code>
319 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
320 * echo $mw_ns; // prints 'MediaWiki'
321 * </code>
322 *
323 * @param $index Int: the array key of the namespace to return
324 * @return mixed, string if the namespace value exists, otherwise false
325 */
326 function getNsText( $index ) {
327 $ns = $this->getNamespaces();
328 return isset( $ns[$index] ) ? $ns[$index] : false;
329 }
330
331 /**
332 * A convenience function that returns the same thing as
333 * getNsText() except with '_' changed to ' ', useful for
334 * producing output.
335 *
336 * @return array
337 */
338 function getFormattedNsText( $index ) {
339 $ns = $this->getNsText( $index );
340 return strtr( $ns, '_', ' ' );
341 }
342
343 /**
344 * Returns gender-dependent namespace alias if available.
345 * @param $index Int: namespace index
346 * @param $gender String: gender key (male, female... )
347 * @return String
348 * @since 1.18
349 */
350 function getGenderNsText( $index, $gender ) {
351 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
352 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
353 }
354
355 /**
356 * Whether this language makes distinguishes genders for example in
357 * namespaces.
358 * @return bool
359 * @since 1.18
360 */
361 function needsGenderDistinction() {
362 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
363 return count( $aliases ) > 0;
364 }
365
366 /**
367 * Get a namespace key by value, case insensitive.
368 * Only matches namespace names for the current language, not the
369 * canonical ones defined in Namespace.php.
370 *
371 * @param $text String
372 * @return mixed An integer if $text is a valid value otherwise false
373 */
374 function getLocalNsIndex( $text ) {
375 $lctext = $this->lc( $text );
376 $ids = $this->getNamespaceIds();
377 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
378 }
379
380 function getNamespaceAliases() {
381 if ( is_null( $this->namespaceAliases ) ) {
382 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
383 if ( !$aliases ) {
384 $aliases = array();
385 } else {
386 foreach ( $aliases as $name => $index ) {
387 if ( $index === NS_PROJECT_TALK ) {
388 unset( $aliases[$name] );
389 $name = $this->fixVariableInNamespace( $name );
390 $aliases[$name] = $index;
391 }
392 }
393 }
394
395 $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
396 foreach ( $genders as $index => $forms ) {
397 foreach ( $forms as $alias ) {
398 $aliases[$alias] = $index;
399 }
400 }
401
402 $this->namespaceAliases = $aliases;
403 }
404 return $this->namespaceAliases;
405 }
406
407 function getNamespaceIds() {
408 if ( is_null( $this->mNamespaceIds ) ) {
409 global $wgNamespaceAliases;
410 # Put namespace names and aliases into a hashtable.
411 # If this is too slow, then we should arrange it so that it is done
412 # before caching. The catch is that at pre-cache time, the above
413 # class-specific fixup hasn't been done.
414 $this->mNamespaceIds = array();
415 foreach ( $this->getNamespaces() as $index => $name ) {
416 $this->mNamespaceIds[$this->lc( $name )] = $index;
417 }
418 foreach ( $this->getNamespaceAliases() as $name => $index ) {
419 $this->mNamespaceIds[$this->lc( $name )] = $index;
420 }
421 if ( $wgNamespaceAliases ) {
422 foreach ( $wgNamespaceAliases as $name => $index ) {
423 $this->mNamespaceIds[$this->lc( $name )] = $index;
424 }
425 }
426 }
427 return $this->mNamespaceIds;
428 }
429
430
431 /**
432 * Get a namespace key by value, case insensitive. Canonical namespace
433 * names override custom ones defined for the current language.
434 *
435 * @param $text String
436 * @return mixed An integer if $text is a valid value otherwise false
437 */
438 function getNsIndex( $text ) {
439 $lctext = $this->lc( $text );
440 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
441 return $ns;
442 }
443 $ids = $this->getNamespaceIds();
444 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
445 }
446
447 /**
448 * short names for language variants used for language conversion links.
449 *
450 * @param $code String
451 * @return string
452 */
453 function getVariantname( $code ) {
454 return $this->getMessageFromDB( "variantname-$code" );
455 }
456
457 function specialPage( $name ) {
458 $aliases = $this->getSpecialPageAliases();
459 if ( isset( $aliases[$name][0] ) ) {
460 $name = $aliases[$name][0];
461 }
462 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
463 }
464
465 function getQuickbarSettings() {
466 return array(
467 $this->getMessage( 'qbsettings-none' ),
468 $this->getMessage( 'qbsettings-fixedleft' ),
469 $this->getMessage( 'qbsettings-fixedright' ),
470 $this->getMessage( 'qbsettings-floatingleft' ),
471 $this->getMessage( 'qbsettings-floatingright' )
472 );
473 }
474
475 function getMathNames() {
476 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
477 }
478
479 function getDatePreferences() {
480 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
481 }
482
483 function getDateFormats() {
484 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
485 }
486
487 function getDefaultDateFormat() {
488 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
489 if ( $df === 'dmy or mdy' ) {
490 global $wgAmericanDates;
491 return $wgAmericanDates ? 'mdy' : 'dmy';
492 } else {
493 return $df;
494 }
495 }
496
497 function getDatePreferenceMigrationMap() {
498 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
499 }
500
501 function getImageFile( $image ) {
502 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
503 }
504
505 function getDefaultUserOptionOverrides() {
506 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
507 }
508
509 function getExtraUserToggles() {
510 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
511 }
512
513 function getUserToggle( $tog ) {
514 return $this->getMessageFromDB( "tog-$tog" );
515 }
516
517 /**
518 * Get language names, indexed by code.
519 * If $customisedOnly is true, only returns codes with a messages file
520 */
521 public static function getLanguageNames( $customisedOnly = false ) {
522 global $wgLanguageNames, $wgExtraLanguageNames;
523 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
524 if ( !$customisedOnly ) {
525 return $allNames;
526 }
527
528 global $IP;
529 $names = array();
530 $dir = opendir( "$IP/languages/messages" );
531 while ( false !== ( $file = readdir( $dir ) ) ) {
532 $code = self::getCodeFromFileName( $file, 'Messages' );
533 if ( $code && isset( $allNames[$code] ) ) {
534 $names[$code] = $allNames[$code];
535 }
536 }
537 closedir( $dir );
538 return $names;
539 }
540
541 /**
542 * Get translated language names. This is done on best effort and
543 * by default this is exactly the same as Language::getLanguageNames.
544 * The CLDR extension provides translated names.
545 * @param $code String Language code.
546 * @return Array language code => language name
547 * @since 1.18.0
548 */
549 public static function getTranslatedLanguageNames( $code ) {
550 $names = array();
551 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
552
553 foreach ( self::getLanguageNames() as $code => $name ) {
554 if ( !isset( $names[$code] ) ) $names[$code] = $name;
555 }
556
557 return $names;
558 }
559
560 /**
561 * Get a message from the MediaWiki namespace.
562 *
563 * @param $msg String: message name
564 * @return string
565 */
566 function getMessageFromDB( $msg ) {
567 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
568 }
569
570 function getLanguageName( $code ) {
571 $names = self::getLanguageNames();
572 if ( !array_key_exists( $code, $names ) ) {
573 return '';
574 }
575 return $names[$code];
576 }
577
578 function getMonthName( $key ) {
579 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
580 }
581
582 function getMonthNameGen( $key ) {
583 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
584 }
585
586 function getMonthAbbreviation( $key ) {
587 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
588 }
589
590 function getWeekdayName( $key ) {
591 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
592 }
593
594 function getWeekdayAbbreviation( $key ) {
595 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
596 }
597
598 function getIranianCalendarMonthName( $key ) {
599 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
600 }
601
602 function getHebrewCalendarMonthName( $key ) {
603 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
604 }
605
606 function getHebrewCalendarMonthNameGen( $key ) {
607 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
608 }
609
610 function getHijriCalendarMonthName( $key ) {
611 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
612 }
613
614 /**
615 * Used by date() and time() to adjust the time output.
616 *
617 * @param $ts Int the time in date('YmdHis') format
618 * @param $tz Mixed: adjust the time by this amount (default false, mean we
619 * get user timecorrection setting)
620 * @return int
621 */
622 function userAdjust( $ts, $tz = false ) {
623 global $wgUser, $wgLocalTZoffset;
624
625 if ( $tz === false ) {
626 $tz = $wgUser->getOption( 'timecorrection' );
627 }
628
629 $data = explode( '|', $tz, 3 );
630
631 if ( $data[0] == 'ZoneInfo' ) {
632 if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
633 $date = date_create( $ts, timezone_open( 'UTC' ) );
634 date_timezone_set( $date, timezone_open( $data[2] ) );
635 $date = date_format( $date, 'YmdHis' );
636 return $date;
637 }
638 # Unrecognized timezone, default to 'Offset' with the stored offset.
639 $data[0] = 'Offset';
640 }
641
642 $minDiff = 0;
643 if ( $data[0] == 'System' || $tz == '' ) {
644 #  Global offset in minutes.
645 if ( isset( $wgLocalTZoffset ) ) {
646 $minDiff = $wgLocalTZoffset;
647 }
648 } else if ( $data[0] == 'Offset' ) {
649 $minDiff = intval( $data[1] );
650 } else {
651 $data = explode( ':', $tz );
652 if ( count( $data ) == 2 ) {
653 $data[0] = intval( $data[0] );
654 $data[1] = intval( $data[1] );
655 $minDiff = abs( $data[0] ) * 60 + $data[1];
656 if ( $data[0] < 0 ) {
657 $minDiff = -$minDiff;
658 }
659 } else {
660 $minDiff = intval( $data[0] ) * 60;
661 }
662 }
663
664 # No difference ? Return time unchanged
665 if ( 0 == $minDiff ) {
666 return $ts;
667 }
668
669 wfSuppressWarnings(); // E_STRICT system time bitching
670 # Generate an adjusted date; take advantage of the fact that mktime
671 # will normalize out-of-range values so we don't have to split $minDiff
672 # into hours and minutes.
673 $t = mktime( (
674 (int)substr( $ts, 8, 2 ) ), # Hours
675 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
676 (int)substr( $ts, 12, 2 ), # Seconds
677 (int)substr( $ts, 4, 2 ), # Month
678 (int)substr( $ts, 6, 2 ), # Day
679 (int)substr( $ts, 0, 4 ) ); # Year
680
681 $date = date( 'YmdHis', $t );
682 wfRestoreWarnings();
683
684 return $date;
685 }
686
687 /**
688 * This is a workalike of PHP's date() function, but with better
689 * internationalisation, a reduced set of format characters, and a better
690 * escaping format.
691 *
692 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
693 * PHP manual for definitions. There are a number of extensions, which
694 * start with "x":
695 *
696 * xn Do not translate digits of the next numeric format character
697 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
698 * xr Use roman numerals for the next numeric format character
699 * xh Use hebrew numerals for the next numeric format character
700 * xx Literal x
701 * xg Genitive month name
702 *
703 * xij j (day number) in Iranian calendar
704 * xiF F (month name) in Iranian calendar
705 * xin n (month number) in Iranian calendar
706 * xiY Y (full year) in Iranian calendar
707 *
708 * xjj j (day number) in Hebrew calendar
709 * xjF F (month name) in Hebrew calendar
710 * xjt t (days in month) in Hebrew calendar
711 * xjx xg (genitive month name) in Hebrew calendar
712 * xjn n (month number) in Hebrew calendar
713 * xjY Y (full year) in Hebrew calendar
714 *
715 * xmj j (day number) in Hijri calendar
716 * xmF F (month name) in Hijri calendar
717 * xmn n (month number) in Hijri calendar
718 * xmY Y (full year) in Hijri calendar
719 *
720 * xkY Y (full year) in Thai solar calendar. Months and days are
721 * identical to the Gregorian calendar
722 * xoY Y (full year) in Minguo calendar or Juche year.
723 * Months and days are identical to the
724 * Gregorian calendar
725 * xtY Y (full year) in Japanese nengo. Months and days are
726 * identical to the Gregorian calendar
727 *
728 * Characters enclosed in double quotes will be considered literal (with
729 * the quotes themselves removed). Unmatched quotes will be considered
730 * literal quotes. Example:
731 *
732 * "The month is" F => The month is January
733 * i's" => 20'11"
734 *
735 * Backslash escaping is also supported.
736 *
737 * Input timestamp is assumed to be pre-normalized to the desired local
738 * time zone, if any.
739 *
740 * @param $format String
741 * @param $ts String: 14-character timestamp
742 * YYYYMMDDHHMMSS
743 * 01234567890123
744 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
745 */
746 function sprintfDate( $format, $ts ) {
747 $s = '';
748 $raw = false;
749 $roman = false;
750 $hebrewNum = false;
751 $unix = false;
752 $rawToggle = false;
753 $iranian = false;
754 $hebrew = false;
755 $hijri = false;
756 $thai = false;
757 $minguo = false;
758 $tenno = false;
759 for ( $p = 0; $p < strlen( $format ); $p++ ) {
760 $num = false;
761 $code = $format[$p];
762 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
763 $code .= $format[++$p];
764 }
765
766 if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
767 $code .= $format[++$p];
768 }
769
770 switch ( $code ) {
771 case 'xx':
772 $s .= 'x';
773 break;
774 case 'xn':
775 $raw = true;
776 break;
777 case 'xN':
778 $rawToggle = !$rawToggle;
779 break;
780 case 'xr':
781 $roman = true;
782 break;
783 case 'xh':
784 $hebrewNum = true;
785 break;
786 case 'xg':
787 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
788 break;
789 case 'xjx':
790 if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
791 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
792 break;
793 case 'd':
794 $num = substr( $ts, 6, 2 );
795 break;
796 case 'D':
797 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
798 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
799 break;
800 case 'j':
801 $num = intval( substr( $ts, 6, 2 ) );
802 break;
803 case 'xij':
804 if ( !$iranian ) {
805 $iranian = self::tsToIranian( $ts );
806 }
807 $num = $iranian[2];
808 break;
809 case 'xmj':
810 if ( !$hijri ) {
811 $hijri = self::tsToHijri( $ts );
812 }
813 $num = $hijri[2];
814 break;
815 case 'xjj':
816 if ( !$hebrew ) {
817 $hebrew = self::tsToHebrew( $ts );
818 }
819 $num = $hebrew[2];
820 break;
821 case 'l':
822 if ( !$unix ) {
823 $unix = wfTimestamp( TS_UNIX, $ts );
824 }
825 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
826 break;
827 case 'N':
828 if ( !$unix ) {
829 $unix = wfTimestamp( TS_UNIX, $ts );
830 }
831 $w = gmdate( 'w', $unix );
832 $num = $w ? $w : 7;
833 break;
834 case 'w':
835 if ( !$unix ) {
836 $unix = wfTimestamp( TS_UNIX, $ts );
837 }
838 $num = gmdate( 'w', $unix );
839 break;
840 case 'z':
841 if ( !$unix ) {
842 $unix = wfTimestamp( TS_UNIX, $ts );
843 }
844 $num = gmdate( 'z', $unix );
845 break;
846 case 'W':
847 if ( !$unix ) {
848 $unix = wfTimestamp( TS_UNIX, $ts );
849 }
850 $num = gmdate( 'W', $unix );
851 break;
852 case 'F':
853 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
854 break;
855 case 'xiF':
856 if ( !$iranian ) {
857 $iranian = self::tsToIranian( $ts );
858 }
859 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
860 break;
861 case 'xmF':
862 if ( !$hijri ) {
863 $hijri = self::tsToHijri( $ts );
864 }
865 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
866 break;
867 case 'xjF':
868 if ( !$hebrew ) {
869 $hebrew = self::tsToHebrew( $ts );
870 }
871 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
872 break;
873 case 'm':
874 $num = substr( $ts, 4, 2 );
875 break;
876 case 'M':
877 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
878 break;
879 case 'n':
880 $num = intval( substr( $ts, 4, 2 ) );
881 break;
882 case 'xin':
883 if ( !$iranian ) {
884 $iranian = self::tsToIranian( $ts );
885 }
886 $num = $iranian[1];
887 break;
888 case 'xmn':
889 if ( !$hijri ) {
890 $hijri = self::tsToHijri ( $ts );
891 }
892 $num = $hijri[1];
893 break;
894 case 'xjn':
895 if ( !$hebrew ) {
896 $hebrew = self::tsToHebrew( $ts );
897 }
898 $num = $hebrew[1];
899 break;
900 case 't':
901 if ( !$unix ) {
902 $unix = wfTimestamp( TS_UNIX, $ts );
903 }
904 $num = gmdate( 't', $unix );
905 break;
906 case 'xjt':
907 if ( !$hebrew ) {
908 $hebrew = self::tsToHebrew( $ts );
909 }
910 $num = $hebrew[3];
911 break;
912 case 'L':
913 if ( !$unix ) {
914 $unix = wfTimestamp( TS_UNIX, $ts );
915 }
916 $num = gmdate( 'L', $unix );
917 break;
918 case 'o':
919 if ( !$unix ) {
920 $unix = wfTimestamp( TS_UNIX, $ts );
921 }
922 $num = date( 'o', $unix );
923 break;
924 case 'Y':
925 $num = substr( $ts, 0, 4 );
926 break;
927 case 'xiY':
928 if ( !$iranian ) {
929 $iranian = self::tsToIranian( $ts );
930 }
931 $num = $iranian[0];
932 break;
933 case 'xmY':
934 if ( !$hijri ) {
935 $hijri = self::tsToHijri( $ts );
936 }
937 $num = $hijri[0];
938 break;
939 case 'xjY':
940 if ( !$hebrew ) {
941 $hebrew = self::tsToHebrew( $ts );
942 }
943 $num = $hebrew[0];
944 break;
945 case 'xkY':
946 if ( !$thai ) {
947 $thai = self::tsToYear( $ts, 'thai' );
948 }
949 $num = $thai[0];
950 break;
951 case 'xoY':
952 if ( !$minguo ) {
953 $minguo = self::tsToYear( $ts, 'minguo' );
954 }
955 $num = $minguo[0];
956 break;
957 case 'xtY':
958 if ( !$tenno ) {
959 $tenno = self::tsToYear( $ts, 'tenno' );
960 }
961 $num = $tenno[0];
962 break;
963 case 'y':
964 $num = substr( $ts, 2, 2 );
965 break;
966 case 'a':
967 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
968 break;
969 case 'A':
970 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
971 break;
972 case 'g':
973 $h = substr( $ts, 8, 2 );
974 $num = $h % 12 ? $h % 12 : 12;
975 break;
976 case 'G':
977 $num = intval( substr( $ts, 8, 2 ) );
978 break;
979 case 'h':
980 $h = substr( $ts, 8, 2 );
981 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
982 break;
983 case 'H':
984 $num = substr( $ts, 8, 2 );
985 break;
986 case 'i':
987 $num = substr( $ts, 10, 2 );
988 break;
989 case 's':
990 $num = substr( $ts, 12, 2 );
991 break;
992 case 'c':
993 if ( !$unix ) {
994 $unix = wfTimestamp( TS_UNIX, $ts );
995 }
996 $s .= gmdate( 'c', $unix );
997 break;
998 case 'r':
999 if ( !$unix ) {
1000 $unix = wfTimestamp( TS_UNIX, $ts );
1001 }
1002 $s .= gmdate( 'r', $unix );
1003 break;
1004 case 'U':
1005 if ( !$unix ) {
1006 $unix = wfTimestamp( TS_UNIX, $ts );
1007 }
1008 $num = $unix;
1009 break;
1010 case '\\':
1011 # Backslash escaping
1012 if ( $p < strlen( $format ) - 1 ) {
1013 $s .= $format[++$p];
1014 } else {
1015 $s .= '\\';
1016 }
1017 break;
1018 case '"':
1019 # Quoted literal
1020 if ( $p < strlen( $format ) - 1 ) {
1021 $endQuote = strpos( $format, '"', $p + 1 );
1022 if ( $endQuote === false ) {
1023 # No terminating quote, assume literal "
1024 $s .= '"';
1025 } else {
1026 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1027 $p = $endQuote;
1028 }
1029 } else {
1030 # Quote at end of string, assume literal "
1031 $s .= '"';
1032 }
1033 break;
1034 default:
1035 $s .= $format[$p];
1036 }
1037 if ( $num !== false ) {
1038 if ( $rawToggle || $raw ) {
1039 $s .= $num;
1040 $raw = false;
1041 } elseif ( $roman ) {
1042 $s .= self::romanNumeral( $num );
1043 $roman = false;
1044 } elseif ( $hebrewNum ) {
1045 $s .= self::hebrewNumeral( $num );
1046 $hebrewNum = false;
1047 } else {
1048 $s .= $this->formatNum( $num, true );
1049 }
1050 }
1051 }
1052 return $s;
1053 }
1054
1055 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1056 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1057 /**
1058 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1059 * Gregorian dates to Iranian dates. Originally written in C, it
1060 * is released under the terms of GNU Lesser General Public
1061 * License. Conversion to PHP was performed by Niklas Laxström.
1062 *
1063 * Link: http://www.farsiweb.info/jalali/jalali.c
1064 */
1065 private static function tsToIranian( $ts ) {
1066 $gy = substr( $ts, 0, 4 ) -1600;
1067 $gm = substr( $ts, 4, 2 ) -1;
1068 $gd = substr( $ts, 6, 2 ) -1;
1069
1070 # Days passed from the beginning (including leap years)
1071 $gDayNo = 365 * $gy
1072 + floor( ( $gy + 3 ) / 4 )
1073 - floor( ( $gy + 99 ) / 100 )
1074 + floor( ( $gy + 399 ) / 400 );
1075
1076
1077 // Add days of the past months of this year
1078 for ( $i = 0; $i < $gm; $i++ ) {
1079 $gDayNo += self::$GREG_DAYS[$i];
1080 }
1081
1082 // Leap years
1083 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1084 $gDayNo++;
1085 }
1086
1087 // Days passed in current month
1088 $gDayNo += $gd;
1089
1090 $jDayNo = $gDayNo - 79;
1091
1092 $jNp = floor( $jDayNo / 12053 );
1093 $jDayNo %= 12053;
1094
1095 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1096 $jDayNo %= 1461;
1097
1098 if ( $jDayNo >= 366 ) {
1099 $jy += floor( ( $jDayNo - 1 ) / 365 );
1100 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1101 }
1102
1103 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1104 $jDayNo -= self::$IRANIAN_DAYS[$i];
1105 }
1106
1107 $jm = $i + 1;
1108 $jd = $jDayNo + 1;
1109
1110 return array( $jy, $jm, $jd );
1111 }
1112
1113 /**
1114 * Converting Gregorian dates to Hijri dates.
1115 *
1116 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1117 *
1118 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1119 */
1120 private static function tsToHijri( $ts ) {
1121 $year = substr( $ts, 0, 4 );
1122 $month = substr( $ts, 4, 2 );
1123 $day = substr( $ts, 6, 2 );
1124
1125 $zyr = $year;
1126 $zd = $day;
1127 $zm = $month;
1128 $zy = $zyr;
1129
1130 if (
1131 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1132 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1133 )
1134 {
1135 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1136 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1137 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1138 $zd - 32075;
1139 } else {
1140 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1141 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1142 }
1143
1144 $zl = $zjd -1948440 + 10632;
1145 $zn = (int)( ( $zl - 1 ) / 10631 );
1146 $zl = $zl - 10631 * $zn + 354;
1147 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1148 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1149 $zm = (int)( ( 24 * $zl ) / 709 );
1150 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1151 $zy = 30 * $zn + $zj - 30;
1152
1153 return array( $zy, $zm, $zd );
1154 }
1155
1156 /**
1157 * Converting Gregorian dates to Hebrew dates.
1158 *
1159 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1160 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1161 * to translate the relevant functions into PHP and release them under
1162 * GNU GPL.
1163 *
1164 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1165 * and Adar II is 14. In a non-leap year, Adar is 6.
1166 */
1167 private static function tsToHebrew( $ts ) {
1168 # Parse date
1169 $year = substr( $ts, 0, 4 );
1170 $month = substr( $ts, 4, 2 );
1171 $day = substr( $ts, 6, 2 );
1172
1173 # Calculate Hebrew year
1174 $hebrewYear = $year + 3760;
1175
1176 # Month number when September = 1, August = 12
1177 $month += 4;
1178 if ( $month > 12 ) {
1179 # Next year
1180 $month -= 12;
1181 $year++;
1182 $hebrewYear++;
1183 }
1184
1185 # Calculate day of year from 1 September
1186 $dayOfYear = $day;
1187 for ( $i = 1; $i < $month; $i++ ) {
1188 if ( $i == 6 ) {
1189 # February
1190 $dayOfYear += 28;
1191 # Check if the year is leap
1192 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1193 $dayOfYear++;
1194 }
1195 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1196 $dayOfYear += 30;
1197 } else {
1198 $dayOfYear += 31;
1199 }
1200 }
1201
1202 # Calculate the start of the Hebrew year
1203 $start = self::hebrewYearStart( $hebrewYear );
1204
1205 # Calculate next year's start
1206 if ( $dayOfYear <= $start ) {
1207 # Day is before the start of the year - it is the previous year
1208 # Next year's start
1209 $nextStart = $start;
1210 # Previous year
1211 $year--;
1212 $hebrewYear--;
1213 # Add days since previous year's 1 September
1214 $dayOfYear += 365;
1215 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1216 # Leap year
1217 $dayOfYear++;
1218 }
1219 # Start of the new (previous) year
1220 $start = self::hebrewYearStart( $hebrewYear );
1221 } else {
1222 # Next year's start
1223 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1224 }
1225
1226 # Calculate Hebrew day of year
1227 $hebrewDayOfYear = $dayOfYear - $start;
1228
1229 # Difference between year's days
1230 $diff = $nextStart - $start;
1231 # Add 12 (or 13 for leap years) days to ignore the difference between
1232 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1233 # difference is only about the year type
1234 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1235 $diff += 13;
1236 } else {
1237 $diff += 12;
1238 }
1239
1240 # Check the year pattern, and is leap year
1241 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1242 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1243 # and non-leap years
1244 $yearPattern = $diff % 30;
1245 # Check if leap year
1246 $isLeap = $diff >= 30;
1247
1248 # Calculate day in the month from number of day in the Hebrew year
1249 # Don't check Adar - if the day is not in Adar, we will stop before;
1250 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1251 $hebrewDay = $hebrewDayOfYear;
1252 $hebrewMonth = 1;
1253 $days = 0;
1254 while ( $hebrewMonth <= 12 ) {
1255 # Calculate days in this month
1256 if ( $isLeap && $hebrewMonth == 6 ) {
1257 # Adar in a leap year
1258 if ( $isLeap ) {
1259 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1260 $days = 30;
1261 if ( $hebrewDay <= $days ) {
1262 # Day in Adar I
1263 $hebrewMonth = 13;
1264 } else {
1265 # Subtract the days of Adar I
1266 $hebrewDay -= $days;
1267 # Try Adar II
1268 $days = 29;
1269 if ( $hebrewDay <= $days ) {
1270 # Day in Adar II
1271 $hebrewMonth = 14;
1272 }
1273 }
1274 }
1275 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1276 # Cheshvan in a complete year (otherwise as the rule below)
1277 $days = 30;
1278 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1279 # Kislev in an incomplete year (otherwise as the rule below)
1280 $days = 29;
1281 } else {
1282 # Odd months have 30 days, even have 29
1283 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1284 }
1285 if ( $hebrewDay <= $days ) {
1286 # In the current month
1287 break;
1288 } else {
1289 # Subtract the days of the current month
1290 $hebrewDay -= $days;
1291 # Try in the next month
1292 $hebrewMonth++;
1293 }
1294 }
1295
1296 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1297 }
1298
1299 /**
1300 * This calculates the Hebrew year start, as days since 1 September.
1301 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1302 * Used for Hebrew date.
1303 */
1304 private static function hebrewYearStart( $year ) {
1305 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1306 $b = intval( ( $year - 1 ) % 4 );
1307 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1308 if ( $m < 0 ) {
1309 $m--;
1310 }
1311 $Mar = intval( $m );
1312 if ( $m < 0 ) {
1313 $m++;
1314 }
1315 $m -= $Mar;
1316
1317 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1318 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1319 $Mar++;
1320 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1321 $Mar += 2;
1322 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1323 $Mar++;
1324 }
1325
1326 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1327 return $Mar;
1328 }
1329
1330 /**
1331 * Algorithm to convert Gregorian dates to Thai solar dates,
1332 * Minguo dates or Minguo dates.
1333 *
1334 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1335 * http://en.wikipedia.org/wiki/Minguo_calendar
1336 * http://en.wikipedia.org/wiki/Japanese_era_name
1337 *
1338 * @param $ts String: 14-character timestamp
1339 * @param $cName String: calender name
1340 * @return Array: converted year, month, day
1341 */
1342 private static function tsToYear( $ts, $cName ) {
1343 $gy = substr( $ts, 0, 4 );
1344 $gm = substr( $ts, 4, 2 );
1345 $gd = substr( $ts, 6, 2 );
1346
1347 if ( !strcmp( $cName, 'thai' ) ) {
1348 # Thai solar dates
1349 # Add 543 years to the Gregorian calendar
1350 # Months and days are identical
1351 $gy_offset = $gy + 543;
1352 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1353 # Minguo dates
1354 # Deduct 1911 years from the Gregorian calendar
1355 # Months and days are identical
1356 $gy_offset = $gy - 1911;
1357 } else if ( !strcmp( $cName, 'tenno' ) ) {
1358 # Nengō dates up to Meiji period
1359 # Deduct years from the Gregorian calendar
1360 # depending on the nengo periods
1361 # Months and days are identical
1362 if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1363 # Meiji period
1364 $gy_gannen = $gy - 1868 + 1;
1365 $gy_offset = $gy_gannen;
1366 if ( $gy_gannen == 1 ) {
1367 $gy_offset = '元';
1368 }
1369 $gy_offset = '明治' . $gy_offset;
1370 } else if (
1371 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1372 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1373 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1374 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1375 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1376 )
1377 {
1378 # Taishō period
1379 $gy_gannen = $gy - 1912 + 1;
1380 $gy_offset = $gy_gannen;
1381 if ( $gy_gannen == 1 ) {
1382 $gy_offset = '元';
1383 }
1384 $gy_offset = '大正' . $gy_offset;
1385 } else if (
1386 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1387 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1388 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1389 )
1390 {
1391 # Shōwa period
1392 $gy_gannen = $gy - 1926 + 1;
1393 $gy_offset = $gy_gannen;
1394 if ( $gy_gannen == 1 ) {
1395 $gy_offset = '元';
1396 }
1397 $gy_offset = '昭和' . $gy_offset;
1398 } else {
1399 # Heisei period
1400 $gy_gannen = $gy - 1989 + 1;
1401 $gy_offset = $gy_gannen;
1402 if ( $gy_gannen == 1 ) {
1403 $gy_offset = '元';
1404 }
1405 $gy_offset = '平成' . $gy_offset;
1406 }
1407 } else {
1408 $gy_offset = $gy;
1409 }
1410
1411 return array( $gy_offset, $gm, $gd );
1412 }
1413
1414 /**
1415 * Roman number formatting up to 3000
1416 */
1417 static function romanNumeral( $num ) {
1418 static $table = array(
1419 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1420 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1421 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1422 array( '', 'M', 'MM', 'MMM' )
1423 );
1424
1425 $num = intval( $num );
1426 if ( $num > 3000 || $num <= 0 ) {
1427 return $num;
1428 }
1429
1430 $s = '';
1431 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1432 if ( $num >= $pow10 ) {
1433 $s .= $table[$i][floor( $num / $pow10 )];
1434 }
1435 $num = $num % $pow10;
1436 }
1437 return $s;
1438 }
1439
1440 /**
1441 * Hebrew Gematria number formatting up to 9999
1442 */
1443 static function hebrewNumeral( $num ) {
1444 static $table = array(
1445 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1446 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1447 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1448 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1449 );
1450
1451 $num = intval( $num );
1452 if ( $num > 9999 || $num <= 0 ) {
1453 return $num;
1454 }
1455
1456 $s = '';
1457 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1458 if ( $num >= $pow10 ) {
1459 if ( $num == 15 || $num == 16 ) {
1460 $s .= $table[0][9] . $table[0][$num - 9];
1461 $num = 0;
1462 } else {
1463 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1464 if ( $pow10 == 1000 ) {
1465 $s .= "'";
1466 }
1467 }
1468 }
1469 $num = $num % $pow10;
1470 }
1471 if ( strlen( $s ) == 2 ) {
1472 $str = $s . "'";
1473 } else {
1474 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1475 $str .= substr( $s, strlen( $s ) - 2, 2 );
1476 }
1477 $start = substr( $str, 0, strlen( $str ) - 2 );
1478 $end = substr( $str, strlen( $str ) - 2 );
1479 switch( $end ) {
1480 case 'כ':
1481 $str = $start . 'ך';
1482 break;
1483 case 'מ':
1484 $str = $start . 'ם';
1485 break;
1486 case 'נ':
1487 $str = $start . 'ן';
1488 break;
1489 case 'פ':
1490 $str = $start . 'ף';
1491 break;
1492 case 'צ':
1493 $str = $start . 'ץ';
1494 break;
1495 }
1496 return $str;
1497 }
1498
1499 /**
1500 * This is meant to be used by time(), date(), and timeanddate() to get
1501 * the date preference they're supposed to use, it should be used in
1502 * all children.
1503 *
1504 *<code>
1505 * function timeanddate([...], $format = true) {
1506 * $datePreference = $this->dateFormat($format);
1507 * [...]
1508 * }
1509 *</code>
1510 *
1511 * @param $usePrefs Mixed: if true, the user's preference is used
1512 * if false, the site/language default is used
1513 * if int/string, assumed to be a format.
1514 * @return string
1515 */
1516 function dateFormat( $usePrefs = true ) {
1517 global $wgUser;
1518
1519 if ( is_bool( $usePrefs ) ) {
1520 if ( $usePrefs ) {
1521 $datePreference = $wgUser->getDatePreference();
1522 } else {
1523 $datePreference = (string)User::getDefaultOption( 'date' );
1524 }
1525 } else {
1526 $datePreference = (string)$usePrefs;
1527 }
1528
1529 // return int
1530 if ( $datePreference == '' ) {
1531 return 'default';
1532 }
1533
1534 return $datePreference;
1535 }
1536
1537 /**
1538 * Get a format string for a given type and preference
1539 * @param $type May be date, time or both
1540 * @param $pref The format name as it appears in Messages*.php
1541 */
1542 function getDateFormatString( $type, $pref ) {
1543 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1544 if ( $pref == 'default' ) {
1545 $pref = $this->getDefaultDateFormat();
1546 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1547 } else {
1548 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1549 if ( is_null( $df ) ) {
1550 $pref = $this->getDefaultDateFormat();
1551 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1552 }
1553 }
1554 $this->dateFormatStrings[$type][$pref] = $df;
1555 }
1556 return $this->dateFormatStrings[$type][$pref];
1557 }
1558
1559 /**
1560 * @param $ts Mixed: the time format which needs to be turned into a
1561 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1562 * @param $adj Bool: whether to adjust the time output according to the
1563 * user configured offset ($timecorrection)
1564 * @param $format Mixed: true to use user's date format preference
1565 * @param $timecorrection String: the time offset as returned by
1566 * validateTimeZone() in Special:Preferences
1567 * @return string
1568 */
1569 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1570 $ts = wfTimestamp( TS_MW, $ts );
1571 if ( $adj ) {
1572 $ts = $this->userAdjust( $ts, $timecorrection );
1573 }
1574 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1575 return $this->sprintfDate( $df, $ts );
1576 }
1577
1578 /**
1579 * @param $ts Mixed: the time format which needs to be turned into a
1580 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1581 * @param $adj Bool: whether to adjust the time output according to the
1582 * user configured offset ($timecorrection)
1583 * @param $format Mixed: true to use user's date format preference
1584 * @param $timecorrection String: the time offset as returned by
1585 * validateTimeZone() in Special:Preferences
1586 * @return string
1587 */
1588 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1589 $ts = wfTimestamp( TS_MW, $ts );
1590 if ( $adj ) {
1591 $ts = $this->userAdjust( $ts, $timecorrection );
1592 }
1593 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1594 return $this->sprintfDate( $df, $ts );
1595 }
1596
1597 /**
1598 * @param $ts Mixed: the time format which needs to be turned into a
1599 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1600 * @param $adj Bool: whether to adjust the time output according to the
1601 * user configured offset ($timecorrection)
1602 * @param $format Mixed: what format to return, if it's false output the
1603 * default one (default true)
1604 * @param $timecorrection String: the time offset as returned by
1605 * validateTimeZone() in Special:Preferences
1606 * @return string
1607 */
1608 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1609 $ts = wfTimestamp( TS_MW, $ts );
1610 if ( $adj ) {
1611 $ts = $this->userAdjust( $ts, $timecorrection );
1612 }
1613 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1614 return $this->sprintfDate( $df, $ts );
1615 }
1616
1617 function getMessage( $key ) {
1618 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1619 }
1620
1621 function getAllMessages() {
1622 return self::$dataCache->getItem( $this->mCode, 'messages' );
1623 }
1624
1625 function iconv( $in, $out, $string ) {
1626 # This is a wrapper for iconv in all languages except esperanto,
1627 # which does some nasty x-conversions beforehand
1628
1629 # Even with //IGNORE iconv can whine about illegal characters in
1630 # *input* string. We just ignore those too.
1631 # REF: http://bugs.php.net/bug.php?id=37166
1632 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1633 wfSuppressWarnings();
1634 $text = iconv( $in, $out . '//IGNORE', $string );
1635 wfRestoreWarnings();
1636 return $text;
1637 }
1638
1639 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1640 function ucwordbreaksCallbackAscii( $matches ) {
1641 return $this->ucfirst( $matches[1] );
1642 }
1643
1644 function ucwordbreaksCallbackMB( $matches ) {
1645 return mb_strtoupper( $matches[0] );
1646 }
1647
1648 function ucCallback( $matches ) {
1649 list( $wikiUpperChars ) = self::getCaseMaps();
1650 return strtr( $matches[1], $wikiUpperChars );
1651 }
1652
1653 function lcCallback( $matches ) {
1654 list( , $wikiLowerChars ) = self::getCaseMaps();
1655 return strtr( $matches[1], $wikiLowerChars );
1656 }
1657
1658 function ucwordsCallbackMB( $matches ) {
1659 return mb_strtoupper( $matches[0] );
1660 }
1661
1662 function ucwordsCallbackWiki( $matches ) {
1663 list( $wikiUpperChars ) = self::getCaseMaps();
1664 return strtr( $matches[0], $wikiUpperChars );
1665 }
1666
1667 /**
1668 * Make a string's first character uppercase
1669 */
1670 function ucfirst( $str ) {
1671 $o = ord( $str );
1672 if ( $o < 96 ) { // if already uppercase...
1673 return $str;
1674 } elseif ( $o < 128 ) {
1675 return ucfirst( $str ); // use PHP's ucfirst()
1676 } else {
1677 // fall back to more complex logic in case of multibyte strings
1678 return $this->uc( $str, true );
1679 }
1680 }
1681
1682 /**
1683 * Convert a string to uppercase
1684 */
1685 function uc( $str, $first = false ) {
1686 if ( function_exists( 'mb_strtoupper' ) ) {
1687 if ( $first ) {
1688 if ( $this->isMultibyte( $str ) ) {
1689 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1690 } else {
1691 return ucfirst( $str );
1692 }
1693 } else {
1694 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1695 }
1696 } else {
1697 if ( $this->isMultibyte( $str ) ) {
1698 $x = $first ? '^' : '';
1699 return preg_replace_callback(
1700 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1701 array( $this, 'ucCallback' ),
1702 $str
1703 );
1704 } else {
1705 return $first ? ucfirst( $str ) : strtoupper( $str );
1706 }
1707 }
1708 }
1709
1710 function lcfirst( $str ) {
1711 $o = ord( $str );
1712 if ( !$o ) {
1713 return strval( $str );
1714 } elseif ( $o >= 128 ) {
1715 return $this->lc( $str, true );
1716 } elseif ( $o > 96 ) {
1717 return $str;
1718 } else {
1719 $str[0] = strtolower( $str[0] );
1720 return $str;
1721 }
1722 }
1723
1724 function lc( $str, $first = false ) {
1725 if ( function_exists( 'mb_strtolower' ) ) {
1726 if ( $first ) {
1727 if ( $this->isMultibyte( $str ) ) {
1728 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1729 } else {
1730 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1731 }
1732 } else {
1733 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1734 }
1735 } else {
1736 if ( $this->isMultibyte( $str ) ) {
1737 $x = $first ? '^' : '';
1738 return preg_replace_callback(
1739 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1740 array( $this, 'lcCallback' ),
1741 $str
1742 );
1743 } else {
1744 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1745 }
1746 }
1747 }
1748
1749 function isMultibyte( $str ) {
1750 return (bool)preg_match( '/[\x80-\xff]/', $str );
1751 }
1752
1753 function ucwords( $str ) {
1754 if ( $this->isMultibyte( $str ) ) {
1755 $str = $this->lc( $str );
1756
1757 // regexp to find first letter in each word (i.e. after each space)
1758 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1759
1760 // function to use to capitalize a single char
1761 if ( function_exists( 'mb_strtoupper' ) ) {
1762 return preg_replace_callback(
1763 $replaceRegexp,
1764 array( $this, 'ucwordsCallbackMB' ),
1765 $str
1766 );
1767 } else {
1768 return preg_replace_callback(
1769 $replaceRegexp,
1770 array( $this, 'ucwordsCallbackWiki' ),
1771 $str
1772 );
1773 }
1774 } else {
1775 return ucwords( strtolower( $str ) );
1776 }
1777 }
1778
1779 # capitalize words at word breaks
1780 function ucwordbreaks( $str ) {
1781 if ( $this->isMultibyte( $str ) ) {
1782 $str = $this->lc( $str );
1783
1784 // since \b doesn't work for UTF-8, we explicitely define word break chars
1785 $breaks = "[ \-\(\)\}\{\.,\?!]";
1786
1787 // find first letter after word break
1788 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1789
1790 if ( function_exists( 'mb_strtoupper' ) ) {
1791 return preg_replace_callback(
1792 $replaceRegexp,
1793 array( $this, 'ucwordbreaksCallbackMB' ),
1794 $str
1795 );
1796 } else {
1797 return preg_replace_callback(
1798 $replaceRegexp,
1799 array( $this, 'ucwordsCallbackWiki' ),
1800 $str
1801 );
1802 }
1803 } else {
1804 return preg_replace_callback(
1805 '/\b([\w\x80-\xff]+)\b/',
1806 array( $this, 'ucwordbreaksCallbackAscii' ),
1807 $str
1808 );
1809 }
1810 }
1811
1812 /**
1813 * Return a case-folded representation of $s
1814 *
1815 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1816 * and $s2 are the same except for the case of their characters. It is not
1817 * necessary for the value returned to make sense when displayed.
1818 *
1819 * Do *not* perform any other normalisation in this function. If a caller
1820 * uses this function when it should be using a more general normalisation
1821 * function, then fix the caller.
1822 */
1823 function caseFold( $s ) {
1824 return $this->uc( $s );
1825 }
1826
1827 function checkTitleEncoding( $s ) {
1828 if ( is_array( $s ) ) {
1829 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1830 }
1831 # Check for non-UTF-8 URLs
1832 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1833 if ( !$ishigh ) {
1834 return $s;
1835 }
1836
1837 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1838 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1839 if ( $isutf8 ) {
1840 return $s;
1841 }
1842
1843 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1844 }
1845
1846 function fallback8bitEncoding() {
1847 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1848 }
1849
1850 /**
1851 * Most writing systems use whitespace to break up words.
1852 * Some languages such as Chinese don't conventionally do this,
1853 * which requires special handling when breaking up words for
1854 * searching etc.
1855 */
1856 function hasWordBreaks() {
1857 return true;
1858 }
1859
1860 /**
1861 * Some languages such as Chinese require word segmentation,
1862 * Specify such segmentation when overridden in derived class.
1863 *
1864 * @param $string String
1865 * @return String
1866 */
1867 function segmentByWord( $string ) {
1868 return $string;
1869 }
1870
1871 /**
1872 * Some languages have special punctuation need to be normalized.
1873 * Make such changes here.
1874 *
1875 * @param $string String
1876 * @return String
1877 */
1878 function normalizeForSearch( $string ) {
1879 return self::convertDoubleWidth( $string );
1880 }
1881
1882 /**
1883 * convert double-width roman characters to single-width.
1884 * range: ff00-ff5f ~= 0020-007f
1885 */
1886 protected static function convertDoubleWidth( $string ) {
1887 static $full = null;
1888 static $half = null;
1889
1890 if ( $full === null ) {
1891 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1892 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1893 $full = str_split( $fullWidth, 3 );
1894 $half = str_split( $halfWidth );
1895 }
1896
1897 $string = str_replace( $full, $half, $string );
1898 return $string;
1899 }
1900
1901 protected static function insertSpace( $string, $pattern ) {
1902 $string = preg_replace( $pattern, " $1 ", $string );
1903 $string = preg_replace( '/ +/', ' ', $string );
1904 return $string;
1905 }
1906
1907 function convertForSearchResult( $termsArray ) {
1908 # some languages, e.g. Chinese, need to do a conversion
1909 # in order for search results to be displayed correctly
1910 return $termsArray;
1911 }
1912
1913 /**
1914 * Get the first character of a string.
1915 *
1916 * @param $s string
1917 * @return string
1918 */
1919 function firstChar( $s ) {
1920 $matches = array();
1921 preg_match(
1922 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1923 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1924 $s,
1925 $matches
1926 );
1927
1928 if ( isset( $matches[1] ) ) {
1929 if ( strlen( $matches[1] ) != 3 ) {
1930 return $matches[1];
1931 }
1932
1933 // Break down Hangul syllables to grab the first jamo
1934 $code = utf8ToCodepoint( $matches[1] );
1935 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1936 return $matches[1];
1937 } elseif ( $code < 0xb098 ) {
1938 return "\xe3\x84\xb1";
1939 } elseif ( $code < 0xb2e4 ) {
1940 return "\xe3\x84\xb4";
1941 } elseif ( $code < 0xb77c ) {
1942 return "\xe3\x84\xb7";
1943 } elseif ( $code < 0xb9c8 ) {
1944 return "\xe3\x84\xb9";
1945 } elseif ( $code < 0xbc14 ) {
1946 return "\xe3\x85\x81";
1947 } elseif ( $code < 0xc0ac ) {
1948 return "\xe3\x85\x82";
1949 } elseif ( $code < 0xc544 ) {
1950 return "\xe3\x85\x85";
1951 } elseif ( $code < 0xc790 ) {
1952 return "\xe3\x85\x87";
1953 } elseif ( $code < 0xcc28 ) {
1954 return "\xe3\x85\x88";
1955 } elseif ( $code < 0xce74 ) {
1956 return "\xe3\x85\x8a";
1957 } elseif ( $code < 0xd0c0 ) {
1958 return "\xe3\x85\x8b";
1959 } elseif ( $code < 0xd30c ) {
1960 return "\xe3\x85\x8c";
1961 } elseif ( $code < 0xd558 ) {
1962 return "\xe3\x85\x8d";
1963 } else {
1964 return "\xe3\x85\x8e";
1965 }
1966 } else {
1967 return '';
1968 }
1969 }
1970
1971 function initEncoding() {
1972 # Some languages may have an alternate char encoding option
1973 # (Esperanto X-coding, Japanese furigana conversion, etc)
1974 # If this language is used as the primary content language,
1975 # an override to the defaults can be set here on startup.
1976 }
1977
1978 function recodeForEdit( $s ) {
1979 # For some languages we'll want to explicitly specify
1980 # which characters make it into the edit box raw
1981 # or are converted in some way or another.
1982 # Note that if wgOutputEncoding is different from
1983 # wgInputEncoding, this text will be further converted
1984 # to wgOutputEncoding.
1985 global $wgEditEncoding;
1986 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1987 return $s;
1988 } else {
1989 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1990 }
1991 }
1992
1993 function recodeInput( $s ) {
1994 # Take the previous into account.
1995 global $wgEditEncoding;
1996 if ( $wgEditEncoding != '' ) {
1997 $enc = $wgEditEncoding;
1998 } else {
1999 $enc = 'UTF-8';
2000 }
2001 if ( $enc == 'UTF-8' ) {
2002 return $s;
2003 } else {
2004 return $this->iconv( $enc, 'UTF-8', $s );
2005 }
2006 }
2007
2008 /**
2009 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2010 * also cleans up certain backwards-compatible sequences, converting them
2011 * to the modern Unicode equivalent.
2012 *
2013 * This is language-specific for performance reasons only.
2014 */
2015 function normalize( $s ) {
2016 global $wgAllUnicodeFixes;
2017 $s = UtfNormal::cleanUp( $s );
2018 if ( $wgAllUnicodeFixes ) {
2019 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2020 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2021 }
2022
2023 return $s;
2024 }
2025
2026 /**
2027 * Transform a string using serialized data stored in the given file (which
2028 * must be in the serialized subdirectory of $IP). The file contains pairs
2029 * mapping source characters to destination characters.
2030 *
2031 * The data is cached in process memory. This will go faster if you have the
2032 * FastStringSearch extension.
2033 */
2034 function transformUsingPairFile( $file, $string ) {
2035 if ( !isset( $this->transformData[$file] ) ) {
2036 $data = wfGetPrecompiledData( $file );
2037 if ( $data === false ) {
2038 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2039 }
2040 $this->transformData[$file] = new ReplacementArray( $data );
2041 }
2042 return $this->transformData[$file]->replace( $string );
2043 }
2044
2045 /**
2046 * For right-to-left language support
2047 *
2048 * @return bool
2049 */
2050 function isRTL() {
2051 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2052 }
2053
2054 /**
2055 * Return the correct HTML 'dir' attribute value for this language.
2056 * @return String
2057 */
2058 function getDir() {
2059 return $this->isRTL() ? 'rtl' : 'ltr';
2060 }
2061
2062 /**
2063 * Return 'left' or 'right' as appropriate alignment for line-start
2064 * for this language's text direction.
2065 *
2066 * Should be equivalent to CSS3 'start' text-align value....
2067 *
2068 * @return String
2069 */
2070 function alignStart() {
2071 return $this->isRTL() ? 'right' : 'left';
2072 }
2073
2074 /**
2075 * Return 'right' or 'left' as appropriate alignment for line-end
2076 * for this language's text direction.
2077 *
2078 * Should be equivalent to CSS3 'end' text-align value....
2079 *
2080 * @return String
2081 */
2082 function alignEnd() {
2083 return $this->isRTL() ? 'left' : 'right';
2084 }
2085
2086 /**
2087 * A hidden direction mark (LRM or RLM), depending on the language direction
2088 *
2089 * @return string
2090 */
2091 function getDirMark() {
2092 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2093 }
2094
2095 function capitalizeAllNouns() {
2096 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2097 }
2098
2099 /**
2100 * An arrow, depending on the language direction
2101 *
2102 * @return string
2103 */
2104 function getArrow() {
2105 return $this->isRTL() ? '←' : '→';
2106 }
2107
2108 /**
2109 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2110 *
2111 * @return bool
2112 */
2113 function linkPrefixExtension() {
2114 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2115 }
2116
2117 function getMagicWords() {
2118 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2119 }
2120
2121 protected function doMagicHook() {
2122 if ( $this->mMagicHookDone ) {
2123 return;
2124 }
2125 $this->mMagicHookDone = true;
2126 wfProfileIn( 'LanguageGetMagic' );
2127 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2128 wfProfileOut( 'LanguageGetMagic' );
2129 }
2130
2131 # Fill a MagicWord object with data from here
2132 function getMagic( $mw ) {
2133 $this->doMagicHook();
2134
2135 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2136 $rawEntry = $this->mMagicExtensions[$mw->mId];
2137 } else {
2138 $magicWords = $this->getMagicWords();
2139 if ( isset( $magicWords[$mw->mId] ) ) {
2140 $rawEntry = $magicWords[$mw->mId];
2141 } else {
2142 $rawEntry = false;
2143 }
2144 }
2145
2146 if ( !is_array( $rawEntry ) ) {
2147 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2148 } else {
2149 $mw->mCaseSensitive = $rawEntry[0];
2150 $mw->mSynonyms = array_slice( $rawEntry, 1 );
2151 }
2152 }
2153
2154 /**
2155 * Add magic words to the extension array
2156 */
2157 function addMagicWordsByLang( $newWords ) {
2158 $code = $this->getCode();
2159 $fallbackChain = array();
2160 while ( $code && !in_array( $code, $fallbackChain ) ) {
2161 $fallbackChain[] = $code;
2162 $code = self::getFallbackFor( $code );
2163 }
2164 if ( !in_array( 'en', $fallbackChain ) ) {
2165 $fallbackChain[] = 'en';
2166 }
2167 $fallbackChain = array_reverse( $fallbackChain );
2168 foreach ( $fallbackChain as $code ) {
2169 if ( isset( $newWords[$code] ) ) {
2170 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2171 }
2172 }
2173 }
2174
2175 /**
2176 * Get special page names, as an associative array
2177 * case folded alias => real name
2178 */
2179 function getSpecialPageAliases() {
2180 // Cache aliases because it may be slow to load them
2181 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2182 // Initialise array
2183 $this->mExtendedSpecialPageAliases =
2184 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2185 wfRunHooks( 'LanguageGetSpecialPageAliases',
2186 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2187 }
2188
2189 return $this->mExtendedSpecialPageAliases;
2190 }
2191
2192 /**
2193 * Italic is unsuitable for some languages
2194 *
2195 * @param $text String: the text to be emphasized.
2196 * @return string
2197 */
2198 function emphasize( $text ) {
2199 return "<em>$text</em>";
2200 }
2201
2202 /**
2203 * Normally we output all numbers in plain en_US style, that is
2204 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2205 * point twohundredthirtyfive. However this is not sutable for all
2206 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2207 * Icelandic just want to use commas instead of dots, and dots instead
2208 * of commas like "293.291,235".
2209 *
2210 * An example of this function being called:
2211 * <code>
2212 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2213 * </code>
2214 *
2215 * See LanguageGu.php for the Gujarati implementation and
2216 * $separatorTransformTable on MessageIs.php for
2217 * the , => . and . => , implementation.
2218 *
2219 * @todo check if it's viable to use localeconv() for the decimal
2220 * separator thing.
2221 * @param $number Mixed: the string to be formatted, should be an integer
2222 * or a floating point number.
2223 * @param $nocommafy Bool: set to true for special numbers like dates
2224 * @return string
2225 */
2226 function formatNum( $number, $nocommafy = false ) {
2227 global $wgTranslateNumerals;
2228 if ( !$nocommafy ) {
2229 $number = $this->commafy( $number );
2230 $s = $this->separatorTransformTable();
2231 if ( $s ) {
2232 $number = strtr( $number, $s );
2233 }
2234 }
2235
2236 if ( $wgTranslateNumerals ) {
2237 $s = $this->digitTransformTable();
2238 if ( $s ) {
2239 $number = strtr( $number, $s );
2240 }
2241 }
2242
2243 return $number;
2244 }
2245
2246 function parseFormattedNumber( $number ) {
2247 $s = $this->digitTransformTable();
2248 if ( $s ) {
2249 $number = strtr( $number, array_flip( $s ) );
2250 }
2251
2252 $s = $this->separatorTransformTable();
2253 if ( $s ) {
2254 $number = strtr( $number, array_flip( $s ) );
2255 }
2256
2257 $number = strtr( $number, array( ',' => '' ) );
2258 return $number;
2259 }
2260
2261 /**
2262 * Adds commas to a given number
2263 *
2264 * @param $_ mixed
2265 * @return string
2266 */
2267 function commafy( $_ ) {
2268 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2269 }
2270
2271 function digitTransformTable() {
2272 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2273 }
2274
2275 function separatorTransformTable() {
2276 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2277 }
2278
2279 /**
2280 * Take a list of strings and build a locale-friendly comma-separated
2281 * list, using the local comma-separator message.
2282 * The last two strings are chained with an "and".
2283 *
2284 * @param $l Array
2285 * @return string
2286 */
2287 function listToText( $l ) {
2288 $s = '';
2289 $m = count( $l ) - 1;
2290 if ( $m == 1 ) {
2291 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2292 } else {
2293 for ( $i = $m; $i >= 0; $i-- ) {
2294 if ( $i == $m ) {
2295 $s = $l[$i];
2296 } else if ( $i == $m - 1 ) {
2297 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2298 } else {
2299 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2300 }
2301 }
2302 return $s;
2303 }
2304 }
2305
2306 /**
2307 * Take a list of strings and build a locale-friendly comma-separated
2308 * list, using the local comma-separator message.
2309 * @param $list array of strings to put in a comma list
2310 * @return string
2311 */
2312 function commaList( $list ) {
2313 return implode(
2314 $list,
2315 wfMsgExt(
2316 'comma-separator',
2317 array( 'parsemag', 'escapenoentities', 'language' => $this )
2318 )
2319 );
2320 }
2321
2322 /**
2323 * Take a list of strings and build a locale-friendly semicolon-separated
2324 * list, using the local semicolon-separator message.
2325 * @param $list array of strings to put in a semicolon list
2326 * @return string
2327 */
2328 function semicolonList( $list ) {
2329 return implode(
2330 $list,
2331 wfMsgExt(
2332 'semicolon-separator',
2333 array( 'parsemag', 'escapenoentities', 'language' => $this )
2334 )
2335 );
2336 }
2337
2338 /**
2339 * Same as commaList, but separate it with the pipe instead.
2340 * @param $list array of strings to put in a pipe list
2341 * @return string
2342 */
2343 function pipeList( $list ) {
2344 return implode(
2345 $list,
2346 wfMsgExt(
2347 'pipe-separator',
2348 array( 'escapenoentities', 'language' => $this )
2349 )
2350 );
2351 }
2352
2353 /**
2354 * Truncate a string to a specified length in bytes, appending an optional
2355 * string (e.g. for ellipses)
2356 *
2357 * The database offers limited byte lengths for some columns in the database;
2358 * multi-byte character sets mean we need to ensure that only whole characters
2359 * are included, otherwise broken characters can be passed to the user
2360 *
2361 * If $length is negative, the string will be truncated from the beginning
2362 *
2363 * @param $string String to truncate
2364 * @param $length Int: maximum length (excluding ellipses)
2365 * @param $ellipsis String to append to the truncated text
2366 * @return string
2367 */
2368 function truncate( $string, $length, $ellipsis = '...' ) {
2369 # Use the localized ellipsis character
2370 if ( $ellipsis == '...' ) {
2371 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2372 }
2373 # Check if there is no need to truncate
2374 if ( $length == 0 ) {
2375 return $ellipsis;
2376 } elseif ( strlen( $string ) <= abs( $length ) ) {
2377 return $string;
2378 }
2379 $stringOriginal = $string;
2380 if ( $length > 0 ) {
2381 $string = substr( $string, 0, $length ); // xyz...
2382 $string = $this->removeBadCharLast( $string );
2383 $string = $string . $ellipsis;
2384 } else {
2385 $string = substr( $string, $length ); // ...xyz
2386 $string = $this->removeBadCharFirst( $string );
2387 $string = $ellipsis . $string;
2388 }
2389 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2390 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2391 return $string;
2392 } else {
2393 return $stringOriginal;
2394 }
2395 }
2396
2397 /**
2398 * Remove bytes that represent an incomplete Unicode character
2399 * at the end of string (e.g. bytes of the char are missing)
2400 *
2401 * @param $string String
2402 * @return string
2403 */
2404 protected function removeBadCharLast( $string ) {
2405 $char = ord( $string[strlen( $string ) - 1] );
2406 $m = array();
2407 if ( $char >= 0xc0 ) {
2408 # We got the first byte only of a multibyte char; remove it.
2409 $string = substr( $string, 0, -1 );
2410 } elseif ( $char >= 0x80 &&
2411 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2412 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2413 {
2414 # We chopped in the middle of a character; remove it
2415 $string = $m[1];
2416 }
2417 return $string;
2418 }
2419
2420 /**
2421 * Remove bytes that represent an incomplete Unicode character
2422 * at the start of string (e.g. bytes of the char are missing)
2423 *
2424 * @param $string String
2425 * @return string
2426 */
2427 protected function removeBadCharFirst( $string ) {
2428 $char = ord( $string[0] );
2429 if ( $char >= 0x80 && $char < 0xc0 ) {
2430 # We chopped in the middle of a character; remove the whole thing
2431 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2432 }
2433 return $string;
2434 }
2435
2436 /*
2437 * Truncate a string of valid HTML to a specified length in bytes,
2438 * appending an optional string (e.g. for ellipses), and return valid HTML
2439 *
2440 * This is only intended for styled/linked text, such as HTML with
2441 * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2442 *
2443 * Note: tries to fix broken HTML with MWTidy
2444 *
2445 * @param string $text HTML string to truncate
2446 * @param int $length (zero/positive) Maximum length (excluding ellipses)
2447 * @param string $ellipsis String to append to the truncated text
2448 * @returns string
2449 */
2450 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2451 # Use the localized ellipsis character
2452 if ( $ellipsis == '...' ) {
2453 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2454 }
2455 # Check if there is no need to truncate
2456 if ( $length <= 0 ) {
2457 return $ellipsis; // no text shown, nothing to format
2458 } elseif ( strlen( $text ) <= $length ) {
2459 return $text; // string short enough even *with* HTML
2460 }
2461 $text = MWTidy::tidy( $text ); // fix tags
2462 $displayLen = 0; // innerHTML legth so far
2463 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2464 $tagType = 0; // 0-open, 1-close
2465 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2466 $entityState = 0; // 0-not entity, 1-entity
2467 $tag = $ret = '';
2468 $openTags = array(); // open tag stack
2469 $textLen = strlen( $text );
2470 for ( $pos = 0; $pos < $textLen; ++$pos ) {
2471 $ch = $text[$pos];
2472 $lastCh = $pos ? $text[$pos - 1] : '';
2473 $ret .= $ch; // add to result string
2474 if ( $ch == '<' ) {
2475 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2476 $entityState = 0; // for bad HTML
2477 $bracketState = 1; // tag started (checking for backslash)
2478 } elseif ( $ch == '>' ) {
2479 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2480 $entityState = 0; // for bad HTML
2481 $bracketState = 0; // out of brackets
2482 } elseif ( $bracketState == 1 ) {
2483 if ( $ch == '/' ) {
2484 $tagType = 1; // close tag (e.g. "</span>")
2485 } else {
2486 $tagType = 0; // open tag (e.g. "<span>")
2487 $tag .= $ch;
2488 }
2489 $bracketState = 2; // building tag name
2490 } elseif ( $bracketState == 2 ) {
2491 if ( $ch != ' ' ) {
2492 $tag .= $ch;
2493 } else {
2494 // Name found (e.g. "<a href=..."), add on tag attributes...
2495 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2496 }
2497 } elseif ( $bracketState == 0 ) {
2498 if ( $entityState ) {
2499 if ( $ch == ';' ) {
2500 $entityState = 0;
2501 $displayLen++; // entity is one displayed char
2502 }
2503 } else {
2504 if ( $ch == '&' ) {
2505 $entityState = 1; // entity found, (e.g. "&#160;")
2506 } else {
2507 $displayLen++; // this char is displayed
2508 // Add on the other display text after this...
2509 $skipped = $this->truncate_skip(
2510 $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2511 $displayLen += $skipped;
2512 $pos += $skipped;
2513 }
2514 }
2515 }
2516 # Consider truncation once the display length has reached the maximim.
2517 # Double-check that we're not in the middle of a bracket/entity...
2518 if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2519 if ( !$testingEllipsis ) {
2520 $testingEllipsis = true;
2521 # Save where we are; we will truncate here unless
2522 # the ellipsis actually makes the string longer.
2523 $pOpenTags = $openTags; // save state
2524 $pRet = $ret; // save state
2525 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2526 # Ellipsis won't make string longer/equal, the truncation point was OK.
2527 $openTags = $pOpenTags; // reload state
2528 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2529 $ret .= $ellipsis; // add ellipsis
2530 break;
2531 }
2532 }
2533 }
2534 if ( $displayLen == 0 ) {
2535 return ''; // no text shown, nothing to format
2536 }
2537 // Close the last tag if left unclosed by bad HTML
2538 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2539 while ( count( $openTags ) > 0 ) {
2540 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2541 }
2542 return $ret;
2543 }
2544
2545 // truncateHtml() helper function
2546 // like strcspn() but adds the skipped chars to $ret
2547 private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2548 $skipCount = 0;
2549 if ( $start < strlen( $text ) ) {
2550 $skipCount = strcspn( $text, $search, $start, $len );
2551 $ret .= substr( $text, $start, $skipCount );
2552 }
2553 return $skipCount;
2554 }
2555
2556 /*
2557 * truncateHtml() helper function
2558 * (a) push or pop $tag from $openTags as needed
2559 * (b) clear $tag value
2560 * @param String &$tag Current HTML tag name we are looking at
2561 * @param int $tagType (0-open tag, 1-close tag)
2562 * @param char $lastCh Character before the '>' that ended this tag
2563 * @param array &$openTags Open tag stack (not accounting for $tag)
2564 */
2565 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2566 $tag = ltrim( $tag );
2567 if ( $tag != '' ) {
2568 if ( $tagType == 0 && $lastCh != '/' ) {
2569 $openTags[] = $tag; // tag opened (didn't close itself)
2570 } else if ( $tagType == 1 ) {
2571 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2572 array_pop( $openTags ); // tag closed
2573 }
2574 }
2575 $tag = '';
2576 }
2577 }
2578
2579 /**
2580 * Grammatical transformations, needed for inflected languages
2581 * Invoked by putting {{grammar:case|word}} in a message
2582 *
2583 * @param $word string
2584 * @param $case string
2585 * @return string
2586 */
2587 function convertGrammar( $word, $case ) {
2588 global $wgGrammarForms;
2589 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2590 return $wgGrammarForms[$this->getCode()][$case][$word];
2591 }
2592 return $word;
2593 }
2594
2595 /**
2596 * Provides an alternative text depending on specified gender.
2597 * Usage {{gender:username|masculine|feminine|neutral}}.
2598 * username is optional, in which case the gender of current user is used,
2599 * but only in (some) interface messages; otherwise default gender is used.
2600 * If second or third parameter are not specified, masculine is used.
2601 * These details may be overriden per language.
2602 */
2603 function gender( $gender, $forms ) {
2604 if ( !count( $forms ) ) {
2605 return '';
2606 }
2607 $forms = $this->preConvertPlural( $forms, 2 );
2608 if ( $gender === 'male' ) {
2609 return $forms[0];
2610 }
2611 if ( $gender === 'female' ) {
2612 return $forms[1];
2613 }
2614 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2615 }
2616
2617 /**
2618 * Plural form transformations, needed for some languages.
2619 * For example, there are 3 form of plural in Russian and Polish,
2620 * depending on "count mod 10". See [[w:Plural]]
2621 * For English it is pretty simple.
2622 *
2623 * Invoked by putting {{plural:count|wordform1|wordform2}}
2624 * or {{plural:count|wordform1|wordform2|wordform3}}
2625 *
2626 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2627 *
2628 * @param $count Integer: non-localized number
2629 * @param $forms Array: different plural forms
2630 * @return string Correct form of plural for $count in this language
2631 */
2632 function convertPlural( $count, $forms ) {
2633 if ( !count( $forms ) ) {
2634 return '';
2635 }
2636 $forms = $this->preConvertPlural( $forms, 2 );
2637
2638 return ( $count == 1 ) ? $forms[0] : $forms[1];
2639 }
2640
2641 /**
2642 * Checks that convertPlural was given an array and pads it to requested
2643 * amound of forms by copying the last one.
2644 *
2645 * @param $count Integer: How many forms should there be at least
2646 * @param $forms Array of forms given to convertPlural
2647 * @return array Padded array of forms or an exception if not an array
2648 */
2649 protected function preConvertPlural( /* Array */ $forms, $count ) {
2650 while ( count( $forms ) < $count ) {
2651 $forms[] = $forms[count( $forms ) - 1];
2652 }
2653 return $forms;
2654 }
2655
2656 /**
2657 * For translating of expiry times
2658 * @param $str String: the validated block time in English
2659 * @return Somehow translated block time
2660 * @see LanguageFi.php for example implementation
2661 */
2662 function translateBlockExpiry( $str ) {
2663 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2664
2665 if ( $scBlockExpiryOptions == '-' ) {
2666 return $str;
2667 }
2668
2669 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2670 if ( strpos( $option, ':' ) === false ) {
2671 continue;
2672 }
2673 list( $show, $value ) = explode( ':', $option );
2674 if ( strcmp( $str, $value ) == 0 ) {
2675 return htmlspecialchars( trim( $show ) );
2676 }
2677 }
2678
2679 return $str;
2680 }
2681
2682 /**
2683 * languages like Chinese need to be segmented in order for the diff
2684 * to be of any use
2685 *
2686 * @param $text String
2687 * @return String
2688 */
2689 function segmentForDiff( $text ) {
2690 return $text;
2691 }
2692
2693 /**
2694 * and unsegment to show the result
2695 *
2696 * @param $text String
2697 * @return String
2698 */
2699 function unsegmentForDiff( $text ) {
2700 return $text;
2701 }
2702
2703 # convert text to all supported variants
2704 function autoConvertToAllVariants( $text ) {
2705 return $this->mConverter->autoConvertToAllVariants( $text );
2706 }
2707
2708 # convert text to different variants of a language.
2709 function convert( $text ) {
2710 return $this->mConverter->convert( $text );
2711 }
2712
2713 # Convert a Title object to a string in the preferred variant
2714 function convertTitle( $title ) {
2715 return $this->mConverter->convertTitle( $title );
2716 }
2717
2718 # Check if this is a language with variants
2719 function hasVariants() {
2720 return sizeof( $this->getVariants() ) > 1;
2721 }
2722
2723 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2724 function armourMath( $text ) {
2725 return $this->mConverter->armourMath( $text );
2726 }
2727
2728 /**
2729 * Perform output conversion on a string, and encode for safe HTML output.
2730 * @param $text String text to be converted
2731 * @param $isTitle Bool whether this conversion is for the article title
2732 * @return string
2733 * @todo this should get integrated somewhere sane
2734 */
2735 function convertHtml( $text, $isTitle = false ) {
2736 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2737 }
2738
2739 function convertCategoryKey( $key ) {
2740 return $this->mConverter->convertCategoryKey( $key );
2741 }
2742
2743 /**
2744 * Get the list of variants supported by this language
2745 * see sample implementation in LanguageZh.php
2746 *
2747 * @return array an array of language codes
2748 */
2749 function getVariants() {
2750 return $this->mConverter->getVariants();
2751 }
2752
2753 function getPreferredVariant() {
2754 return $this->mConverter->getPreferredVariant();
2755 }
2756
2757 function getDefaultVariant() {
2758 return $this->mConverter->getDefaultVariant();
2759 }
2760
2761 function getURLVariant() {
2762 return $this->mConverter->getURLVariant();
2763 }
2764
2765 /**
2766 * If a language supports multiple variants, it is
2767 * possible that non-existing link in one variant
2768 * actually exists in another variant. this function
2769 * tries to find it. See e.g. LanguageZh.php
2770 *
2771 * @param $link String: the name of the link
2772 * @param $nt Mixed: the title object of the link
2773 * @param $ignoreOtherCond Boolean: to disable other conditions when
2774 * we need to transclude a template or update a category's link
2775 * @return null the input parameters may be modified upon return
2776 */
2777 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2778 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2779 }
2780
2781 /**
2782 * If a language supports multiple variants, converts text
2783 * into an array of all possible variants of the text:
2784 * 'variant' => text in that variant
2785 *
2786 * @deprecated Use autoConvertToAllVariants()
2787 */
2788 function convertLinkToAllVariants( $text ) {
2789 return $this->mConverter->convertLinkToAllVariants( $text );
2790 }
2791
2792 /**
2793 * returns language specific options used by User::getPageRenderHash()
2794 * for example, the preferred language variant
2795 *
2796 * @return string
2797 */
2798 function getExtraHashOptions() {
2799 return $this->mConverter->getExtraHashOptions();
2800 }
2801
2802 /**
2803 * For languages that support multiple variants, the title of an
2804 * article may be displayed differently in different variants. this
2805 * function returns the apporiate title defined in the body of the article.
2806 *
2807 * @return string
2808 */
2809 function getParsedTitle() {
2810 return $this->mConverter->getParsedTitle();
2811 }
2812
2813 /**
2814 * Enclose a string with the "no conversion" tag. This is used by
2815 * various functions in the Parser
2816 *
2817 * @param $text String: text to be tagged for no conversion
2818 * @param $noParse
2819 * @return string the tagged text
2820 */
2821 function markNoConversion( $text, $noParse = false ) {
2822 return $this->mConverter->markNoConversion( $text, $noParse );
2823 }
2824
2825 /**
2826 * A regular expression to match legal word-trailing characters
2827 * which should be merged onto a link of the form [[foo]]bar.
2828 *
2829 * @return string
2830 */
2831 function linkTrail() {
2832 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2833 }
2834
2835 function getLangObj() {
2836 return $this;
2837 }
2838
2839 /**
2840 * Get the RFC 3066 code for this language object
2841 */
2842 function getCode() {
2843 return $this->mCode;
2844 }
2845
2846 function setCode( $code ) {
2847 $this->mCode = $code;
2848 }
2849
2850 /**
2851 * Get the name of a file for a certain language code
2852 * @param $prefix string Prepend this to the filename
2853 * @param $code string Language code
2854 * @param $suffix string Append this to the filename
2855 * @return string $prefix . $mangledCode . $suffix
2856 */
2857 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2858 // Protect against path traversal
2859 if ( !Language::isValidCode( $code ) ) {
2860 throw new MWException( "Invalid language code \"$code\"" );
2861 }
2862
2863 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2864 }
2865
2866 /**
2867 * Get the language code from a file name. Inverse of getFileName()
2868 * @param $filename string $prefix . $languageCode . $suffix
2869 * @param $prefix string Prefix before the language code
2870 * @param $suffix string Suffix after the language code
2871 * @return Language code, or false if $prefix or $suffix isn't found
2872 */
2873 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2874 $m = null;
2875 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2876 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2877 if ( !count( $m ) ) {
2878 return false;
2879 }
2880 return str_replace( '_', '-', strtolower( $m[1] ) );
2881 }
2882
2883 static function getMessagesFileName( $code ) {
2884 global $IP;
2885 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2886 }
2887
2888 static function getClassFileName( $code ) {
2889 global $IP;
2890 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2891 }
2892
2893 /**
2894 * Get the fallback for a given language
2895 */
2896 static function getFallbackFor( $code ) {
2897 if ( $code === 'en' ) {
2898 // Shortcut
2899 return false;
2900 } else {
2901 return self::getLocalisationCache()->getItem( $code, 'fallback' );
2902 }
2903 }
2904
2905 /**
2906 * Get all messages for a given language
2907 * WARNING: this may take a long time
2908 */
2909 static function getMessagesFor( $code ) {
2910 return self::getLocalisationCache()->getItem( $code, 'messages' );
2911 }
2912
2913 /**
2914 * Get a message for a given language
2915 */
2916 static function getMessageFor( $key, $code ) {
2917 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2918 }
2919
2920 function fixVariableInNamespace( $talk ) {
2921 if ( strpos( $talk, '$1' ) === false ) {
2922 return $talk;
2923 }
2924
2925 global $wgMetaNamespace;
2926 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2927
2928 # Allow grammar transformations
2929 # Allowing full message-style parsing would make simple requests
2930 # such as action=raw much more expensive than they need to be.
2931 # This will hopefully cover most cases.
2932 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2933 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2934 return str_replace( ' ', '_', $talk );
2935 }
2936
2937 function replaceGrammarInNamespace( $m ) {
2938 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2939 }
2940
2941 static function getCaseMaps() {
2942 static $wikiUpperChars, $wikiLowerChars;
2943 if ( isset( $wikiUpperChars ) ) {
2944 return array( $wikiUpperChars, $wikiLowerChars );
2945 }
2946
2947 wfProfileIn( __METHOD__ );
2948 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2949 if ( $arr === false ) {
2950 throw new MWException(
2951 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2952 }
2953 $wikiUpperChars = $arr['wikiUpperChars'];
2954 $wikiLowerChars = $arr['wikiLowerChars'];
2955 wfProfileOut( __METHOD__ );
2956 return array( $wikiUpperChars, $wikiLowerChars );
2957 }
2958
2959 function formatTimePeriod( $seconds ) {
2960 if ( round( $seconds * 10 ) < 100 ) {
2961 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2962 } elseif ( round( $seconds ) < 60 ) {
2963 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2964 } elseif ( round( $seconds ) < 3600 ) {
2965 $minutes = floor( $seconds / 60 );
2966 $secondsPart = round( fmod( $seconds, 60 ) );
2967 if ( $secondsPart == 60 ) {
2968 $secondsPart = 0;
2969 $minutes++;
2970 }
2971 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2972 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2973 } else {
2974 $hours = floor( $seconds / 3600 );
2975 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2976 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2977 if ( $secondsPart == 60 ) {
2978 $secondsPart = 0;
2979 $minutes++;
2980 }
2981 if ( $minutes == 60 ) {
2982 $minutes = 0;
2983 $hours++;
2984 }
2985 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2986 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2987 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2988 }
2989 }
2990
2991 function formatBitrate( $bps ) {
2992 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2993 if ( $bps <= 0 ) {
2994 return $this->formatNum( $bps ) . $units[0];
2995 }
2996 $unitIndex = floor( log10( $bps ) / 3 );
2997 $mantissa = $bps / pow( 1000, $unitIndex );
2998 if ( $mantissa < 10 ) {
2999 $mantissa = round( $mantissa, 1 );
3000 } else {
3001 $mantissa = round( $mantissa );
3002 }
3003 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3004 }
3005
3006 /**
3007 * Format a size in bytes for output, using an appropriate
3008 * unit (B, KB, MB or GB) according to the magnitude in question
3009 *
3010 * @param $size Size to format
3011 * @return string Plain text (not HTML)
3012 */
3013 function formatSize( $size ) {
3014 // For small sizes no decimal places necessary
3015 $round = 0;
3016 if ( $size > 1024 ) {
3017 $size = $size / 1024;
3018 if ( $size > 1024 ) {
3019 $size = $size / 1024;
3020 // For MB and bigger two decimal places are smarter
3021 $round = 2;
3022 if ( $size > 1024 ) {
3023 $size = $size / 1024;
3024 $msg = 'size-gigabytes';
3025 } else {
3026 $msg = 'size-megabytes';
3027 }
3028 } else {
3029 $msg = 'size-kilobytes';
3030 }
3031 } else {
3032 $msg = 'size-bytes';
3033 }
3034 $size = round( $size, $round );
3035 $text = $this->getMessageFromDB( $msg );
3036 return str_replace( '$1', $this->formatNum( $size ), $text );
3037 }
3038
3039 /**
3040 * Get the conversion rule title, if any.
3041 */
3042 function getConvRuleTitle() {
3043 return $this->mConverter->getConvRuleTitle();
3044 }
3045 }