3501bd5a7a33b0fe80a74c9e8b6419f72dc46285
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * Internationalisation code
4 *
5 * @file
6 * @ingroup Language
7 */
8
9 /**
10 * @defgroup Language Language
11 */
12
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
15 exit( 1 );
16 }
17
18 # Read language names
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__ ) . '/Names.php' );
21
22 global $wgInputEncoding, $wgOutputEncoding;
23
24 /**
25 * These are always UTF-8, they exist only for backwards compatibility
26 */
27 $wgInputEncoding = 'UTF-8';
28 $wgOutputEncoding = 'UTF-8';
29
30 if ( function_exists( 'mb_strtoupper' ) ) {
31 mb_internal_encoding( 'UTF-8' );
32 }
33
34 /**
35 * a fake language converter
36 *
37 * @ingroup Language
38 */
39 class FakeConverter {
40 var $mLang;
41 function __construct( $langobj ) { $this->mLang = $langobj; }
42 function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
43 function convert( $t ) { return $t; }
44 function convertTitle( $t ) { return $t->getPrefixedText(); }
45 function getVariants() { return array( $this->mLang->getCode() ); }
46 function getPreferredVariant() { return $this->mLang->getCode(); }
47 function getDefaultVariant() { return $this->mLang->getCode(); }
48 function getURLVariant() { return ''; }
49 function getConvRuleTitle() { return false; }
50 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
51 function getExtraHashOptions() { return ''; }
52 function getParsedTitle() { return ''; }
53 function markNoConversion( $text, $noParse = false ) { return $text; }
54 function convertCategoryKey( $key ) { return $key; }
55 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
56 function armourMath( $text ) { return $text; }
57 }
58
59 /**
60 * Internationalisation code
61 * @ingroup Language
62 */
63 class Language {
64 var $mConverter, $mVariants, $mCode, $mLoaded = false;
65 var $mMagicExtensions = array(), $mMagicHookDone = false;
66
67 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
68 var $dateFormatStrings = array();
69 var $mExtendedSpecialPageAliases;
70
71 /**
72 * ReplacementArray object caches
73 */
74 var $transformData = array();
75
76 static public $dataCache;
77 static public $mLangObjCache = array();
78
79 static public $mWeekdayMsgs = array(
80 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
81 'friday', 'saturday'
82 );
83
84 static public $mWeekdayAbbrevMsgs = array(
85 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
86 );
87
88 static public $mMonthMsgs = array(
89 'january', 'february', 'march', 'april', 'may_long', 'june',
90 'july', 'august', 'september', 'october', 'november',
91 'december'
92 );
93 static public $mMonthGenMsgs = array(
94 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
95 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
96 'december-gen'
97 );
98 static public $mMonthAbbrevMsgs = array(
99 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
100 'sep', 'oct', 'nov', 'dec'
101 );
102
103 static public $mIranianCalendarMonthMsgs = array(
104 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
105 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
106 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
107 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
108 );
109
110 static public $mHebrewCalendarMonthMsgs = array(
111 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
112 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
113 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
114 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
115 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
116 );
117
118 static public $mHebrewCalendarMonthGenMsgs = array(
119 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
120 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
121 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
122 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
123 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
124 );
125
126 static public $mHijriCalendarMonthMsgs = array(
127 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
128 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
129 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
130 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
131 );
132
133 /**
134 * Get a cached language object for a given language code
135 * @param $code String
136 * @return Language
137 */
138 static function factory( $code ) {
139 if ( !isset( self::$mLangObjCache[$code] ) ) {
140 if ( count( self::$mLangObjCache ) > 10 ) {
141 // Don't keep a billion objects around, that's stupid.
142 self::$mLangObjCache = array();
143 }
144 self::$mLangObjCache[$code] = self::newFromCode( $code );
145 }
146 return self::$mLangObjCache[$code];
147 }
148
149 /**
150 * Create a language object for a given language code
151 * @param $code String
152 * @return Language
153 */
154 protected static function newFromCode( $code ) {
155 global $IP;
156 static $recursionLevel = 0;
157
158 // Protect against path traversal below
159 if ( !Language::isValidCode( $code )
160 || strcspn( $code, "/\\\000" ) !== strlen( $code ) )
161 {
162 throw new MWException( "Invalid language code \"$code\"" );
163 }
164
165 if ( $code == 'en' ) {
166 $class = 'Language';
167 } else {
168 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
169 // Preload base classes to work around APC/PHP5 bug
170 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
171 include_once( "$IP/languages/classes/$class.deps.php" );
172 }
173 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
174 include_once( "$IP/languages/classes/$class.php" );
175 }
176 }
177
178 if ( $recursionLevel > 5 ) {
179 throw new MWException( "Language fallback loop detected when creating class $class\n" );
180 }
181
182 if ( !class_exists( $class ) ) {
183 $fallback = Language::getFallbackFor( $code );
184 ++$recursionLevel;
185 $lang = Language::newFromCode( $fallback );
186 --$recursionLevel;
187 $lang->setCode( $code );
188 } else {
189 $lang = new $class;
190 }
191 return $lang;
192 }
193
194 /**
195 * Returns true if a language code string is of a valid form, whether or
196 * not it exists.
197 */
198 public static function isValidCode( $code ) {
199 return strcspn( $code, "/\\\000" ) === strlen( $code );
200 }
201
202 /**
203 * Get the LocalisationCache instance
204 *
205 * @return LocalisationCache
206 */
207 public static function getLocalisationCache() {
208 if ( is_null( self::$dataCache ) ) {
209 global $wgLocalisationCacheConf;
210 $class = $wgLocalisationCacheConf['class'];
211 self::$dataCache = new $class( $wgLocalisationCacheConf );
212 }
213 return self::$dataCache;
214 }
215
216 function __construct() {
217 $this->mConverter = new FakeConverter( $this );
218 // Set the code to the name of the descendant
219 if ( get_class( $this ) == 'Language' ) {
220 $this->mCode = 'en';
221 } else {
222 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
223 }
224 self::getLocalisationCache();
225 }
226
227 /**
228 * Reduce memory usage
229 */
230 function __destruct() {
231 foreach ( $this as $name => $value ) {
232 unset( $this->$name );
233 }
234 }
235
236 /**
237 * Hook which will be called if this is the content language.
238 * Descendants can use this to register hook functions or modify globals
239 */
240 function initContLang() { }
241
242 /**
243 * @deprecated Use User::getDefaultOptions()
244 * @return array
245 */
246 function getDefaultUserOptions() {
247 wfDeprecated( __METHOD__ );
248 return User::getDefaultOptions();
249 }
250
251 function getFallbackLanguageCode() {
252 if ( $this->mCode === 'en' ) {
253 return false;
254 } else {
255 return self::$dataCache->getItem( $this->mCode, 'fallback' );
256 }
257 }
258
259 /**
260 * Exports $wgBookstoreListEn
261 * @return array
262 */
263 function getBookstoreList() {
264 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
265 }
266
267 /**
268 * @return array
269 */
270 function getNamespaces() {
271 if ( is_null( $this->namespaceNames ) ) {
272 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
273
274 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
275 $validNamespaces = MWNamespace::getCanonicalNamespaces();
276
277 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
278
279 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
280 if ( $wgMetaNamespaceTalk ) {
281 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
282 } else {
283 $talk = $this->namespaceNames[NS_PROJECT_TALK];
284 $this->namespaceNames[NS_PROJECT_TALK] =
285 $this->fixVariableInNamespace( $talk );
286 }
287
288 # Sometimes a language will be localised but not actually exist on this wiki.
289 foreach( $this->namespaceNames as $key => $text ) {
290 if ( !isset( $validNamespaces[$key] ) ) {
291 unset( $this->namespaceNames[$key] );
292 }
293 }
294
295 # The above mixing may leave namespaces out of canonical order.
296 # Re-order by namespace ID number...
297 ksort( $this->namespaceNames );
298 }
299 return $this->namespaceNames;
300 }
301
302 /**
303 * A convenience function that returns the same thing as
304 * getNamespaces() except with the array values changed to ' '
305 * where it found '_', useful for producing output to be displayed
306 * e.g. in <select> forms.
307 *
308 * @return array
309 */
310 function getFormattedNamespaces() {
311 $ns = $this->getNamespaces();
312 foreach ( $ns as $k => $v ) {
313 $ns[$k] = strtr( $v, '_', ' ' );
314 }
315 return $ns;
316 }
317
318 /**
319 * Get a namespace value by key
320 * <code>
321 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
322 * echo $mw_ns; // prints 'MediaWiki'
323 * </code>
324 *
325 * @param $index Int: the array key of the namespace to return
326 * @return mixed, string if the namespace value exists, otherwise false
327 */
328 function getNsText( $index ) {
329 $ns = $this->getNamespaces();
330 return isset( $ns[$index] ) ? $ns[$index] : false;
331 }
332
333 /**
334 * A convenience function that returns the same thing as
335 * getNsText() except with '_' changed to ' ', useful for
336 * producing output.
337 *
338 * @return array
339 */
340 function getFormattedNsText( $index ) {
341 $ns = $this->getNsText( $index );
342 return strtr( $ns, '_', ' ' );
343 }
344
345 /**
346 * Returns gender-dependent namespace alias if available.
347 * @param $index Int: namespace index
348 * @param $gender String: gender key (male, female... )
349 * @return String
350 * @since 1.18
351 */
352 function getGenderNsText( $index, $gender ) {
353 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
354 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
355 }
356
357 /**
358 * Whether this language makes distinguishes genders for example in
359 * namespaces.
360 * @return bool
361 * @since 1.18
362 */
363 function needsGenderDistinction() {
364 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
365 return count( $aliases ) > 0;
366 }
367
368 /**
369 * Get a namespace key by value, case insensitive.
370 * Only matches namespace names for the current language, not the
371 * canonical ones defined in Namespace.php.
372 *
373 * @param $text String
374 * @return mixed An integer if $text is a valid value otherwise false
375 */
376 function getLocalNsIndex( $text ) {
377 $lctext = $this->lc( $text );
378 $ids = $this->getNamespaceIds();
379 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
380 }
381
382 function getNamespaceAliases() {
383 if ( is_null( $this->namespaceAliases ) ) {
384 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
385 if ( !$aliases ) {
386 $aliases = array();
387 } else {
388 foreach ( $aliases as $name => $index ) {
389 if ( $index === NS_PROJECT_TALK ) {
390 unset( $aliases[$name] );
391 $name = $this->fixVariableInNamespace( $name );
392 $aliases[$name] = $index;
393 }
394 }
395 }
396
397 $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
398 foreach ( $genders as $index => $forms ) {
399 foreach ( $forms as $alias ) {
400 $aliases[$alias] = $index;
401 }
402 }
403
404 $this->namespaceAliases = $aliases;
405 }
406 return $this->namespaceAliases;
407 }
408
409 function getNamespaceIds() {
410 if ( is_null( $this->mNamespaceIds ) ) {
411 global $wgNamespaceAliases;
412 # Put namespace names and aliases into a hashtable.
413 # If this is too slow, then we should arrange it so that it is done
414 # before caching. The catch is that at pre-cache time, the above
415 # class-specific fixup hasn't been done.
416 $this->mNamespaceIds = array();
417 foreach ( $this->getNamespaces() as $index => $name ) {
418 $this->mNamespaceIds[$this->lc( $name )] = $index;
419 }
420 foreach ( $this->getNamespaceAliases() as $name => $index ) {
421 $this->mNamespaceIds[$this->lc( $name )] = $index;
422 }
423 if ( $wgNamespaceAliases ) {
424 foreach ( $wgNamespaceAliases as $name => $index ) {
425 $this->mNamespaceIds[$this->lc( $name )] = $index;
426 }
427 }
428 }
429 return $this->mNamespaceIds;
430 }
431
432
433 /**
434 * Get a namespace key by value, case insensitive. Canonical namespace
435 * names override custom ones defined for the current language.
436 *
437 * @param $text String
438 * @return mixed An integer if $text is a valid value otherwise false
439 */
440 function getNsIndex( $text ) {
441 $lctext = $this->lc( $text );
442 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
443 return $ns;
444 }
445 $ids = $this->getNamespaceIds();
446 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
447 }
448
449 /**
450 * short names for language variants used for language conversion links.
451 *
452 * @param $code String
453 * @return string
454 */
455 function getVariantname( $code ) {
456 return $this->getMessageFromDB( "variantname-$code" );
457 }
458
459 function specialPage( $name ) {
460 $aliases = $this->getSpecialPageAliases();
461 if ( isset( $aliases[$name][0] ) ) {
462 $name = $aliases[$name][0];
463 }
464 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
465 }
466
467 function getQuickbarSettings() {
468 return array(
469 $this->getMessage( 'qbsettings-none' ),
470 $this->getMessage( 'qbsettings-fixedleft' ),
471 $this->getMessage( 'qbsettings-fixedright' ),
472 $this->getMessage( 'qbsettings-floatingleft' ),
473 $this->getMessage( 'qbsettings-floatingright' )
474 );
475 }
476
477 function getMathNames() {
478 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
479 }
480
481 function getDatePreferences() {
482 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
483 }
484
485 function getDateFormats() {
486 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
487 }
488
489 function getDefaultDateFormat() {
490 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
491 if ( $df === 'dmy or mdy' ) {
492 global $wgAmericanDates;
493 return $wgAmericanDates ? 'mdy' : 'dmy';
494 } else {
495 return $df;
496 }
497 }
498
499 function getDatePreferenceMigrationMap() {
500 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
501 }
502
503 function getImageFile( $image ) {
504 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
505 }
506
507 function getDefaultUserOptionOverrides() {
508 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
509 }
510
511 function getExtraUserToggles() {
512 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
513 }
514
515 function getUserToggle( $tog ) {
516 return $this->getMessageFromDB( "tog-$tog" );
517 }
518
519 /**
520 * Get language names, indexed by code.
521 * If $customisedOnly is true, only returns codes with a messages file
522 */
523 public static function getLanguageNames( $customisedOnly = false ) {
524 global $wgLanguageNames, $wgExtraLanguageNames;
525 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
526 if ( !$customisedOnly ) {
527 return $allNames;
528 }
529
530 global $IP;
531 $names = array();
532 $dir = opendir( "$IP/languages/messages" );
533 while ( false !== ( $file = readdir( $dir ) ) ) {
534 $code = self::getCodeFromFileName( $file, 'Messages' );
535 if ( $code && isset( $allNames[$code] ) ) {
536 $names[$code] = $allNames[$code];
537 }
538 }
539 closedir( $dir );
540 return $names;
541 }
542
543 /**
544 * Get translated language names. This is done on best effort and
545 * by default this is exactly the same as Language::getLanguageNames.
546 * The CLDR extension provides translated names.
547 * @param $code String Language code.
548 * @return Array language code => language name
549 * @since 1.18.0
550 */
551 public static function getTranslatedLanguageNames( $code ) {
552 $names = array();
553 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
554
555 foreach ( self::getLanguageNames() as $code => $name ) {
556 if ( !isset( $names[$code] ) ) $names[$code] = $name;
557 }
558
559 return $names;
560 }
561
562 /**
563 * Get a message from the MediaWiki namespace.
564 *
565 * @param $msg String: message name
566 * @return string
567 */
568 function getMessageFromDB( $msg ) {
569 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
570 }
571
572 function getLanguageName( $code ) {
573 $names = self::getLanguageNames();
574 if ( !array_key_exists( $code, $names ) ) {
575 return '';
576 }
577 return $names[$code];
578 }
579
580 function getMonthName( $key ) {
581 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
582 }
583
584 function getMonthNameGen( $key ) {
585 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
586 }
587
588 function getMonthAbbreviation( $key ) {
589 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
590 }
591
592 function getWeekdayName( $key ) {
593 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
594 }
595
596 function getWeekdayAbbreviation( $key ) {
597 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
598 }
599
600 function getIranianCalendarMonthName( $key ) {
601 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
602 }
603
604 function getHebrewCalendarMonthName( $key ) {
605 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
606 }
607
608 function getHebrewCalendarMonthNameGen( $key ) {
609 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
610 }
611
612 function getHijriCalendarMonthName( $key ) {
613 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
614 }
615
616 /**
617 * Used by date() and time() to adjust the time output.
618 *
619 * @param $ts Int the time in date('YmdHis') format
620 * @param $tz Mixed: adjust the time by this amount (default false, mean we
621 * get user timecorrection setting)
622 * @return int
623 */
624 function userAdjust( $ts, $tz = false ) {
625 global $wgUser, $wgLocalTZoffset;
626
627 if ( $tz === false ) {
628 $tz = $wgUser->getOption( 'timecorrection' );
629 }
630
631 $data = explode( '|', $tz, 3 );
632
633 if ( $data[0] == 'ZoneInfo' ) {
634 if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
635 $date = date_create( $ts, timezone_open( 'UTC' ) );
636 date_timezone_set( $date, timezone_open( $data[2] ) );
637 $date = date_format( $date, 'YmdHis' );
638 return $date;
639 }
640 # Unrecognized timezone, default to 'Offset' with the stored offset.
641 $data[0] = 'Offset';
642 }
643
644 $minDiff = 0;
645 if ( $data[0] == 'System' || $tz == '' ) {
646 #  Global offset in minutes.
647 if ( isset( $wgLocalTZoffset ) ) {
648 $minDiff = $wgLocalTZoffset;
649 }
650 } else if ( $data[0] == 'Offset' ) {
651 $minDiff = intval( $data[1] );
652 } else {
653 $data = explode( ':', $tz );
654 if ( count( $data ) == 2 ) {
655 $data[0] = intval( $data[0] );
656 $data[1] = intval( $data[1] );
657 $minDiff = abs( $data[0] ) * 60 + $data[1];
658 if ( $data[0] < 0 ) {
659 $minDiff = -$minDiff;
660 }
661 } else {
662 $minDiff = intval( $data[0] ) * 60;
663 }
664 }
665
666 # No difference ? Return time unchanged
667 if ( 0 == $minDiff ) {
668 return $ts;
669 }
670
671 wfSuppressWarnings(); // E_STRICT system time bitching
672 # Generate an adjusted date; take advantage of the fact that mktime
673 # will normalize out-of-range values so we don't have to split $minDiff
674 # into hours and minutes.
675 $t = mktime( (
676 (int)substr( $ts, 8, 2 ) ), # Hours
677 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
678 (int)substr( $ts, 12, 2 ), # Seconds
679 (int)substr( $ts, 4, 2 ), # Month
680 (int)substr( $ts, 6, 2 ), # Day
681 (int)substr( $ts, 0, 4 ) ); # Year
682
683 $date = date( 'YmdHis', $t );
684 wfRestoreWarnings();
685
686 return $date;
687 }
688
689 /**
690 * This is a workalike of PHP's date() function, but with better
691 * internationalisation, a reduced set of format characters, and a better
692 * escaping format.
693 *
694 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
695 * PHP manual for definitions. There are a number of extensions, which
696 * start with "x":
697 *
698 * xn Do not translate digits of the next numeric format character
699 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
700 * xr Use roman numerals for the next numeric format character
701 * xh Use hebrew numerals for the next numeric format character
702 * xx Literal x
703 * xg Genitive month name
704 *
705 * xij j (day number) in Iranian calendar
706 * xiF F (month name) in Iranian calendar
707 * xin n (month number) in Iranian calendar
708 * xiY Y (full year) in Iranian calendar
709 *
710 * xjj j (day number) in Hebrew calendar
711 * xjF F (month name) in Hebrew calendar
712 * xjt t (days in month) in Hebrew calendar
713 * xjx xg (genitive month name) in Hebrew calendar
714 * xjn n (month number) in Hebrew calendar
715 * xjY Y (full year) in Hebrew calendar
716 *
717 * xmj j (day number) in Hijri calendar
718 * xmF F (month name) in Hijri calendar
719 * xmn n (month number) in Hijri calendar
720 * xmY Y (full year) in Hijri calendar
721 *
722 * xkY Y (full year) in Thai solar calendar. Months and days are
723 * identical to the Gregorian calendar
724 * xoY Y (full year) in Minguo calendar or Juche year.
725 * Months and days are identical to the
726 * Gregorian calendar
727 * xtY Y (full year) in Japanese nengo. Months and days are
728 * identical to the Gregorian calendar
729 *
730 * Characters enclosed in double quotes will be considered literal (with
731 * the quotes themselves removed). Unmatched quotes will be considered
732 * literal quotes. Example:
733 *
734 * "The month is" F => The month is January
735 * i's" => 20'11"
736 *
737 * Backslash escaping is also supported.
738 *
739 * Input timestamp is assumed to be pre-normalized to the desired local
740 * time zone, if any.
741 *
742 * @param $format String
743 * @param $ts String: 14-character timestamp
744 * YYYYMMDDHHMMSS
745 * 01234567890123
746 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
747 */
748 function sprintfDate( $format, $ts ) {
749 $s = '';
750 $raw = false;
751 $roman = false;
752 $hebrewNum = false;
753 $unix = false;
754 $rawToggle = false;
755 $iranian = false;
756 $hebrew = false;
757 $hijri = false;
758 $thai = false;
759 $minguo = false;
760 $tenno = false;
761 for ( $p = 0; $p < strlen( $format ); $p++ ) {
762 $num = false;
763 $code = $format[$p];
764 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
765 $code .= $format[++$p];
766 }
767
768 if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
769 $code .= $format[++$p];
770 }
771
772 switch ( $code ) {
773 case 'xx':
774 $s .= 'x';
775 break;
776 case 'xn':
777 $raw = true;
778 break;
779 case 'xN':
780 $rawToggle = !$rawToggle;
781 break;
782 case 'xr':
783 $roman = true;
784 break;
785 case 'xh':
786 $hebrewNum = true;
787 break;
788 case 'xg':
789 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
790 break;
791 case 'xjx':
792 if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
793 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
794 break;
795 case 'd':
796 $num = substr( $ts, 6, 2 );
797 break;
798 case 'D':
799 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
800 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
801 break;
802 case 'j':
803 $num = intval( substr( $ts, 6, 2 ) );
804 break;
805 case 'xij':
806 if ( !$iranian ) {
807 $iranian = self::tsToIranian( $ts );
808 }
809 $num = $iranian[2];
810 break;
811 case 'xmj':
812 if ( !$hijri ) {
813 $hijri = self::tsToHijri( $ts );
814 }
815 $num = $hijri[2];
816 break;
817 case 'xjj':
818 if ( !$hebrew ) {
819 $hebrew = self::tsToHebrew( $ts );
820 }
821 $num = $hebrew[2];
822 break;
823 case 'l':
824 if ( !$unix ) {
825 $unix = wfTimestamp( TS_UNIX, $ts );
826 }
827 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
828 break;
829 case 'N':
830 if ( !$unix ) {
831 $unix = wfTimestamp( TS_UNIX, $ts );
832 }
833 $w = gmdate( 'w', $unix );
834 $num = $w ? $w : 7;
835 break;
836 case 'w':
837 if ( !$unix ) {
838 $unix = wfTimestamp( TS_UNIX, $ts );
839 }
840 $num = gmdate( 'w', $unix );
841 break;
842 case 'z':
843 if ( !$unix ) {
844 $unix = wfTimestamp( TS_UNIX, $ts );
845 }
846 $num = gmdate( 'z', $unix );
847 break;
848 case 'W':
849 if ( !$unix ) {
850 $unix = wfTimestamp( TS_UNIX, $ts );
851 }
852 $num = gmdate( 'W', $unix );
853 break;
854 case 'F':
855 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
856 break;
857 case 'xiF':
858 if ( !$iranian ) {
859 $iranian = self::tsToIranian( $ts );
860 }
861 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
862 break;
863 case 'xmF':
864 if ( !$hijri ) {
865 $hijri = self::tsToHijri( $ts );
866 }
867 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
868 break;
869 case 'xjF':
870 if ( !$hebrew ) {
871 $hebrew = self::tsToHebrew( $ts );
872 }
873 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
874 break;
875 case 'm':
876 $num = substr( $ts, 4, 2 );
877 break;
878 case 'M':
879 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
880 break;
881 case 'n':
882 $num = intval( substr( $ts, 4, 2 ) );
883 break;
884 case 'xin':
885 if ( !$iranian ) {
886 $iranian = self::tsToIranian( $ts );
887 }
888 $num = $iranian[1];
889 break;
890 case 'xmn':
891 if ( !$hijri ) {
892 $hijri = self::tsToHijri ( $ts );
893 }
894 $num = $hijri[1];
895 break;
896 case 'xjn':
897 if ( !$hebrew ) {
898 $hebrew = self::tsToHebrew( $ts );
899 }
900 $num = $hebrew[1];
901 break;
902 case 't':
903 if ( !$unix ) {
904 $unix = wfTimestamp( TS_UNIX, $ts );
905 }
906 $num = gmdate( 't', $unix );
907 break;
908 case 'xjt':
909 if ( !$hebrew ) {
910 $hebrew = self::tsToHebrew( $ts );
911 }
912 $num = $hebrew[3];
913 break;
914 case 'L':
915 if ( !$unix ) {
916 $unix = wfTimestamp( TS_UNIX, $ts );
917 }
918 $num = gmdate( 'L', $unix );
919 break;
920 case 'o':
921 if ( !$unix ) {
922 $unix = wfTimestamp( TS_UNIX, $ts );
923 }
924 $num = date( 'o', $unix );
925 break;
926 case 'Y':
927 $num = substr( $ts, 0, 4 );
928 break;
929 case 'xiY':
930 if ( !$iranian ) {
931 $iranian = self::tsToIranian( $ts );
932 }
933 $num = $iranian[0];
934 break;
935 case 'xmY':
936 if ( !$hijri ) {
937 $hijri = self::tsToHijri( $ts );
938 }
939 $num = $hijri[0];
940 break;
941 case 'xjY':
942 if ( !$hebrew ) {
943 $hebrew = self::tsToHebrew( $ts );
944 }
945 $num = $hebrew[0];
946 break;
947 case 'xkY':
948 if ( !$thai ) {
949 $thai = self::tsToYear( $ts, 'thai' );
950 }
951 $num = $thai[0];
952 break;
953 case 'xoY':
954 if ( !$minguo ) {
955 $minguo = self::tsToYear( $ts, 'minguo' );
956 }
957 $num = $minguo[0];
958 break;
959 case 'xtY':
960 if ( !$tenno ) {
961 $tenno = self::tsToYear( $ts, 'tenno' );
962 }
963 $num = $tenno[0];
964 break;
965 case 'y':
966 $num = substr( $ts, 2, 2 );
967 break;
968 case 'a':
969 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
970 break;
971 case 'A':
972 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
973 break;
974 case 'g':
975 $h = substr( $ts, 8, 2 );
976 $num = $h % 12 ? $h % 12 : 12;
977 break;
978 case 'G':
979 $num = intval( substr( $ts, 8, 2 ) );
980 break;
981 case 'h':
982 $h = substr( $ts, 8, 2 );
983 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
984 break;
985 case 'H':
986 $num = substr( $ts, 8, 2 );
987 break;
988 case 'i':
989 $num = substr( $ts, 10, 2 );
990 break;
991 case 's':
992 $num = substr( $ts, 12, 2 );
993 break;
994 case 'c':
995 if ( !$unix ) {
996 $unix = wfTimestamp( TS_UNIX, $ts );
997 }
998 $s .= gmdate( 'c', $unix );
999 break;
1000 case 'r':
1001 if ( !$unix ) {
1002 $unix = wfTimestamp( TS_UNIX, $ts );
1003 }
1004 $s .= gmdate( 'r', $unix );
1005 break;
1006 case 'U':
1007 if ( !$unix ) {
1008 $unix = wfTimestamp( TS_UNIX, $ts );
1009 }
1010 $num = $unix;
1011 break;
1012 case '\\':
1013 # Backslash escaping
1014 if ( $p < strlen( $format ) - 1 ) {
1015 $s .= $format[++$p];
1016 } else {
1017 $s .= '\\';
1018 }
1019 break;
1020 case '"':
1021 # Quoted literal
1022 if ( $p < strlen( $format ) - 1 ) {
1023 $endQuote = strpos( $format, '"', $p + 1 );
1024 if ( $endQuote === false ) {
1025 # No terminating quote, assume literal "
1026 $s .= '"';
1027 } else {
1028 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1029 $p = $endQuote;
1030 }
1031 } else {
1032 # Quote at end of string, assume literal "
1033 $s .= '"';
1034 }
1035 break;
1036 default:
1037 $s .= $format[$p];
1038 }
1039 if ( $num !== false ) {
1040 if ( $rawToggle || $raw ) {
1041 $s .= $num;
1042 $raw = false;
1043 } elseif ( $roman ) {
1044 $s .= self::romanNumeral( $num );
1045 $roman = false;
1046 } elseif ( $hebrewNum ) {
1047 $s .= self::hebrewNumeral( $num );
1048 $hebrewNum = false;
1049 } else {
1050 $s .= $this->formatNum( $num, true );
1051 }
1052 }
1053 }
1054 return $s;
1055 }
1056
1057 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1058 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1059 /**
1060 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1061 * Gregorian dates to Iranian dates. Originally written in C, it
1062 * is released under the terms of GNU Lesser General Public
1063 * License. Conversion to PHP was performed by Niklas Laxström.
1064 *
1065 * Link: http://www.farsiweb.info/jalali/jalali.c
1066 */
1067 private static function tsToIranian( $ts ) {
1068 $gy = substr( $ts, 0, 4 ) -1600;
1069 $gm = substr( $ts, 4, 2 ) -1;
1070 $gd = substr( $ts, 6, 2 ) -1;
1071
1072 # Days passed from the beginning (including leap years)
1073 $gDayNo = 365 * $gy
1074 + floor( ( $gy + 3 ) / 4 )
1075 - floor( ( $gy + 99 ) / 100 )
1076 + floor( ( $gy + 399 ) / 400 );
1077
1078
1079 // Add days of the past months of this year
1080 for ( $i = 0; $i < $gm; $i++ ) {
1081 $gDayNo += self::$GREG_DAYS[$i];
1082 }
1083
1084 // Leap years
1085 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1086 $gDayNo++;
1087 }
1088
1089 // Days passed in current month
1090 $gDayNo += $gd;
1091
1092 $jDayNo = $gDayNo - 79;
1093
1094 $jNp = floor( $jDayNo / 12053 );
1095 $jDayNo %= 12053;
1096
1097 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1098 $jDayNo %= 1461;
1099
1100 if ( $jDayNo >= 366 ) {
1101 $jy += floor( ( $jDayNo - 1 ) / 365 );
1102 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1103 }
1104
1105 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1106 $jDayNo -= self::$IRANIAN_DAYS[$i];
1107 }
1108
1109 $jm = $i + 1;
1110 $jd = $jDayNo + 1;
1111
1112 return array( $jy, $jm, $jd );
1113 }
1114
1115 /**
1116 * Converting Gregorian dates to Hijri dates.
1117 *
1118 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1119 *
1120 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1121 */
1122 private static function tsToHijri( $ts ) {
1123 $year = substr( $ts, 0, 4 );
1124 $month = substr( $ts, 4, 2 );
1125 $day = substr( $ts, 6, 2 );
1126
1127 $zyr = $year;
1128 $zd = $day;
1129 $zm = $month;
1130 $zy = $zyr;
1131
1132 if (
1133 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1134 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1135 )
1136 {
1137 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1138 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1139 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1140 $zd - 32075;
1141 } else {
1142 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1143 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1144 }
1145
1146 $zl = $zjd -1948440 + 10632;
1147 $zn = (int)( ( $zl - 1 ) / 10631 );
1148 $zl = $zl - 10631 * $zn + 354;
1149 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1150 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1151 $zm = (int)( ( 24 * $zl ) / 709 );
1152 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1153 $zy = 30 * $zn + $zj - 30;
1154
1155 return array( $zy, $zm, $zd );
1156 }
1157
1158 /**
1159 * Converting Gregorian dates to Hebrew dates.
1160 *
1161 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1162 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1163 * to translate the relevant functions into PHP and release them under
1164 * GNU GPL.
1165 *
1166 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1167 * and Adar II is 14. In a non-leap year, Adar is 6.
1168 */
1169 private static function tsToHebrew( $ts ) {
1170 # Parse date
1171 $year = substr( $ts, 0, 4 );
1172 $month = substr( $ts, 4, 2 );
1173 $day = substr( $ts, 6, 2 );
1174
1175 # Calculate Hebrew year
1176 $hebrewYear = $year + 3760;
1177
1178 # Month number when September = 1, August = 12
1179 $month += 4;
1180 if ( $month > 12 ) {
1181 # Next year
1182 $month -= 12;
1183 $year++;
1184 $hebrewYear++;
1185 }
1186
1187 # Calculate day of year from 1 September
1188 $dayOfYear = $day;
1189 for ( $i = 1; $i < $month; $i++ ) {
1190 if ( $i == 6 ) {
1191 # February
1192 $dayOfYear += 28;
1193 # Check if the year is leap
1194 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1195 $dayOfYear++;
1196 }
1197 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1198 $dayOfYear += 30;
1199 } else {
1200 $dayOfYear += 31;
1201 }
1202 }
1203
1204 # Calculate the start of the Hebrew year
1205 $start = self::hebrewYearStart( $hebrewYear );
1206
1207 # Calculate next year's start
1208 if ( $dayOfYear <= $start ) {
1209 # Day is before the start of the year - it is the previous year
1210 # Next year's start
1211 $nextStart = $start;
1212 # Previous year
1213 $year--;
1214 $hebrewYear--;
1215 # Add days since previous year's 1 September
1216 $dayOfYear += 365;
1217 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1218 # Leap year
1219 $dayOfYear++;
1220 }
1221 # Start of the new (previous) year
1222 $start = self::hebrewYearStart( $hebrewYear );
1223 } else {
1224 # Next year's start
1225 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1226 }
1227
1228 # Calculate Hebrew day of year
1229 $hebrewDayOfYear = $dayOfYear - $start;
1230
1231 # Difference between year's days
1232 $diff = $nextStart - $start;
1233 # Add 12 (or 13 for leap years) days to ignore the difference between
1234 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1235 # difference is only about the year type
1236 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1237 $diff += 13;
1238 } else {
1239 $diff += 12;
1240 }
1241
1242 # Check the year pattern, and is leap year
1243 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1244 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1245 # and non-leap years
1246 $yearPattern = $diff % 30;
1247 # Check if leap year
1248 $isLeap = $diff >= 30;
1249
1250 # Calculate day in the month from number of day in the Hebrew year
1251 # Don't check Adar - if the day is not in Adar, we will stop before;
1252 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1253 $hebrewDay = $hebrewDayOfYear;
1254 $hebrewMonth = 1;
1255 $days = 0;
1256 while ( $hebrewMonth <= 12 ) {
1257 # Calculate days in this month
1258 if ( $isLeap && $hebrewMonth == 6 ) {
1259 # Adar in a leap year
1260 if ( $isLeap ) {
1261 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1262 $days = 30;
1263 if ( $hebrewDay <= $days ) {
1264 # Day in Adar I
1265 $hebrewMonth = 13;
1266 } else {
1267 # Subtract the days of Adar I
1268 $hebrewDay -= $days;
1269 # Try Adar II
1270 $days = 29;
1271 if ( $hebrewDay <= $days ) {
1272 # Day in Adar II
1273 $hebrewMonth = 14;
1274 }
1275 }
1276 }
1277 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1278 # Cheshvan in a complete year (otherwise as the rule below)
1279 $days = 30;
1280 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1281 # Kislev in an incomplete year (otherwise as the rule below)
1282 $days = 29;
1283 } else {
1284 # Odd months have 30 days, even have 29
1285 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1286 }
1287 if ( $hebrewDay <= $days ) {
1288 # In the current month
1289 break;
1290 } else {
1291 # Subtract the days of the current month
1292 $hebrewDay -= $days;
1293 # Try in the next month
1294 $hebrewMonth++;
1295 }
1296 }
1297
1298 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1299 }
1300
1301 /**
1302 * This calculates the Hebrew year start, as days since 1 September.
1303 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1304 * Used for Hebrew date.
1305 */
1306 private static function hebrewYearStart( $year ) {
1307 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1308 $b = intval( ( $year - 1 ) % 4 );
1309 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1310 if ( $m < 0 ) {
1311 $m--;
1312 }
1313 $Mar = intval( $m );
1314 if ( $m < 0 ) {
1315 $m++;
1316 }
1317 $m -= $Mar;
1318
1319 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1320 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1321 $Mar++;
1322 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1323 $Mar += 2;
1324 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1325 $Mar++;
1326 }
1327
1328 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1329 return $Mar;
1330 }
1331
1332 /**
1333 * Algorithm to convert Gregorian dates to Thai solar dates,
1334 * Minguo dates or Minguo dates.
1335 *
1336 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1337 * http://en.wikipedia.org/wiki/Minguo_calendar
1338 * http://en.wikipedia.org/wiki/Japanese_era_name
1339 *
1340 * @param $ts String: 14-character timestamp
1341 * @param $cName String: calender name
1342 * @return Array: converted year, month, day
1343 */
1344 private static function tsToYear( $ts, $cName ) {
1345 $gy = substr( $ts, 0, 4 );
1346 $gm = substr( $ts, 4, 2 );
1347 $gd = substr( $ts, 6, 2 );
1348
1349 if ( !strcmp( $cName, 'thai' ) ) {
1350 # Thai solar dates
1351 # Add 543 years to the Gregorian calendar
1352 # Months and days are identical
1353 $gy_offset = $gy + 543;
1354 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1355 # Minguo dates
1356 # Deduct 1911 years from the Gregorian calendar
1357 # Months and days are identical
1358 $gy_offset = $gy - 1911;
1359 } else if ( !strcmp( $cName, 'tenno' ) ) {
1360 # Nengō dates up to Meiji period
1361 # Deduct years from the Gregorian calendar
1362 # depending on the nengo periods
1363 # Months and days are identical
1364 if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1365 # Meiji period
1366 $gy_gannen = $gy - 1868 + 1;
1367 $gy_offset = $gy_gannen;
1368 if ( $gy_gannen == 1 ) {
1369 $gy_offset = '元';
1370 }
1371 $gy_offset = '明治' . $gy_offset;
1372 } else if (
1373 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1374 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1375 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1376 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1377 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1378 )
1379 {
1380 # Taishō period
1381 $gy_gannen = $gy - 1912 + 1;
1382 $gy_offset = $gy_gannen;
1383 if ( $gy_gannen == 1 ) {
1384 $gy_offset = '元';
1385 }
1386 $gy_offset = '大正' . $gy_offset;
1387 } else if (
1388 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1389 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1390 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1391 )
1392 {
1393 # Shōwa period
1394 $gy_gannen = $gy - 1926 + 1;
1395 $gy_offset = $gy_gannen;
1396 if ( $gy_gannen == 1 ) {
1397 $gy_offset = '元';
1398 }
1399 $gy_offset = '昭和' . $gy_offset;
1400 } else {
1401 # Heisei period
1402 $gy_gannen = $gy - 1989 + 1;
1403 $gy_offset = $gy_gannen;
1404 if ( $gy_gannen == 1 ) {
1405 $gy_offset = '元';
1406 }
1407 $gy_offset = '平成' . $gy_offset;
1408 }
1409 } else {
1410 $gy_offset = $gy;
1411 }
1412
1413 return array( $gy_offset, $gm, $gd );
1414 }
1415
1416 /**
1417 * Roman number formatting up to 3000
1418 */
1419 static function romanNumeral( $num ) {
1420 static $table = array(
1421 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1422 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1423 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1424 array( '', 'M', 'MM', 'MMM' )
1425 );
1426
1427 $num = intval( $num );
1428 if ( $num > 3000 || $num <= 0 ) {
1429 return $num;
1430 }
1431
1432 $s = '';
1433 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1434 if ( $num >= $pow10 ) {
1435 $s .= $table[$i][floor( $num / $pow10 )];
1436 }
1437 $num = $num % $pow10;
1438 }
1439 return $s;
1440 }
1441
1442 /**
1443 * Hebrew Gematria number formatting up to 9999
1444 */
1445 static function hebrewNumeral( $num ) {
1446 static $table = array(
1447 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1448 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1449 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1450 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1451 );
1452
1453 $num = intval( $num );
1454 if ( $num > 9999 || $num <= 0 ) {
1455 return $num;
1456 }
1457
1458 $s = '';
1459 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1460 if ( $num >= $pow10 ) {
1461 if ( $num == 15 || $num == 16 ) {
1462 $s .= $table[0][9] . $table[0][$num - 9];
1463 $num = 0;
1464 } else {
1465 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1466 if ( $pow10 == 1000 ) {
1467 $s .= "'";
1468 }
1469 }
1470 }
1471 $num = $num % $pow10;
1472 }
1473 if ( strlen( $s ) == 2 ) {
1474 $str = $s . "'";
1475 } else {
1476 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1477 $str .= substr( $s, strlen( $s ) - 2, 2 );
1478 }
1479 $start = substr( $str, 0, strlen( $str ) - 2 );
1480 $end = substr( $str, strlen( $str ) - 2 );
1481 switch( $end ) {
1482 case 'כ':
1483 $str = $start . 'ך';
1484 break;
1485 case 'מ':
1486 $str = $start . 'ם';
1487 break;
1488 case 'נ':
1489 $str = $start . 'ן';
1490 break;
1491 case 'פ':
1492 $str = $start . 'ף';
1493 break;
1494 case 'צ':
1495 $str = $start . 'ץ';
1496 break;
1497 }
1498 return $str;
1499 }
1500
1501 /**
1502 * This is meant to be used by time(), date(), and timeanddate() to get
1503 * the date preference they're supposed to use, it should be used in
1504 * all children.
1505 *
1506 *<code>
1507 * function timeanddate([...], $format = true) {
1508 * $datePreference = $this->dateFormat($format);
1509 * [...]
1510 * }
1511 *</code>
1512 *
1513 * @param $usePrefs Mixed: if true, the user's preference is used
1514 * if false, the site/language default is used
1515 * if int/string, assumed to be a format.
1516 * @return string
1517 */
1518 function dateFormat( $usePrefs = true ) {
1519 global $wgUser;
1520
1521 if ( is_bool( $usePrefs ) ) {
1522 if ( $usePrefs ) {
1523 $datePreference = $wgUser->getDatePreference();
1524 } else {
1525 $datePreference = (string)User::getDefaultOption( 'date' );
1526 }
1527 } else {
1528 $datePreference = (string)$usePrefs;
1529 }
1530
1531 // return int
1532 if ( $datePreference == '' ) {
1533 return 'default';
1534 }
1535
1536 return $datePreference;
1537 }
1538
1539 /**
1540 * Get a format string for a given type and preference
1541 * @param $type May be date, time or both
1542 * @param $pref The format name as it appears in Messages*.php
1543 */
1544 function getDateFormatString( $type, $pref ) {
1545 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1546 if ( $pref == 'default' ) {
1547 $pref = $this->getDefaultDateFormat();
1548 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1549 } else {
1550 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1551 if ( is_null( $df ) ) {
1552 $pref = $this->getDefaultDateFormat();
1553 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1554 }
1555 }
1556 $this->dateFormatStrings[$type][$pref] = $df;
1557 }
1558 return $this->dateFormatStrings[$type][$pref];
1559 }
1560
1561 /**
1562 * @param $ts Mixed: the time format which needs to be turned into a
1563 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1564 * @param $adj Bool: whether to adjust the time output according to the
1565 * user configured offset ($timecorrection)
1566 * @param $format Mixed: true to use user's date format preference
1567 * @param $timecorrection String: the time offset as returned by
1568 * validateTimeZone() in Special:Preferences
1569 * @return string
1570 */
1571 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1572 $ts = wfTimestamp( TS_MW, $ts );
1573 if ( $adj ) {
1574 $ts = $this->userAdjust( $ts, $timecorrection );
1575 }
1576 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1577 return $this->sprintfDate( $df, $ts );
1578 }
1579
1580 /**
1581 * @param $ts Mixed: the time format which needs to be turned into a
1582 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1583 * @param $adj Bool: whether to adjust the time output according to the
1584 * user configured offset ($timecorrection)
1585 * @param $format Mixed: true to use user's date format preference
1586 * @param $timecorrection String: the time offset as returned by
1587 * validateTimeZone() in Special:Preferences
1588 * @return string
1589 */
1590 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1591 $ts = wfTimestamp( TS_MW, $ts );
1592 if ( $adj ) {
1593 $ts = $this->userAdjust( $ts, $timecorrection );
1594 }
1595 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1596 return $this->sprintfDate( $df, $ts );
1597 }
1598
1599 /**
1600 * @param $ts Mixed: the time format which needs to be turned into a
1601 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1602 * @param $adj Bool: whether to adjust the time output according to the
1603 * user configured offset ($timecorrection)
1604 * @param $format Mixed: what format to return, if it's false output the
1605 * default one (default true)
1606 * @param $timecorrection String: the time offset as returned by
1607 * validateTimeZone() in Special:Preferences
1608 * @return string
1609 */
1610 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1611 $ts = wfTimestamp( TS_MW, $ts );
1612 if ( $adj ) {
1613 $ts = $this->userAdjust( $ts, $timecorrection );
1614 }
1615 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1616 return $this->sprintfDate( $df, $ts );
1617 }
1618
1619 function getMessage( $key ) {
1620 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1621 }
1622
1623 function getAllMessages() {
1624 return self::$dataCache->getItem( $this->mCode, 'messages' );
1625 }
1626
1627 function iconv( $in, $out, $string ) {
1628 # This is a wrapper for iconv in all languages except esperanto,
1629 # which does some nasty x-conversions beforehand
1630
1631 # Even with //IGNORE iconv can whine about illegal characters in
1632 # *input* string. We just ignore those too.
1633 # REF: http://bugs.php.net/bug.php?id=37166
1634 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1635 wfSuppressWarnings();
1636 $text = iconv( $in, $out . '//IGNORE', $string );
1637 wfRestoreWarnings();
1638 return $text;
1639 }
1640
1641 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1642 function ucwordbreaksCallbackAscii( $matches ) {
1643 return $this->ucfirst( $matches[1] );
1644 }
1645
1646 function ucwordbreaksCallbackMB( $matches ) {
1647 return mb_strtoupper( $matches[0] );
1648 }
1649
1650 function ucCallback( $matches ) {
1651 list( $wikiUpperChars ) = self::getCaseMaps();
1652 return strtr( $matches[1], $wikiUpperChars );
1653 }
1654
1655 function lcCallback( $matches ) {
1656 list( , $wikiLowerChars ) = self::getCaseMaps();
1657 return strtr( $matches[1], $wikiLowerChars );
1658 }
1659
1660 function ucwordsCallbackMB( $matches ) {
1661 return mb_strtoupper( $matches[0] );
1662 }
1663
1664 function ucwordsCallbackWiki( $matches ) {
1665 list( $wikiUpperChars ) = self::getCaseMaps();
1666 return strtr( $matches[0], $wikiUpperChars );
1667 }
1668
1669 /**
1670 * Make a string's first character uppercase
1671 */
1672 function ucfirst( $str ) {
1673 $o = ord( $str );
1674 if ( $o < 96 ) { // if already uppercase...
1675 return $str;
1676 } elseif ( $o < 128 ) {
1677 return ucfirst( $str ); // use PHP's ucfirst()
1678 } else {
1679 // fall back to more complex logic in case of multibyte strings
1680 return $this->uc( $str, true );
1681 }
1682 }
1683
1684 /**
1685 * Convert a string to uppercase
1686 */
1687 function uc( $str, $first = false ) {
1688 if ( function_exists( 'mb_strtoupper' ) ) {
1689 if ( $first ) {
1690 if ( $this->isMultibyte( $str ) ) {
1691 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1692 } else {
1693 return ucfirst( $str );
1694 }
1695 } else {
1696 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1697 }
1698 } else {
1699 if ( $this->isMultibyte( $str ) ) {
1700 $x = $first ? '^' : '';
1701 return preg_replace_callback(
1702 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1703 array( $this, 'ucCallback' ),
1704 $str
1705 );
1706 } else {
1707 return $first ? ucfirst( $str ) : strtoupper( $str );
1708 }
1709 }
1710 }
1711
1712 function lcfirst( $str ) {
1713 $o = ord( $str );
1714 if ( !$o ) {
1715 return strval( $str );
1716 } elseif ( $o >= 128 ) {
1717 return $this->lc( $str, true );
1718 } elseif ( $o > 96 ) {
1719 return $str;
1720 } else {
1721 $str[0] = strtolower( $str[0] );
1722 return $str;
1723 }
1724 }
1725
1726 function lc( $str, $first = false ) {
1727 if ( function_exists( 'mb_strtolower' ) ) {
1728 if ( $first ) {
1729 if ( $this->isMultibyte( $str ) ) {
1730 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1731 } else {
1732 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1733 }
1734 } else {
1735 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1736 }
1737 } else {
1738 if ( $this->isMultibyte( $str ) ) {
1739 $x = $first ? '^' : '';
1740 return preg_replace_callback(
1741 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1742 array( $this, 'lcCallback' ),
1743 $str
1744 );
1745 } else {
1746 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1747 }
1748 }
1749 }
1750
1751 function isMultibyte( $str ) {
1752 return (bool)preg_match( '/[\x80-\xff]/', $str );
1753 }
1754
1755 function ucwords( $str ) {
1756 if ( $this->isMultibyte( $str ) ) {
1757 $str = $this->lc( $str );
1758
1759 // regexp to find first letter in each word (i.e. after each space)
1760 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1761
1762 // function to use to capitalize a single char
1763 if ( function_exists( 'mb_strtoupper' ) ) {
1764 return preg_replace_callback(
1765 $replaceRegexp,
1766 array( $this, 'ucwordsCallbackMB' ),
1767 $str
1768 );
1769 } else {
1770 return preg_replace_callback(
1771 $replaceRegexp,
1772 array( $this, 'ucwordsCallbackWiki' ),
1773 $str
1774 );
1775 }
1776 } else {
1777 return ucwords( strtolower( $str ) );
1778 }
1779 }
1780
1781 # capitalize words at word breaks
1782 function ucwordbreaks( $str ) {
1783 if ( $this->isMultibyte( $str ) ) {
1784 $str = $this->lc( $str );
1785
1786 // since \b doesn't work for UTF-8, we explicitely define word break chars
1787 $breaks = "[ \-\(\)\}\{\.,\?!]";
1788
1789 // find first letter after word break
1790 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1791
1792 if ( function_exists( 'mb_strtoupper' ) ) {
1793 return preg_replace_callback(
1794 $replaceRegexp,
1795 array( $this, 'ucwordbreaksCallbackMB' ),
1796 $str
1797 );
1798 } else {
1799 return preg_replace_callback(
1800 $replaceRegexp,
1801 array( $this, 'ucwordsCallbackWiki' ),
1802 $str
1803 );
1804 }
1805 } else {
1806 return preg_replace_callback(
1807 '/\b([\w\x80-\xff]+)\b/',
1808 array( $this, 'ucwordbreaksCallbackAscii' ),
1809 $str
1810 );
1811 }
1812 }
1813
1814 /**
1815 * Return a case-folded representation of $s
1816 *
1817 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1818 * and $s2 are the same except for the case of their characters. It is not
1819 * necessary for the value returned to make sense when displayed.
1820 *
1821 * Do *not* perform any other normalisation in this function. If a caller
1822 * uses this function when it should be using a more general normalisation
1823 * function, then fix the caller.
1824 */
1825 function caseFold( $s ) {
1826 return $this->uc( $s );
1827 }
1828
1829 function checkTitleEncoding( $s ) {
1830 if ( is_array( $s ) ) {
1831 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1832 }
1833 # Check for non-UTF-8 URLs
1834 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1835 if ( !$ishigh ) {
1836 return $s;
1837 }
1838
1839 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1840 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1841 if ( $isutf8 ) {
1842 return $s;
1843 }
1844
1845 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1846 }
1847
1848 function fallback8bitEncoding() {
1849 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1850 }
1851
1852 /**
1853 * Most writing systems use whitespace to break up words.
1854 * Some languages such as Chinese don't conventionally do this,
1855 * which requires special handling when breaking up words for
1856 * searching etc.
1857 */
1858 function hasWordBreaks() {
1859 return true;
1860 }
1861
1862 /**
1863 * Some languages such as Chinese require word segmentation,
1864 * Specify such segmentation when overridden in derived class.
1865 *
1866 * @param $string String
1867 * @return String
1868 */
1869 function segmentByWord( $string ) {
1870 return $string;
1871 }
1872
1873 /**
1874 * Some languages have special punctuation need to be normalized.
1875 * Make such changes here.
1876 *
1877 * @param $string String
1878 * @return String
1879 */
1880 function normalizeForSearch( $string ) {
1881 return self::convertDoubleWidth( $string );
1882 }
1883
1884 /**
1885 * convert double-width roman characters to single-width.
1886 * range: ff00-ff5f ~= 0020-007f
1887 */
1888 protected static function convertDoubleWidth( $string ) {
1889 static $full = null;
1890 static $half = null;
1891
1892 if ( $full === null ) {
1893 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1894 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1895 $full = str_split( $fullWidth, 3 );
1896 $half = str_split( $halfWidth );
1897 }
1898
1899 $string = str_replace( $full, $half, $string );
1900 return $string;
1901 }
1902
1903 protected static function insertSpace( $string, $pattern ) {
1904 $string = preg_replace( $pattern, " $1 ", $string );
1905 $string = preg_replace( '/ +/', ' ', $string );
1906 return $string;
1907 }
1908
1909 function convertForSearchResult( $termsArray ) {
1910 # some languages, e.g. Chinese, need to do a conversion
1911 # in order for search results to be displayed correctly
1912 return $termsArray;
1913 }
1914
1915 /**
1916 * Get the first character of a string.
1917 *
1918 * @param $s string
1919 * @return string
1920 */
1921 function firstChar( $s ) {
1922 $matches = array();
1923 preg_match(
1924 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1925 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1926 $s,
1927 $matches
1928 );
1929
1930 if ( isset( $matches[1] ) ) {
1931 if ( strlen( $matches[1] ) != 3 ) {
1932 return $matches[1];
1933 }
1934
1935 // Break down Hangul syllables to grab the first jamo
1936 $code = utf8ToCodepoint( $matches[1] );
1937 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1938 return $matches[1];
1939 } elseif ( $code < 0xb098 ) {
1940 return "\xe3\x84\xb1";
1941 } elseif ( $code < 0xb2e4 ) {
1942 return "\xe3\x84\xb4";
1943 } elseif ( $code < 0xb77c ) {
1944 return "\xe3\x84\xb7";
1945 } elseif ( $code < 0xb9c8 ) {
1946 return "\xe3\x84\xb9";
1947 } elseif ( $code < 0xbc14 ) {
1948 return "\xe3\x85\x81";
1949 } elseif ( $code < 0xc0ac ) {
1950 return "\xe3\x85\x82";
1951 } elseif ( $code < 0xc544 ) {
1952 return "\xe3\x85\x85";
1953 } elseif ( $code < 0xc790 ) {
1954 return "\xe3\x85\x87";
1955 } elseif ( $code < 0xcc28 ) {
1956 return "\xe3\x85\x88";
1957 } elseif ( $code < 0xce74 ) {
1958 return "\xe3\x85\x8a";
1959 } elseif ( $code < 0xd0c0 ) {
1960 return "\xe3\x85\x8b";
1961 } elseif ( $code < 0xd30c ) {
1962 return "\xe3\x85\x8c";
1963 } elseif ( $code < 0xd558 ) {
1964 return "\xe3\x85\x8d";
1965 } else {
1966 return "\xe3\x85\x8e";
1967 }
1968 } else {
1969 return '';
1970 }
1971 }
1972
1973 function initEncoding() {
1974 # Some languages may have an alternate char encoding option
1975 # (Esperanto X-coding, Japanese furigana conversion, etc)
1976 # If this language is used as the primary content language,
1977 # an override to the defaults can be set here on startup.
1978 }
1979
1980 function recodeForEdit( $s ) {
1981 # For some languages we'll want to explicitly specify
1982 # which characters make it into the edit box raw
1983 # or are converted in some way or another.
1984 # Note that if wgOutputEncoding is different from
1985 # wgInputEncoding, this text will be further converted
1986 # to wgOutputEncoding.
1987 global $wgEditEncoding;
1988 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1989 return $s;
1990 } else {
1991 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1992 }
1993 }
1994
1995 function recodeInput( $s ) {
1996 # Take the previous into account.
1997 global $wgEditEncoding;
1998 if ( $wgEditEncoding != '' ) {
1999 $enc = $wgEditEncoding;
2000 } else {
2001 $enc = 'UTF-8';
2002 }
2003 if ( $enc == 'UTF-8' ) {
2004 return $s;
2005 } else {
2006 return $this->iconv( $enc, 'UTF-8', $s );
2007 }
2008 }
2009
2010 /**
2011 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2012 * also cleans up certain backwards-compatible sequences, converting them
2013 * to the modern Unicode equivalent.
2014 *
2015 * This is language-specific for performance reasons only.
2016 */
2017 function normalize( $s ) {
2018 global $wgAllUnicodeFixes;
2019 $s = UtfNormal::cleanUp( $s );
2020 if ( $wgAllUnicodeFixes ) {
2021 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2022 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2023 }
2024
2025 return $s;
2026 }
2027
2028 /**
2029 * Transform a string using serialized data stored in the given file (which
2030 * must be in the serialized subdirectory of $IP). The file contains pairs
2031 * mapping source characters to destination characters.
2032 *
2033 * The data is cached in process memory. This will go faster if you have the
2034 * FastStringSearch extension.
2035 */
2036 function transformUsingPairFile( $file, $string ) {
2037 if ( !isset( $this->transformData[$file] ) ) {
2038 $data = wfGetPrecompiledData( $file );
2039 if ( $data === false ) {
2040 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2041 }
2042 $this->transformData[$file] = new ReplacementArray( $data );
2043 }
2044 return $this->transformData[$file]->replace( $string );
2045 }
2046
2047 /**
2048 * For right-to-left language support
2049 *
2050 * @return bool
2051 */
2052 function isRTL() {
2053 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2054 }
2055
2056 /**
2057 * Return the correct HTML 'dir' attribute value for this language.
2058 * @return String
2059 */
2060 function getDir() {
2061 return $this->isRTL() ? 'rtl' : 'ltr';
2062 }
2063
2064 /**
2065 * Return 'left' or 'right' as appropriate alignment for line-start
2066 * for this language's text direction.
2067 *
2068 * Should be equivalent to CSS3 'start' text-align value....
2069 *
2070 * @return String
2071 */
2072 function alignStart() {
2073 return $this->isRTL() ? 'right' : 'left';
2074 }
2075
2076 /**
2077 * Return 'right' or 'left' as appropriate alignment for line-end
2078 * for this language's text direction.
2079 *
2080 * Should be equivalent to CSS3 'end' text-align value....
2081 *
2082 * @return String
2083 */
2084 function alignEnd() {
2085 return $this->isRTL() ? 'left' : 'right';
2086 }
2087
2088 /**
2089 * A hidden direction mark (LRM or RLM), depending on the language direction
2090 *
2091 * @return string
2092 */
2093 function getDirMark() {
2094 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2095 }
2096
2097 function capitalizeAllNouns() {
2098 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2099 }
2100
2101 /**
2102 * An arrow, depending on the language direction
2103 *
2104 * @return string
2105 */
2106 function getArrow() {
2107 return $this->isRTL() ? '←' : '→';
2108 }
2109
2110 /**
2111 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2112 *
2113 * @return bool
2114 */
2115 function linkPrefixExtension() {
2116 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2117 }
2118
2119 function getMagicWords() {
2120 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2121 }
2122
2123 protected function doMagicHook() {
2124 if ( $this->mMagicHookDone ) {
2125 return;
2126 }
2127 $this->mMagicHookDone = true;
2128 wfProfileIn( 'LanguageGetMagic' );
2129 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2130 wfProfileOut( 'LanguageGetMagic' );
2131 }
2132
2133 # Fill a MagicWord object with data from here
2134 function getMagic( $mw ) {
2135 $this->doMagicHook();
2136
2137 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2138 $rawEntry = $this->mMagicExtensions[$mw->mId];
2139 } else {
2140 $magicWords = $this->getMagicWords();
2141 if ( isset( $magicWords[$mw->mId] ) ) {
2142 $rawEntry = $magicWords[$mw->mId];
2143 } else {
2144 $rawEntry = false;
2145 }
2146 }
2147
2148 if ( !is_array( $rawEntry ) ) {
2149 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2150 } else {
2151 $mw->mCaseSensitive = $rawEntry[0];
2152 $mw->mSynonyms = array_slice( $rawEntry, 1 );
2153 }
2154 }
2155
2156 /**
2157 * Add magic words to the extension array
2158 */
2159 function addMagicWordsByLang( $newWords ) {
2160 $code = $this->getCode();
2161 $fallbackChain = array();
2162 while ( $code && !in_array( $code, $fallbackChain ) ) {
2163 $fallbackChain[] = $code;
2164 $code = self::getFallbackFor( $code );
2165 }
2166 if ( !in_array( 'en', $fallbackChain ) ) {
2167 $fallbackChain[] = 'en';
2168 }
2169 $fallbackChain = array_reverse( $fallbackChain );
2170 foreach ( $fallbackChain as $code ) {
2171 if ( isset( $newWords[$code] ) ) {
2172 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2173 }
2174 }
2175 }
2176
2177 /**
2178 * Get special page names, as an associative array
2179 * case folded alias => real name
2180 */
2181 function getSpecialPageAliases() {
2182 // Cache aliases because it may be slow to load them
2183 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2184 // Initialise array
2185 $this->mExtendedSpecialPageAliases =
2186 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2187 wfRunHooks( 'LanguageGetSpecialPageAliases',
2188 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2189 }
2190
2191 return $this->mExtendedSpecialPageAliases;
2192 }
2193
2194 /**
2195 * Italic is unsuitable for some languages
2196 *
2197 * @param $text String: the text to be emphasized.
2198 * @return string
2199 */
2200 function emphasize( $text ) {
2201 return "<em>$text</em>";
2202 }
2203
2204 /**
2205 * Normally we output all numbers in plain en_US style, that is
2206 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2207 * point twohundredthirtyfive. However this is not sutable for all
2208 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2209 * Icelandic just want to use commas instead of dots, and dots instead
2210 * of commas like "293.291,235".
2211 *
2212 * An example of this function being called:
2213 * <code>
2214 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2215 * </code>
2216 *
2217 * See LanguageGu.php for the Gujarati implementation and
2218 * $separatorTransformTable on MessageIs.php for
2219 * the , => . and . => , implementation.
2220 *
2221 * @todo check if it's viable to use localeconv() for the decimal
2222 * separator thing.
2223 * @param $number Mixed: the string to be formatted, should be an integer
2224 * or a floating point number.
2225 * @param $nocommafy Bool: set to true for special numbers like dates
2226 * @return string
2227 */
2228 function formatNum( $number, $nocommafy = false ) {
2229 global $wgTranslateNumerals;
2230 if ( !$nocommafy ) {
2231 $number = $this->commafy( $number );
2232 $s = $this->separatorTransformTable();
2233 if ( $s ) {
2234 $number = strtr( $number, $s );
2235 }
2236 }
2237
2238 if ( $wgTranslateNumerals ) {
2239 $s = $this->digitTransformTable();
2240 if ( $s ) {
2241 $number = strtr( $number, $s );
2242 }
2243 }
2244
2245 return $number;
2246 }
2247
2248 function parseFormattedNumber( $number ) {
2249 $s = $this->digitTransformTable();
2250 if ( $s ) {
2251 $number = strtr( $number, array_flip( $s ) );
2252 }
2253
2254 $s = $this->separatorTransformTable();
2255 if ( $s ) {
2256 $number = strtr( $number, array_flip( $s ) );
2257 }
2258
2259 $number = strtr( $number, array( ',' => '' ) );
2260 return $number;
2261 }
2262
2263 /**
2264 * Adds commas to a given number
2265 *
2266 * @param $_ mixed
2267 * @return string
2268 */
2269 function commafy( $_ ) {
2270 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2271 }
2272
2273 function digitTransformTable() {
2274 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2275 }
2276
2277 function separatorTransformTable() {
2278 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2279 }
2280
2281 /**
2282 * Take a list of strings and build a locale-friendly comma-separated
2283 * list, using the local comma-separator message.
2284 * The last two strings are chained with an "and".
2285 *
2286 * @param $l Array
2287 * @return string
2288 */
2289 function listToText( $l ) {
2290 $s = '';
2291 $m = count( $l ) - 1;
2292 if ( $m == 1 ) {
2293 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2294 } else {
2295 for ( $i = $m; $i >= 0; $i-- ) {
2296 if ( $i == $m ) {
2297 $s = $l[$i];
2298 } else if ( $i == $m - 1 ) {
2299 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2300 } else {
2301 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2302 }
2303 }
2304 return $s;
2305 }
2306 }
2307
2308 /**
2309 * Take a list of strings and build a locale-friendly comma-separated
2310 * list, using the local comma-separator message.
2311 * @param $list array of strings to put in a comma list
2312 * @return string
2313 */
2314 function commaList( $list ) {
2315 return implode(
2316 $list,
2317 wfMsgExt(
2318 'comma-separator',
2319 array( 'parsemag', 'escapenoentities', 'language' => $this )
2320 )
2321 );
2322 }
2323
2324 /**
2325 * Take a list of strings and build a locale-friendly semicolon-separated
2326 * list, using the local semicolon-separator message.
2327 * @param $list array of strings to put in a semicolon list
2328 * @return string
2329 */
2330 function semicolonList( $list ) {
2331 return implode(
2332 $list,
2333 wfMsgExt(
2334 'semicolon-separator',
2335 array( 'parsemag', 'escapenoentities', 'language' => $this )
2336 )
2337 );
2338 }
2339
2340 /**
2341 * Same as commaList, but separate it with the pipe instead.
2342 * @param $list array of strings to put in a pipe list
2343 * @return string
2344 */
2345 function pipeList( $list ) {
2346 return implode(
2347 $list,
2348 wfMsgExt(
2349 'pipe-separator',
2350 array( 'escapenoentities', 'language' => $this )
2351 )
2352 );
2353 }
2354
2355 /**
2356 * Truncate a string to a specified length in bytes, appending an optional
2357 * string (e.g. for ellipses)
2358 *
2359 * The database offers limited byte lengths for some columns in the database;
2360 * multi-byte character sets mean we need to ensure that only whole characters
2361 * are included, otherwise broken characters can be passed to the user
2362 *
2363 * If $length is negative, the string will be truncated from the beginning
2364 *
2365 * @param $string String to truncate
2366 * @param $length Int: maximum length (excluding ellipses)
2367 * @param $ellipsis String to append to the truncated text
2368 * @return string
2369 */
2370 function truncate( $string, $length, $ellipsis = '...' ) {
2371 # Use the localized ellipsis character
2372 if ( $ellipsis == '...' ) {
2373 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2374 }
2375 # Check if there is no need to truncate
2376 if ( $length == 0 ) {
2377 return $ellipsis;
2378 } elseif ( strlen( $string ) <= abs( $length ) ) {
2379 return $string;
2380 }
2381 $stringOriginal = $string;
2382 if ( $length > 0 ) {
2383 $string = substr( $string, 0, $length ); // xyz...
2384 $string = $this->removeBadCharLast( $string );
2385 $string = $string . $ellipsis;
2386 } else {
2387 $string = substr( $string, $length ); // ...xyz
2388 $string = $this->removeBadCharFirst( $string );
2389 $string = $ellipsis . $string;
2390 }
2391 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2392 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2393 return $string;
2394 } else {
2395 return $stringOriginal;
2396 }
2397 }
2398
2399 /**
2400 * Remove bytes that represent an incomplete Unicode character
2401 * at the end of string (e.g. bytes of the char are missing)
2402 *
2403 * @param $string String
2404 * @return string
2405 */
2406 protected function removeBadCharLast( $string ) {
2407 $char = ord( $string[strlen( $string ) - 1] );
2408 $m = array();
2409 if ( $char >= 0xc0 ) {
2410 # We got the first byte only of a multibyte char; remove it.
2411 $string = substr( $string, 0, -1 );
2412 } elseif ( $char >= 0x80 &&
2413 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2414 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2415 {
2416 # We chopped in the middle of a character; remove it
2417 $string = $m[1];
2418 }
2419 return $string;
2420 }
2421
2422 /**
2423 * Remove bytes that represent an incomplete Unicode character
2424 * at the start of string (e.g. bytes of the char are missing)
2425 *
2426 * @param $string String
2427 * @return string
2428 */
2429 protected function removeBadCharFirst( $string ) {
2430 $char = ord( $string[0] );
2431 if ( $char >= 0x80 && $char < 0xc0 ) {
2432 # We chopped in the middle of a character; remove the whole thing
2433 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2434 }
2435 return $string;
2436 }
2437
2438 /*
2439 * Truncate a string of valid HTML to a specified length in bytes,
2440 * appending an optional string (e.g. for ellipses), and return valid HTML
2441 *
2442 * This is only intended for styled/linked text, such as HTML with
2443 * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2444 *
2445 * Note: tries to fix broken HTML with MWTidy
2446 *
2447 * @param string $text HTML string to truncate
2448 * @param int $length (zero/positive) Maximum length (excluding ellipses)
2449 * @param string $ellipsis String to append to the truncated text
2450 * @returns string
2451 */
2452 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2453 # Use the localized ellipsis character
2454 if ( $ellipsis == '...' ) {
2455 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2456 }
2457 # Check if there is no need to truncate
2458 if ( $length <= 0 ) {
2459 return $ellipsis; // no text shown, nothing to format
2460 } elseif ( strlen( $text ) <= $length ) {
2461 return $text; // string short enough even *with* HTML
2462 }
2463 $text = MWTidy::tidy( $text ); // fix tags
2464 $displayLen = 0; // innerHTML legth so far
2465 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2466 $tagType = 0; // 0-open, 1-close
2467 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2468 $entityState = 0; // 0-not entity, 1-entity
2469 $tag = $ret = '';
2470 $openTags = array(); // open tag stack
2471 $textLen = strlen( $text );
2472 for ( $pos = 0; $pos < $textLen; ++$pos ) {
2473 $ch = $text[$pos];
2474 $lastCh = $pos ? $text[$pos - 1] : '';
2475 $ret .= $ch; // add to result string
2476 if ( $ch == '<' ) {
2477 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2478 $entityState = 0; // for bad HTML
2479 $bracketState = 1; // tag started (checking for backslash)
2480 } elseif ( $ch == '>' ) {
2481 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2482 $entityState = 0; // for bad HTML
2483 $bracketState = 0; // out of brackets
2484 } elseif ( $bracketState == 1 ) {
2485 if ( $ch == '/' ) {
2486 $tagType = 1; // close tag (e.g. "</span>")
2487 } else {
2488 $tagType = 0; // open tag (e.g. "<span>")
2489 $tag .= $ch;
2490 }
2491 $bracketState = 2; // building tag name
2492 } elseif ( $bracketState == 2 ) {
2493 if ( $ch != ' ' ) {
2494 $tag .= $ch;
2495 } else {
2496 // Name found (e.g. "<a href=..."), add on tag attributes...
2497 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2498 }
2499 } elseif ( $bracketState == 0 ) {
2500 if ( $entityState ) {
2501 if ( $ch == ';' ) {
2502 $entityState = 0;
2503 $displayLen++; // entity is one displayed char
2504 }
2505 } else {
2506 if ( $ch == '&' ) {
2507 $entityState = 1; // entity found, (e.g. "&#160;")
2508 } else {
2509 $displayLen++; // this char is displayed
2510 // Add on the other display text after this...
2511 $skipped = $this->truncate_skip(
2512 $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2513 $displayLen += $skipped;
2514 $pos += $skipped;
2515 }
2516 }
2517 }
2518 # Consider truncation once the display length has reached the maximim.
2519 # Double-check that we're not in the middle of a bracket/entity...
2520 if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2521 if ( !$testingEllipsis ) {
2522 $testingEllipsis = true;
2523 # Save where we are; we will truncate here unless
2524 # the ellipsis actually makes the string longer.
2525 $pOpenTags = $openTags; // save state
2526 $pRet = $ret; // save state
2527 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2528 # Ellipsis won't make string longer/equal, the truncation point was OK.
2529 $openTags = $pOpenTags; // reload state
2530 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2531 $ret .= $ellipsis; // add ellipsis
2532 break;
2533 }
2534 }
2535 }
2536 if ( $displayLen == 0 ) {
2537 return ''; // no text shown, nothing to format
2538 }
2539 // Close the last tag if left unclosed by bad HTML
2540 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2541 while ( count( $openTags ) > 0 ) {
2542 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2543 }
2544 return $ret;
2545 }
2546
2547 // truncateHtml() helper function
2548 // like strcspn() but adds the skipped chars to $ret
2549 private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2550 $skipCount = 0;
2551 if ( $start < strlen( $text ) ) {
2552 $skipCount = strcspn( $text, $search, $start, $len );
2553 $ret .= substr( $text, $start, $skipCount );
2554 }
2555 return $skipCount;
2556 }
2557
2558 /*
2559 * truncateHtml() helper function
2560 * (a) push or pop $tag from $openTags as needed
2561 * (b) clear $tag value
2562 * @param String &$tag Current HTML tag name we are looking at
2563 * @param int $tagType (0-open tag, 1-close tag)
2564 * @param char $lastCh Character before the '>' that ended this tag
2565 * @param array &$openTags Open tag stack (not accounting for $tag)
2566 */
2567 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2568 $tag = ltrim( $tag );
2569 if ( $tag != '' ) {
2570 if ( $tagType == 0 && $lastCh != '/' ) {
2571 $openTags[] = $tag; // tag opened (didn't close itself)
2572 } else if ( $tagType == 1 ) {
2573 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2574 array_pop( $openTags ); // tag closed
2575 }
2576 }
2577 $tag = '';
2578 }
2579 }
2580
2581 /**
2582 * Grammatical transformations, needed for inflected languages
2583 * Invoked by putting {{grammar:case|word}} in a message
2584 *
2585 * @param $word string
2586 * @param $case string
2587 * @return string
2588 */
2589 function convertGrammar( $word, $case ) {
2590 global $wgGrammarForms;
2591 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2592 return $wgGrammarForms[$this->getCode()][$case][$word];
2593 }
2594 return $word;
2595 }
2596
2597 /**
2598 * Provides an alternative text depending on specified gender.
2599 * Usage {{gender:username|masculine|feminine|neutral}}.
2600 * username is optional, in which case the gender of current user is used,
2601 * but only in (some) interface messages; otherwise default gender is used.
2602 * If second or third parameter are not specified, masculine is used.
2603 * These details may be overriden per language.
2604 */
2605 function gender( $gender, $forms ) {
2606 if ( !count( $forms ) ) {
2607 return '';
2608 }
2609 $forms = $this->preConvertPlural( $forms, 2 );
2610 if ( $gender === 'male' ) {
2611 return $forms[0];
2612 }
2613 if ( $gender === 'female' ) {
2614 return $forms[1];
2615 }
2616 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2617 }
2618
2619 /**
2620 * Plural form transformations, needed for some languages.
2621 * For example, there are 3 form of plural in Russian and Polish,
2622 * depending on "count mod 10". See [[w:Plural]]
2623 * For English it is pretty simple.
2624 *
2625 * Invoked by putting {{plural:count|wordform1|wordform2}}
2626 * or {{plural:count|wordform1|wordform2|wordform3}}
2627 *
2628 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2629 *
2630 * @param $count Integer: non-localized number
2631 * @param $forms Array: different plural forms
2632 * @return string Correct form of plural for $count in this language
2633 */
2634 function convertPlural( $count, $forms ) {
2635 if ( !count( $forms ) ) {
2636 return '';
2637 }
2638 $forms = $this->preConvertPlural( $forms, 2 );
2639
2640 return ( $count == 1 ) ? $forms[0] : $forms[1];
2641 }
2642
2643 /**
2644 * Checks that convertPlural was given an array and pads it to requested
2645 * amound of forms by copying the last one.
2646 *
2647 * @param $count Integer: How many forms should there be at least
2648 * @param $forms Array of forms given to convertPlural
2649 * @return array Padded array of forms or an exception if not an array
2650 */
2651 protected function preConvertPlural( /* Array */ $forms, $count ) {
2652 while ( count( $forms ) < $count ) {
2653 $forms[] = $forms[count( $forms ) - 1];
2654 }
2655 return $forms;
2656 }
2657
2658 /**
2659 * For translating of expiry times
2660 * @param $str String: the validated block time in English
2661 * @return Somehow translated block time
2662 * @see LanguageFi.php for example implementation
2663 */
2664 function translateBlockExpiry( $str ) {
2665 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2666
2667 if ( $scBlockExpiryOptions == '-' ) {
2668 return $str;
2669 }
2670
2671 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2672 if ( strpos( $option, ':' ) === false ) {
2673 continue;
2674 }
2675 list( $show, $value ) = explode( ':', $option );
2676 if ( strcmp( $str, $value ) == 0 ) {
2677 return htmlspecialchars( trim( $show ) );
2678 }
2679 }
2680
2681 return $str;
2682 }
2683
2684 /**
2685 * languages like Chinese need to be segmented in order for the diff
2686 * to be of any use
2687 *
2688 * @param $text String
2689 * @return String
2690 */
2691 function segmentForDiff( $text ) {
2692 return $text;
2693 }
2694
2695 /**
2696 * and unsegment to show the result
2697 *
2698 * @param $text String
2699 * @return String
2700 */
2701 function unsegmentForDiff( $text ) {
2702 return $text;
2703 }
2704
2705 # convert text to all supported variants
2706 function autoConvertToAllVariants( $text ) {
2707 return $this->mConverter->autoConvertToAllVariants( $text );
2708 }
2709
2710 # convert text to different variants of a language.
2711 function convert( $text ) {
2712 return $this->mConverter->convert( $text );
2713 }
2714
2715 # Convert a Title object to a string in the preferred variant
2716 function convertTitle( $title ) {
2717 return $this->mConverter->convertTitle( $title );
2718 }
2719
2720 # Check if this is a language with variants
2721 function hasVariants() {
2722 return sizeof( $this->getVariants() ) > 1;
2723 }
2724
2725 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2726 function armourMath( $text ) {
2727 return $this->mConverter->armourMath( $text );
2728 }
2729
2730 /**
2731 * Perform output conversion on a string, and encode for safe HTML output.
2732 * @param $text String text to be converted
2733 * @param $isTitle Bool whether this conversion is for the article title
2734 * @return string
2735 * @todo this should get integrated somewhere sane
2736 */
2737 function convertHtml( $text, $isTitle = false ) {
2738 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2739 }
2740
2741 function convertCategoryKey( $key ) {
2742 return $this->mConverter->convertCategoryKey( $key );
2743 }
2744
2745 /**
2746 * Get the list of variants supported by this language
2747 * see sample implementation in LanguageZh.php
2748 *
2749 * @return array an array of language codes
2750 */
2751 function getVariants() {
2752 return $this->mConverter->getVariants();
2753 }
2754
2755 function getPreferredVariant() {
2756 return $this->mConverter->getPreferredVariant();
2757 }
2758
2759 function getDefaultVariant() {
2760 return $this->mConverter->getDefaultVariant();
2761 }
2762
2763 function getURLVariant() {
2764 return $this->mConverter->getURLVariant();
2765 }
2766
2767 /**
2768 * If a language supports multiple variants, it is
2769 * possible that non-existing link in one variant
2770 * actually exists in another variant. this function
2771 * tries to find it. See e.g. LanguageZh.php
2772 *
2773 * @param $link String: the name of the link
2774 * @param $nt Mixed: the title object of the link
2775 * @param $ignoreOtherCond Boolean: to disable other conditions when
2776 * we need to transclude a template or update a category's link
2777 * @return null the input parameters may be modified upon return
2778 */
2779 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2780 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2781 }
2782
2783 /**
2784 * If a language supports multiple variants, converts text
2785 * into an array of all possible variants of the text:
2786 * 'variant' => text in that variant
2787 *
2788 * @deprecated Use autoConvertToAllVariants()
2789 */
2790 function convertLinkToAllVariants( $text ) {
2791 return $this->mConverter->convertLinkToAllVariants( $text );
2792 }
2793
2794 /**
2795 * returns language specific options used by User::getPageRenderHash()
2796 * for example, the preferred language variant
2797 *
2798 * @return string
2799 */
2800 function getExtraHashOptions() {
2801 return $this->mConverter->getExtraHashOptions();
2802 }
2803
2804 /**
2805 * For languages that support multiple variants, the title of an
2806 * article may be displayed differently in different variants. this
2807 * function returns the apporiate title defined in the body of the article.
2808 *
2809 * @return string
2810 */
2811 function getParsedTitle() {
2812 return $this->mConverter->getParsedTitle();
2813 }
2814
2815 /**
2816 * Enclose a string with the "no conversion" tag. This is used by
2817 * various functions in the Parser
2818 *
2819 * @param $text String: text to be tagged for no conversion
2820 * @param $noParse
2821 * @return string the tagged text
2822 */
2823 function markNoConversion( $text, $noParse = false ) {
2824 return $this->mConverter->markNoConversion( $text, $noParse );
2825 }
2826
2827 /**
2828 * A regular expression to match legal word-trailing characters
2829 * which should be merged onto a link of the form [[foo]]bar.
2830 *
2831 * @return string
2832 */
2833 function linkTrail() {
2834 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2835 }
2836
2837 function getLangObj() {
2838 return $this;
2839 }
2840
2841 /**
2842 * Get the RFC 3066 code for this language object
2843 */
2844 function getCode() {
2845 return $this->mCode;
2846 }
2847
2848 function setCode( $code ) {
2849 $this->mCode = $code;
2850 }
2851
2852 /**
2853 * Get the name of a file for a certain language code
2854 * @param $prefix string Prepend this to the filename
2855 * @param $code string Language code
2856 * @param $suffix string Append this to the filename
2857 * @return string $prefix . $mangledCode . $suffix
2858 */
2859 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2860 // Protect against path traversal
2861 if ( !Language::isValidCode( $code )
2862 || strcspn( $code, "/\\\000" ) !== strlen( $code ) )
2863 {
2864 throw new MWException( "Invalid language code \"$code\"" );
2865 }
2866
2867 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2868 }
2869
2870 /**
2871 * Get the language code from a file name. Inverse of getFileName()
2872 * @param $filename string $prefix . $languageCode . $suffix
2873 * @param $prefix string Prefix before the language code
2874 * @param $suffix string Suffix after the language code
2875 * @return Language code, or false if $prefix or $suffix isn't found
2876 */
2877 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2878 $m = null;
2879 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2880 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2881 if ( !count( $m ) ) {
2882 return false;
2883 }
2884 return str_replace( '_', '-', strtolower( $m[1] ) );
2885 }
2886
2887 static function getMessagesFileName( $code ) {
2888 global $IP;
2889 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2890 }
2891
2892 static function getClassFileName( $code ) {
2893 global $IP;
2894 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2895 }
2896
2897 /**
2898 * Get the fallback for a given language
2899 */
2900 static function getFallbackFor( $code ) {
2901 if ( $code === 'en' ) {
2902 // Shortcut
2903 return false;
2904 } else {
2905 return self::getLocalisationCache()->getItem( $code, 'fallback' );
2906 }
2907 }
2908
2909 /**
2910 * Get all messages for a given language
2911 * WARNING: this may take a long time
2912 */
2913 static function getMessagesFor( $code ) {
2914 return self::getLocalisationCache()->getItem( $code, 'messages' );
2915 }
2916
2917 /**
2918 * Get a message for a given language
2919 */
2920 static function getMessageFor( $key, $code ) {
2921 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2922 }
2923
2924 function fixVariableInNamespace( $talk ) {
2925 if ( strpos( $talk, '$1' ) === false ) {
2926 return $talk;
2927 }
2928
2929 global $wgMetaNamespace;
2930 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2931
2932 # Allow grammar transformations
2933 # Allowing full message-style parsing would make simple requests
2934 # such as action=raw much more expensive than they need to be.
2935 # This will hopefully cover most cases.
2936 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2937 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2938 return str_replace( ' ', '_', $talk );
2939 }
2940
2941 function replaceGrammarInNamespace( $m ) {
2942 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2943 }
2944
2945 static function getCaseMaps() {
2946 static $wikiUpperChars, $wikiLowerChars;
2947 if ( isset( $wikiUpperChars ) ) {
2948 return array( $wikiUpperChars, $wikiLowerChars );
2949 }
2950
2951 wfProfileIn( __METHOD__ );
2952 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2953 if ( $arr === false ) {
2954 throw new MWException(
2955 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2956 }
2957 $wikiUpperChars = $arr['wikiUpperChars'];
2958 $wikiLowerChars = $arr['wikiLowerChars'];
2959 wfProfileOut( __METHOD__ );
2960 return array( $wikiUpperChars, $wikiLowerChars );
2961 }
2962
2963 function formatTimePeriod( $seconds ) {
2964 if ( round( $seconds * 10 ) < 100 ) {
2965 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2966 } elseif ( round( $seconds ) < 60 ) {
2967 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2968 } elseif ( round( $seconds ) < 3600 ) {
2969 $minutes = floor( $seconds / 60 );
2970 $secondsPart = round( fmod( $seconds, 60 ) );
2971 if ( $secondsPart == 60 ) {
2972 $secondsPart = 0;
2973 $minutes++;
2974 }
2975 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2976 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2977 } else {
2978 $hours = floor( $seconds / 3600 );
2979 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2980 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2981 if ( $secondsPart == 60 ) {
2982 $secondsPart = 0;
2983 $minutes++;
2984 }
2985 if ( $minutes == 60 ) {
2986 $minutes = 0;
2987 $hours++;
2988 }
2989 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2990 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2991 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2992 }
2993 }
2994
2995 function formatBitrate( $bps ) {
2996 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2997 if ( $bps <= 0 ) {
2998 return $this->formatNum( $bps ) . $units[0];
2999 }
3000 $unitIndex = floor( log10( $bps ) / 3 );
3001 $mantissa = $bps / pow( 1000, $unitIndex );
3002 if ( $mantissa < 10 ) {
3003 $mantissa = round( $mantissa, 1 );
3004 } else {
3005 $mantissa = round( $mantissa );
3006 }
3007 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3008 }
3009
3010 /**
3011 * Format a size in bytes for output, using an appropriate
3012 * unit (B, KB, MB or GB) according to the magnitude in question
3013 *
3014 * @param $size Size to format
3015 * @return string Plain text (not HTML)
3016 */
3017 function formatSize( $size ) {
3018 // For small sizes no decimal places necessary
3019 $round = 0;
3020 if ( $size > 1024 ) {
3021 $size = $size / 1024;
3022 if ( $size > 1024 ) {
3023 $size = $size / 1024;
3024 // For MB and bigger two decimal places are smarter
3025 $round = 2;
3026 if ( $size > 1024 ) {
3027 $size = $size / 1024;
3028 $msg = 'size-gigabytes';
3029 } else {
3030 $msg = 'size-megabytes';
3031 }
3032 } else {
3033 $msg = 'size-kilobytes';
3034 }
3035 } else {
3036 $msg = 'size-bytes';
3037 }
3038 $size = round( $size, $round );
3039 $text = $this->getMessageFromDB( $msg );
3040 return str_replace( '$1', $this->formatNum( $size ), $text );
3041 }
3042
3043 /**
3044 * Get the conversion rule title, if any.
3045 */
3046 function getConvRuleTitle() {
3047 return $this->mConverter->getConvRuleTitle();
3048 }
3049 }