6b0ba13c077bf0c481e23e82181b3b77589f3ead
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * Internationalisation code
4 *
5 * @file
6 * @ingroup Language
7 */
8
9 /**
10 * @defgroup Language Language
11 */
12
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
15 exit( 1 );
16 }
17
18 # Read language names
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__ ) . '/Names.php' );
21
22 if ( function_exists( 'mb_strtoupper' ) ) {
23 mb_internal_encoding( 'UTF-8' );
24 }
25
26 /**
27 * a fake language converter
28 *
29 * @ingroup Language
30 */
31 class FakeConverter {
32 var $mLang;
33 function __construct( $langobj ) { $this->mLang = $langobj; }
34 function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
35 function convert( $t ) { return $t; }
36 function convertTitle( $t ) { return $t->getPrefixedText(); }
37 function getVariants() { return array( $this->mLang->getCode() ); }
38 function getPreferredVariant() { return $this->mLang->getCode(); }
39 function getDefaultVariant() { return $this->mLang->getCode(); }
40 function getURLVariant() { return ''; }
41 function getConvRuleTitle() { return false; }
42 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
43 function getExtraHashOptions() { return ''; }
44 function getParsedTitle() { return ''; }
45 function markNoConversion( $text, $noParse = false ) { return $text; }
46 function convertCategoryKey( $key ) { return $key; }
47 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
48 function armourMath( $text ) { return $text; }
49 }
50
51 /**
52 * Internationalisation code
53 * @ingroup Language
54 */
55 class Language {
56 var $mConverter, $mVariants, $mCode, $mLoaded = false;
57 var $mMagicExtensions = array(), $mMagicHookDone = false;
58
59 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
60 var $dateFormatStrings = array();
61 var $mExtendedSpecialPageAliases;
62
63 /**
64 * ReplacementArray object caches
65 */
66 var $transformData = array();
67
68 /**
69 * @var LocalisationCache
70 */
71 static public $dataCache;
72
73 static public $mLangObjCache = array();
74
75 static public $mWeekdayMsgs = array(
76 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
77 'friday', 'saturday'
78 );
79
80 static public $mWeekdayAbbrevMsgs = array(
81 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
82 );
83
84 static public $mMonthMsgs = array(
85 'january', 'february', 'march', 'april', 'may_long', 'june',
86 'july', 'august', 'september', 'october', 'november',
87 'december'
88 );
89 static public $mMonthGenMsgs = array(
90 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
91 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
92 'december-gen'
93 );
94 static public $mMonthAbbrevMsgs = array(
95 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
96 'sep', 'oct', 'nov', 'dec'
97 );
98
99 static public $mIranianCalendarMonthMsgs = array(
100 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
101 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
102 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
103 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
104 );
105
106 static public $mHebrewCalendarMonthMsgs = array(
107 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
108 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
109 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
110 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
111 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
112 );
113
114 static public $mHebrewCalendarMonthGenMsgs = array(
115 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
116 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
117 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
118 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
119 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
120 );
121
122 static public $mHijriCalendarMonthMsgs = array(
123 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
124 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
125 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
126 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
127 );
128
129 /**
130 * Get a cached language object for a given language code
131 * @param $code String
132 * @return Language
133 */
134 static function factory( $code ) {
135 if ( !isset( self::$mLangObjCache[$code] ) ) {
136 if ( count( self::$mLangObjCache ) > 10 ) {
137 // Don't keep a billion objects around, that's stupid.
138 self::$mLangObjCache = array();
139 }
140 self::$mLangObjCache[$code] = self::newFromCode( $code );
141 }
142 return self::$mLangObjCache[$code];
143 }
144
145 /**
146 * Create a language object for a given language code
147 * @param $code String
148 * @return Language
149 */
150 protected static function newFromCode( $code ) {
151 global $IP;
152 static $recursionLevel = 0;
153
154 // Protect against path traversal below
155 if ( !Language::isValidCode( $code )
156 || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
157 {
158 throw new MWException( "Invalid language code \"$code\"" );
159 }
160
161 if ( !Language::isValidBuiltInCode( $code ) ) {
162 // It's not possible to customise this code with class files, so
163 // just return a Language object. This is to support uselang= hacks.
164 $lang = new Language;
165 $lang->setCode( $code );
166 return $lang;
167 }
168
169 if ( $code == 'en' ) {
170 $class = 'Language';
171 } else {
172 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
173 if ( !defined( 'MW_COMPILED' ) ) {
174 // Preload base classes to work around APC/PHP5 bug
175 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
176 include_once( "$IP/languages/classes/$class.deps.php" );
177 }
178 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
179 include_once( "$IP/languages/classes/$class.php" );
180 }
181 }
182 }
183
184 if ( $recursionLevel > 5 ) {
185 throw new MWException( "Language fallback loop detected when creating class $class\n" );
186 }
187
188 if ( !MWInit::classExists( $class ) ) {
189 $fallback = Language::getFallbackFor( $code );
190 ++$recursionLevel;
191 $lang = Language::newFromCode( $fallback );
192 --$recursionLevel;
193 $lang->setCode( $code );
194 } else {
195 $lang = new $class;
196 }
197 return $lang;
198 }
199
200 /**
201 * Returns true if a language code string is of a valid form, whether or
202 * not it exists. This includes codes which are used solely for
203 * customisation via the MediaWiki namespace.
204 *
205 * @return bool
206 */
207 public static function isValidCode( $code ) {
208 return
209 strcspn( $code, ":/\\\000" ) === strlen( $code )
210 && !preg_match( Title::getTitleInvalidRegex(), $code );
211 }
212
213 /**
214 * Returns true if a language code is of a valid form for the purposes of
215 * internal customisation of MediaWiki, via Messages*.php.
216 */
217 public static function isValidBuiltInCode( $code ) {
218 return preg_match( '/^[a-z0-9-]*$/i', $code );
219 }
220
221 /**
222 * Get the LocalisationCache instance
223 *
224 * @return LocalisationCache
225 */
226 public static function getLocalisationCache() {
227 if ( is_null( self::$dataCache ) ) {
228 global $wgLocalisationCacheConf;
229 $class = $wgLocalisationCacheConf['class'];
230 self::$dataCache = new $class( $wgLocalisationCacheConf );
231 }
232 return self::$dataCache;
233 }
234
235 function __construct() {
236 $this->mConverter = new FakeConverter( $this );
237 // Set the code to the name of the descendant
238 if ( get_class( $this ) == 'Language' ) {
239 $this->mCode = 'en';
240 } else {
241 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
242 }
243 self::getLocalisationCache();
244 }
245
246 /**
247 * Reduce memory usage
248 */
249 function __destruct() {
250 foreach ( $this as $name => $value ) {
251 unset( $this->$name );
252 }
253 }
254
255 /**
256 * Hook which will be called if this is the content language.
257 * Descendants can use this to register hook functions or modify globals
258 */
259 function initContLang() { }
260
261 function getFallbackLanguageCode() {
262 if ( $this->mCode === 'en' ) {
263 return false;
264 } else {
265 return self::$dataCache->getItem( $this->mCode, 'fallback' );
266 }
267 }
268
269 /**
270 * Exports $wgBookstoreListEn
271 * @return array
272 */
273 function getBookstoreList() {
274 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
275 }
276
277 /**
278 * @return array
279 */
280 function getNamespaces() {
281 if ( is_null( $this->namespaceNames ) ) {
282 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
283
284 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
285 $validNamespaces = MWNamespace::getCanonicalNamespaces();
286
287 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
288
289 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
290 if ( $wgMetaNamespaceTalk ) {
291 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
292 } else {
293 $talk = $this->namespaceNames[NS_PROJECT_TALK];
294 $this->namespaceNames[NS_PROJECT_TALK] =
295 $this->fixVariableInNamespace( $talk );
296 }
297
298 # Sometimes a language will be localised but not actually exist on this wiki.
299 foreach( $this->namespaceNames as $key => $text ) {
300 if ( !isset( $validNamespaces[$key] ) ) {
301 unset( $this->namespaceNames[$key] );
302 }
303 }
304
305 # The above mixing may leave namespaces out of canonical order.
306 # Re-order by namespace ID number...
307 ksort( $this->namespaceNames );
308 }
309 return $this->namespaceNames;
310 }
311
312 /**
313 * A convenience function that returns the same thing as
314 * getNamespaces() except with the array values changed to ' '
315 * where it found '_', useful for producing output to be displayed
316 * e.g. in <select> forms.
317 *
318 * @return array
319 */
320 function getFormattedNamespaces() {
321 $ns = $this->getNamespaces();
322 foreach ( $ns as $k => $v ) {
323 $ns[$k] = strtr( $v, '_', ' ' );
324 }
325 return $ns;
326 }
327
328 /**
329 * Get a namespace value by key
330 * <code>
331 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
332 * echo $mw_ns; // prints 'MediaWiki'
333 * </code>
334 *
335 * @param $index Int: the array key of the namespace to return
336 * @return mixed, string if the namespace value exists, otherwise false
337 */
338 function getNsText( $index ) {
339 $ns = $this->getNamespaces();
340 return isset( $ns[$index] ) ? $ns[$index] : false;
341 }
342
343 /**
344 * A convenience function that returns the same thing as
345 * getNsText() except with '_' changed to ' ', useful for
346 * producing output.
347 *
348 * @return array
349 */
350 function getFormattedNsText( $index ) {
351 $ns = $this->getNsText( $index );
352 return strtr( $ns, '_', ' ' );
353 }
354
355 /**
356 * Returns gender-dependent namespace alias if available.
357 * @param $index Int: namespace index
358 * @param $gender String: gender key (male, female... )
359 * @return String
360 * @since 1.18
361 */
362 function getGenderNsText( $index, $gender ) {
363 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
364 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
365 }
366
367 /**
368 * Whether this language makes distinguishes genders for example in
369 * namespaces.
370 * @return bool
371 * @since 1.18
372 */
373 function needsGenderDistinction() {
374 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
375 return count( $aliases ) > 0;
376 }
377
378 /**
379 * Get a namespace key by value, case insensitive.
380 * Only matches namespace names for the current language, not the
381 * canonical ones defined in Namespace.php.
382 *
383 * @param $text String
384 * @return mixed An integer if $text is a valid value otherwise false
385 */
386 function getLocalNsIndex( $text ) {
387 $lctext = $this->lc( $text );
388 $ids = $this->getNamespaceIds();
389 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
390 }
391
392 function getNamespaceAliases() {
393 if ( is_null( $this->namespaceAliases ) ) {
394 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
395 if ( !$aliases ) {
396 $aliases = array();
397 } else {
398 foreach ( $aliases as $name => $index ) {
399 if ( $index === NS_PROJECT_TALK ) {
400 unset( $aliases[$name] );
401 $name = $this->fixVariableInNamespace( $name );
402 $aliases[$name] = $index;
403 }
404 }
405 }
406
407 $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
408 foreach ( $genders as $index => $forms ) {
409 foreach ( $forms as $alias ) {
410 $aliases[$alias] = $index;
411 }
412 }
413
414 $this->namespaceAliases = $aliases;
415 }
416 return $this->namespaceAliases;
417 }
418
419 function getNamespaceIds() {
420 if ( is_null( $this->mNamespaceIds ) ) {
421 global $wgNamespaceAliases;
422 # Put namespace names and aliases into a hashtable.
423 # If this is too slow, then we should arrange it so that it is done
424 # before caching. The catch is that at pre-cache time, the above
425 # class-specific fixup hasn't been done.
426 $this->mNamespaceIds = array();
427 foreach ( $this->getNamespaces() as $index => $name ) {
428 $this->mNamespaceIds[$this->lc( $name )] = $index;
429 }
430 foreach ( $this->getNamespaceAliases() as $name => $index ) {
431 $this->mNamespaceIds[$this->lc( $name )] = $index;
432 }
433 if ( $wgNamespaceAliases ) {
434 foreach ( $wgNamespaceAliases as $name => $index ) {
435 $this->mNamespaceIds[$this->lc( $name )] = $index;
436 }
437 }
438 }
439 return $this->mNamespaceIds;
440 }
441
442
443 /**
444 * Get a namespace key by value, case insensitive. Canonical namespace
445 * names override custom ones defined for the current language.
446 *
447 * @param $text String
448 * @return mixed An integer if $text is a valid value otherwise false
449 */
450 function getNsIndex( $text ) {
451 $lctext = $this->lc( $text );
452 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
453 return $ns;
454 }
455 $ids = $this->getNamespaceIds();
456 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
457 }
458
459 /**
460 * short names for language variants used for language conversion links.
461 *
462 * @param $code String
463 * @return string
464 */
465 function getVariantname( $code ) {
466 return $this->getMessageFromDB( "variantname-$code" );
467 }
468
469 function specialPage( $name ) {
470 $aliases = $this->getSpecialPageAliases();
471 if ( isset( $aliases[$name][0] ) ) {
472 $name = $aliases[$name][0];
473 }
474 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
475 }
476
477 function getQuickbarSettings() {
478 return array(
479 $this->getMessage( 'qbsettings-none' ),
480 $this->getMessage( 'qbsettings-fixedleft' ),
481 $this->getMessage( 'qbsettings-fixedright' ),
482 $this->getMessage( 'qbsettings-floatingleft' ),
483 $this->getMessage( 'qbsettings-floatingright' )
484 );
485 }
486
487 function getDatePreferences() {
488 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
489 }
490
491 function getDateFormats() {
492 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
493 }
494
495 function getDefaultDateFormat() {
496 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
497 if ( $df === 'dmy or mdy' ) {
498 global $wgAmericanDates;
499 return $wgAmericanDates ? 'mdy' : 'dmy';
500 } else {
501 return $df;
502 }
503 }
504
505 function getDatePreferenceMigrationMap() {
506 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
507 }
508
509 function getImageFile( $image ) {
510 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
511 }
512
513 function getDefaultUserOptionOverrides() {
514 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
515 }
516
517 function getExtraUserToggles() {
518 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
519 }
520
521 function getUserToggle( $tog ) {
522 return $this->getMessageFromDB( "tog-$tog" );
523 }
524
525 /**
526 * Get language names, indexed by code.
527 * If $customisedOnly is true, only returns codes with a messages file
528 */
529 public static function getLanguageNames( $customisedOnly = false ) {
530 global $wgExtraLanguageNames;
531 static $coreLanguageNames;
532
533 if ( $coreLanguageNames === null ) {
534 include( MWInit::compiledPath( 'languages/Names.php' ) );
535 }
536
537 $allNames = $wgExtraLanguageNames + $coreLanguageNames;
538 if ( !$customisedOnly ) {
539 return $allNames;
540 }
541
542 global $IP;
543 $names = array();
544 $dir = opendir( "$IP/languages/messages" );
545 while ( false !== ( $file = readdir( $dir ) ) ) {
546 $code = self::getCodeFromFileName( $file, 'Messages' );
547 if ( $code && isset( $allNames[$code] ) ) {
548 $names[$code] = $allNames[$code];
549 }
550 }
551 closedir( $dir );
552 return $names;
553 }
554
555 /**
556 * Get translated language names. This is done on best effort and
557 * by default this is exactly the same as Language::getLanguageNames.
558 * The CLDR extension provides translated names.
559 * @param $code String Language code.
560 * @return Array language code => language name
561 * @since 1.18.0
562 */
563 public static function getTranslatedLanguageNames( $code ) {
564 $names = array();
565 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
566
567 foreach ( self::getLanguageNames() as $code => $name ) {
568 if ( !isset( $names[$code] ) ) $names[$code] = $name;
569 }
570
571 return $names;
572 }
573
574 /**
575 * Get a message from the MediaWiki namespace.
576 *
577 * @param $msg String: message name
578 * @return string
579 */
580 function getMessageFromDB( $msg ) {
581 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
582 }
583
584 function getLanguageName( $code ) {
585 $names = self::getLanguageNames();
586 if ( !array_key_exists( $code, $names ) ) {
587 return '';
588 }
589 return $names[$code];
590 }
591
592 function getMonthName( $key ) {
593 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
594 }
595
596 function getMonthNamesArray() {
597 $monthNames = array( '' );
598 for ( $i=1; $i < 13; $i++ ) {
599 $monthNames[] = $this->getMonthName( $i );
600 }
601 return $monthNames;
602 }
603
604 function getMonthNameGen( $key ) {
605 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
606 }
607
608 function getMonthAbbreviation( $key ) {
609 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
610 }
611
612 function getMonthAbbreviationsArray() {
613 $monthNames = array('');
614 for ( $i=1; $i < 13; $i++ ) {
615 $monthNames[] = $this->getMonthAbbreviation( $i );
616 }
617 return $monthNames;
618 }
619
620 function getWeekdayName( $key ) {
621 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
622 }
623
624 function getWeekdayAbbreviation( $key ) {
625 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
626 }
627
628 function getIranianCalendarMonthName( $key ) {
629 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
630 }
631
632 function getHebrewCalendarMonthName( $key ) {
633 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
634 }
635
636 function getHebrewCalendarMonthNameGen( $key ) {
637 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
638 }
639
640 function getHijriCalendarMonthName( $key ) {
641 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
642 }
643
644 /**
645 * Used by date() and time() to adjust the time output.
646 *
647 * @param $ts Int the time in date('YmdHis') format
648 * @param $tz Mixed: adjust the time by this amount (default false, mean we
649 * get user timecorrection setting)
650 * @return int
651 */
652 function userAdjust( $ts, $tz = false ) {
653 global $wgUser, $wgLocalTZoffset;
654
655 if ( $tz === false ) {
656 $tz = $wgUser->getOption( 'timecorrection' );
657 }
658
659 $data = explode( '|', $tz, 3 );
660
661 if ( $data[0] == 'ZoneInfo' ) {
662 if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
663 $date = date_create( $ts, timezone_open( 'UTC' ) );
664 date_timezone_set( $date, timezone_open( $data[2] ) );
665 $date = date_format( $date, 'YmdHis' );
666 return $date;
667 }
668 # Unrecognized timezone, default to 'Offset' with the stored offset.
669 $data[0] = 'Offset';
670 }
671
672 $minDiff = 0;
673 if ( $data[0] == 'System' || $tz == '' ) {
674 #  Global offset in minutes.
675 if ( isset( $wgLocalTZoffset ) ) {
676 $minDiff = $wgLocalTZoffset;
677 }
678 } else if ( $data[0] == 'Offset' ) {
679 $minDiff = intval( $data[1] );
680 } else {
681 $data = explode( ':', $tz );
682 if ( count( $data ) == 2 ) {
683 $data[0] = intval( $data[0] );
684 $data[1] = intval( $data[1] );
685 $minDiff = abs( $data[0] ) * 60 + $data[1];
686 if ( $data[0] < 0 ) {
687 $minDiff = -$minDiff;
688 }
689 } else {
690 $minDiff = intval( $data[0] ) * 60;
691 }
692 }
693
694 # No difference ? Return time unchanged
695 if ( 0 == $minDiff ) {
696 return $ts;
697 }
698
699 wfSuppressWarnings(); // E_STRICT system time bitching
700 # Generate an adjusted date; take advantage of the fact that mktime
701 # will normalize out-of-range values so we don't have to split $minDiff
702 # into hours and minutes.
703 $t = mktime( (
704 (int)substr( $ts, 8, 2 ) ), # Hours
705 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
706 (int)substr( $ts, 12, 2 ), # Seconds
707 (int)substr( $ts, 4, 2 ), # Month
708 (int)substr( $ts, 6, 2 ), # Day
709 (int)substr( $ts, 0, 4 ) ); # Year
710
711 $date = date( 'YmdHis', $t );
712 wfRestoreWarnings();
713
714 return $date;
715 }
716
717 /**
718 * This is a workalike of PHP's date() function, but with better
719 * internationalisation, a reduced set of format characters, and a better
720 * escaping format.
721 *
722 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
723 * PHP manual for definitions. There are a number of extensions, which
724 * start with "x":
725 *
726 * xn Do not translate digits of the next numeric format character
727 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
728 * xr Use roman numerals for the next numeric format character
729 * xh Use hebrew numerals for the next numeric format character
730 * xx Literal x
731 * xg Genitive month name
732 *
733 * xij j (day number) in Iranian calendar
734 * xiF F (month name) in Iranian calendar
735 * xin n (month number) in Iranian calendar
736 * xiY Y (full year) in Iranian calendar
737 *
738 * xjj j (day number) in Hebrew calendar
739 * xjF F (month name) in Hebrew calendar
740 * xjt t (days in month) in Hebrew calendar
741 * xjx xg (genitive month name) in Hebrew calendar
742 * xjn n (month number) in Hebrew calendar
743 * xjY Y (full year) in Hebrew calendar
744 *
745 * xmj j (day number) in Hijri calendar
746 * xmF F (month name) in Hijri calendar
747 * xmn n (month number) in Hijri calendar
748 * xmY Y (full year) in Hijri calendar
749 *
750 * xkY Y (full year) in Thai solar calendar. Months and days are
751 * identical to the Gregorian calendar
752 * xoY Y (full year) in Minguo calendar or Juche year.
753 * Months and days are identical to the
754 * Gregorian calendar
755 * xtY Y (full year) in Japanese nengo. Months and days are
756 * identical to the Gregorian calendar
757 *
758 * Characters enclosed in double quotes will be considered literal (with
759 * the quotes themselves removed). Unmatched quotes will be considered
760 * literal quotes. Example:
761 *
762 * "The month is" F => The month is January
763 * i's" => 20'11"
764 *
765 * Backslash escaping is also supported.
766 *
767 * Input timestamp is assumed to be pre-normalized to the desired local
768 * time zone, if any.
769 *
770 * @param $format String
771 * @param $ts String: 14-character timestamp
772 * YYYYMMDDHHMMSS
773 * 01234567890123
774 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
775 */
776 function sprintfDate( $format, $ts ) {
777 $s = '';
778 $raw = false;
779 $roman = false;
780 $hebrewNum = false;
781 $unix = false;
782 $rawToggle = false;
783 $iranian = false;
784 $hebrew = false;
785 $hijri = false;
786 $thai = false;
787 $minguo = false;
788 $tenno = false;
789 for ( $p = 0; $p < strlen( $format ); $p++ ) {
790 $num = false;
791 $code = $format[$p];
792 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
793 $code .= $format[++$p];
794 }
795
796 if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
797 $code .= $format[++$p];
798 }
799
800 switch ( $code ) {
801 case 'xx':
802 $s .= 'x';
803 break;
804 case 'xn':
805 $raw = true;
806 break;
807 case 'xN':
808 $rawToggle = !$rawToggle;
809 break;
810 case 'xr':
811 $roman = true;
812 break;
813 case 'xh':
814 $hebrewNum = true;
815 break;
816 case 'xg':
817 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
818 break;
819 case 'xjx':
820 if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
821 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
822 break;
823 case 'd':
824 $num = substr( $ts, 6, 2 );
825 break;
826 case 'D':
827 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
828 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
829 break;
830 case 'j':
831 $num = intval( substr( $ts, 6, 2 ) );
832 break;
833 case 'xij':
834 if ( !$iranian ) {
835 $iranian = self::tsToIranian( $ts );
836 }
837 $num = $iranian[2];
838 break;
839 case 'xmj':
840 if ( !$hijri ) {
841 $hijri = self::tsToHijri( $ts );
842 }
843 $num = $hijri[2];
844 break;
845 case 'xjj':
846 if ( !$hebrew ) {
847 $hebrew = self::tsToHebrew( $ts );
848 }
849 $num = $hebrew[2];
850 break;
851 case 'l':
852 if ( !$unix ) {
853 $unix = wfTimestamp( TS_UNIX, $ts );
854 }
855 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
856 break;
857 case 'N':
858 if ( !$unix ) {
859 $unix = wfTimestamp( TS_UNIX, $ts );
860 }
861 $w = gmdate( 'w', $unix );
862 $num = $w ? $w : 7;
863 break;
864 case 'w':
865 if ( !$unix ) {
866 $unix = wfTimestamp( TS_UNIX, $ts );
867 }
868 $num = gmdate( 'w', $unix );
869 break;
870 case 'z':
871 if ( !$unix ) {
872 $unix = wfTimestamp( TS_UNIX, $ts );
873 }
874 $num = gmdate( 'z', $unix );
875 break;
876 case 'W':
877 if ( !$unix ) {
878 $unix = wfTimestamp( TS_UNIX, $ts );
879 }
880 $num = gmdate( 'W', $unix );
881 break;
882 case 'F':
883 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
884 break;
885 case 'xiF':
886 if ( !$iranian ) {
887 $iranian = self::tsToIranian( $ts );
888 }
889 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
890 break;
891 case 'xmF':
892 if ( !$hijri ) {
893 $hijri = self::tsToHijri( $ts );
894 }
895 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
896 break;
897 case 'xjF':
898 if ( !$hebrew ) {
899 $hebrew = self::tsToHebrew( $ts );
900 }
901 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
902 break;
903 case 'm':
904 $num = substr( $ts, 4, 2 );
905 break;
906 case 'M':
907 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
908 break;
909 case 'n':
910 $num = intval( substr( $ts, 4, 2 ) );
911 break;
912 case 'xin':
913 if ( !$iranian ) {
914 $iranian = self::tsToIranian( $ts );
915 }
916 $num = $iranian[1];
917 break;
918 case 'xmn':
919 if ( !$hijri ) {
920 $hijri = self::tsToHijri ( $ts );
921 }
922 $num = $hijri[1];
923 break;
924 case 'xjn':
925 if ( !$hebrew ) {
926 $hebrew = self::tsToHebrew( $ts );
927 }
928 $num = $hebrew[1];
929 break;
930 case 't':
931 if ( !$unix ) {
932 $unix = wfTimestamp( TS_UNIX, $ts );
933 }
934 $num = gmdate( 't', $unix );
935 break;
936 case 'xjt':
937 if ( !$hebrew ) {
938 $hebrew = self::tsToHebrew( $ts );
939 }
940 $num = $hebrew[3];
941 break;
942 case 'L':
943 if ( !$unix ) {
944 $unix = wfTimestamp( TS_UNIX, $ts );
945 }
946 $num = gmdate( 'L', $unix );
947 break;
948 case 'o':
949 if ( !$unix ) {
950 $unix = wfTimestamp( TS_UNIX, $ts );
951 }
952 $num = date( 'o', $unix );
953 break;
954 case 'Y':
955 $num = substr( $ts, 0, 4 );
956 break;
957 case 'xiY':
958 if ( !$iranian ) {
959 $iranian = self::tsToIranian( $ts );
960 }
961 $num = $iranian[0];
962 break;
963 case 'xmY':
964 if ( !$hijri ) {
965 $hijri = self::tsToHijri( $ts );
966 }
967 $num = $hijri[0];
968 break;
969 case 'xjY':
970 if ( !$hebrew ) {
971 $hebrew = self::tsToHebrew( $ts );
972 }
973 $num = $hebrew[0];
974 break;
975 case 'xkY':
976 if ( !$thai ) {
977 $thai = self::tsToYear( $ts, 'thai' );
978 }
979 $num = $thai[0];
980 break;
981 case 'xoY':
982 if ( !$minguo ) {
983 $minguo = self::tsToYear( $ts, 'minguo' );
984 }
985 $num = $minguo[0];
986 break;
987 case 'xtY':
988 if ( !$tenno ) {
989 $tenno = self::tsToYear( $ts, 'tenno' );
990 }
991 $num = $tenno[0];
992 break;
993 case 'y':
994 $num = substr( $ts, 2, 2 );
995 break;
996 case 'a':
997 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
998 break;
999 case 'A':
1000 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1001 break;
1002 case 'g':
1003 $h = substr( $ts, 8, 2 );
1004 $num = $h % 12 ? $h % 12 : 12;
1005 break;
1006 case 'G':
1007 $num = intval( substr( $ts, 8, 2 ) );
1008 break;
1009 case 'h':
1010 $h = substr( $ts, 8, 2 );
1011 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1012 break;
1013 case 'H':
1014 $num = substr( $ts, 8, 2 );
1015 break;
1016 case 'i':
1017 $num = substr( $ts, 10, 2 );
1018 break;
1019 case 's':
1020 $num = substr( $ts, 12, 2 );
1021 break;
1022 case 'c':
1023 if ( !$unix ) {
1024 $unix = wfTimestamp( TS_UNIX, $ts );
1025 }
1026 $s .= gmdate( 'c', $unix );
1027 break;
1028 case 'r':
1029 if ( !$unix ) {
1030 $unix = wfTimestamp( TS_UNIX, $ts );
1031 }
1032 $s .= gmdate( 'r', $unix );
1033 break;
1034 case 'U':
1035 if ( !$unix ) {
1036 $unix = wfTimestamp( TS_UNIX, $ts );
1037 }
1038 $num = $unix;
1039 break;
1040 case '\\':
1041 # Backslash escaping
1042 if ( $p < strlen( $format ) - 1 ) {
1043 $s .= $format[++$p];
1044 } else {
1045 $s .= '\\';
1046 }
1047 break;
1048 case '"':
1049 # Quoted literal
1050 if ( $p < strlen( $format ) - 1 ) {
1051 $endQuote = strpos( $format, '"', $p + 1 );
1052 if ( $endQuote === false ) {
1053 # No terminating quote, assume literal "
1054 $s .= '"';
1055 } else {
1056 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1057 $p = $endQuote;
1058 }
1059 } else {
1060 # Quote at end of string, assume literal "
1061 $s .= '"';
1062 }
1063 break;
1064 default:
1065 $s .= $format[$p];
1066 }
1067 if ( $num !== false ) {
1068 if ( $rawToggle || $raw ) {
1069 $s .= $num;
1070 $raw = false;
1071 } elseif ( $roman ) {
1072 $s .= self::romanNumeral( $num );
1073 $roman = false;
1074 } elseif ( $hebrewNum ) {
1075 $s .= self::hebrewNumeral( $num );
1076 $hebrewNum = false;
1077 } else {
1078 $s .= $this->formatNum( $num, true );
1079 }
1080 }
1081 }
1082 return $s;
1083 }
1084
1085 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1086 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1087 /**
1088 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1089 * Gregorian dates to Iranian dates. Originally written in C, it
1090 * is released under the terms of GNU Lesser General Public
1091 * License. Conversion to PHP was performed by Niklas Laxström.
1092 *
1093 * Link: http://www.farsiweb.info/jalali/jalali.c
1094 */
1095 private static function tsToIranian( $ts ) {
1096 $gy = substr( $ts, 0, 4 ) -1600;
1097 $gm = substr( $ts, 4, 2 ) -1;
1098 $gd = (int)substr( $ts, 6, 2 ) -1;
1099
1100 # Days passed from the beginning (including leap years)
1101 $gDayNo = 365 * $gy
1102 + floor( ( $gy + 3 ) / 4 )
1103 - floor( ( $gy + 99 ) / 100 )
1104 + floor( ( $gy + 399 ) / 400 );
1105
1106
1107 // Add days of the past months of this year
1108 for ( $i = 0; $i < $gm; $i++ ) {
1109 $gDayNo += self::$GREG_DAYS[$i];
1110 }
1111
1112 // Leap years
1113 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1114 $gDayNo++;
1115 }
1116
1117 // Days passed in current month
1118 $gDayNo += $gd;
1119
1120 $jDayNo = $gDayNo - 79;
1121
1122 $jNp = floor( $jDayNo / 12053 );
1123 $jDayNo %= 12053;
1124
1125 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1126 $jDayNo %= 1461;
1127
1128 if ( $jDayNo >= 366 ) {
1129 $jy += floor( ( $jDayNo - 1 ) / 365 );
1130 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1131 }
1132
1133 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1134 $jDayNo -= self::$IRANIAN_DAYS[$i];
1135 }
1136
1137 $jm = $i + 1;
1138 $jd = $jDayNo + 1;
1139
1140 return array( $jy, $jm, $jd );
1141 }
1142
1143 /**
1144 * Converting Gregorian dates to Hijri dates.
1145 *
1146 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1147 *
1148 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1149 */
1150 private static function tsToHijri( $ts ) {
1151 $year = substr( $ts, 0, 4 );
1152 $month = substr( $ts, 4, 2 );
1153 $day = substr( $ts, 6, 2 );
1154
1155 $zyr = $year;
1156 $zd = $day;
1157 $zm = $month;
1158 $zy = $zyr;
1159
1160 if (
1161 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1162 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1163 )
1164 {
1165 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1166 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1167 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1168 $zd - 32075;
1169 } else {
1170 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1171 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1172 }
1173
1174 $zl = $zjd -1948440 + 10632;
1175 $zn = (int)( ( $zl - 1 ) / 10631 );
1176 $zl = $zl - 10631 * $zn + 354;
1177 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1178 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1179 $zm = (int)( ( 24 * $zl ) / 709 );
1180 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1181 $zy = 30 * $zn + $zj - 30;
1182
1183 return array( $zy, $zm, $zd );
1184 }
1185
1186 /**
1187 * Converting Gregorian dates to Hebrew dates.
1188 *
1189 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1190 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1191 * to translate the relevant functions into PHP and release them under
1192 * GNU GPL.
1193 *
1194 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1195 * and Adar II is 14. In a non-leap year, Adar is 6.
1196 */
1197 private static function tsToHebrew( $ts ) {
1198 # Parse date
1199 $year = substr( $ts, 0, 4 );
1200 $month = substr( $ts, 4, 2 );
1201 $day = substr( $ts, 6, 2 );
1202
1203 # Calculate Hebrew year
1204 $hebrewYear = $year + 3760;
1205
1206 # Month number when September = 1, August = 12
1207 $month += 4;
1208 if ( $month > 12 ) {
1209 # Next year
1210 $month -= 12;
1211 $year++;
1212 $hebrewYear++;
1213 }
1214
1215 # Calculate day of year from 1 September
1216 $dayOfYear = $day;
1217 for ( $i = 1; $i < $month; $i++ ) {
1218 if ( $i == 6 ) {
1219 # February
1220 $dayOfYear += 28;
1221 # Check if the year is leap
1222 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1223 $dayOfYear++;
1224 }
1225 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1226 $dayOfYear += 30;
1227 } else {
1228 $dayOfYear += 31;
1229 }
1230 }
1231
1232 # Calculate the start of the Hebrew year
1233 $start = self::hebrewYearStart( $hebrewYear );
1234
1235 # Calculate next year's start
1236 if ( $dayOfYear <= $start ) {
1237 # Day is before the start of the year - it is the previous year
1238 # Next year's start
1239 $nextStart = $start;
1240 # Previous year
1241 $year--;
1242 $hebrewYear--;
1243 # Add days since previous year's 1 September
1244 $dayOfYear += 365;
1245 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1246 # Leap year
1247 $dayOfYear++;
1248 }
1249 # Start of the new (previous) year
1250 $start = self::hebrewYearStart( $hebrewYear );
1251 } else {
1252 # Next year's start
1253 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1254 }
1255
1256 # Calculate Hebrew day of year
1257 $hebrewDayOfYear = $dayOfYear - $start;
1258
1259 # Difference between year's days
1260 $diff = $nextStart - $start;
1261 # Add 12 (or 13 for leap years) days to ignore the difference between
1262 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1263 # difference is only about the year type
1264 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1265 $diff += 13;
1266 } else {
1267 $diff += 12;
1268 }
1269
1270 # Check the year pattern, and is leap year
1271 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1272 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1273 # and non-leap years
1274 $yearPattern = $diff % 30;
1275 # Check if leap year
1276 $isLeap = $diff >= 30;
1277
1278 # Calculate day in the month from number of day in the Hebrew year
1279 # Don't check Adar - if the day is not in Adar, we will stop before;
1280 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1281 $hebrewDay = $hebrewDayOfYear;
1282 $hebrewMonth = 1;
1283 $days = 0;
1284 while ( $hebrewMonth <= 12 ) {
1285 # Calculate days in this month
1286 if ( $isLeap && $hebrewMonth == 6 ) {
1287 # Adar in a leap year
1288 if ( $isLeap ) {
1289 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1290 $days = 30;
1291 if ( $hebrewDay <= $days ) {
1292 # Day in Adar I
1293 $hebrewMonth = 13;
1294 } else {
1295 # Subtract the days of Adar I
1296 $hebrewDay -= $days;
1297 # Try Adar II
1298 $days = 29;
1299 if ( $hebrewDay <= $days ) {
1300 # Day in Adar II
1301 $hebrewMonth = 14;
1302 }
1303 }
1304 }
1305 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1306 # Cheshvan in a complete year (otherwise as the rule below)
1307 $days = 30;
1308 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1309 # Kislev in an incomplete year (otherwise as the rule below)
1310 $days = 29;
1311 } else {
1312 # Odd months have 30 days, even have 29
1313 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1314 }
1315 if ( $hebrewDay <= $days ) {
1316 # In the current month
1317 break;
1318 } else {
1319 # Subtract the days of the current month
1320 $hebrewDay -= $days;
1321 # Try in the next month
1322 $hebrewMonth++;
1323 }
1324 }
1325
1326 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1327 }
1328
1329 /**
1330 * This calculates the Hebrew year start, as days since 1 September.
1331 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1332 * Used for Hebrew date.
1333 */
1334 private static function hebrewYearStart( $year ) {
1335 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1336 $b = intval( ( $year - 1 ) % 4 );
1337 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1338 if ( $m < 0 ) {
1339 $m--;
1340 }
1341 $Mar = intval( $m );
1342 if ( $m < 0 ) {
1343 $m++;
1344 }
1345 $m -= $Mar;
1346
1347 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1348 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1349 $Mar++;
1350 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1351 $Mar += 2;
1352 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1353 $Mar++;
1354 }
1355
1356 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1357 return $Mar;
1358 }
1359
1360 /**
1361 * Algorithm to convert Gregorian dates to Thai solar dates,
1362 * Minguo dates or Minguo dates.
1363 *
1364 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1365 * http://en.wikipedia.org/wiki/Minguo_calendar
1366 * http://en.wikipedia.org/wiki/Japanese_era_name
1367 *
1368 * @param $ts String: 14-character timestamp
1369 * @param $cName String: calender name
1370 * @return Array: converted year, month, day
1371 */
1372 private static function tsToYear( $ts, $cName ) {
1373 $gy = substr( $ts, 0, 4 );
1374 $gm = substr( $ts, 4, 2 );
1375 $gd = substr( $ts, 6, 2 );
1376
1377 if ( !strcmp( $cName, 'thai' ) ) {
1378 # Thai solar dates
1379 # Add 543 years to the Gregorian calendar
1380 # Months and days are identical
1381 $gy_offset = $gy + 543;
1382 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1383 # Minguo dates
1384 # Deduct 1911 years from the Gregorian calendar
1385 # Months and days are identical
1386 $gy_offset = $gy - 1911;
1387 } else if ( !strcmp( $cName, 'tenno' ) ) {
1388 # Nengō dates up to Meiji period
1389 # Deduct years from the Gregorian calendar
1390 # depending on the nengo periods
1391 # Months and days are identical
1392 if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1393 # Meiji period
1394 $gy_gannen = $gy - 1868 + 1;
1395 $gy_offset = $gy_gannen;
1396 if ( $gy_gannen == 1 ) {
1397 $gy_offset = '元';
1398 }
1399 $gy_offset = '明治' . $gy_offset;
1400 } else if (
1401 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1402 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1403 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1404 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1405 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1406 )
1407 {
1408 # Taishō period
1409 $gy_gannen = $gy - 1912 + 1;
1410 $gy_offset = $gy_gannen;
1411 if ( $gy_gannen == 1 ) {
1412 $gy_offset = '元';
1413 }
1414 $gy_offset = '大正' . $gy_offset;
1415 } else if (
1416 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1417 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1418 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1419 )
1420 {
1421 # Shōwa period
1422 $gy_gannen = $gy - 1926 + 1;
1423 $gy_offset = $gy_gannen;
1424 if ( $gy_gannen == 1 ) {
1425 $gy_offset = '元';
1426 }
1427 $gy_offset = '昭和' . $gy_offset;
1428 } else {
1429 # Heisei period
1430 $gy_gannen = $gy - 1989 + 1;
1431 $gy_offset = $gy_gannen;
1432 if ( $gy_gannen == 1 ) {
1433 $gy_offset = '元';
1434 }
1435 $gy_offset = '平成' . $gy_offset;
1436 }
1437 } else {
1438 $gy_offset = $gy;
1439 }
1440
1441 return array( $gy_offset, $gm, $gd );
1442 }
1443
1444 /**
1445 * Roman number formatting up to 3000
1446 */
1447 static function romanNumeral( $num ) {
1448 static $table = array(
1449 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1450 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1451 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1452 array( '', 'M', 'MM', 'MMM' )
1453 );
1454
1455 $num = intval( $num );
1456 if ( $num > 3000 || $num <= 0 ) {
1457 return $num;
1458 }
1459
1460 $s = '';
1461 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1462 if ( $num >= $pow10 ) {
1463 $s .= $table[$i][floor( $num / $pow10 )];
1464 }
1465 $num = $num % $pow10;
1466 }
1467 return $s;
1468 }
1469
1470 /**
1471 * Hebrew Gematria number formatting up to 9999
1472 */
1473 static function hebrewNumeral( $num ) {
1474 static $table = array(
1475 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1476 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1477 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1478 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1479 );
1480
1481 $num = intval( $num );
1482 if ( $num > 9999 || $num <= 0 ) {
1483 return $num;
1484 }
1485
1486 $s = '';
1487 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1488 if ( $num >= $pow10 ) {
1489 if ( $num == 15 || $num == 16 ) {
1490 $s .= $table[0][9] . $table[0][$num - 9];
1491 $num = 0;
1492 } else {
1493 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1494 if ( $pow10 == 1000 ) {
1495 $s .= "'";
1496 }
1497 }
1498 }
1499 $num = $num % $pow10;
1500 }
1501 if ( strlen( $s ) == 2 ) {
1502 $str = $s . "'";
1503 } else {
1504 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1505 $str .= substr( $s, strlen( $s ) - 2, 2 );
1506 }
1507 $start = substr( $str, 0, strlen( $str ) - 2 );
1508 $end = substr( $str, strlen( $str ) - 2 );
1509 switch( $end ) {
1510 case 'כ':
1511 $str = $start . 'ך';
1512 break;
1513 case 'מ':
1514 $str = $start . 'ם';
1515 break;
1516 case 'נ':
1517 $str = $start . 'ן';
1518 break;
1519 case 'פ':
1520 $str = $start . 'ף';
1521 break;
1522 case 'צ':
1523 $str = $start . 'ץ';
1524 break;
1525 }
1526 return $str;
1527 }
1528
1529 /**
1530 * This is meant to be used by time(), date(), and timeanddate() to get
1531 * the date preference they're supposed to use, it should be used in
1532 * all children.
1533 *
1534 *<code>
1535 * function timeanddate([...], $format = true) {
1536 * $datePreference = $this->dateFormat($format);
1537 * [...]
1538 * }
1539 *</code>
1540 *
1541 * @param $usePrefs Mixed: if true, the user's preference is used
1542 * if false, the site/language default is used
1543 * if int/string, assumed to be a format.
1544 * @return string
1545 */
1546 function dateFormat( $usePrefs = true ) {
1547 global $wgUser;
1548
1549 if ( is_bool( $usePrefs ) ) {
1550 if ( $usePrefs ) {
1551 $datePreference = $wgUser->getDatePreference();
1552 } else {
1553 $datePreference = (string)User::getDefaultOption( 'date' );
1554 }
1555 } else {
1556 $datePreference = (string)$usePrefs;
1557 }
1558
1559 // return int
1560 if ( $datePreference == '' ) {
1561 return 'default';
1562 }
1563
1564 return $datePreference;
1565 }
1566
1567 /**
1568 * Get a format string for a given type and preference
1569 * @param $type string May be date, time or both
1570 * @param $pref string The format name as it appears in Messages*.php
1571 *
1572 * @return string
1573 */
1574 function getDateFormatString( $type, $pref ) {
1575 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1576 if ( $pref == 'default' ) {
1577 $pref = $this->getDefaultDateFormat();
1578 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1579 } else {
1580 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1581 if ( is_null( $df ) ) {
1582 $pref = $this->getDefaultDateFormat();
1583 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1584 }
1585 }
1586 $this->dateFormatStrings[$type][$pref] = $df;
1587 }
1588 return $this->dateFormatStrings[$type][$pref];
1589 }
1590
1591 /**
1592 * @param $ts Mixed: the time format which needs to be turned into a
1593 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1594 * @param $adj Bool: whether to adjust the time output according to the
1595 * user configured offset ($timecorrection)
1596 * @param $format Mixed: true to use user's date format preference
1597 * @param $timecorrection String: the time offset as returned by
1598 * validateTimeZone() in Special:Preferences
1599 * @return string
1600 */
1601 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1602 $ts = wfTimestamp( TS_MW, $ts );
1603 if ( $adj ) {
1604 $ts = $this->userAdjust( $ts, $timecorrection );
1605 }
1606 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1607 return $this->sprintfDate( $df, $ts );
1608 }
1609
1610 /**
1611 * @param $ts Mixed: the time format which needs to be turned into a
1612 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1613 * @param $adj Bool: whether to adjust the time output according to the
1614 * user configured offset ($timecorrection)
1615 * @param $format Mixed: true to use user's date format preference
1616 * @param $timecorrection String: the time offset as returned by
1617 * validateTimeZone() in Special:Preferences
1618 * @return string
1619 */
1620 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1621 $ts = wfTimestamp( TS_MW, $ts );
1622 if ( $adj ) {
1623 $ts = $this->userAdjust( $ts, $timecorrection );
1624 }
1625 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1626 return $this->sprintfDate( $df, $ts );
1627 }
1628
1629 /**
1630 * @param $ts Mixed: the time format which needs to be turned into a
1631 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1632 * @param $adj Bool: whether to adjust the time output according to the
1633 * user configured offset ($timecorrection)
1634 * @param $format Mixed: what format to return, if it's false output the
1635 * default one (default true)
1636 * @param $timecorrection String: the time offset as returned by
1637 * validateTimeZone() in Special:Preferences
1638 * @return string
1639 */
1640 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1641 $ts = wfTimestamp( TS_MW, $ts );
1642 if ( $adj ) {
1643 $ts = $this->userAdjust( $ts, $timecorrection );
1644 }
1645 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1646 return $this->sprintfDate( $df, $ts );
1647 }
1648
1649 function getMessage( $key ) {
1650 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1651 }
1652
1653 function getAllMessages() {
1654 return self::$dataCache->getItem( $this->mCode, 'messages' );
1655 }
1656
1657 function iconv( $in, $out, $string ) {
1658 # This is a wrapper for iconv in all languages except esperanto,
1659 # which does some nasty x-conversions beforehand
1660
1661 # Even with //IGNORE iconv can whine about illegal characters in
1662 # *input* string. We just ignore those too.
1663 # REF: http://bugs.php.net/bug.php?id=37166
1664 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1665 wfSuppressWarnings();
1666 $text = iconv( $in, $out . '//IGNORE', $string );
1667 wfRestoreWarnings();
1668 return $text;
1669 }
1670
1671 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1672 function ucwordbreaksCallbackAscii( $matches ) {
1673 return $this->ucfirst( $matches[1] );
1674 }
1675
1676 function ucwordbreaksCallbackMB( $matches ) {
1677 return mb_strtoupper( $matches[0] );
1678 }
1679
1680 function ucCallback( $matches ) {
1681 list( $wikiUpperChars ) = self::getCaseMaps();
1682 return strtr( $matches[1], $wikiUpperChars );
1683 }
1684
1685 function lcCallback( $matches ) {
1686 list( , $wikiLowerChars ) = self::getCaseMaps();
1687 return strtr( $matches[1], $wikiLowerChars );
1688 }
1689
1690 function ucwordsCallbackMB( $matches ) {
1691 return mb_strtoupper( $matches[0] );
1692 }
1693
1694 function ucwordsCallbackWiki( $matches ) {
1695 list( $wikiUpperChars ) = self::getCaseMaps();
1696 return strtr( $matches[0], $wikiUpperChars );
1697 }
1698
1699 /**
1700 * Make a string's first character uppercase
1701 */
1702 function ucfirst( $str ) {
1703 $o = ord( $str );
1704 if ( $o < 96 ) { // if already uppercase...
1705 return $str;
1706 } elseif ( $o < 128 ) {
1707 return ucfirst( $str ); // use PHP's ucfirst()
1708 } else {
1709 // fall back to more complex logic in case of multibyte strings
1710 return $this->uc( $str, true );
1711 }
1712 }
1713
1714 /**
1715 * Convert a string to uppercase
1716 */
1717 function uc( $str, $first = false ) {
1718 if ( function_exists( 'mb_strtoupper' ) ) {
1719 if ( $first ) {
1720 if ( $this->isMultibyte( $str ) ) {
1721 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1722 } else {
1723 return ucfirst( $str );
1724 }
1725 } else {
1726 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1727 }
1728 } else {
1729 if ( $this->isMultibyte( $str ) ) {
1730 $x = $first ? '^' : '';
1731 return preg_replace_callback(
1732 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1733 array( $this, 'ucCallback' ),
1734 $str
1735 );
1736 } else {
1737 return $first ? ucfirst( $str ) : strtoupper( $str );
1738 }
1739 }
1740 }
1741
1742 function lcfirst( $str ) {
1743 $o = ord( $str );
1744 if ( !$o ) {
1745 return strval( $str );
1746 } elseif ( $o >= 128 ) {
1747 return $this->lc( $str, true );
1748 } elseif ( $o > 96 ) {
1749 return $str;
1750 } else {
1751 $str[0] = strtolower( $str[0] );
1752 return $str;
1753 }
1754 }
1755
1756 function lc( $str, $first = false ) {
1757 if ( function_exists( 'mb_strtolower' ) ) {
1758 if ( $first ) {
1759 if ( $this->isMultibyte( $str ) ) {
1760 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1761 } else {
1762 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1763 }
1764 } else {
1765 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1766 }
1767 } else {
1768 if ( $this->isMultibyte( $str ) ) {
1769 $x = $first ? '^' : '';
1770 return preg_replace_callback(
1771 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1772 array( $this, 'lcCallback' ),
1773 $str
1774 );
1775 } else {
1776 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1777 }
1778 }
1779 }
1780
1781 function isMultibyte( $str ) {
1782 return (bool)preg_match( '/[\x80-\xff]/', $str );
1783 }
1784
1785 function ucwords( $str ) {
1786 if ( $this->isMultibyte( $str ) ) {
1787 $str = $this->lc( $str );
1788
1789 // regexp to find first letter in each word (i.e. after each space)
1790 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1791
1792 // function to use to capitalize a single char
1793 if ( function_exists( 'mb_strtoupper' ) ) {
1794 return preg_replace_callback(
1795 $replaceRegexp,
1796 array( $this, 'ucwordsCallbackMB' ),
1797 $str
1798 );
1799 } else {
1800 return preg_replace_callback(
1801 $replaceRegexp,
1802 array( $this, 'ucwordsCallbackWiki' ),
1803 $str
1804 );
1805 }
1806 } else {
1807 return ucwords( strtolower( $str ) );
1808 }
1809 }
1810
1811 # capitalize words at word breaks
1812 function ucwordbreaks( $str ) {
1813 if ( $this->isMultibyte( $str ) ) {
1814 $str = $this->lc( $str );
1815
1816 // since \b doesn't work for UTF-8, we explicitely define word break chars
1817 $breaks = "[ \-\(\)\}\{\.,\?!]";
1818
1819 // find first letter after word break
1820 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1821
1822 if ( function_exists( 'mb_strtoupper' ) ) {
1823 return preg_replace_callback(
1824 $replaceRegexp,
1825 array( $this, 'ucwordbreaksCallbackMB' ),
1826 $str
1827 );
1828 } else {
1829 return preg_replace_callback(
1830 $replaceRegexp,
1831 array( $this, 'ucwordsCallbackWiki' ),
1832 $str
1833 );
1834 }
1835 } else {
1836 return preg_replace_callback(
1837 '/\b([\w\x80-\xff]+)\b/',
1838 array( $this, 'ucwordbreaksCallbackAscii' ),
1839 $str
1840 );
1841 }
1842 }
1843
1844 /**
1845 * Return a case-folded representation of $s
1846 *
1847 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1848 * and $s2 are the same except for the case of their characters. It is not
1849 * necessary for the value returned to make sense when displayed.
1850 *
1851 * Do *not* perform any other normalisation in this function. If a caller
1852 * uses this function when it should be using a more general normalisation
1853 * function, then fix the caller.
1854 */
1855 function caseFold( $s ) {
1856 return $this->uc( $s );
1857 }
1858
1859 function checkTitleEncoding( $s ) {
1860 if ( is_array( $s ) ) {
1861 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1862 }
1863 # Check for non-UTF-8 URLs
1864 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1865 if ( !$ishigh ) {
1866 return $s;
1867 }
1868
1869 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1870 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1871 if ( $isutf8 ) {
1872 return $s;
1873 }
1874
1875 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1876 }
1877
1878 /**
1879 * @return array
1880 */
1881 function fallback8bitEncoding() {
1882 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1883 }
1884
1885 /**
1886 * Most writing systems use whitespace to break up words.
1887 * Some languages such as Chinese don't conventionally do this,
1888 * which requires special handling when breaking up words for
1889 * searching etc.
1890 */
1891 function hasWordBreaks() {
1892 return true;
1893 }
1894
1895 /**
1896 * Some languages such as Chinese require word segmentation,
1897 * Specify such segmentation when overridden in derived class.
1898 *
1899 * @param $string String
1900 * @return String
1901 */
1902 function segmentByWord( $string ) {
1903 return $string;
1904 }
1905
1906 /**
1907 * Some languages have special punctuation need to be normalized.
1908 * Make such changes here.
1909 *
1910 * @param $string String
1911 * @return String
1912 */
1913 function normalizeForSearch( $string ) {
1914 return self::convertDoubleWidth( $string );
1915 }
1916
1917 /**
1918 * convert double-width roman characters to single-width.
1919 * range: ff00-ff5f ~= 0020-007f
1920 */
1921 protected static function convertDoubleWidth( $string ) {
1922 static $full = null;
1923 static $half = null;
1924
1925 if ( $full === null ) {
1926 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1927 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1928 $full = str_split( $fullWidth, 3 );
1929 $half = str_split( $halfWidth );
1930 }
1931
1932 $string = str_replace( $full, $half, $string );
1933 return $string;
1934 }
1935
1936 protected static function insertSpace( $string, $pattern ) {
1937 $string = preg_replace( $pattern, " $1 ", $string );
1938 $string = preg_replace( '/ +/', ' ', $string );
1939 return $string;
1940 }
1941
1942 function convertForSearchResult( $termsArray ) {
1943 # some languages, e.g. Chinese, need to do a conversion
1944 # in order for search results to be displayed correctly
1945 return $termsArray;
1946 }
1947
1948 /**
1949 * Get the first character of a string.
1950 *
1951 * @param $s string
1952 * @return string
1953 */
1954 function firstChar( $s ) {
1955 $matches = array();
1956 preg_match(
1957 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1958 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1959 $s,
1960 $matches
1961 );
1962
1963 if ( isset( $matches[1] ) ) {
1964 if ( strlen( $matches[1] ) != 3 ) {
1965 return $matches[1];
1966 }
1967
1968 // Break down Hangul syllables to grab the first jamo
1969 $code = utf8ToCodepoint( $matches[1] );
1970 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1971 return $matches[1];
1972 } elseif ( $code < 0xb098 ) {
1973 return "\xe3\x84\xb1";
1974 } elseif ( $code < 0xb2e4 ) {
1975 return "\xe3\x84\xb4";
1976 } elseif ( $code < 0xb77c ) {
1977 return "\xe3\x84\xb7";
1978 } elseif ( $code < 0xb9c8 ) {
1979 return "\xe3\x84\xb9";
1980 } elseif ( $code < 0xbc14 ) {
1981 return "\xe3\x85\x81";
1982 } elseif ( $code < 0xc0ac ) {
1983 return "\xe3\x85\x82";
1984 } elseif ( $code < 0xc544 ) {
1985 return "\xe3\x85\x85";
1986 } elseif ( $code < 0xc790 ) {
1987 return "\xe3\x85\x87";
1988 } elseif ( $code < 0xcc28 ) {
1989 return "\xe3\x85\x88";
1990 } elseif ( $code < 0xce74 ) {
1991 return "\xe3\x85\x8a";
1992 } elseif ( $code < 0xd0c0 ) {
1993 return "\xe3\x85\x8b";
1994 } elseif ( $code < 0xd30c ) {
1995 return "\xe3\x85\x8c";
1996 } elseif ( $code < 0xd558 ) {
1997 return "\xe3\x85\x8d";
1998 } else {
1999 return "\xe3\x85\x8e";
2000 }
2001 } else {
2002 return '';
2003 }
2004 }
2005
2006 function initEncoding() {
2007 # Some languages may have an alternate char encoding option
2008 # (Esperanto X-coding, Japanese furigana conversion, etc)
2009 # If this language is used as the primary content language,
2010 # an override to the defaults can be set here on startup.
2011 }
2012
2013 function recodeForEdit( $s ) {
2014 # For some languages we'll want to explicitly specify
2015 # which characters make it into the edit box raw
2016 # or are converted in some way or another.
2017 # Note that if wgOutputEncoding is different from
2018 # wgInputEncoding, this text will be further converted
2019 # to wgOutputEncoding.
2020 global $wgEditEncoding;
2021 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
2022 return $s;
2023 } else {
2024 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2025 }
2026 }
2027
2028 function recodeInput( $s ) {
2029 # Take the previous into account.
2030 global $wgEditEncoding;
2031 if ( $wgEditEncoding != '' ) {
2032 $enc = $wgEditEncoding;
2033 } else {
2034 $enc = 'UTF-8';
2035 }
2036 if ( $enc == 'UTF-8' ) {
2037 return $s;
2038 } else {
2039 return $this->iconv( $enc, 'UTF-8', $s );
2040 }
2041 }
2042
2043 /**
2044 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2045 * also cleans up certain backwards-compatible sequences, converting them
2046 * to the modern Unicode equivalent.
2047 *
2048 * This is language-specific for performance reasons only.
2049 */
2050 function normalize( $s ) {
2051 global $wgAllUnicodeFixes;
2052 $s = UtfNormal::cleanUp( $s );
2053 if ( $wgAllUnicodeFixes ) {
2054 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2055 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2056 }
2057
2058 return $s;
2059 }
2060
2061 /**
2062 * Transform a string using serialized data stored in the given file (which
2063 * must be in the serialized subdirectory of $IP). The file contains pairs
2064 * mapping source characters to destination characters.
2065 *
2066 * The data is cached in process memory. This will go faster if you have the
2067 * FastStringSearch extension.
2068 */
2069 function transformUsingPairFile( $file, $string ) {
2070 if ( !isset( $this->transformData[$file] ) ) {
2071 $data = wfGetPrecompiledData( $file );
2072 if ( $data === false ) {
2073 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2074 }
2075 $this->transformData[$file] = new ReplacementArray( $data );
2076 }
2077 return $this->transformData[$file]->replace( $string );
2078 }
2079
2080 /**
2081 * For right-to-left language support
2082 *
2083 * @return bool
2084 */
2085 function isRTL() {
2086 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2087 }
2088
2089 /**
2090 * Return the correct HTML 'dir' attribute value for this language.
2091 * @return String
2092 */
2093 function getDir() {
2094 return $this->isRTL() ? 'rtl' : 'ltr';
2095 }
2096
2097 /**
2098 * Return 'left' or 'right' as appropriate alignment for line-start
2099 * for this language's text direction.
2100 *
2101 * Should be equivalent to CSS3 'start' text-align value....
2102 *
2103 * @return String
2104 */
2105 function alignStart() {
2106 return $this->isRTL() ? 'right' : 'left';
2107 }
2108
2109 /**
2110 * Return 'right' or 'left' as appropriate alignment for line-end
2111 * for this language's text direction.
2112 *
2113 * Should be equivalent to CSS3 'end' text-align value....
2114 *
2115 * @return String
2116 */
2117 function alignEnd() {
2118 return $this->isRTL() ? 'left' : 'right';
2119 }
2120
2121 /**
2122 * A hidden direction mark (LRM or RLM), depending on the language direction
2123 *
2124 * @return string
2125 */
2126 function getDirMark() {
2127 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2128 }
2129
2130 /**
2131 * @return array
2132 */
2133 function capitalizeAllNouns() {
2134 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2135 }
2136
2137 /**
2138 * An arrow, depending on the language direction
2139 *
2140 * @return string
2141 */
2142 function getArrow() {
2143 return $this->isRTL() ? '←' : '→';
2144 }
2145
2146 /**
2147 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2148 *
2149 * @return bool
2150 */
2151 function linkPrefixExtension() {
2152 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2153 }
2154
2155 /**
2156 * @return array
2157 */
2158 function getMagicWords() {
2159 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2160 }
2161
2162 protected function doMagicHook() {
2163 if ( $this->mMagicHookDone ) {
2164 return;
2165 }
2166 $this->mMagicHookDone = true;
2167 wfProfileIn( 'LanguageGetMagic' );
2168 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2169 wfProfileOut( 'LanguageGetMagic' );
2170 }
2171
2172 # Fill a MagicWord object with data from here
2173 function getMagic( $mw ) {
2174 $this->doMagicHook();
2175
2176 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2177 $rawEntry = $this->mMagicExtensions[$mw->mId];
2178 } else {
2179 $magicWords = $this->getMagicWords();
2180 if ( isset( $magicWords[$mw->mId] ) ) {
2181 $rawEntry = $magicWords[$mw->mId];
2182 } else {
2183 $rawEntry = false;
2184 }
2185 }
2186
2187 if ( !is_array( $rawEntry ) ) {
2188 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2189 } else {
2190 $mw->mCaseSensitive = $rawEntry[0];
2191 $mw->mSynonyms = array_slice( $rawEntry, 1 );
2192 }
2193 }
2194
2195 /**
2196 * Add magic words to the extension array
2197 */
2198 function addMagicWordsByLang( $newWords ) {
2199 $code = $this->getCode();
2200 $fallbackChain = array();
2201 while ( $code && !in_array( $code, $fallbackChain ) ) {
2202 $fallbackChain[] = $code;
2203 $code = self::getFallbackFor( $code );
2204 }
2205 if ( !in_array( 'en', $fallbackChain ) ) {
2206 $fallbackChain[] = 'en';
2207 }
2208 $fallbackChain = array_reverse( $fallbackChain );
2209 foreach ( $fallbackChain as $code ) {
2210 if ( isset( $newWords[$code] ) ) {
2211 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2212 }
2213 }
2214 }
2215
2216 /**
2217 * Get special page names, as an associative array
2218 * case folded alias => real name
2219 */
2220 function getSpecialPageAliases() {
2221 // Cache aliases because it may be slow to load them
2222 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2223 // Initialise array
2224 $this->mExtendedSpecialPageAliases =
2225 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2226 wfRunHooks( 'LanguageGetSpecialPageAliases',
2227 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2228 }
2229
2230 return $this->mExtendedSpecialPageAliases;
2231 }
2232
2233 /**
2234 * Italic is unsuitable for some languages
2235 *
2236 * @param $text String: the text to be emphasized.
2237 * @return string
2238 */
2239 function emphasize( $text ) {
2240 return "<em>$text</em>";
2241 }
2242
2243 /**
2244 * Normally we output all numbers in plain en_US style, that is
2245 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2246 * point twohundredthirtyfive. However this is not sutable for all
2247 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2248 * Icelandic just want to use commas instead of dots, and dots instead
2249 * of commas like "293.291,235".
2250 *
2251 * An example of this function being called:
2252 * <code>
2253 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2254 * </code>
2255 *
2256 * See LanguageGu.php for the Gujarati implementation and
2257 * $separatorTransformTable on MessageIs.php for
2258 * the , => . and . => , implementation.
2259 *
2260 * @todo check if it's viable to use localeconv() for the decimal
2261 * separator thing.
2262 * @param $number Mixed: the string to be formatted, should be an integer
2263 * or a floating point number.
2264 * @param $nocommafy Bool: set to true for special numbers like dates
2265 * @return string
2266 */
2267 function formatNum( $number, $nocommafy = false ) {
2268 global $wgTranslateNumerals;
2269 if ( !$nocommafy ) {
2270 $number = $this->commafy( $number );
2271 $s = $this->separatorTransformTable();
2272 if ( $s ) {
2273 $number = strtr( $number, $s );
2274 }
2275 }
2276
2277 if ( $wgTranslateNumerals ) {
2278 $s = $this->digitTransformTable();
2279 if ( $s ) {
2280 $number = strtr( $number, $s );
2281 }
2282 }
2283
2284 return $number;
2285 }
2286
2287 function parseFormattedNumber( $number ) {
2288 $s = $this->digitTransformTable();
2289 if ( $s ) {
2290 $number = strtr( $number, array_flip( $s ) );
2291 }
2292
2293 $s = $this->separatorTransformTable();
2294 if ( $s ) {
2295 $number = strtr( $number, array_flip( $s ) );
2296 }
2297
2298 $number = strtr( $number, array( ',' => '' ) );
2299 return $number;
2300 }
2301
2302 /**
2303 * Adds commas to a given number
2304 *
2305 * @param $_ mixed
2306 * @return string
2307 */
2308 function commafy( $_ ) {
2309 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2310 }
2311
2312 function digitTransformTable() {
2313 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2314 }
2315
2316 function separatorTransformTable() {
2317 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2318 }
2319
2320 /**
2321 * Take a list of strings and build a locale-friendly comma-separated
2322 * list, using the local comma-separator message.
2323 * The last two strings are chained with an "and".
2324 *
2325 * @param $l Array
2326 * @return string
2327 */
2328 function listToText( $l ) {
2329 $s = '';
2330 $m = count( $l ) - 1;
2331 if ( $m == 1 ) {
2332 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2333 } else {
2334 for ( $i = $m; $i >= 0; $i-- ) {
2335 if ( $i == $m ) {
2336 $s = $l[$i];
2337 } else if ( $i == $m - 1 ) {
2338 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2339 } else {
2340 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2341 }
2342 }
2343 return $s;
2344 }
2345 }
2346
2347 /**
2348 * Take a list of strings and build a locale-friendly comma-separated
2349 * list, using the local comma-separator message.
2350 * @param $list array of strings to put in a comma list
2351 * @return string
2352 */
2353 function commaList( $list ) {
2354 return implode(
2355 $list,
2356 wfMsgExt(
2357 'comma-separator',
2358 array( 'parsemag', 'escapenoentities', 'language' => $this )
2359 )
2360 );
2361 }
2362
2363 /**
2364 * Take a list of strings and build a locale-friendly semicolon-separated
2365 * list, using the local semicolon-separator message.
2366 * @param $list array of strings to put in a semicolon list
2367 * @return string
2368 */
2369 function semicolonList( $list ) {
2370 return implode(
2371 $list,
2372 wfMsgExt(
2373 'semicolon-separator',
2374 array( 'parsemag', 'escapenoentities', 'language' => $this )
2375 )
2376 );
2377 }
2378
2379 /**
2380 * Same as commaList, but separate it with the pipe instead.
2381 * @param $list array of strings to put in a pipe list
2382 * @return string
2383 */
2384 function pipeList( $list ) {
2385 return implode(
2386 $list,
2387 wfMsgExt(
2388 'pipe-separator',
2389 array( 'escapenoentities', 'language' => $this )
2390 )
2391 );
2392 }
2393
2394 /**
2395 * Truncate a string to a specified length in bytes, appending an optional
2396 * string (e.g. for ellipses)
2397 *
2398 * The database offers limited byte lengths for some columns in the database;
2399 * multi-byte character sets mean we need to ensure that only whole characters
2400 * are included, otherwise broken characters can be passed to the user
2401 *
2402 * If $length is negative, the string will be truncated from the beginning
2403 *
2404 * @param $string String to truncate
2405 * @param $length Int: maximum length (including ellipses)
2406 * @param $ellipsis String to append to the truncated text
2407 * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
2408 * $adjustLength was introduced in 1.18, before that behaved as if false.
2409 * @return string
2410 */
2411 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
2412 # Use the localized ellipsis character
2413 if ( $ellipsis == '...' ) {
2414 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2415 }
2416 # Check if there is no need to truncate
2417 if ( $length == 0 ) {
2418 return $ellipsis; // convention
2419 } elseif ( strlen( $string ) <= abs( $length ) ) {
2420 return $string; // no need to truncate
2421 }
2422 $stringOriginal = $string;
2423 # If ellipsis length is >= $length then we can't apply $adjustLength
2424 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
2425 $string = $ellipsis; // this can be slightly unexpected
2426 # Otherwise, truncate and add ellipsis...
2427 } else {
2428 $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
2429 if ( $length > 0 ) {
2430 $length -= $eLength;
2431 $string = substr( $string, 0, $length ); // xyz...
2432 $string = $this->removeBadCharLast( $string );
2433 $string = $string . $ellipsis;
2434 } else {
2435 $length += $eLength;
2436 $string = substr( $string, $length ); // ...xyz
2437 $string = $this->removeBadCharFirst( $string );
2438 $string = $ellipsis . $string;
2439 }
2440 }
2441 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
2442 # This check is *not* redundant if $adjustLength, due to the single case where
2443 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
2444 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2445 return $string;
2446 } else {
2447 return $stringOriginal;
2448 }
2449 }
2450
2451 /**
2452 * Remove bytes that represent an incomplete Unicode character
2453 * at the end of string (e.g. bytes of the char are missing)
2454 *
2455 * @param $string String
2456 * @return string
2457 */
2458 protected function removeBadCharLast( $string ) {
2459 if ( $string != '' ) {
2460 $char = ord( $string[strlen( $string ) - 1] );
2461 $m = array();
2462 if ( $char >= 0xc0 ) {
2463 # We got the first byte only of a multibyte char; remove it.
2464 $string = substr( $string, 0, -1 );
2465 } elseif ( $char >= 0x80 &&
2466 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2467 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2468 {
2469 # We chopped in the middle of a character; remove it
2470 $string = $m[1];
2471 }
2472 }
2473 return $string;
2474 }
2475
2476 /**
2477 * Remove bytes that represent an incomplete Unicode character
2478 * at the start of string (e.g. bytes of the char are missing)
2479 *
2480 * @param $string String
2481 * @return string
2482 */
2483 protected function removeBadCharFirst( $string ) {
2484 if ( $string != '' ) {
2485 $char = ord( $string[0] );
2486 if ( $char >= 0x80 && $char < 0xc0 ) {
2487 # We chopped in the middle of a character; remove the whole thing
2488 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2489 }
2490 }
2491 return $string;
2492 }
2493
2494 /**
2495 * Truncate a string of valid HTML to a specified length in bytes,
2496 * appending an optional string (e.g. for ellipses), and return valid HTML
2497 *
2498 * This is only intended for styled/linked text, such as HTML with
2499 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
2500 * Also, this will not detect things like "display:none" CSS.
2501 *
2502 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
2503 *
2504 * @param string $text HTML string to truncate
2505 * @param int $length (zero/positive) Maximum length (including ellipses)
2506 * @param string $ellipsis String to append to the truncated text
2507 * @returns string
2508 */
2509 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2510 # Use the localized ellipsis character
2511 if ( $ellipsis == '...' ) {
2512 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2513 }
2514 # Check if there is clearly no need to truncate
2515 if ( $length <= 0 ) {
2516 return $ellipsis; // no text shown, nothing to format (convention)
2517 } elseif ( strlen( $text ) <= $length ) {
2518 return $text; // string short enough even *with* HTML (short-circuit)
2519 }
2520
2521 $displayLen = 0; // innerHTML legth so far
2522 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2523 $tagType = 0; // 0-open, 1-close
2524 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2525 $entityState = 0; // 0-not entity, 1-entity
2526 $tag = $ret = $pRet = ''; // accumulated tag name, accumulated result string
2527 $openTags = array(); // open tag stack
2528 $pOpenTags = array();
2529
2530 $textLen = strlen( $text );
2531 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
2532 for ( $pos = 0; true; ++$pos ) {
2533 # Consider truncation once the display length has reached the maximim.
2534 # Check that we're not in the middle of a bracket/entity...
2535 if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) {
2536 if ( !$testingEllipsis ) {
2537 $testingEllipsis = true;
2538 # Save where we are; we will truncate here unless there turn out to
2539 # be so few remaining characters that truncation is not necessary.
2540 $pOpenTags = $openTags; // save state
2541 $pRet = $ret; // save state
2542 } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) {
2543 # String in fact does need truncation, the truncation point was OK.
2544 $openTags = $pOpenTags; // reload state
2545 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2546 $ret .= $ellipsis; // add ellipsis
2547 break;
2548 }
2549 }
2550 if ( $pos >= $textLen ) break; // extra iteration just for above checks
2551
2552 # Read the next char...
2553 $ch = $text[$pos];
2554 $lastCh = $pos ? $text[$pos - 1] : '';
2555 $ret .= $ch; // add to result string
2556 if ( $ch == '<' ) {
2557 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2558 $entityState = 0; // for bad HTML
2559 $bracketState = 1; // tag started (checking for backslash)
2560 } elseif ( $ch == '>' ) {
2561 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2562 $entityState = 0; // for bad HTML
2563 $bracketState = 0; // out of brackets
2564 } elseif ( $bracketState == 1 ) {
2565 if ( $ch == '/' ) {
2566 $tagType = 1; // close tag (e.g. "</span>")
2567 } else {
2568 $tagType = 0; // open tag (e.g. "<span>")
2569 $tag .= $ch;
2570 }
2571 $bracketState = 2; // building tag name
2572 } elseif ( $bracketState == 2 ) {
2573 if ( $ch != ' ' ) {
2574 $tag .= $ch;
2575 } else {
2576 // Name found (e.g. "<a href=..."), add on tag attributes...
2577 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2578 }
2579 } elseif ( $bracketState == 0 ) {
2580 if ( $entityState ) {
2581 if ( $ch == ';' ) {
2582 $entityState = 0;
2583 $displayLen++; // entity is one displayed char
2584 }
2585 } else {
2586 if ( $ch == '&' ) {
2587 $entityState = 1; // entity found, (e.g. "&#160;")
2588 } else {
2589 $displayLen++; // this char is displayed
2590 // Add the next $max display text chars after this in one swoop...
2591 $max = ( $testingEllipsis ? $length : $neLength ) - $displayLen;
2592 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
2593 $displayLen += $skipped;
2594 $pos += $skipped;
2595 }
2596 }
2597 }
2598 }
2599 if ( $displayLen == 0 ) {
2600 return ''; // no text shown, nothing to format
2601 }
2602 // Close the last tag if left unclosed by bad HTML
2603 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2604 while ( count( $openTags ) > 0 ) {
2605 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2606 }
2607 return $ret;
2608 }
2609
2610 // truncateHtml() helper function
2611 // like strcspn() but adds the skipped chars to $ret
2612 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
2613 if ( $len === null ) {
2614 $len = -1; // -1 means "no limit" for strcspn
2615 } elseif ( $len < 0 ) {
2616 $len = 0; // sanity
2617 }
2618 $skipCount = 0;
2619 if ( $start < strlen( $text ) ) {
2620 $skipCount = strcspn( $text, $search, $start, $len );
2621 $ret .= substr( $text, $start, $skipCount );
2622 }
2623 return $skipCount;
2624 }
2625
2626 /**
2627 * truncateHtml() helper function
2628 * (a) push or pop $tag from $openTags as needed
2629 * (b) clear $tag value
2630 * @param String &$tag Current HTML tag name we are looking at
2631 * @param int $tagType (0-open tag, 1-close tag)
2632 * @param char $lastCh Character before the '>' that ended this tag
2633 * @param array &$openTags Open tag stack (not accounting for $tag)
2634 */
2635 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2636 $tag = ltrim( $tag );
2637 if ( $tag != '' ) {
2638 if ( $tagType == 0 && $lastCh != '/' ) {
2639 $openTags[] = $tag; // tag opened (didn't close itself)
2640 } else if ( $tagType == 1 ) {
2641 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2642 array_pop( $openTags ); // tag closed
2643 }
2644 }
2645 $tag = '';
2646 }
2647 }
2648
2649 /**
2650 * Grammatical transformations, needed for inflected languages
2651 * Invoked by putting {{grammar:case|word}} in a message
2652 *
2653 * @param $word string
2654 * @param $case string
2655 * @return string
2656 */
2657 function convertGrammar( $word, $case ) {
2658 global $wgGrammarForms;
2659 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2660 return $wgGrammarForms[$this->getCode()][$case][$word];
2661 }
2662 return $word;
2663 }
2664
2665 /**
2666 * Provides an alternative text depending on specified gender.
2667 * Usage {{gender:username|masculine|feminine|neutral}}.
2668 * username is optional, in which case the gender of current user is used,
2669 * but only in (some) interface messages; otherwise default gender is used.
2670 * If second or third parameter are not specified, masculine is used.
2671 * These details may be overriden per language.
2672 */
2673 function gender( $gender, $forms ) {
2674 if ( !count( $forms ) ) {
2675 return '';
2676 }
2677 $forms = $this->preConvertPlural( $forms, 2 );
2678 if ( $gender === 'male' ) {
2679 return $forms[0];
2680 }
2681 if ( $gender === 'female' ) {
2682 return $forms[1];
2683 }
2684 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2685 }
2686
2687 /**
2688 * Plural form transformations, needed for some languages.
2689 * For example, there are 3 form of plural in Russian and Polish,
2690 * depending on "count mod 10". See [[w:Plural]]
2691 * For English it is pretty simple.
2692 *
2693 * Invoked by putting {{plural:count|wordform1|wordform2}}
2694 * or {{plural:count|wordform1|wordform2|wordform3}}
2695 *
2696 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2697 *
2698 * @param $count Integer: non-localized number
2699 * @param $forms Array: different plural forms
2700 * @return string Correct form of plural for $count in this language
2701 */
2702 function convertPlural( $count, $forms ) {
2703 if ( !count( $forms ) ) {
2704 return '';
2705 }
2706 $forms = $this->preConvertPlural( $forms, 2 );
2707
2708 return ( $count == 1 ) ? $forms[0] : $forms[1];
2709 }
2710
2711 /**
2712 * Checks that convertPlural was given an array and pads it to requested
2713 * amount of forms by copying the last one.
2714 *
2715 * @param $count Integer: How many forms should there be at least
2716 * @param $forms Array of forms given to convertPlural
2717 * @return array Padded array of forms or an exception if not an array
2718 */
2719 protected function preConvertPlural( /* Array */ $forms, $count ) {
2720 while ( count( $forms ) < $count ) {
2721 $forms[] = $forms[count( $forms ) - 1];
2722 }
2723 return $forms;
2724 }
2725
2726 /**
2727 * Maybe translate block durations. Note that this function is somewhat misnamed: it
2728 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
2729 * (which is an absolute timestamp).
2730 * @param $str String: the validated block duration in English
2731 * @return Somehow translated block duration
2732 * @see LanguageFi.php for example implementation
2733 */
2734 function translateBlockExpiry( $str ) {
2735 $duration = SpecialBlock::getSuggestedDurations( $this );
2736 foreach( $duration as $show => $value ){
2737 if ( strcmp( $str, $value ) == 0 ) {
2738 return htmlspecialchars( trim( $show ) );
2739 }
2740 }
2741
2742 // Since usually only infinite or indefinite is only on list, so try
2743 // equivalents if still here.
2744 $indefs = array( 'infinite', 'infinity', 'indefinite' );
2745 if ( in_array( $str, $indefs ) ) {
2746 foreach( $indefs as $val ) {
2747 $show = array_search( $val, $duration, true );
2748 if ( $show !== false ) {
2749 return htmlspecialchars( trim( $show ) );
2750 }
2751 }
2752 }
2753 // If all else fails, return the original string.
2754 return $str;
2755 }
2756
2757 /**
2758 * languages like Chinese need to be segmented in order for the diff
2759 * to be of any use
2760 *
2761 * @param $text String
2762 * @return String
2763 */
2764 function segmentForDiff( $text ) {
2765 return $text;
2766 }
2767
2768 /**
2769 * and unsegment to show the result
2770 *
2771 * @param $text String
2772 * @return String
2773 */
2774 function unsegmentForDiff( $text ) {
2775 return $text;
2776 }
2777
2778 # convert text to all supported variants
2779 function autoConvertToAllVariants( $text ) {
2780 return $this->mConverter->autoConvertToAllVariants( $text );
2781 }
2782
2783 # convert text to different variants of a language.
2784 function convert( $text ) {
2785 return $this->mConverter->convert( $text );
2786 }
2787
2788 # Convert a Title object to a string in the preferred variant
2789 function convertTitle( $title ) {
2790 return $this->mConverter->convertTitle( $title );
2791 }
2792
2793 # Check if this is a language with variants
2794 function hasVariants() {
2795 return sizeof( $this->getVariants() ) > 1;
2796 }
2797
2798 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2799 function armourMath( $text ) {
2800 return $this->mConverter->armourMath( $text );
2801 }
2802
2803 /**
2804 * Perform output conversion on a string, and encode for safe HTML output.
2805 * @param $text String text to be converted
2806 * @param $isTitle Bool whether this conversion is for the article title
2807 * @return string
2808 * @todo this should get integrated somewhere sane
2809 */
2810 function convertHtml( $text, $isTitle = false ) {
2811 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2812 }
2813
2814 /**
2815 * @param $key string
2816 * @return string
2817 */
2818 function convertCategoryKey( $key ) {
2819 return $this->mConverter->convertCategoryKey( $key );
2820 }
2821
2822 /**
2823 * Get the list of variants supported by this language
2824 * see sample implementation in LanguageZh.php
2825 *
2826 * @return array an array of language codes
2827 */
2828 function getVariants() {
2829 return $this->mConverter->getVariants();
2830 }
2831
2832 /**
2833 * @return string
2834 */
2835 function getPreferredVariant() {
2836 return $this->mConverter->getPreferredVariant();
2837 }
2838
2839 /**
2840 * @return string
2841 */
2842 function getDefaultVariant() {
2843 return $this->mConverter->getDefaultVariant();
2844 }
2845
2846 /**
2847 * @return string
2848 */
2849 function getURLVariant() {
2850 return $this->mConverter->getURLVariant();
2851 }
2852
2853 /**
2854 * If a language supports multiple variants, it is
2855 * possible that non-existing link in one variant
2856 * actually exists in another variant. this function
2857 * tries to find it. See e.g. LanguageZh.php
2858 *
2859 * @param $link String: the name of the link
2860 * @param $nt Mixed: the title object of the link
2861 * @param $ignoreOtherCond Boolean: to disable other conditions when
2862 * we need to transclude a template or update a category's link
2863 * @return null the input parameters may be modified upon return
2864 */
2865 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2866 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2867 }
2868
2869 /**
2870 * If a language supports multiple variants, converts text
2871 * into an array of all possible variants of the text:
2872 * 'variant' => text in that variant
2873 *
2874 * @deprecated since 1.17 Use autoConvertToAllVariants()
2875 */
2876 function convertLinkToAllVariants( $text ) {
2877 return $this->mConverter->convertLinkToAllVariants( $text );
2878 }
2879
2880 /**
2881 * returns language specific options used by User::getPageRenderHash()
2882 * for example, the preferred language variant
2883 *
2884 * @return string
2885 */
2886 function getExtraHashOptions() {
2887 return $this->mConverter->getExtraHashOptions();
2888 }
2889
2890 /**
2891 * For languages that support multiple variants, the title of an
2892 * article may be displayed differently in different variants. this
2893 * function returns the apporiate title defined in the body of the article.
2894 *
2895 * @return string
2896 */
2897 function getParsedTitle() {
2898 return $this->mConverter->getParsedTitle();
2899 }
2900
2901 /**
2902 * Enclose a string with the "no conversion" tag. This is used by
2903 * various functions in the Parser
2904 *
2905 * @param $text String: text to be tagged for no conversion
2906 * @param $noParse
2907 * @return string the tagged text
2908 */
2909 function markNoConversion( $text, $noParse = false ) {
2910 return $this->mConverter->markNoConversion( $text, $noParse );
2911 }
2912
2913 /**
2914 * A regular expression to match legal word-trailing characters
2915 * which should be merged onto a link of the form [[foo]]bar.
2916 *
2917 * @return string
2918 */
2919 function linkTrail() {
2920 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2921 }
2922
2923 /**
2924 * @return Language
2925 */
2926 function getLangObj() {
2927 return $this;
2928 }
2929
2930 /**
2931 * Get the RFC 3066 code for this language object
2932 */
2933 function getCode() {
2934 return $this->mCode;
2935 }
2936
2937 function setCode( $code ) {
2938 $this->mCode = $code;
2939 }
2940
2941 /**
2942 * Get the name of a file for a certain language code
2943 * @param $prefix string Prepend this to the filename
2944 * @param $code string Language code
2945 * @param $suffix string Append this to the filename
2946 * @return string $prefix . $mangledCode . $suffix
2947 */
2948 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2949 // Protect against path traversal
2950 if ( !Language::isValidCode( $code )
2951 || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
2952 {
2953 throw new MWException( "Invalid language code \"$code\"" );
2954 }
2955
2956 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2957 }
2958
2959 /**
2960 * Get the language code from a file name. Inverse of getFileName()
2961 * @param $filename string $prefix . $languageCode . $suffix
2962 * @param $prefix string Prefix before the language code
2963 * @param $suffix string Suffix after the language code
2964 * @return string Language code, or false if $prefix or $suffix isn't found
2965 */
2966 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2967 $m = null;
2968 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2969 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2970 if ( !count( $m ) ) {
2971 return false;
2972 }
2973 return str_replace( '_', '-', strtolower( $m[1] ) );
2974 }
2975
2976 /**
2977 * @param $code string
2978 * @return string
2979 */
2980 static function getMessagesFileName( $code ) {
2981 global $IP;
2982 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2983 }
2984
2985 /**
2986 * @param $code string
2987 * @return string
2988 */
2989 static function getClassFileName( $code ) {
2990 global $IP;
2991 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2992 }
2993
2994 /**
2995 * Get the fallback for a given language
2996 *
2997 * @return false|string
2998 */
2999 static function getFallbackFor( $code ) {
3000 if ( $code === 'en' ) {
3001 // Shortcut
3002 return false;
3003 } else {
3004 return self::getLocalisationCache()->getItem( $code, 'fallback' );
3005 }
3006 }
3007
3008 /**
3009 * Get all messages for a given language
3010 * WARNING: this may take a long time
3011 */
3012 static function getMessagesFor( $code ) {
3013 return self::getLocalisationCache()->getItem( $code, 'messages' );
3014 }
3015
3016 /**
3017 * Get a message for a given language
3018 */
3019 static function getMessageFor( $key, $code ) {
3020 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
3021 }
3022
3023 function fixVariableInNamespace( $talk ) {
3024 if ( strpos( $talk, '$1' ) === false ) {
3025 return $talk;
3026 }
3027
3028 global $wgMetaNamespace;
3029 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
3030
3031 # Allow grammar transformations
3032 # Allowing full message-style parsing would make simple requests
3033 # such as action=raw much more expensive than they need to be.
3034 # This will hopefully cover most cases.
3035 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
3036 array( &$this, 'replaceGrammarInNamespace' ), $talk );
3037 return str_replace( ' ', '_', $talk );
3038 }
3039
3040 function replaceGrammarInNamespace( $m ) {
3041 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
3042 }
3043
3044 static function getCaseMaps() {
3045 static $wikiUpperChars, $wikiLowerChars;
3046 if ( isset( $wikiUpperChars ) ) {
3047 return array( $wikiUpperChars, $wikiLowerChars );
3048 }
3049
3050 wfProfileIn( __METHOD__ );
3051 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
3052 if ( $arr === false ) {
3053 throw new MWException(
3054 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
3055 }
3056 $wikiUpperChars = $arr['wikiUpperChars'];
3057 $wikiLowerChars = $arr['wikiLowerChars'];
3058 wfProfileOut( __METHOD__ );
3059 return array( $wikiUpperChars, $wikiLowerChars );
3060 }
3061
3062 /**
3063 * Decode an expiry (block, protection, etc) which has come from the DB
3064 *
3065 * @param $expiry String: Database expiry String
3066 * @param $format Bool|Int true to process using language functions, or TS_ constant
3067 * to return the expiry in a given timestamp
3068 * @return String
3069 */
3070 public function formatExpiry( $expiry, $format = true ) {
3071 static $infinity, $infinityMsg;
3072 if( $infinity === null ){
3073 $infinityMsg = wfMessage( 'infiniteblock' );
3074 $infinity = wfGetDB( DB_SLAVE )->getInfinity();
3075 }
3076
3077 if ( $expiry == '' || $expiry == $infinity ) {
3078 return $format === true
3079 ? $infinityMsg
3080 : $infinity;
3081 } else {
3082 return $format === true
3083 ? $this->timeanddate( $expiry )
3084 : wfTimestamp( $format, $expiry );
3085 }
3086 }
3087
3088 /**
3089 * @todo Document
3090 * @param $seconds String
3091 * @return string
3092 */
3093 function formatTimePeriod( $seconds ) {
3094 if ( round( $seconds * 10 ) < 100 ) {
3095 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3096 } elseif ( round( $seconds ) < 60 ) {
3097 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3098 } elseif ( round( $seconds ) < 3600 ) {
3099 $minutes = floor( $seconds / 60 );
3100 $secondsPart = round( fmod( $seconds, 60 ) );
3101 if ( $secondsPart == 60 ) {
3102 $secondsPart = 0;
3103 $minutes++;
3104 }
3105 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3106 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3107 } else {
3108 $hours = floor( $seconds / 3600 );
3109 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
3110 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
3111 if ( $secondsPart == 60 ) {
3112 $secondsPart = 0;
3113 $minutes++;
3114 }
3115 if ( $minutes == 60 ) {
3116 $minutes = 0;
3117 $hours++;
3118 }
3119 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
3120 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3121 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3122 }
3123 }
3124
3125 function formatBitrate( $bps ) {
3126 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
3127 if ( $bps <= 0 ) {
3128 return $this->formatNum( $bps ) . $units[0];
3129 }
3130 $unitIndex = floor( log10( $bps ) / 3 );
3131 $mantissa = $bps / pow( 1000, $unitIndex );
3132 if ( $mantissa < 10 ) {
3133 $mantissa = round( $mantissa, 1 );
3134 } else {
3135 $mantissa = round( $mantissa );
3136 }
3137 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3138 }
3139
3140 /**
3141 * Format a size in bytes for output, using an appropriate
3142 * unit (B, KB, MB or GB) according to the magnitude in question
3143 *
3144 * @param $size Size to format
3145 * @return string Plain text (not HTML)
3146 */
3147 function formatSize( $size ) {
3148 // For small sizes no decimal places necessary
3149 $round = 0;
3150 if ( $size > 1024 ) {
3151 $size = $size / 1024;
3152 if ( $size > 1024 ) {
3153 $size = $size / 1024;
3154 // For MB and bigger two decimal places are smarter
3155 $round = 2;
3156 if ( $size > 1024 ) {
3157 $size = $size / 1024;
3158 $msg = 'size-gigabytes';
3159 } else {
3160 $msg = 'size-megabytes';
3161 }
3162 } else {
3163 $msg = 'size-kilobytes';
3164 }
3165 } else {
3166 $msg = 'size-bytes';
3167 }
3168 $size = round( $size, $round );
3169 $text = $this->getMessageFromDB( $msg );
3170 return str_replace( '$1', $this->formatNum( $size ), $text );
3171 }
3172
3173 /**
3174 * Get the conversion rule title, if any.
3175 */
3176 function getConvRuleTitle() {
3177 return $this->mConverter->getConvRuleTitle();
3178 }
3179 }