Kill some more long deprecated unused functions
[lhc/web/wiklou.git] / languages / Language.php
1 <?php
2 /**
3 * Internationalisation code
4 *
5 * @file
6 * @ingroup Language
7 */
8
9 /**
10 * @defgroup Language Language
11 */
12
13 if ( !defined( 'MEDIAWIKI' ) ) {
14 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
15 exit( 1 );
16 }
17
18 # Read language names
19 global $wgLanguageNames;
20 require_once( dirname( __FILE__ ) . '/Names.php' );
21
22 global $wgInputEncoding, $wgOutputEncoding;
23
24 /**
25 * These are always UTF-8, they exist only for backwards compatibility
26 */
27 $wgInputEncoding = 'UTF-8';
28 $wgOutputEncoding = 'UTF-8';
29
30 if ( function_exists( 'mb_strtoupper' ) ) {
31 mb_internal_encoding( 'UTF-8' );
32 }
33
34 /**
35 * a fake language converter
36 *
37 * @ingroup Language
38 */
39 class FakeConverter {
40 var $mLang;
41 function __construct( $langobj ) { $this->mLang = $langobj; }
42 function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
43 function convert( $t ) { return $t; }
44 function convertTitle( $t ) { return $t->getPrefixedText(); }
45 function getVariants() { return array( $this->mLang->getCode() ); }
46 function getPreferredVariant() { return $this->mLang->getCode(); }
47 function getDefaultVariant() { return $this->mLang->getCode(); }
48 function getURLVariant() { return ''; }
49 function getConvRuleTitle() { return false; }
50 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
51 function getExtraHashOptions() { return ''; }
52 function getParsedTitle() { return ''; }
53 function markNoConversion( $text, $noParse = false ) { return $text; }
54 function convertCategoryKey( $key ) { return $key; }
55 function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
56 function armourMath( $text ) { return $text; }
57 }
58
59 /**
60 * Internationalisation code
61 * @ingroup Language
62 */
63 class Language {
64 var $mConverter, $mVariants, $mCode, $mLoaded = false;
65 var $mMagicExtensions = array(), $mMagicHookDone = false;
66
67 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
68 var $dateFormatStrings = array();
69 var $mExtendedSpecialPageAliases;
70
71 /**
72 * ReplacementArray object caches
73 */
74 var $transformData = array();
75
76 /**
77 * @var LocalisationCache
78 */
79 static public $dataCache;
80
81 static public $mLangObjCache = array();
82
83 static public $mWeekdayMsgs = array(
84 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
85 'friday', 'saturday'
86 );
87
88 static public $mWeekdayAbbrevMsgs = array(
89 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
90 );
91
92 static public $mMonthMsgs = array(
93 'january', 'february', 'march', 'april', 'may_long', 'june',
94 'july', 'august', 'september', 'october', 'november',
95 'december'
96 );
97 static public $mMonthGenMsgs = array(
98 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
99 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
100 'december-gen'
101 );
102 static public $mMonthAbbrevMsgs = array(
103 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
104 'sep', 'oct', 'nov', 'dec'
105 );
106
107 static public $mIranianCalendarMonthMsgs = array(
108 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
109 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
110 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
111 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
112 );
113
114 static public $mHebrewCalendarMonthMsgs = array(
115 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
116 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
117 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
118 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
119 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
120 );
121
122 static public $mHebrewCalendarMonthGenMsgs = array(
123 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
124 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
125 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
126 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
127 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
128 );
129
130 static public $mHijriCalendarMonthMsgs = array(
131 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
132 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
133 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
134 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
135 );
136
137 /**
138 * Get a cached language object for a given language code
139 * @param $code String
140 * @return Language
141 */
142 static function factory( $code ) {
143 if ( !isset( self::$mLangObjCache[$code] ) ) {
144 if ( count( self::$mLangObjCache ) > 10 ) {
145 // Don't keep a billion objects around, that's stupid.
146 self::$mLangObjCache = array();
147 }
148 self::$mLangObjCache[$code] = self::newFromCode( $code );
149 }
150 return self::$mLangObjCache[$code];
151 }
152
153 /**
154 * Create a language object for a given language code
155 * @param $code String
156 * @return Language
157 */
158 protected static function newFromCode( $code ) {
159 global $IP;
160 static $recursionLevel = 0;
161
162 // Protect against path traversal below
163 if ( !Language::isValidCode( $code )
164 || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
165 {
166 throw new MWException( "Invalid language code \"$code\"" );
167 }
168
169 if ( !Language::isValidBuiltInCode( $code ) ) {
170 // It's not possible to customise this code with class files, so
171 // just return a Language object. This is to support uselang= hacks.
172 $lang = new Language;
173 $lang->setCode( $code );
174 return $lang;
175 }
176
177 if ( $code == 'en' ) {
178 $class = 'Language';
179 } else {
180 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
181 if ( !defined( 'MW_COMPILED' ) ) {
182 // Preload base classes to work around APC/PHP5 bug
183 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
184 include_once( "$IP/languages/classes/$class.deps.php" );
185 }
186 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
187 include_once( "$IP/languages/classes/$class.php" );
188 }
189 }
190 }
191
192 if ( $recursionLevel > 5 ) {
193 throw new MWException( "Language fallback loop detected when creating class $class\n" );
194 }
195
196 if ( !MWInit::classExists( $class ) ) {
197 $fallback = Language::getFallbackFor( $code );
198 ++$recursionLevel;
199 $lang = Language::newFromCode( $fallback );
200 --$recursionLevel;
201 $lang->setCode( $code );
202 } else {
203 $lang = new $class;
204 }
205 return $lang;
206 }
207
208 /**
209 * Returns true if a language code string is of a valid form, whether or
210 * not it exists. This includes codes which are used solely for
211 * customisation via the MediaWiki namespace.
212 */
213 public static function isValidCode( $code ) {
214 return
215 strcspn( $code, ":/\\\000" ) === strlen( $code )
216 && !preg_match( Title::getTitleInvalidRegex(), $code );
217 }
218
219 /**
220 * Returns true if a language code is of a valid form for the purposes of
221 * internal customisation of MediaWiki, via Messages*.php.
222 */
223 public static function isValidBuiltInCode( $code ) {
224 return preg_match( '/^[a-z0-9-]*$/i', $code );
225 }
226
227 /**
228 * Get the LocalisationCache instance
229 *
230 * @return LocalisationCache
231 */
232 public static function getLocalisationCache() {
233 if ( is_null( self::$dataCache ) ) {
234 global $wgLocalisationCacheConf;
235 $class = $wgLocalisationCacheConf['class'];
236 self::$dataCache = new $class( $wgLocalisationCacheConf );
237 }
238 return self::$dataCache;
239 }
240
241 function __construct() {
242 $this->mConverter = new FakeConverter( $this );
243 // Set the code to the name of the descendant
244 if ( get_class( $this ) == 'Language' ) {
245 $this->mCode = 'en';
246 } else {
247 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
248 }
249 self::getLocalisationCache();
250 }
251
252 /**
253 * Reduce memory usage
254 */
255 function __destruct() {
256 foreach ( $this as $name => $value ) {
257 unset( $this->$name );
258 }
259 }
260
261 /**
262 * Hook which will be called if this is the content language.
263 * Descendants can use this to register hook functions or modify globals
264 */
265 function initContLang() { }
266
267 function getFallbackLanguageCode() {
268 if ( $this->mCode === 'en' ) {
269 return false;
270 } else {
271 return self::$dataCache->getItem( $this->mCode, 'fallback' );
272 }
273 }
274
275 /**
276 * Exports $wgBookstoreListEn
277 * @return array
278 */
279 function getBookstoreList() {
280 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
281 }
282
283 /**
284 * @return array
285 */
286 function getNamespaces() {
287 if ( is_null( $this->namespaceNames ) ) {
288 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
289
290 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
291 $validNamespaces = MWNamespace::getCanonicalNamespaces();
292
293 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
294
295 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
296 if ( $wgMetaNamespaceTalk ) {
297 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
298 } else {
299 $talk = $this->namespaceNames[NS_PROJECT_TALK];
300 $this->namespaceNames[NS_PROJECT_TALK] =
301 $this->fixVariableInNamespace( $talk );
302 }
303
304 # Sometimes a language will be localised but not actually exist on this wiki.
305 foreach( $this->namespaceNames as $key => $text ) {
306 if ( !isset( $validNamespaces[$key] ) ) {
307 unset( $this->namespaceNames[$key] );
308 }
309 }
310
311 # The above mixing may leave namespaces out of canonical order.
312 # Re-order by namespace ID number...
313 ksort( $this->namespaceNames );
314 }
315 return $this->namespaceNames;
316 }
317
318 /**
319 * A convenience function that returns the same thing as
320 * getNamespaces() except with the array values changed to ' '
321 * where it found '_', useful for producing output to be displayed
322 * e.g. in <select> forms.
323 *
324 * @return array
325 */
326 function getFormattedNamespaces() {
327 $ns = $this->getNamespaces();
328 foreach ( $ns as $k => $v ) {
329 $ns[$k] = strtr( $v, '_', ' ' );
330 }
331 return $ns;
332 }
333
334 /**
335 * Get a namespace value by key
336 * <code>
337 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
338 * echo $mw_ns; // prints 'MediaWiki'
339 * </code>
340 *
341 * @param $index Int: the array key of the namespace to return
342 * @return mixed, string if the namespace value exists, otherwise false
343 */
344 function getNsText( $index ) {
345 $ns = $this->getNamespaces();
346 return isset( $ns[$index] ) ? $ns[$index] : false;
347 }
348
349 /**
350 * A convenience function that returns the same thing as
351 * getNsText() except with '_' changed to ' ', useful for
352 * producing output.
353 *
354 * @return array
355 */
356 function getFormattedNsText( $index ) {
357 $ns = $this->getNsText( $index );
358 return strtr( $ns, '_', ' ' );
359 }
360
361 /**
362 * Returns gender-dependent namespace alias if available.
363 * @param $index Int: namespace index
364 * @param $gender String: gender key (male, female... )
365 * @return String
366 * @since 1.18
367 */
368 function getGenderNsText( $index, $gender ) {
369 $ns = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
370 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
371 }
372
373 /**
374 * Whether this language makes distinguishes genders for example in
375 * namespaces.
376 * @return bool
377 * @since 1.18
378 */
379 function needsGenderDistinction() {
380 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
381 return count( $aliases ) > 0;
382 }
383
384 /**
385 * Get a namespace key by value, case insensitive.
386 * Only matches namespace names for the current language, not the
387 * canonical ones defined in Namespace.php.
388 *
389 * @param $text String
390 * @return mixed An integer if $text is a valid value otherwise false
391 */
392 function getLocalNsIndex( $text ) {
393 $lctext = $this->lc( $text );
394 $ids = $this->getNamespaceIds();
395 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
396 }
397
398 function getNamespaceAliases() {
399 if ( is_null( $this->namespaceAliases ) ) {
400 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
401 if ( !$aliases ) {
402 $aliases = array();
403 } else {
404 foreach ( $aliases as $name => $index ) {
405 if ( $index === NS_PROJECT_TALK ) {
406 unset( $aliases[$name] );
407 $name = $this->fixVariableInNamespace( $name );
408 $aliases[$name] = $index;
409 }
410 }
411 }
412
413 $genders = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
414 foreach ( $genders as $index => $forms ) {
415 foreach ( $forms as $alias ) {
416 $aliases[$alias] = $index;
417 }
418 }
419
420 $this->namespaceAliases = $aliases;
421 }
422 return $this->namespaceAliases;
423 }
424
425 function getNamespaceIds() {
426 if ( is_null( $this->mNamespaceIds ) ) {
427 global $wgNamespaceAliases;
428 # Put namespace names and aliases into a hashtable.
429 # If this is too slow, then we should arrange it so that it is done
430 # before caching. The catch is that at pre-cache time, the above
431 # class-specific fixup hasn't been done.
432 $this->mNamespaceIds = array();
433 foreach ( $this->getNamespaces() as $index => $name ) {
434 $this->mNamespaceIds[$this->lc( $name )] = $index;
435 }
436 foreach ( $this->getNamespaceAliases() as $name => $index ) {
437 $this->mNamespaceIds[$this->lc( $name )] = $index;
438 }
439 if ( $wgNamespaceAliases ) {
440 foreach ( $wgNamespaceAliases as $name => $index ) {
441 $this->mNamespaceIds[$this->lc( $name )] = $index;
442 }
443 }
444 }
445 return $this->mNamespaceIds;
446 }
447
448
449 /**
450 * Get a namespace key by value, case insensitive. Canonical namespace
451 * names override custom ones defined for the current language.
452 *
453 * @param $text String
454 * @return mixed An integer if $text is a valid value otherwise false
455 */
456 function getNsIndex( $text ) {
457 $lctext = $this->lc( $text );
458 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
459 return $ns;
460 }
461 $ids = $this->getNamespaceIds();
462 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
463 }
464
465 /**
466 * short names for language variants used for language conversion links.
467 *
468 * @param $code String
469 * @return string
470 */
471 function getVariantname( $code ) {
472 return $this->getMessageFromDB( "variantname-$code" );
473 }
474
475 function specialPage( $name ) {
476 $aliases = $this->getSpecialPageAliases();
477 if ( isset( $aliases[$name][0] ) ) {
478 $name = $aliases[$name][0];
479 }
480 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
481 }
482
483 function getQuickbarSettings() {
484 return array(
485 $this->getMessage( 'qbsettings-none' ),
486 $this->getMessage( 'qbsettings-fixedleft' ),
487 $this->getMessage( 'qbsettings-fixedright' ),
488 $this->getMessage( 'qbsettings-floatingleft' ),
489 $this->getMessage( 'qbsettings-floatingright' )
490 );
491 }
492
493 function getDatePreferences() {
494 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
495 }
496
497 function getDateFormats() {
498 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
499 }
500
501 function getDefaultDateFormat() {
502 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
503 if ( $df === 'dmy or mdy' ) {
504 global $wgAmericanDates;
505 return $wgAmericanDates ? 'mdy' : 'dmy';
506 } else {
507 return $df;
508 }
509 }
510
511 function getDatePreferenceMigrationMap() {
512 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
513 }
514
515 function getImageFile( $image ) {
516 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
517 }
518
519 function getDefaultUserOptionOverrides() {
520 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
521 }
522
523 function getExtraUserToggles() {
524 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
525 }
526
527 function getUserToggle( $tog ) {
528 return $this->getMessageFromDB( "tog-$tog" );
529 }
530
531 /**
532 * Get language names, indexed by code.
533 * If $customisedOnly is true, only returns codes with a messages file
534 */
535 public static function getLanguageNames( $customisedOnly = false ) {
536 global $wgExtraLanguageNames;
537 static $coreLanguageNames;
538
539 if ( $coreLanguageNames === null ) {
540 include( MWInit::compiledPath( 'languages/Names.php' ) );
541 }
542
543 $allNames = $wgExtraLanguageNames + $coreLanguageNames;
544 if ( !$customisedOnly ) {
545 return $allNames;
546 }
547
548 global $IP;
549 $names = array();
550 $dir = opendir( "$IP/languages/messages" );
551 while ( false !== ( $file = readdir( $dir ) ) ) {
552 $code = self::getCodeFromFileName( $file, 'Messages' );
553 if ( $code && isset( $allNames[$code] ) ) {
554 $names[$code] = $allNames[$code];
555 }
556 }
557 closedir( $dir );
558 return $names;
559 }
560
561 /**
562 * Get translated language names. This is done on best effort and
563 * by default this is exactly the same as Language::getLanguageNames.
564 * The CLDR extension provides translated names.
565 * @param $code String Language code.
566 * @return Array language code => language name
567 * @since 1.18.0
568 */
569 public static function getTranslatedLanguageNames( $code ) {
570 $names = array();
571 wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $code ) );
572
573 foreach ( self::getLanguageNames() as $code => $name ) {
574 if ( !isset( $names[$code] ) ) $names[$code] = $name;
575 }
576
577 return $names;
578 }
579
580 /**
581 * Get a message from the MediaWiki namespace.
582 *
583 * @param $msg String: message name
584 * @return string
585 */
586 function getMessageFromDB( $msg ) {
587 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
588 }
589
590 function getLanguageName( $code ) {
591 $names = self::getLanguageNames();
592 if ( !array_key_exists( $code, $names ) ) {
593 return '';
594 }
595 return $names[$code];
596 }
597
598 function getMonthName( $key ) {
599 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
600 }
601
602 function getMonthNamesArray() {
603 $monthNames = array( '' );
604 for ( $i=1; $i < 13; $i++ ) {
605 $monthNames[] = $this->getMonthName( $i );
606 }
607 return $monthNames;
608 }
609
610 function getMonthNameGen( $key ) {
611 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
612 }
613
614 function getMonthAbbreviation( $key ) {
615 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
616 }
617
618 function getMonthAbbreviationsArray() {
619 $monthNames = array('');
620 for ( $i=1; $i < 13; $i++ ) {
621 $monthNames[] = $this->getMonthAbbreviation( $i );
622 }
623 return $monthNames;
624 }
625
626 function getWeekdayName( $key ) {
627 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
628 }
629
630 function getWeekdayAbbreviation( $key ) {
631 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
632 }
633
634 function getIranianCalendarMonthName( $key ) {
635 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
636 }
637
638 function getHebrewCalendarMonthName( $key ) {
639 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
640 }
641
642 function getHebrewCalendarMonthNameGen( $key ) {
643 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
644 }
645
646 function getHijriCalendarMonthName( $key ) {
647 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
648 }
649
650 /**
651 * Used by date() and time() to adjust the time output.
652 *
653 * @param $ts Int the time in date('YmdHis') format
654 * @param $tz Mixed: adjust the time by this amount (default false, mean we
655 * get user timecorrection setting)
656 * @return int
657 */
658 function userAdjust( $ts, $tz = false ) {
659 global $wgUser, $wgLocalTZoffset;
660
661 if ( $tz === false ) {
662 $tz = $wgUser->getOption( 'timecorrection' );
663 }
664
665 $data = explode( '|', $tz, 3 );
666
667 if ( $data[0] == 'ZoneInfo' ) {
668 if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
669 $date = date_create( $ts, timezone_open( 'UTC' ) );
670 date_timezone_set( $date, timezone_open( $data[2] ) );
671 $date = date_format( $date, 'YmdHis' );
672 return $date;
673 }
674 # Unrecognized timezone, default to 'Offset' with the stored offset.
675 $data[0] = 'Offset';
676 }
677
678 $minDiff = 0;
679 if ( $data[0] == 'System' || $tz == '' ) {
680 #  Global offset in minutes.
681 if ( isset( $wgLocalTZoffset ) ) {
682 $minDiff = $wgLocalTZoffset;
683 }
684 } else if ( $data[0] == 'Offset' ) {
685 $minDiff = intval( $data[1] );
686 } else {
687 $data = explode( ':', $tz );
688 if ( count( $data ) == 2 ) {
689 $data[0] = intval( $data[0] );
690 $data[1] = intval( $data[1] );
691 $minDiff = abs( $data[0] ) * 60 + $data[1];
692 if ( $data[0] < 0 ) {
693 $minDiff = -$minDiff;
694 }
695 } else {
696 $minDiff = intval( $data[0] ) * 60;
697 }
698 }
699
700 # No difference ? Return time unchanged
701 if ( 0 == $minDiff ) {
702 return $ts;
703 }
704
705 wfSuppressWarnings(); // E_STRICT system time bitching
706 # Generate an adjusted date; take advantage of the fact that mktime
707 # will normalize out-of-range values so we don't have to split $minDiff
708 # into hours and minutes.
709 $t = mktime( (
710 (int)substr( $ts, 8, 2 ) ), # Hours
711 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
712 (int)substr( $ts, 12, 2 ), # Seconds
713 (int)substr( $ts, 4, 2 ), # Month
714 (int)substr( $ts, 6, 2 ), # Day
715 (int)substr( $ts, 0, 4 ) ); # Year
716
717 $date = date( 'YmdHis', $t );
718 wfRestoreWarnings();
719
720 return $date;
721 }
722
723 /**
724 * This is a workalike of PHP's date() function, but with better
725 * internationalisation, a reduced set of format characters, and a better
726 * escaping format.
727 *
728 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
729 * PHP manual for definitions. There are a number of extensions, which
730 * start with "x":
731 *
732 * xn Do not translate digits of the next numeric format character
733 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
734 * xr Use roman numerals for the next numeric format character
735 * xh Use hebrew numerals for the next numeric format character
736 * xx Literal x
737 * xg Genitive month name
738 *
739 * xij j (day number) in Iranian calendar
740 * xiF F (month name) in Iranian calendar
741 * xin n (month number) in Iranian calendar
742 * xiY Y (full year) in Iranian calendar
743 *
744 * xjj j (day number) in Hebrew calendar
745 * xjF F (month name) in Hebrew calendar
746 * xjt t (days in month) in Hebrew calendar
747 * xjx xg (genitive month name) in Hebrew calendar
748 * xjn n (month number) in Hebrew calendar
749 * xjY Y (full year) in Hebrew calendar
750 *
751 * xmj j (day number) in Hijri calendar
752 * xmF F (month name) in Hijri calendar
753 * xmn n (month number) in Hijri calendar
754 * xmY Y (full year) in Hijri calendar
755 *
756 * xkY Y (full year) in Thai solar calendar. Months and days are
757 * identical to the Gregorian calendar
758 * xoY Y (full year) in Minguo calendar or Juche year.
759 * Months and days are identical to the
760 * Gregorian calendar
761 * xtY Y (full year) in Japanese nengo. Months and days are
762 * identical to the Gregorian calendar
763 *
764 * Characters enclosed in double quotes will be considered literal (with
765 * the quotes themselves removed). Unmatched quotes will be considered
766 * literal quotes. Example:
767 *
768 * "The month is" F => The month is January
769 * i's" => 20'11"
770 *
771 * Backslash escaping is also supported.
772 *
773 * Input timestamp is assumed to be pre-normalized to the desired local
774 * time zone, if any.
775 *
776 * @param $format String
777 * @param $ts String: 14-character timestamp
778 * YYYYMMDDHHMMSS
779 * 01234567890123
780 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
781 */
782 function sprintfDate( $format, $ts ) {
783 $s = '';
784 $raw = false;
785 $roman = false;
786 $hebrewNum = false;
787 $unix = false;
788 $rawToggle = false;
789 $iranian = false;
790 $hebrew = false;
791 $hijri = false;
792 $thai = false;
793 $minguo = false;
794 $tenno = false;
795 for ( $p = 0; $p < strlen( $format ); $p++ ) {
796 $num = false;
797 $code = $format[$p];
798 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
799 $code .= $format[++$p];
800 }
801
802 if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
803 $code .= $format[++$p];
804 }
805
806 switch ( $code ) {
807 case 'xx':
808 $s .= 'x';
809 break;
810 case 'xn':
811 $raw = true;
812 break;
813 case 'xN':
814 $rawToggle = !$rawToggle;
815 break;
816 case 'xr':
817 $roman = true;
818 break;
819 case 'xh':
820 $hebrewNum = true;
821 break;
822 case 'xg':
823 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
824 break;
825 case 'xjx':
826 if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
827 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
828 break;
829 case 'd':
830 $num = substr( $ts, 6, 2 );
831 break;
832 case 'D':
833 if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
834 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
835 break;
836 case 'j':
837 $num = intval( substr( $ts, 6, 2 ) );
838 break;
839 case 'xij':
840 if ( !$iranian ) {
841 $iranian = self::tsToIranian( $ts );
842 }
843 $num = $iranian[2];
844 break;
845 case 'xmj':
846 if ( !$hijri ) {
847 $hijri = self::tsToHijri( $ts );
848 }
849 $num = $hijri[2];
850 break;
851 case 'xjj':
852 if ( !$hebrew ) {
853 $hebrew = self::tsToHebrew( $ts );
854 }
855 $num = $hebrew[2];
856 break;
857 case 'l':
858 if ( !$unix ) {
859 $unix = wfTimestamp( TS_UNIX, $ts );
860 }
861 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
862 break;
863 case 'N':
864 if ( !$unix ) {
865 $unix = wfTimestamp( TS_UNIX, $ts );
866 }
867 $w = gmdate( 'w', $unix );
868 $num = $w ? $w : 7;
869 break;
870 case 'w':
871 if ( !$unix ) {
872 $unix = wfTimestamp( TS_UNIX, $ts );
873 }
874 $num = gmdate( 'w', $unix );
875 break;
876 case 'z':
877 if ( !$unix ) {
878 $unix = wfTimestamp( TS_UNIX, $ts );
879 }
880 $num = gmdate( 'z', $unix );
881 break;
882 case 'W':
883 if ( !$unix ) {
884 $unix = wfTimestamp( TS_UNIX, $ts );
885 }
886 $num = gmdate( 'W', $unix );
887 break;
888 case 'F':
889 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
890 break;
891 case 'xiF':
892 if ( !$iranian ) {
893 $iranian = self::tsToIranian( $ts );
894 }
895 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
896 break;
897 case 'xmF':
898 if ( !$hijri ) {
899 $hijri = self::tsToHijri( $ts );
900 }
901 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
902 break;
903 case 'xjF':
904 if ( !$hebrew ) {
905 $hebrew = self::tsToHebrew( $ts );
906 }
907 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
908 break;
909 case 'm':
910 $num = substr( $ts, 4, 2 );
911 break;
912 case 'M':
913 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
914 break;
915 case 'n':
916 $num = intval( substr( $ts, 4, 2 ) );
917 break;
918 case 'xin':
919 if ( !$iranian ) {
920 $iranian = self::tsToIranian( $ts );
921 }
922 $num = $iranian[1];
923 break;
924 case 'xmn':
925 if ( !$hijri ) {
926 $hijri = self::tsToHijri ( $ts );
927 }
928 $num = $hijri[1];
929 break;
930 case 'xjn':
931 if ( !$hebrew ) {
932 $hebrew = self::tsToHebrew( $ts );
933 }
934 $num = $hebrew[1];
935 break;
936 case 't':
937 if ( !$unix ) {
938 $unix = wfTimestamp( TS_UNIX, $ts );
939 }
940 $num = gmdate( 't', $unix );
941 break;
942 case 'xjt':
943 if ( !$hebrew ) {
944 $hebrew = self::tsToHebrew( $ts );
945 }
946 $num = $hebrew[3];
947 break;
948 case 'L':
949 if ( !$unix ) {
950 $unix = wfTimestamp( TS_UNIX, $ts );
951 }
952 $num = gmdate( 'L', $unix );
953 break;
954 case 'o':
955 if ( !$unix ) {
956 $unix = wfTimestamp( TS_UNIX, $ts );
957 }
958 $num = date( 'o', $unix );
959 break;
960 case 'Y':
961 $num = substr( $ts, 0, 4 );
962 break;
963 case 'xiY':
964 if ( !$iranian ) {
965 $iranian = self::tsToIranian( $ts );
966 }
967 $num = $iranian[0];
968 break;
969 case 'xmY':
970 if ( !$hijri ) {
971 $hijri = self::tsToHijri( $ts );
972 }
973 $num = $hijri[0];
974 break;
975 case 'xjY':
976 if ( !$hebrew ) {
977 $hebrew = self::tsToHebrew( $ts );
978 }
979 $num = $hebrew[0];
980 break;
981 case 'xkY':
982 if ( !$thai ) {
983 $thai = self::tsToYear( $ts, 'thai' );
984 }
985 $num = $thai[0];
986 break;
987 case 'xoY':
988 if ( !$minguo ) {
989 $minguo = self::tsToYear( $ts, 'minguo' );
990 }
991 $num = $minguo[0];
992 break;
993 case 'xtY':
994 if ( !$tenno ) {
995 $tenno = self::tsToYear( $ts, 'tenno' );
996 }
997 $num = $tenno[0];
998 break;
999 case 'y':
1000 $num = substr( $ts, 2, 2 );
1001 break;
1002 case 'a':
1003 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
1004 break;
1005 case 'A':
1006 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1007 break;
1008 case 'g':
1009 $h = substr( $ts, 8, 2 );
1010 $num = $h % 12 ? $h % 12 : 12;
1011 break;
1012 case 'G':
1013 $num = intval( substr( $ts, 8, 2 ) );
1014 break;
1015 case 'h':
1016 $h = substr( $ts, 8, 2 );
1017 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1018 break;
1019 case 'H':
1020 $num = substr( $ts, 8, 2 );
1021 break;
1022 case 'i':
1023 $num = substr( $ts, 10, 2 );
1024 break;
1025 case 's':
1026 $num = substr( $ts, 12, 2 );
1027 break;
1028 case 'c':
1029 if ( !$unix ) {
1030 $unix = wfTimestamp( TS_UNIX, $ts );
1031 }
1032 $s .= gmdate( 'c', $unix );
1033 break;
1034 case 'r':
1035 if ( !$unix ) {
1036 $unix = wfTimestamp( TS_UNIX, $ts );
1037 }
1038 $s .= gmdate( 'r', $unix );
1039 break;
1040 case 'U':
1041 if ( !$unix ) {
1042 $unix = wfTimestamp( TS_UNIX, $ts );
1043 }
1044 $num = $unix;
1045 break;
1046 case '\\':
1047 # Backslash escaping
1048 if ( $p < strlen( $format ) - 1 ) {
1049 $s .= $format[++$p];
1050 } else {
1051 $s .= '\\';
1052 }
1053 break;
1054 case '"':
1055 # Quoted literal
1056 if ( $p < strlen( $format ) - 1 ) {
1057 $endQuote = strpos( $format, '"', $p + 1 );
1058 if ( $endQuote === false ) {
1059 # No terminating quote, assume literal "
1060 $s .= '"';
1061 } else {
1062 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1063 $p = $endQuote;
1064 }
1065 } else {
1066 # Quote at end of string, assume literal "
1067 $s .= '"';
1068 }
1069 break;
1070 default:
1071 $s .= $format[$p];
1072 }
1073 if ( $num !== false ) {
1074 if ( $rawToggle || $raw ) {
1075 $s .= $num;
1076 $raw = false;
1077 } elseif ( $roman ) {
1078 $s .= self::romanNumeral( $num );
1079 $roman = false;
1080 } elseif ( $hebrewNum ) {
1081 $s .= self::hebrewNumeral( $num );
1082 $hebrewNum = false;
1083 } else {
1084 $s .= $this->formatNum( $num, true );
1085 }
1086 }
1087 }
1088 return $s;
1089 }
1090
1091 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1092 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1093 /**
1094 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1095 * Gregorian dates to Iranian dates. Originally written in C, it
1096 * is released under the terms of GNU Lesser General Public
1097 * License. Conversion to PHP was performed by Niklas Laxström.
1098 *
1099 * Link: http://www.farsiweb.info/jalali/jalali.c
1100 */
1101 private static function tsToIranian( $ts ) {
1102 $gy = substr( $ts, 0, 4 ) -1600;
1103 $gm = substr( $ts, 4, 2 ) -1;
1104 $gd = (int)substr( $ts, 6, 2 ) -1;
1105
1106 # Days passed from the beginning (including leap years)
1107 $gDayNo = 365 * $gy
1108 + floor( ( $gy + 3 ) / 4 )
1109 - floor( ( $gy + 99 ) / 100 )
1110 + floor( ( $gy + 399 ) / 400 );
1111
1112
1113 // Add days of the past months of this year
1114 for ( $i = 0; $i < $gm; $i++ ) {
1115 $gDayNo += self::$GREG_DAYS[$i];
1116 }
1117
1118 // Leap years
1119 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1120 $gDayNo++;
1121 }
1122
1123 // Days passed in current month
1124 $gDayNo += $gd;
1125
1126 $jDayNo = $gDayNo - 79;
1127
1128 $jNp = floor( $jDayNo / 12053 );
1129 $jDayNo %= 12053;
1130
1131 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1132 $jDayNo %= 1461;
1133
1134 if ( $jDayNo >= 366 ) {
1135 $jy += floor( ( $jDayNo - 1 ) / 365 );
1136 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1137 }
1138
1139 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1140 $jDayNo -= self::$IRANIAN_DAYS[$i];
1141 }
1142
1143 $jm = $i + 1;
1144 $jd = $jDayNo + 1;
1145
1146 return array( $jy, $jm, $jd );
1147 }
1148
1149 /**
1150 * Converting Gregorian dates to Hijri dates.
1151 *
1152 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1153 *
1154 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1155 */
1156 private static function tsToHijri( $ts ) {
1157 $year = substr( $ts, 0, 4 );
1158 $month = substr( $ts, 4, 2 );
1159 $day = substr( $ts, 6, 2 );
1160
1161 $zyr = $year;
1162 $zd = $day;
1163 $zm = $month;
1164 $zy = $zyr;
1165
1166 if (
1167 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1168 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1169 )
1170 {
1171 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1172 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1173 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1174 $zd - 32075;
1175 } else {
1176 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1177 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1178 }
1179
1180 $zl = $zjd -1948440 + 10632;
1181 $zn = (int)( ( $zl - 1 ) / 10631 );
1182 $zl = $zl - 10631 * $zn + 354;
1183 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1184 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1185 $zm = (int)( ( 24 * $zl ) / 709 );
1186 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1187 $zy = 30 * $zn + $zj - 30;
1188
1189 return array( $zy, $zm, $zd );
1190 }
1191
1192 /**
1193 * Converting Gregorian dates to Hebrew dates.
1194 *
1195 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1196 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1197 * to translate the relevant functions into PHP and release them under
1198 * GNU GPL.
1199 *
1200 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1201 * and Adar II is 14. In a non-leap year, Adar is 6.
1202 */
1203 private static function tsToHebrew( $ts ) {
1204 # Parse date
1205 $year = substr( $ts, 0, 4 );
1206 $month = substr( $ts, 4, 2 );
1207 $day = substr( $ts, 6, 2 );
1208
1209 # Calculate Hebrew year
1210 $hebrewYear = $year + 3760;
1211
1212 # Month number when September = 1, August = 12
1213 $month += 4;
1214 if ( $month > 12 ) {
1215 # Next year
1216 $month -= 12;
1217 $year++;
1218 $hebrewYear++;
1219 }
1220
1221 # Calculate day of year from 1 September
1222 $dayOfYear = $day;
1223 for ( $i = 1; $i < $month; $i++ ) {
1224 if ( $i == 6 ) {
1225 # February
1226 $dayOfYear += 28;
1227 # Check if the year is leap
1228 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1229 $dayOfYear++;
1230 }
1231 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1232 $dayOfYear += 30;
1233 } else {
1234 $dayOfYear += 31;
1235 }
1236 }
1237
1238 # Calculate the start of the Hebrew year
1239 $start = self::hebrewYearStart( $hebrewYear );
1240
1241 # Calculate next year's start
1242 if ( $dayOfYear <= $start ) {
1243 # Day is before the start of the year - it is the previous year
1244 # Next year's start
1245 $nextStart = $start;
1246 # Previous year
1247 $year--;
1248 $hebrewYear--;
1249 # Add days since previous year's 1 September
1250 $dayOfYear += 365;
1251 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1252 # Leap year
1253 $dayOfYear++;
1254 }
1255 # Start of the new (previous) year
1256 $start = self::hebrewYearStart( $hebrewYear );
1257 } else {
1258 # Next year's start
1259 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1260 }
1261
1262 # Calculate Hebrew day of year
1263 $hebrewDayOfYear = $dayOfYear - $start;
1264
1265 # Difference between year's days
1266 $diff = $nextStart - $start;
1267 # Add 12 (or 13 for leap years) days to ignore the difference between
1268 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1269 # difference is only about the year type
1270 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1271 $diff += 13;
1272 } else {
1273 $diff += 12;
1274 }
1275
1276 # Check the year pattern, and is leap year
1277 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1278 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1279 # and non-leap years
1280 $yearPattern = $diff % 30;
1281 # Check if leap year
1282 $isLeap = $diff >= 30;
1283
1284 # Calculate day in the month from number of day in the Hebrew year
1285 # Don't check Adar - if the day is not in Adar, we will stop before;
1286 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1287 $hebrewDay = $hebrewDayOfYear;
1288 $hebrewMonth = 1;
1289 $days = 0;
1290 while ( $hebrewMonth <= 12 ) {
1291 # Calculate days in this month
1292 if ( $isLeap && $hebrewMonth == 6 ) {
1293 # Adar in a leap year
1294 if ( $isLeap ) {
1295 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1296 $days = 30;
1297 if ( $hebrewDay <= $days ) {
1298 # Day in Adar I
1299 $hebrewMonth = 13;
1300 } else {
1301 # Subtract the days of Adar I
1302 $hebrewDay -= $days;
1303 # Try Adar II
1304 $days = 29;
1305 if ( $hebrewDay <= $days ) {
1306 # Day in Adar II
1307 $hebrewMonth = 14;
1308 }
1309 }
1310 }
1311 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1312 # Cheshvan in a complete year (otherwise as the rule below)
1313 $days = 30;
1314 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1315 # Kislev in an incomplete year (otherwise as the rule below)
1316 $days = 29;
1317 } else {
1318 # Odd months have 30 days, even have 29
1319 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1320 }
1321 if ( $hebrewDay <= $days ) {
1322 # In the current month
1323 break;
1324 } else {
1325 # Subtract the days of the current month
1326 $hebrewDay -= $days;
1327 # Try in the next month
1328 $hebrewMonth++;
1329 }
1330 }
1331
1332 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1333 }
1334
1335 /**
1336 * This calculates the Hebrew year start, as days since 1 September.
1337 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1338 * Used for Hebrew date.
1339 */
1340 private static function hebrewYearStart( $year ) {
1341 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1342 $b = intval( ( $year - 1 ) % 4 );
1343 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1344 if ( $m < 0 ) {
1345 $m--;
1346 }
1347 $Mar = intval( $m );
1348 if ( $m < 0 ) {
1349 $m++;
1350 }
1351 $m -= $Mar;
1352
1353 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1354 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1355 $Mar++;
1356 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1357 $Mar += 2;
1358 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1359 $Mar++;
1360 }
1361
1362 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1363 return $Mar;
1364 }
1365
1366 /**
1367 * Algorithm to convert Gregorian dates to Thai solar dates,
1368 * Minguo dates or Minguo dates.
1369 *
1370 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1371 * http://en.wikipedia.org/wiki/Minguo_calendar
1372 * http://en.wikipedia.org/wiki/Japanese_era_name
1373 *
1374 * @param $ts String: 14-character timestamp
1375 * @param $cName String: calender name
1376 * @return Array: converted year, month, day
1377 */
1378 private static function tsToYear( $ts, $cName ) {
1379 $gy = substr( $ts, 0, 4 );
1380 $gm = substr( $ts, 4, 2 );
1381 $gd = substr( $ts, 6, 2 );
1382
1383 if ( !strcmp( $cName, 'thai' ) ) {
1384 # Thai solar dates
1385 # Add 543 years to the Gregorian calendar
1386 # Months and days are identical
1387 $gy_offset = $gy + 543;
1388 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1389 # Minguo dates
1390 # Deduct 1911 years from the Gregorian calendar
1391 # Months and days are identical
1392 $gy_offset = $gy - 1911;
1393 } else if ( !strcmp( $cName, 'tenno' ) ) {
1394 # Nengō dates up to Meiji period
1395 # Deduct years from the Gregorian calendar
1396 # depending on the nengo periods
1397 # Months and days are identical
1398 if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1399 # Meiji period
1400 $gy_gannen = $gy - 1868 + 1;
1401 $gy_offset = $gy_gannen;
1402 if ( $gy_gannen == 1 ) {
1403 $gy_offset = '元';
1404 }
1405 $gy_offset = '明治' . $gy_offset;
1406 } else if (
1407 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1408 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1409 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1410 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1411 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1412 )
1413 {
1414 # Taishō period
1415 $gy_gannen = $gy - 1912 + 1;
1416 $gy_offset = $gy_gannen;
1417 if ( $gy_gannen == 1 ) {
1418 $gy_offset = '元';
1419 }
1420 $gy_offset = '大正' . $gy_offset;
1421 } else if (
1422 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1423 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1424 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1425 )
1426 {
1427 # Shōwa period
1428 $gy_gannen = $gy - 1926 + 1;
1429 $gy_offset = $gy_gannen;
1430 if ( $gy_gannen == 1 ) {
1431 $gy_offset = '元';
1432 }
1433 $gy_offset = '昭和' . $gy_offset;
1434 } else {
1435 # Heisei period
1436 $gy_gannen = $gy - 1989 + 1;
1437 $gy_offset = $gy_gannen;
1438 if ( $gy_gannen == 1 ) {
1439 $gy_offset = '元';
1440 }
1441 $gy_offset = '平成' . $gy_offset;
1442 }
1443 } else {
1444 $gy_offset = $gy;
1445 }
1446
1447 return array( $gy_offset, $gm, $gd );
1448 }
1449
1450 /**
1451 * Roman number formatting up to 3000
1452 */
1453 static function romanNumeral( $num ) {
1454 static $table = array(
1455 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1456 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1457 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1458 array( '', 'M', 'MM', 'MMM' )
1459 );
1460
1461 $num = intval( $num );
1462 if ( $num > 3000 || $num <= 0 ) {
1463 return $num;
1464 }
1465
1466 $s = '';
1467 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1468 if ( $num >= $pow10 ) {
1469 $s .= $table[$i][floor( $num / $pow10 )];
1470 }
1471 $num = $num % $pow10;
1472 }
1473 return $s;
1474 }
1475
1476 /**
1477 * Hebrew Gematria number formatting up to 9999
1478 */
1479 static function hebrewNumeral( $num ) {
1480 static $table = array(
1481 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1482 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1483 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1484 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1485 );
1486
1487 $num = intval( $num );
1488 if ( $num > 9999 || $num <= 0 ) {
1489 return $num;
1490 }
1491
1492 $s = '';
1493 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1494 if ( $num >= $pow10 ) {
1495 if ( $num == 15 || $num == 16 ) {
1496 $s .= $table[0][9] . $table[0][$num - 9];
1497 $num = 0;
1498 } else {
1499 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1500 if ( $pow10 == 1000 ) {
1501 $s .= "'";
1502 }
1503 }
1504 }
1505 $num = $num % $pow10;
1506 }
1507 if ( strlen( $s ) == 2 ) {
1508 $str = $s . "'";
1509 } else {
1510 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1511 $str .= substr( $s, strlen( $s ) - 2, 2 );
1512 }
1513 $start = substr( $str, 0, strlen( $str ) - 2 );
1514 $end = substr( $str, strlen( $str ) - 2 );
1515 switch( $end ) {
1516 case 'כ':
1517 $str = $start . 'ך';
1518 break;
1519 case 'מ':
1520 $str = $start . 'ם';
1521 break;
1522 case 'נ':
1523 $str = $start . 'ן';
1524 break;
1525 case 'פ':
1526 $str = $start . 'ף';
1527 break;
1528 case 'צ':
1529 $str = $start . 'ץ';
1530 break;
1531 }
1532 return $str;
1533 }
1534
1535 /**
1536 * This is meant to be used by time(), date(), and timeanddate() to get
1537 * the date preference they're supposed to use, it should be used in
1538 * all children.
1539 *
1540 *<code>
1541 * function timeanddate([...], $format = true) {
1542 * $datePreference = $this->dateFormat($format);
1543 * [...]
1544 * }
1545 *</code>
1546 *
1547 * @param $usePrefs Mixed: if true, the user's preference is used
1548 * if false, the site/language default is used
1549 * if int/string, assumed to be a format.
1550 * @return string
1551 */
1552 function dateFormat( $usePrefs = true ) {
1553 global $wgUser;
1554
1555 if ( is_bool( $usePrefs ) ) {
1556 if ( $usePrefs ) {
1557 $datePreference = $wgUser->getDatePreference();
1558 } else {
1559 $datePreference = (string)User::getDefaultOption( 'date' );
1560 }
1561 } else {
1562 $datePreference = (string)$usePrefs;
1563 }
1564
1565 // return int
1566 if ( $datePreference == '' ) {
1567 return 'default';
1568 }
1569
1570 return $datePreference;
1571 }
1572
1573 /**
1574 * Get a format string for a given type and preference
1575 * @param $type May be date, time or both
1576 * @param $pref The format name as it appears in Messages*.php
1577 */
1578 function getDateFormatString( $type, $pref ) {
1579 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1580 if ( $pref == 'default' ) {
1581 $pref = $this->getDefaultDateFormat();
1582 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1583 } else {
1584 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1585 if ( is_null( $df ) ) {
1586 $pref = $this->getDefaultDateFormat();
1587 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1588 }
1589 }
1590 $this->dateFormatStrings[$type][$pref] = $df;
1591 }
1592 return $this->dateFormatStrings[$type][$pref];
1593 }
1594
1595 /**
1596 * @param $ts Mixed: the time format which needs to be turned into a
1597 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1598 * @param $adj Bool: whether to adjust the time output according to the
1599 * user configured offset ($timecorrection)
1600 * @param $format Mixed: true to use user's date format preference
1601 * @param $timecorrection String: the time offset as returned by
1602 * validateTimeZone() in Special:Preferences
1603 * @return string
1604 */
1605 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1606 $ts = wfTimestamp( TS_MW, $ts );
1607 if ( $adj ) {
1608 $ts = $this->userAdjust( $ts, $timecorrection );
1609 }
1610 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1611 return $this->sprintfDate( $df, $ts );
1612 }
1613
1614 /**
1615 * @param $ts Mixed: the time format which needs to be turned into a
1616 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1617 * @param $adj Bool: whether to adjust the time output according to the
1618 * user configured offset ($timecorrection)
1619 * @param $format Mixed: true to use user's date format preference
1620 * @param $timecorrection String: the time offset as returned by
1621 * validateTimeZone() in Special:Preferences
1622 * @return string
1623 */
1624 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1625 $ts = wfTimestamp( TS_MW, $ts );
1626 if ( $adj ) {
1627 $ts = $this->userAdjust( $ts, $timecorrection );
1628 }
1629 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1630 return $this->sprintfDate( $df, $ts );
1631 }
1632
1633 /**
1634 * @param $ts Mixed: the time format which needs to be turned into a
1635 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1636 * @param $adj Bool: whether to adjust the time output according to the
1637 * user configured offset ($timecorrection)
1638 * @param $format Mixed: what format to return, if it's false output the
1639 * default one (default true)
1640 * @param $timecorrection String: the time offset as returned by
1641 * validateTimeZone() in Special:Preferences
1642 * @return string
1643 */
1644 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1645 $ts = wfTimestamp( TS_MW, $ts );
1646 if ( $adj ) {
1647 $ts = $this->userAdjust( $ts, $timecorrection );
1648 }
1649 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1650 return $this->sprintfDate( $df, $ts );
1651 }
1652
1653 function getMessage( $key ) {
1654 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1655 }
1656
1657 function getAllMessages() {
1658 return self::$dataCache->getItem( $this->mCode, 'messages' );
1659 }
1660
1661 function iconv( $in, $out, $string ) {
1662 # This is a wrapper for iconv in all languages except esperanto,
1663 # which does some nasty x-conversions beforehand
1664
1665 # Even with //IGNORE iconv can whine about illegal characters in
1666 # *input* string. We just ignore those too.
1667 # REF: http://bugs.php.net/bug.php?id=37166
1668 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1669 wfSuppressWarnings();
1670 $text = iconv( $in, $out . '//IGNORE', $string );
1671 wfRestoreWarnings();
1672 return $text;
1673 }
1674
1675 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1676 function ucwordbreaksCallbackAscii( $matches ) {
1677 return $this->ucfirst( $matches[1] );
1678 }
1679
1680 function ucwordbreaksCallbackMB( $matches ) {
1681 return mb_strtoupper( $matches[0] );
1682 }
1683
1684 function ucCallback( $matches ) {
1685 list( $wikiUpperChars ) = self::getCaseMaps();
1686 return strtr( $matches[1], $wikiUpperChars );
1687 }
1688
1689 function lcCallback( $matches ) {
1690 list( , $wikiLowerChars ) = self::getCaseMaps();
1691 return strtr( $matches[1], $wikiLowerChars );
1692 }
1693
1694 function ucwordsCallbackMB( $matches ) {
1695 return mb_strtoupper( $matches[0] );
1696 }
1697
1698 function ucwordsCallbackWiki( $matches ) {
1699 list( $wikiUpperChars ) = self::getCaseMaps();
1700 return strtr( $matches[0], $wikiUpperChars );
1701 }
1702
1703 /**
1704 * Make a string's first character uppercase
1705 */
1706 function ucfirst( $str ) {
1707 $o = ord( $str );
1708 if ( $o < 96 ) { // if already uppercase...
1709 return $str;
1710 } elseif ( $o < 128 ) {
1711 return ucfirst( $str ); // use PHP's ucfirst()
1712 } else {
1713 // fall back to more complex logic in case of multibyte strings
1714 return $this->uc( $str, true );
1715 }
1716 }
1717
1718 /**
1719 * Convert a string to uppercase
1720 */
1721 function uc( $str, $first = false ) {
1722 if ( function_exists( 'mb_strtoupper' ) ) {
1723 if ( $first ) {
1724 if ( $this->isMultibyte( $str ) ) {
1725 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1726 } else {
1727 return ucfirst( $str );
1728 }
1729 } else {
1730 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1731 }
1732 } else {
1733 if ( $this->isMultibyte( $str ) ) {
1734 $x = $first ? '^' : '';
1735 return preg_replace_callback(
1736 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1737 array( $this, 'ucCallback' ),
1738 $str
1739 );
1740 } else {
1741 return $first ? ucfirst( $str ) : strtoupper( $str );
1742 }
1743 }
1744 }
1745
1746 function lcfirst( $str ) {
1747 $o = ord( $str );
1748 if ( !$o ) {
1749 return strval( $str );
1750 } elseif ( $o >= 128 ) {
1751 return $this->lc( $str, true );
1752 } elseif ( $o > 96 ) {
1753 return $str;
1754 } else {
1755 $str[0] = strtolower( $str[0] );
1756 return $str;
1757 }
1758 }
1759
1760 function lc( $str, $first = false ) {
1761 if ( function_exists( 'mb_strtolower' ) ) {
1762 if ( $first ) {
1763 if ( $this->isMultibyte( $str ) ) {
1764 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1765 } else {
1766 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1767 }
1768 } else {
1769 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1770 }
1771 } else {
1772 if ( $this->isMultibyte( $str ) ) {
1773 $x = $first ? '^' : '';
1774 return preg_replace_callback(
1775 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1776 array( $this, 'lcCallback' ),
1777 $str
1778 );
1779 } else {
1780 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1781 }
1782 }
1783 }
1784
1785 function isMultibyte( $str ) {
1786 return (bool)preg_match( '/[\x80-\xff]/', $str );
1787 }
1788
1789 function ucwords( $str ) {
1790 if ( $this->isMultibyte( $str ) ) {
1791 $str = $this->lc( $str );
1792
1793 // regexp to find first letter in each word (i.e. after each space)
1794 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1795
1796 // function to use to capitalize a single char
1797 if ( function_exists( 'mb_strtoupper' ) ) {
1798 return preg_replace_callback(
1799 $replaceRegexp,
1800 array( $this, 'ucwordsCallbackMB' ),
1801 $str
1802 );
1803 } else {
1804 return preg_replace_callback(
1805 $replaceRegexp,
1806 array( $this, 'ucwordsCallbackWiki' ),
1807 $str
1808 );
1809 }
1810 } else {
1811 return ucwords( strtolower( $str ) );
1812 }
1813 }
1814
1815 # capitalize words at word breaks
1816 function ucwordbreaks( $str ) {
1817 if ( $this->isMultibyte( $str ) ) {
1818 $str = $this->lc( $str );
1819
1820 // since \b doesn't work for UTF-8, we explicitely define word break chars
1821 $breaks = "[ \-\(\)\}\{\.,\?!]";
1822
1823 // find first letter after word break
1824 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1825
1826 if ( function_exists( 'mb_strtoupper' ) ) {
1827 return preg_replace_callback(
1828 $replaceRegexp,
1829 array( $this, 'ucwordbreaksCallbackMB' ),
1830 $str
1831 );
1832 } else {
1833 return preg_replace_callback(
1834 $replaceRegexp,
1835 array( $this, 'ucwordsCallbackWiki' ),
1836 $str
1837 );
1838 }
1839 } else {
1840 return preg_replace_callback(
1841 '/\b([\w\x80-\xff]+)\b/',
1842 array( $this, 'ucwordbreaksCallbackAscii' ),
1843 $str
1844 );
1845 }
1846 }
1847
1848 /**
1849 * Return a case-folded representation of $s
1850 *
1851 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1852 * and $s2 are the same except for the case of their characters. It is not
1853 * necessary for the value returned to make sense when displayed.
1854 *
1855 * Do *not* perform any other normalisation in this function. If a caller
1856 * uses this function when it should be using a more general normalisation
1857 * function, then fix the caller.
1858 */
1859 function caseFold( $s ) {
1860 return $this->uc( $s );
1861 }
1862
1863 function checkTitleEncoding( $s ) {
1864 if ( is_array( $s ) ) {
1865 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1866 }
1867 # Check for non-UTF-8 URLs
1868 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1869 if ( !$ishigh ) {
1870 return $s;
1871 }
1872
1873 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1874 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1875 if ( $isutf8 ) {
1876 return $s;
1877 }
1878
1879 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1880 }
1881
1882 function fallback8bitEncoding() {
1883 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1884 }
1885
1886 /**
1887 * Most writing systems use whitespace to break up words.
1888 * Some languages such as Chinese don't conventionally do this,
1889 * which requires special handling when breaking up words for
1890 * searching etc.
1891 */
1892 function hasWordBreaks() {
1893 return true;
1894 }
1895
1896 /**
1897 * Some languages such as Chinese require word segmentation,
1898 * Specify such segmentation when overridden in derived class.
1899 *
1900 * @param $string String
1901 * @return String
1902 */
1903 function segmentByWord( $string ) {
1904 return $string;
1905 }
1906
1907 /**
1908 * Some languages have special punctuation need to be normalized.
1909 * Make such changes here.
1910 *
1911 * @param $string String
1912 * @return String
1913 */
1914 function normalizeForSearch( $string ) {
1915 return self::convertDoubleWidth( $string );
1916 }
1917
1918 /**
1919 * convert double-width roman characters to single-width.
1920 * range: ff00-ff5f ~= 0020-007f
1921 */
1922 protected static function convertDoubleWidth( $string ) {
1923 static $full = null;
1924 static $half = null;
1925
1926 if ( $full === null ) {
1927 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1928 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1929 $full = str_split( $fullWidth, 3 );
1930 $half = str_split( $halfWidth );
1931 }
1932
1933 $string = str_replace( $full, $half, $string );
1934 return $string;
1935 }
1936
1937 protected static function insertSpace( $string, $pattern ) {
1938 $string = preg_replace( $pattern, " $1 ", $string );
1939 $string = preg_replace( '/ +/', ' ', $string );
1940 return $string;
1941 }
1942
1943 function convertForSearchResult( $termsArray ) {
1944 # some languages, e.g. Chinese, need to do a conversion
1945 # in order for search results to be displayed correctly
1946 return $termsArray;
1947 }
1948
1949 /**
1950 * Get the first character of a string.
1951 *
1952 * @param $s string
1953 * @return string
1954 */
1955 function firstChar( $s ) {
1956 $matches = array();
1957 preg_match(
1958 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1959 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1960 $s,
1961 $matches
1962 );
1963
1964 if ( isset( $matches[1] ) ) {
1965 if ( strlen( $matches[1] ) != 3 ) {
1966 return $matches[1];
1967 }
1968
1969 // Break down Hangul syllables to grab the first jamo
1970 $code = utf8ToCodepoint( $matches[1] );
1971 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1972 return $matches[1];
1973 } elseif ( $code < 0xb098 ) {
1974 return "\xe3\x84\xb1";
1975 } elseif ( $code < 0xb2e4 ) {
1976 return "\xe3\x84\xb4";
1977 } elseif ( $code < 0xb77c ) {
1978 return "\xe3\x84\xb7";
1979 } elseif ( $code < 0xb9c8 ) {
1980 return "\xe3\x84\xb9";
1981 } elseif ( $code < 0xbc14 ) {
1982 return "\xe3\x85\x81";
1983 } elseif ( $code < 0xc0ac ) {
1984 return "\xe3\x85\x82";
1985 } elseif ( $code < 0xc544 ) {
1986 return "\xe3\x85\x85";
1987 } elseif ( $code < 0xc790 ) {
1988 return "\xe3\x85\x87";
1989 } elseif ( $code < 0xcc28 ) {
1990 return "\xe3\x85\x88";
1991 } elseif ( $code < 0xce74 ) {
1992 return "\xe3\x85\x8a";
1993 } elseif ( $code < 0xd0c0 ) {
1994 return "\xe3\x85\x8b";
1995 } elseif ( $code < 0xd30c ) {
1996 return "\xe3\x85\x8c";
1997 } elseif ( $code < 0xd558 ) {
1998 return "\xe3\x85\x8d";
1999 } else {
2000 return "\xe3\x85\x8e";
2001 }
2002 } else {
2003 return '';
2004 }
2005 }
2006
2007 function initEncoding() {
2008 # Some languages may have an alternate char encoding option
2009 # (Esperanto X-coding, Japanese furigana conversion, etc)
2010 # If this language is used as the primary content language,
2011 # an override to the defaults can be set here on startup.
2012 }
2013
2014 function recodeForEdit( $s ) {
2015 # For some languages we'll want to explicitly specify
2016 # which characters make it into the edit box raw
2017 # or are converted in some way or another.
2018 # Note that if wgOutputEncoding is different from
2019 # wgInputEncoding, this text will be further converted
2020 # to wgOutputEncoding.
2021 global $wgEditEncoding;
2022 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
2023 return $s;
2024 } else {
2025 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2026 }
2027 }
2028
2029 function recodeInput( $s ) {
2030 # Take the previous into account.
2031 global $wgEditEncoding;
2032 if ( $wgEditEncoding != '' ) {
2033 $enc = $wgEditEncoding;
2034 } else {
2035 $enc = 'UTF-8';
2036 }
2037 if ( $enc == 'UTF-8' ) {
2038 return $s;
2039 } else {
2040 return $this->iconv( $enc, 'UTF-8', $s );
2041 }
2042 }
2043
2044 /**
2045 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2046 * also cleans up certain backwards-compatible sequences, converting them
2047 * to the modern Unicode equivalent.
2048 *
2049 * This is language-specific for performance reasons only.
2050 */
2051 function normalize( $s ) {
2052 global $wgAllUnicodeFixes;
2053 $s = UtfNormal::cleanUp( $s );
2054 if ( $wgAllUnicodeFixes ) {
2055 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2056 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2057 }
2058
2059 return $s;
2060 }
2061
2062 /**
2063 * Transform a string using serialized data stored in the given file (which
2064 * must be in the serialized subdirectory of $IP). The file contains pairs
2065 * mapping source characters to destination characters.
2066 *
2067 * The data is cached in process memory. This will go faster if you have the
2068 * FastStringSearch extension.
2069 */
2070 function transformUsingPairFile( $file, $string ) {
2071 if ( !isset( $this->transformData[$file] ) ) {
2072 $data = wfGetPrecompiledData( $file );
2073 if ( $data === false ) {
2074 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2075 }
2076 $this->transformData[$file] = new ReplacementArray( $data );
2077 }
2078 return $this->transformData[$file]->replace( $string );
2079 }
2080
2081 /**
2082 * For right-to-left language support
2083 *
2084 * @return bool
2085 */
2086 function isRTL() {
2087 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2088 }
2089
2090 /**
2091 * Return the correct HTML 'dir' attribute value for this language.
2092 * @return String
2093 */
2094 function getDir() {
2095 return $this->isRTL() ? 'rtl' : 'ltr';
2096 }
2097
2098 /**
2099 * Return 'left' or 'right' as appropriate alignment for line-start
2100 * for this language's text direction.
2101 *
2102 * Should be equivalent to CSS3 'start' text-align value....
2103 *
2104 * @return String
2105 */
2106 function alignStart() {
2107 return $this->isRTL() ? 'right' : 'left';
2108 }
2109
2110 /**
2111 * Return 'right' or 'left' as appropriate alignment for line-end
2112 * for this language's text direction.
2113 *
2114 * Should be equivalent to CSS3 'end' text-align value....
2115 *
2116 * @return String
2117 */
2118 function alignEnd() {
2119 return $this->isRTL() ? 'left' : 'right';
2120 }
2121
2122 /**
2123 * A hidden direction mark (LRM or RLM), depending on the language direction
2124 *
2125 * @return string
2126 */
2127 function getDirMark() {
2128 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2129 }
2130
2131 function capitalizeAllNouns() {
2132 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2133 }
2134
2135 /**
2136 * An arrow, depending on the language direction
2137 *
2138 * @return string
2139 */
2140 function getArrow() {
2141 return $this->isRTL() ? '←' : '→';
2142 }
2143
2144 /**
2145 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2146 *
2147 * @return bool
2148 */
2149 function linkPrefixExtension() {
2150 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2151 }
2152
2153 function getMagicWords() {
2154 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2155 }
2156
2157 protected function doMagicHook() {
2158 if ( $this->mMagicHookDone ) {
2159 return;
2160 }
2161 $this->mMagicHookDone = true;
2162 wfProfileIn( 'LanguageGetMagic' );
2163 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2164 wfProfileOut( 'LanguageGetMagic' );
2165 }
2166
2167 # Fill a MagicWord object with data from here
2168 function getMagic( $mw ) {
2169 $this->doMagicHook();
2170
2171 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2172 $rawEntry = $this->mMagicExtensions[$mw->mId];
2173 } else {
2174 $magicWords = $this->getMagicWords();
2175 if ( isset( $magicWords[$mw->mId] ) ) {
2176 $rawEntry = $magicWords[$mw->mId];
2177 } else {
2178 $rawEntry = false;
2179 }
2180 }
2181
2182 if ( !is_array( $rawEntry ) ) {
2183 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2184 } else {
2185 $mw->mCaseSensitive = $rawEntry[0];
2186 $mw->mSynonyms = array_slice( $rawEntry, 1 );
2187 }
2188 }
2189
2190 /**
2191 * Add magic words to the extension array
2192 */
2193 function addMagicWordsByLang( $newWords ) {
2194 $code = $this->getCode();
2195 $fallbackChain = array();
2196 while ( $code && !in_array( $code, $fallbackChain ) ) {
2197 $fallbackChain[] = $code;
2198 $code = self::getFallbackFor( $code );
2199 }
2200 if ( !in_array( 'en', $fallbackChain ) ) {
2201 $fallbackChain[] = 'en';
2202 }
2203 $fallbackChain = array_reverse( $fallbackChain );
2204 foreach ( $fallbackChain as $code ) {
2205 if ( isset( $newWords[$code] ) ) {
2206 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2207 }
2208 }
2209 }
2210
2211 /**
2212 * Get special page names, as an associative array
2213 * case folded alias => real name
2214 */
2215 function getSpecialPageAliases() {
2216 // Cache aliases because it may be slow to load them
2217 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2218 // Initialise array
2219 $this->mExtendedSpecialPageAliases =
2220 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2221 wfRunHooks( 'LanguageGetSpecialPageAliases',
2222 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2223 }
2224
2225 return $this->mExtendedSpecialPageAliases;
2226 }
2227
2228 /**
2229 * Italic is unsuitable for some languages
2230 *
2231 * @param $text String: the text to be emphasized.
2232 * @return string
2233 */
2234 function emphasize( $text ) {
2235 return "<em>$text</em>";
2236 }
2237
2238 /**
2239 * Normally we output all numbers in plain en_US style, that is
2240 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2241 * point twohundredthirtyfive. However this is not sutable for all
2242 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2243 * Icelandic just want to use commas instead of dots, and dots instead
2244 * of commas like "293.291,235".
2245 *
2246 * An example of this function being called:
2247 * <code>
2248 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2249 * </code>
2250 *
2251 * See LanguageGu.php for the Gujarati implementation and
2252 * $separatorTransformTable on MessageIs.php for
2253 * the , => . and . => , implementation.
2254 *
2255 * @todo check if it's viable to use localeconv() for the decimal
2256 * separator thing.
2257 * @param $number Mixed: the string to be formatted, should be an integer
2258 * or a floating point number.
2259 * @param $nocommafy Bool: set to true for special numbers like dates
2260 * @return string
2261 */
2262 function formatNum( $number, $nocommafy = false ) {
2263 global $wgTranslateNumerals;
2264 if ( !$nocommafy ) {
2265 $number = $this->commafy( $number );
2266 $s = $this->separatorTransformTable();
2267 if ( $s ) {
2268 $number = strtr( $number, $s );
2269 }
2270 }
2271
2272 if ( $wgTranslateNumerals ) {
2273 $s = $this->digitTransformTable();
2274 if ( $s ) {
2275 $number = strtr( $number, $s );
2276 }
2277 }
2278
2279 return $number;
2280 }
2281
2282 function parseFormattedNumber( $number ) {
2283 $s = $this->digitTransformTable();
2284 if ( $s ) {
2285 $number = strtr( $number, array_flip( $s ) );
2286 }
2287
2288 $s = $this->separatorTransformTable();
2289 if ( $s ) {
2290 $number = strtr( $number, array_flip( $s ) );
2291 }
2292
2293 $number = strtr( $number, array( ',' => '' ) );
2294 return $number;
2295 }
2296
2297 /**
2298 * Adds commas to a given number
2299 *
2300 * @param $_ mixed
2301 * @return string
2302 */
2303 function commafy( $_ ) {
2304 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2305 }
2306
2307 function digitTransformTable() {
2308 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2309 }
2310
2311 function separatorTransformTable() {
2312 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2313 }
2314
2315 /**
2316 * Take a list of strings and build a locale-friendly comma-separated
2317 * list, using the local comma-separator message.
2318 * The last two strings are chained with an "and".
2319 *
2320 * @param $l Array
2321 * @return string
2322 */
2323 function listToText( $l ) {
2324 $s = '';
2325 $m = count( $l ) - 1;
2326 if ( $m == 1 ) {
2327 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2328 } else {
2329 for ( $i = $m; $i >= 0; $i-- ) {
2330 if ( $i == $m ) {
2331 $s = $l[$i];
2332 } else if ( $i == $m - 1 ) {
2333 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2334 } else {
2335 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2336 }
2337 }
2338 return $s;
2339 }
2340 }
2341
2342 /**
2343 * Take a list of strings and build a locale-friendly comma-separated
2344 * list, using the local comma-separator message.
2345 * @param $list array of strings to put in a comma list
2346 * @return string
2347 */
2348 function commaList( $list ) {
2349 return implode(
2350 $list,
2351 wfMsgExt(
2352 'comma-separator',
2353 array( 'parsemag', 'escapenoentities', 'language' => $this )
2354 )
2355 );
2356 }
2357
2358 /**
2359 * Take a list of strings and build a locale-friendly semicolon-separated
2360 * list, using the local semicolon-separator message.
2361 * @param $list array of strings to put in a semicolon list
2362 * @return string
2363 */
2364 function semicolonList( $list ) {
2365 return implode(
2366 $list,
2367 wfMsgExt(
2368 'semicolon-separator',
2369 array( 'parsemag', 'escapenoentities', 'language' => $this )
2370 )
2371 );
2372 }
2373
2374 /**
2375 * Same as commaList, but separate it with the pipe instead.
2376 * @param $list array of strings to put in a pipe list
2377 * @return string
2378 */
2379 function pipeList( $list ) {
2380 return implode(
2381 $list,
2382 wfMsgExt(
2383 'pipe-separator',
2384 array( 'escapenoentities', 'language' => $this )
2385 )
2386 );
2387 }
2388
2389 /**
2390 * Truncate a string to a specified length in bytes, appending an optional
2391 * string (e.g. for ellipses)
2392 *
2393 * The database offers limited byte lengths for some columns in the database;
2394 * multi-byte character sets mean we need to ensure that only whole characters
2395 * are included, otherwise broken characters can be passed to the user
2396 *
2397 * If $length is negative, the string will be truncated from the beginning
2398 *
2399 * @param $string String to truncate
2400 * @param $length Int: maximum length (including ellipses)
2401 * @param $ellipsis String to append to the truncated text
2402 * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
2403 * $adjustLength was introduced in 1.18, before that behaved as if false.
2404 * @return string
2405 */
2406 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
2407 # Use the localized ellipsis character
2408 if ( $ellipsis == '...' ) {
2409 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2410 }
2411 # Check if there is no need to truncate
2412 if ( $length == 0 ) {
2413 return $ellipsis; // convention
2414 } elseif ( strlen( $string ) <= abs( $length ) ) {
2415 return $string; // no need to truncate
2416 }
2417 $stringOriginal = $string;
2418 # If ellipsis length is >= $length then we can't apply $adjustLength
2419 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
2420 $string = $ellipsis; // this can be slightly unexpected
2421 # Otherwise, truncate and add ellipsis...
2422 } else {
2423 $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
2424 if ( $length > 0 ) {
2425 $length -= $eLength;
2426 $string = substr( $string, 0, $length ); // xyz...
2427 $string = $this->removeBadCharLast( $string );
2428 $string = $string . $ellipsis;
2429 } else {
2430 $length += $eLength;
2431 $string = substr( $string, $length ); // ...xyz
2432 $string = $this->removeBadCharFirst( $string );
2433 $string = $ellipsis . $string;
2434 }
2435 }
2436 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
2437 # This check is *not* redundant if $adjustLength, due to the single case where
2438 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
2439 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2440 return $string;
2441 } else {
2442 return $stringOriginal;
2443 }
2444 }
2445
2446 /**
2447 * Remove bytes that represent an incomplete Unicode character
2448 * at the end of string (e.g. bytes of the char are missing)
2449 *
2450 * @param $string String
2451 * @return string
2452 */
2453 protected function removeBadCharLast( $string ) {
2454 if ( $string != '' ) {
2455 $char = ord( $string[strlen( $string ) - 1] );
2456 $m = array();
2457 if ( $char >= 0xc0 ) {
2458 # We got the first byte only of a multibyte char; remove it.
2459 $string = substr( $string, 0, -1 );
2460 } elseif ( $char >= 0x80 &&
2461 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2462 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2463 {
2464 # We chopped in the middle of a character; remove it
2465 $string = $m[1];
2466 }
2467 }
2468 return $string;
2469 }
2470
2471 /**
2472 * Remove bytes that represent an incomplete Unicode character
2473 * at the start of string (e.g. bytes of the char are missing)
2474 *
2475 * @param $string String
2476 * @return string
2477 */
2478 protected function removeBadCharFirst( $string ) {
2479 if ( $string != '' ) {
2480 $char = ord( $string[0] );
2481 if ( $char >= 0x80 && $char < 0xc0 ) {
2482 # We chopped in the middle of a character; remove the whole thing
2483 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2484 }
2485 }
2486 return $string;
2487 }
2488
2489 /*
2490 * Truncate a string of valid HTML to a specified length in bytes,
2491 * appending an optional string (e.g. for ellipses), and return valid HTML
2492 *
2493 * This is only intended for styled/linked text, such as HTML with
2494 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
2495 * Also, this will not detect things like "display:none" CSS.
2496 *
2497 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
2498 *
2499 * @param string $text HTML string to truncate
2500 * @param int $length (zero/positive) Maximum length (including ellipses)
2501 * @param string $ellipsis String to append to the truncated text
2502 * @returns string
2503 */
2504 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2505 # Use the localized ellipsis character
2506 if ( $ellipsis == '...' ) {
2507 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2508 }
2509 # Check if there is clearly no need to truncate
2510 if ( $length <= 0 ) {
2511 return $ellipsis; // no text shown, nothing to format (convention)
2512 } elseif ( strlen( $text ) <= $length ) {
2513 return $text; // string short enough even *with* HTML (short-circuit)
2514 }
2515
2516 $displayLen = 0; // innerHTML legth so far
2517 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2518 $tagType = 0; // 0-open, 1-close
2519 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2520 $entityState = 0; // 0-not entity, 1-entity
2521 $tag = $ret = $pRet = ''; // accumulated tag name, accumulated result string
2522 $openTags = array(); // open tag stack
2523 $pOpenTags = array();
2524
2525 $textLen = strlen( $text );
2526 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
2527 for ( $pos = 0; true; ++$pos ) {
2528 # Consider truncation once the display length has reached the maximim.
2529 # Check that we're not in the middle of a bracket/entity...
2530 if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) {
2531 if ( !$testingEllipsis ) {
2532 $testingEllipsis = true;
2533 # Save where we are; we will truncate here unless there turn out to
2534 # be so few remaining characters that truncation is not necessary.
2535 $pOpenTags = $openTags; // save state
2536 $pRet = $ret; // save state
2537 } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) {
2538 # String in fact does need truncation, the truncation point was OK.
2539 $openTags = $pOpenTags; // reload state
2540 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2541 $ret .= $ellipsis; // add ellipsis
2542 break;
2543 }
2544 }
2545 if ( $pos >= $textLen ) break; // extra iteration just for above checks
2546
2547 # Read the next char...
2548 $ch = $text[$pos];
2549 $lastCh = $pos ? $text[$pos - 1] : '';
2550 $ret .= $ch; // add to result string
2551 if ( $ch == '<' ) {
2552 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2553 $entityState = 0; // for bad HTML
2554 $bracketState = 1; // tag started (checking for backslash)
2555 } elseif ( $ch == '>' ) {
2556 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2557 $entityState = 0; // for bad HTML
2558 $bracketState = 0; // out of brackets
2559 } elseif ( $bracketState == 1 ) {
2560 if ( $ch == '/' ) {
2561 $tagType = 1; // close tag (e.g. "</span>")
2562 } else {
2563 $tagType = 0; // open tag (e.g. "<span>")
2564 $tag .= $ch;
2565 }
2566 $bracketState = 2; // building tag name
2567 } elseif ( $bracketState == 2 ) {
2568 if ( $ch != ' ' ) {
2569 $tag .= $ch;
2570 } else {
2571 // Name found (e.g. "<a href=..."), add on tag attributes...
2572 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2573 }
2574 } elseif ( $bracketState == 0 ) {
2575 if ( $entityState ) {
2576 if ( $ch == ';' ) {
2577 $entityState = 0;
2578 $displayLen++; // entity is one displayed char
2579 }
2580 } else {
2581 if ( $ch == '&' ) {
2582 $entityState = 1; // entity found, (e.g. "&#160;")
2583 } else {
2584 $displayLen++; // this char is displayed
2585 // Add the next $max display text chars after this in one swoop...
2586 $max = ( $testingEllipsis ? $length : $neLength ) - $displayLen;
2587 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
2588 $displayLen += $skipped;
2589 $pos += $skipped;
2590 }
2591 }
2592 }
2593 }
2594 if ( $displayLen == 0 ) {
2595 return ''; // no text shown, nothing to format
2596 }
2597 // Close the last tag if left unclosed by bad HTML
2598 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
2599 while ( count( $openTags ) > 0 ) {
2600 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2601 }
2602 return $ret;
2603 }
2604
2605 // truncateHtml() helper function
2606 // like strcspn() but adds the skipped chars to $ret
2607 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
2608 if ( $len === null ) {
2609 $len = -1; // -1 means "no limit" for strcspn
2610 } elseif ( $len < 0 ) {
2611 $len = 0; // sanity
2612 }
2613 $skipCount = 0;
2614 if ( $start < strlen( $text ) ) {
2615 $skipCount = strcspn( $text, $search, $start, $len );
2616 $ret .= substr( $text, $start, $skipCount );
2617 }
2618 return $skipCount;
2619 }
2620
2621 /*
2622 * truncateHtml() helper function
2623 * (a) push or pop $tag from $openTags as needed
2624 * (b) clear $tag value
2625 * @param String &$tag Current HTML tag name we are looking at
2626 * @param int $tagType (0-open tag, 1-close tag)
2627 * @param char $lastCh Character before the '>' that ended this tag
2628 * @param array &$openTags Open tag stack (not accounting for $tag)
2629 */
2630 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2631 $tag = ltrim( $tag );
2632 if ( $tag != '' ) {
2633 if ( $tagType == 0 && $lastCh != '/' ) {
2634 $openTags[] = $tag; // tag opened (didn't close itself)
2635 } else if ( $tagType == 1 ) {
2636 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2637 array_pop( $openTags ); // tag closed
2638 }
2639 }
2640 $tag = '';
2641 }
2642 }
2643
2644 /**
2645 * Grammatical transformations, needed for inflected languages
2646 * Invoked by putting {{grammar:case|word}} in a message
2647 *
2648 * @param $word string
2649 * @param $case string
2650 * @return string
2651 */
2652 function convertGrammar( $word, $case ) {
2653 global $wgGrammarForms;
2654 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2655 return $wgGrammarForms[$this->getCode()][$case][$word];
2656 }
2657 return $word;
2658 }
2659
2660 /**
2661 * Provides an alternative text depending on specified gender.
2662 * Usage {{gender:username|masculine|feminine|neutral}}.
2663 * username is optional, in which case the gender of current user is used,
2664 * but only in (some) interface messages; otherwise default gender is used.
2665 * If second or third parameter are not specified, masculine is used.
2666 * These details may be overriden per language.
2667 */
2668 function gender( $gender, $forms ) {
2669 if ( !count( $forms ) ) {
2670 return '';
2671 }
2672 $forms = $this->preConvertPlural( $forms, 2 );
2673 if ( $gender === 'male' ) {
2674 return $forms[0];
2675 }
2676 if ( $gender === 'female' ) {
2677 return $forms[1];
2678 }
2679 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2680 }
2681
2682 /**
2683 * Plural form transformations, needed for some languages.
2684 * For example, there are 3 form of plural in Russian and Polish,
2685 * depending on "count mod 10". See [[w:Plural]]
2686 * For English it is pretty simple.
2687 *
2688 * Invoked by putting {{plural:count|wordform1|wordform2}}
2689 * or {{plural:count|wordform1|wordform2|wordform3}}
2690 *
2691 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2692 *
2693 * @param $count Integer: non-localized number
2694 * @param $forms Array: different plural forms
2695 * @return string Correct form of plural for $count in this language
2696 */
2697 function convertPlural( $count, $forms ) {
2698 if ( !count( $forms ) ) {
2699 return '';
2700 }
2701 $forms = $this->preConvertPlural( $forms, 2 );
2702
2703 return ( $count == 1 ) ? $forms[0] : $forms[1];
2704 }
2705
2706 /**
2707 * Checks that convertPlural was given an array and pads it to requested
2708 * amount of forms by copying the last one.
2709 *
2710 * @param $count Integer: How many forms should there be at least
2711 * @param $forms Array of forms given to convertPlural
2712 * @return array Padded array of forms or an exception if not an array
2713 */
2714 protected function preConvertPlural( /* Array */ $forms, $count ) {
2715 while ( count( $forms ) < $count ) {
2716 $forms[] = $forms[count( $forms ) - 1];
2717 }
2718 return $forms;
2719 }
2720
2721 /**
2722 * Maybe translate block durations. Note that this function is somewhat misnamed: it
2723 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
2724 * (which is an absolute timestamp).
2725 * @param $str String: the validated block duration in English
2726 * @return Somehow translated block duration
2727 * @see LanguageFi.php for example implementation
2728 */
2729 function translateBlockExpiry( $str ) {
2730 foreach( SpecialBlock::getSuggestedDurations( $this ) as $show => $value ){
2731 if ( strcmp( $str, $value ) == 0 ) {
2732 return htmlspecialchars( trim( $show ) );
2733 }
2734 }
2735 return $str;
2736 }
2737
2738 /**
2739 * languages like Chinese need to be segmented in order for the diff
2740 * to be of any use
2741 *
2742 * @param $text String
2743 * @return String
2744 */
2745 function segmentForDiff( $text ) {
2746 return $text;
2747 }
2748
2749 /**
2750 * and unsegment to show the result
2751 *
2752 * @param $text String
2753 * @return String
2754 */
2755 function unsegmentForDiff( $text ) {
2756 return $text;
2757 }
2758
2759 # convert text to all supported variants
2760 function autoConvertToAllVariants( $text ) {
2761 return $this->mConverter->autoConvertToAllVariants( $text );
2762 }
2763
2764 # convert text to different variants of a language.
2765 function convert( $text ) {
2766 return $this->mConverter->convert( $text );
2767 }
2768
2769 # Convert a Title object to a string in the preferred variant
2770 function convertTitle( $title ) {
2771 return $this->mConverter->convertTitle( $title );
2772 }
2773
2774 # Check if this is a language with variants
2775 function hasVariants() {
2776 return sizeof( $this->getVariants() ) > 1;
2777 }
2778
2779 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2780 function armourMath( $text ) {
2781 return $this->mConverter->armourMath( $text );
2782 }
2783
2784 /**
2785 * Perform output conversion on a string, and encode for safe HTML output.
2786 * @param $text String text to be converted
2787 * @param $isTitle Bool whether this conversion is for the article title
2788 * @return string
2789 * @todo this should get integrated somewhere sane
2790 */
2791 function convertHtml( $text, $isTitle = false ) {
2792 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2793 }
2794
2795 function convertCategoryKey( $key ) {
2796 return $this->mConverter->convertCategoryKey( $key );
2797 }
2798
2799 /**
2800 * Get the list of variants supported by this language
2801 * see sample implementation in LanguageZh.php
2802 *
2803 * @return array an array of language codes
2804 */
2805 function getVariants() {
2806 return $this->mConverter->getVariants();
2807 }
2808
2809 function getPreferredVariant() {
2810 return $this->mConverter->getPreferredVariant();
2811 }
2812
2813 function getDefaultVariant() {
2814 return $this->mConverter->getDefaultVariant();
2815 }
2816
2817 function getURLVariant() {
2818 return $this->mConverter->getURLVariant();
2819 }
2820
2821 /**
2822 * If a language supports multiple variants, it is
2823 * possible that non-existing link in one variant
2824 * actually exists in another variant. this function
2825 * tries to find it. See e.g. LanguageZh.php
2826 *
2827 * @param $link String: the name of the link
2828 * @param $nt Mixed: the title object of the link
2829 * @param $ignoreOtherCond Boolean: to disable other conditions when
2830 * we need to transclude a template or update a category's link
2831 * @return null the input parameters may be modified upon return
2832 */
2833 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2834 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2835 }
2836
2837 /**
2838 * If a language supports multiple variants, converts text
2839 * into an array of all possible variants of the text:
2840 * 'variant' => text in that variant
2841 *
2842 * @deprecated Use autoConvertToAllVariants()
2843 */
2844 function convertLinkToAllVariants( $text ) {
2845 return $this->mConverter->convertLinkToAllVariants( $text );
2846 }
2847
2848 /**
2849 * returns language specific options used by User::getPageRenderHash()
2850 * for example, the preferred language variant
2851 *
2852 * @return string
2853 */
2854 function getExtraHashOptions() {
2855 return $this->mConverter->getExtraHashOptions();
2856 }
2857
2858 /**
2859 * For languages that support multiple variants, the title of an
2860 * article may be displayed differently in different variants. this
2861 * function returns the apporiate title defined in the body of the article.
2862 *
2863 * @return string
2864 */
2865 function getParsedTitle() {
2866 return $this->mConverter->getParsedTitle();
2867 }
2868
2869 /**
2870 * Enclose a string with the "no conversion" tag. This is used by
2871 * various functions in the Parser
2872 *
2873 * @param $text String: text to be tagged for no conversion
2874 * @param $noParse
2875 * @return string the tagged text
2876 */
2877 function markNoConversion( $text, $noParse = false ) {
2878 return $this->mConverter->markNoConversion( $text, $noParse );
2879 }
2880
2881 /**
2882 * A regular expression to match legal word-trailing characters
2883 * which should be merged onto a link of the form [[foo]]bar.
2884 *
2885 * @return string
2886 */
2887 function linkTrail() {
2888 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2889 }
2890
2891 function getLangObj() {
2892 return $this;
2893 }
2894
2895 /**
2896 * Get the RFC 3066 code for this language object
2897 */
2898 function getCode() {
2899 return $this->mCode;
2900 }
2901
2902 function setCode( $code ) {
2903 $this->mCode = $code;
2904 }
2905
2906 /**
2907 * Get the name of a file for a certain language code
2908 * @param $prefix string Prepend this to the filename
2909 * @param $code string Language code
2910 * @param $suffix string Append this to the filename
2911 * @return string $prefix . $mangledCode . $suffix
2912 */
2913 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2914 // Protect against path traversal
2915 if ( !Language::isValidCode( $code )
2916 || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
2917 {
2918 throw new MWException( "Invalid language code \"$code\"" );
2919 }
2920
2921 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2922 }
2923
2924 /**
2925 * Get the language code from a file name. Inverse of getFileName()
2926 * @param $filename string $prefix . $languageCode . $suffix
2927 * @param $prefix string Prefix before the language code
2928 * @param $suffix string Suffix after the language code
2929 * @return Language code, or false if $prefix or $suffix isn't found
2930 */
2931 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2932 $m = null;
2933 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2934 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2935 if ( !count( $m ) ) {
2936 return false;
2937 }
2938 return str_replace( '_', '-', strtolower( $m[1] ) );
2939 }
2940
2941 static function getMessagesFileName( $code ) {
2942 global $IP;
2943 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2944 }
2945
2946 static function getClassFileName( $code ) {
2947 global $IP;
2948 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2949 }
2950
2951 /**
2952 * Get the fallback for a given language
2953 */
2954 static function getFallbackFor( $code ) {
2955 if ( $code === 'en' ) {
2956 // Shortcut
2957 return false;
2958 } else {
2959 return self::getLocalisationCache()->getItem( $code, 'fallback' );
2960 }
2961 }
2962
2963 /**
2964 * Get all messages for a given language
2965 * WARNING: this may take a long time
2966 */
2967 static function getMessagesFor( $code ) {
2968 return self::getLocalisationCache()->getItem( $code, 'messages' );
2969 }
2970
2971 /**
2972 * Get a message for a given language
2973 */
2974 static function getMessageFor( $key, $code ) {
2975 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2976 }
2977
2978 function fixVariableInNamespace( $talk ) {
2979 if ( strpos( $talk, '$1' ) === false ) {
2980 return $talk;
2981 }
2982
2983 global $wgMetaNamespace;
2984 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2985
2986 # Allow grammar transformations
2987 # Allowing full message-style parsing would make simple requests
2988 # such as action=raw much more expensive than they need to be.
2989 # This will hopefully cover most cases.
2990 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2991 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2992 return str_replace( ' ', '_', $talk );
2993 }
2994
2995 function replaceGrammarInNamespace( $m ) {
2996 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2997 }
2998
2999 static function getCaseMaps() {
3000 static $wikiUpperChars, $wikiLowerChars;
3001 if ( isset( $wikiUpperChars ) ) {
3002 return array( $wikiUpperChars, $wikiLowerChars );
3003 }
3004
3005 wfProfileIn( __METHOD__ );
3006 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
3007 if ( $arr === false ) {
3008 throw new MWException(
3009 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
3010 }
3011 $wikiUpperChars = $arr['wikiUpperChars'];
3012 $wikiLowerChars = $arr['wikiLowerChars'];
3013 wfProfileOut( __METHOD__ );
3014 return array( $wikiUpperChars, $wikiLowerChars );
3015 }
3016
3017 /**
3018 * Decode an expiry (block, protection, etc) which has come from the DB
3019 *
3020 * @param $expiry String: Database expiry String
3021 * @param $format Bool|Int true to process using language functions, or TS_ constant
3022 * to return the expiry in a given timestamp
3023 * @return String
3024 */
3025 public function formatExpiry( $expiry, $format = true ) {
3026 static $infinity, $infinityMsg;
3027 if( $infinity === null ){
3028 $infinityMsg = wfMessage( 'infiniteblock' );
3029 $infinity = wfGetDB( DB_SLAVE )->getInfinity();
3030 }
3031
3032 if ( $expiry == '' || $expiry == $infinity ) {
3033 return $format === true
3034 ? $infinityMsg
3035 : $infinity;
3036 } else {
3037 return $format === true
3038 ? $this->timeanddate( $expiry )
3039 : wfTimestamp( $format, $expiry );
3040 }
3041 }
3042
3043 /**
3044 * @todo Document
3045 * @param $seconds String
3046 * @return string
3047 */
3048 function formatTimePeriod( $seconds ) {
3049 if ( round( $seconds * 10 ) < 100 ) {
3050 return $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3051 } elseif ( round( $seconds ) < 60 ) {
3052 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
3053 } elseif ( round( $seconds ) < 3600 ) {
3054 $minutes = floor( $seconds / 60 );
3055 $secondsPart = round( fmod( $seconds, 60 ) );
3056 if ( $secondsPart == 60 ) {
3057 $secondsPart = 0;
3058 $minutes++;
3059 }
3060 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3061 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3062 } else {
3063 $hours = floor( $seconds / 3600 );
3064 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
3065 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
3066 if ( $secondsPart == 60 ) {
3067 $secondsPart = 0;
3068 $minutes++;
3069 }
3070 if ( $minutes == 60 ) {
3071 $minutes = 0;
3072 $hours++;
3073 }
3074 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
3075 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
3076 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
3077 }
3078 }
3079
3080 function formatBitrate( $bps ) {
3081 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
3082 if ( $bps <= 0 ) {
3083 return $this->formatNum( $bps ) . $units[0];
3084 }
3085 $unitIndex = floor( log10( $bps ) / 3 );
3086 $mantissa = $bps / pow( 1000, $unitIndex );
3087 if ( $mantissa < 10 ) {
3088 $mantissa = round( $mantissa, 1 );
3089 } else {
3090 $mantissa = round( $mantissa );
3091 }
3092 return $this->formatNum( $mantissa ) . $units[$unitIndex];
3093 }
3094
3095 /**
3096 * Format a size in bytes for output, using an appropriate
3097 * unit (B, KB, MB or GB) according to the magnitude in question
3098 *
3099 * @param $size Size to format
3100 * @return string Plain text (not HTML)
3101 */
3102 function formatSize( $size ) {
3103 // For small sizes no decimal places necessary
3104 $round = 0;
3105 if ( $size > 1024 ) {
3106 $size = $size / 1024;
3107 if ( $size > 1024 ) {
3108 $size = $size / 1024;
3109 // For MB and bigger two decimal places are smarter
3110 $round = 2;
3111 if ( $size > 1024 ) {
3112 $size = $size / 1024;
3113 $msg = 'size-gigabytes';
3114 } else {
3115 $msg = 'size-megabytes';
3116 }
3117 } else {
3118 $msg = 'size-kilobytes';
3119 }
3120 } else {
3121 $msg = 'size-bytes';
3122 }
3123 $size = round( $size, $round );
3124 $text = $this->getMessageFromDB( $msg );
3125 return str_replace( '$1', $this->formatNum( $size ), $text );
3126 }
3127
3128 /**
3129 * Get the conversion rule title, if any.
3130 */
3131 function getConvRuleTitle() {
3132 return $this->mConverter->getConvRuleTitle();
3133 }
3134 }