Merge "Fix the bug for dates between 1912 and 1941 in Thai language"

author jenkins-bot <jenkins-bot@gerrit.wikimedia.org>

Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)

committer Gerrit Code Review <gerrit@wikimedia.org>

Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
author jenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
committer Gerrit Code Review <gerrit@wikimedia.org>
Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
diff --combined languages/Language.php

index deee2bc,0941e3d..8373ffc
--- 1/languages/Language.php
--- 2/languages/Language.php
+++ b/languages/Language.php
@@@ -154,9 -154,9 +154,9 @@@ class Language 
         /**
          * Unicode directional formatting characters, for embedBidi()
          */
- -      static private $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING
- -      static private $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING
- -      static private $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING
+ +      static private $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING
+ +      static private $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING
+ +      static private $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING
   
         /**
          * Directionality test regex for embedBidi(). Matches the first strong directionality codepoint:
@@@ -188,7 -188,9 +188,7 @@@
                 }
   
                 // get the language object to process
- -              $langObj = isset( self::$mLangObjCache[$code] )
- -                      ? self::$mLangObjCache[$code]
- -                      : self::newFromCode( $code );
+ +              $langObj = self::$mLangObjCache[$code] ?? self::newFromCode( $code );
   
                 // merge the language object in to get it up front in the cache
                 self::$mLangObjCache = array_merge( [ $code => $langObj ], self::$mLangObjCache );
@@@ -220,8 -222,7 +220,8 @@@
   
                 // Check if there is a language class for the code
                 $class = self::classFromCode( $code, $fallback );
- -              if ( class_exists( $class ) ) {
+ +              // LanguageCode does not inherit Language
+ +              if ( class_exists( $class ) && is_a( $class, 'Language', true ) ) {
                         $lang = new $class;
                         return $lang;
                 }
@@@ -272,7 -273,7 +272,7 @@@
          * language, script or variant codes actually exist in the repositories.
          *
          * Based on regexes by Mark Davis of the Unicode Consortium:
- -       * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
+ +       * https://www.unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
          *
          * @param string $code
          * @param bool $lenient Whether to allow '_' as separator. The default is only '-'.
@@@ -541,7 -542,7 +541,7 @@@
          */
         public function getNsText( $index ) {
                 $ns = $this->getNamespaces();
- -              return isset( $ns[$index] ) ? $ns[$index] : false;
+ +              return $ns[$index] ?? false;
         }
   
         /**
@@@ -576,7 -577,7 +576,7 @@@
                 $ns = $wgExtraGenderNamespaces +
                         (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
   
- -              return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
+ +              return $ns[$index][$gender] ?? $this->getNsText( $index );
         }
   
         /**
@@@ -612,7 -613,7 +612,7 @@@
         function getLocalNsIndex( $text ) {
                 $lctext = $this->lc( $text );
                 $ids = $this->getNamespaceIds();
- -              return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
+ +              return $ids[$lctext] ?? false;
         }
   
         /**
@@@ -699,7 -700,7 +699,7 @@@
                         return $ns;
                 }
                 $ids = $this->getNamespaceIds();
- -              return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
+ +              return $ids[$lctext] ?? false;
         }
   
         /**
@@@ -791,12 -792,12 +791,12 @@@
         /**
          * Get an array of language names, indexed by code.
          * @param null|string $inLanguage Code of language in which to return the names
- -       *              Use null for autonyms (native names)
+ +       *              Use null for autonyms (native names)
          * @param string $include One of:
- -       *              'all' all available languages
- -       *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
- -       *              'mwfile' only if the language is in 'mw' *and* has a message file
- -       * @return array Language code => language name
+ +       *              'all' all available languages
+ +       *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
+ +       *              'mwfile' only if the language is in 'mw' *and* has a message file
+ +       * @return array Language code => language name (sorted by key)
          * @since 1.20
          */
         public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
@@@ -822,7 -823,7 +822,7 @@@
          *              'all' all available languages
          *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
          *              'mwfile' only if the language is in 'mw' *and* has a message file
- -       * @return array Language code => language name
+ +       * @return array Language code => language name (sorted by key)
          */
         private static function fetchLanguageNamesUncached( $inLanguage = null, $include = 'mw' ) {
                 global $wgExtraLanguageNames, $wgUsePigLatinVariant;
@@@ -1090,7 -1091,7 +1090,7 @@@
          * @param string $ts 14-character timestamp
          *      YYYYMMDDHHMMSS
          *      01234567890123
- -       * @param DateTimeZone $zone Timezone of $ts
+ +       * @param DateTimeZone|null $zone Timezone of $ts
          * @param int &$ttl The amount of time (in seconds) the output may be cached for.
          * Only makes sense if $ts is the current time.
          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
@@@ -1883,6 -1884,14 +1883,14 @@@
                         # Add 543 years to the Gregorian calendar
                         # Months and days are identical
                         $gy_offset = $gy + 543;
+                       # fix for dates between 1912 and 1941
+                       # https://en.wikipedia.org/?oldid=836596673#New_year
+                       if ( $gy >= 1912 && $gy <= 1940 ) {
+                               if ( $gm <= 3 ) {
+                                       $gy_offset--;
+                               }
+                               $gm = ( $gm - 3 ) % 12;
+                       }
                 } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
                         # Minguo dates
                         # Deduct 1911 years from the Gregorian calendar
@@@ -1950,8 -1959,8 +1958,8 @@@
          * Gets directionality of the first strongly directional codepoint, for embedBidi()
          *
          * This is the rule the BIDI algorithm uses to determine the directionality of
- -       * paragraphs ( http://unicode.org/reports/tr9/#The_Paragraph_Level ) and
- -       * FSI isolates ( http://unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
+ +       * paragraphs ( https://www.unicode.org/reports/tr9/#The_Paragraph_Level ) and
+ +       * FSI isolates ( https://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
          *
          * TODO: Does not handle BIDI control characters inside the text.
          * TODO: Does not handle unallocated characters.
@@@ -2916,33 -2925,33 +2924,33 @@@
                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
                                 return $matches[1];
                         } elseif ( $code < 0xb098 ) {
- -                              return "\xe3\x84\xb1";
+ +                              return "\u{3131}";
                         } elseif ( $code < 0xb2e4 ) {
- -                              return "\xe3\x84\xb4";
+ +                              return "\u{3134}";
                         } elseif ( $code < 0xb77c ) {
- -                              return "\xe3\x84\xb7";
+ +                              return "\u{3137}";
                         } elseif ( $code < 0xb9c8 ) {
- -                              return "\xe3\x84\xb9";
+ +                              return "\u{3139}";
                         } elseif ( $code < 0xbc14 ) {
- -                              return "\xe3\x85\x81";
+ +                              return "\u{3141}";
                         } elseif ( $code < 0xc0ac ) {
- -                              return "\xe3\x85\x82";
+ +                              return "\u{3142}";
                         } elseif ( $code < 0xc544 ) {
- -                              return "\xe3\x85\x85";
+ +                              return "\u{3145}";
                         } elseif ( $code < 0xc790 ) {
- -                              return "\xe3\x85\x87";
+ +                              return "\u{3147}";
                         } elseif ( $code < 0xcc28 ) {
- -                              return "\xe3\x85\x88";
+ +                              return "\u{3148}";
                         } elseif ( $code < 0xce74 ) {
- -                              return "\xe3\x85\x8a";
+ +                              return "\u{314A}";
                         } elseif ( $code < 0xd0c0 ) {
- -                              return "\xe3\x85\x8b";
+ +                              return "\u{314B}";
                         } elseif ( $code < 0xd30c ) {
- -                              return "\xe3\x85\x8c";
+ +                              return "\u{314C}";
                         } elseif ( $code < 0xd558 ) {
- -                              return "\xe3\x85\x8d";
+ +                              return "\u{314D}";
                         } else {
- -                              return "\xe3\x85\x8e";
+ +                              return "\u{314E}";
                         }
                 } else {
                         return '';
@@@ -2953,7 -2962,6 +2961,7 @@@
          * @deprecated No-op since 1.28
          */
         function initEncoding() {
+ +              wfDeprecated( __METHOD__, '1.28' );
                 // No-op.
         }
   
@@@ -2963,7 -2971,6 +2971,7 @@@
          * @deprecated No-op since 1.28
          */
         function recodeForEdit( $s ) {
+ +              wfDeprecated( __METHOD__, '1.28' );
                 return $s;
         }
   
@@@ -2973,7 -2980,6 +2981,7 @@@
          * @deprecated No-op since 1.28
          */
         function recodeInput( $s ) {
+ +              wfDeprecated( __METHOD__, '1.28' );
                 return $s;
         }
   
@@@ -2992,8 -2998,8 +3000,8 @@@
                 global $wgAllUnicodeFixes;
                 $s = UtfNormal\Validator::cleanUp( $s );
                 if ( $wgAllUnicodeFixes ) {
- -                      $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
- -                      $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
+ +                      $s = $this->transformUsingPairFile( 'normalize-ar.php', $s );
+ +                      $s = $this->transformUsingPairFile( 'normalize-ml.php', $s );
                 }
   
                 return $s;
@@@ -3013,10 -3019,12 +3021,10 @@@
          * @throws MWException
          * @return string
          */
- -      function transformUsingPairFile( $file, $string ) {
+ +      protected function transformUsingPairFile( $file, $string ) {
                 if ( !isset( $this->transformData[$file] ) ) {
- -                      $data = wfGetPrecompiledData( $file );
- -                      if ( $data === false ) {
- -                              throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
- -                      }
+ +                      global $IP;
+ +                      $data = require "$IP/languages/data/{$file}";
                         $this->transformData[$file] = new ReplacementArray( $data );
                 }
                 return $this->transformData[$file]->replace( $string );
@@@ -3092,8 -3100,8 +3100,8 @@@
          * @return string
          */
         function getDirMark( $opposite = false ) {
- -              $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
- -              $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
+ +              $lrm = "\u{200E}"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
+ +              $rlm = "\u{200F}"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
                 if ( $opposite ) {
                         return $this->isRTL() ? $lrm : $rlm;
                 }
@@@ -3156,7 -3164,7 +3164,7 @@@
                         return;
                 }
                 $this->mMagicHookDone = true;
- -              Hooks::run( 'LanguageGetMagic', [ &$this->mMagicExtensions, $this->getCode() ] );
+ +              Hooks::run( 'LanguageGetMagic', [ &$this->mMagicExtensions, $this->getCode() ], '1.16' );
         }
   
         /**
@@@ -3212,7 -3220,7 +3220,7 @@@
                         $this->mExtendedSpecialPageAliases =
                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
                         Hooks::run( 'LanguageGetSpecialPageAliases',
- -                              [ &$this->mExtendedSpecialPageAliases, $this->getCode() ] );
+ +                              [ &$this->mExtendedSpecialPageAliases, $this->getCode() ], '1.16' );
                 }
   
                 return $this->mExtendedSpecialPageAliases;
@@@ -3485,7 -3493,7 +3493,7 @@@
          * @param int $length Maximum length (including ellipsis)
          * @param string $ellipsis String to append to the truncated text
          * @param bool $adjustLength Subtract length of ellipsis from $length.
- -       *      $adjustLength was introduced in 1.18, before that behaved as if false.
+ +       *      $adjustLength was introduced in 1.18, before that behaved as if false.
          * @return string
          */
         function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
@@@ -3558,7 -3566,7 +3566,7 @@@
          * @return string
          */
         private function truncateInternal(
- -              $string, $length, $ellipsis = '...', $adjustLength = true, $measureLength, $getSubstring
+ +              $string, $length, $ellipsis, $adjustLength, $measureLength, $getSubstring
         ) {
                 if ( !is_callable( $measureLength ) || !is_callable( $getSubstring ) ) {
                         throw new InvalidArgumentException( 'Invalid callback provided' );
@@@ -3962,7 -3970,7 +3970,7 @@@
                 if ( $gender === 'female' ) {
                         return $forms[1];
                 }
- -              return isset( $forms[2] ) ? $forms[2] : $forms[0];
+ +              return $forms[2] ?? $forms[0];
         }
   
         /**
@@@ -4076,7 -4084,7 +4084,7 @@@
          * match up with it.
          *
          * @param string $str The validated block duration in English
- -       * @param User $user User object to use timezone from or null for $wgUser
+ +       * @param User|null $user User object to use timezone from or null for $wgUser
          * @param int $now Current timestamp, for formatting relative block durations
          * @return string Somehow translated block duration
          * @see LanguageFi.php for example implementation
@@@ -4325,18 -4333,13 +4333,18 @@@
          * the "raw" tag (-{R| }-) to prevent conversion.
          *
          * This function is called "markNoConversion" for historical
- -       * reasons.
+ +       * reasons *BUT DIFFERS SIGNIFICANTLY* from
+ +       * LanguageConverter::markNoConversion(), with which it is easily
+ +       * confused.
          *
          * @param string $text Text to be used for external link
          * @param bool $noParse Wrap it without confirming it's a real URL first
          * @return string The tagged text
+ +       * @deprecated since 1.32, use LanguageConverter::markNoConversion()
+ +       *  instead.
          */
         public function markNoConversion( $text, $noParse = false ) {
+ +              wfDeprecated( __METHOD__, '1.32' );
                 // Excluding protocal-relative URLs may avoid many false positives.
                 if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
                         return $this->mConverter->markNoConversion( $text );
@@@ -4400,7 -4403,7 +4408,7 @@@
          * @return bool
          */
         public function equals( Language $lang ) {
- -              return $lang->getCode() === $this->mCode;
+ +              return $lang === $this || $lang->getCode() === $this->mCode;
         }
   
         /**
@@@ -4480,7 -4483,7 +4488,7 @@@
          * @throws MWException
          * @return string $prefix . $mangledCode . $suffix
          */
- -      public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
+ +      public static function getFileName( $prefix, $code, $suffix = '.php' ) {
                 if ( !self::isValidBuiltInCode( $code ) ) {
                         throw new MWException( "Invalid language code \"$code\"" );
                 }
diff --combined tests/phpunit/languages/LanguageTest.php

index 7e29c92,04ffdab..35bb1f0
--- 1/tests/phpunit/languages/LanguageTest.php
--- 2/tests/phpunit/languages/LanguageTest.php
+++ b/tests/phpunit/languages/LanguageTest.php
@@@ -1029,6 -1029,13 +1029,13 @@@ class LanguageTest extends LanguageClas
                                 '2555',
                                 'Thai year'
                         ],
+                       [
+                               'xkY',
+                               '19410101090705',
+                               '2484',
+                               '2484',
+                               'Thai year'
+                       ],
                         [
                                 'xoY',
                                 '20120102090705',
@@@ -1110,27 -1117,27 +1117,27 @@@
                                 "1 gigabyte"
                         ],
                         [
- -                              pow( 1024, 4 ),
+ +                              1024 ** 4,
                                 "1 TB",
                                 "1 terabyte"
                         ],
                         [
- -                              pow( 1024, 5 ),
+ +                              1024 ** 5,
                                 "1 PB",
                                 "1 petabyte"
                         ],
                         [
- -                              pow( 1024, 6 ),
+ +                              1024 ** 6,
                                 "1 EB",
                                 "1,024 exabyte"
                         ],
                         [
- -                              pow( 1024, 7 ),
+ +                              1024 ** 7,
                                 "1 ZB",
                                 "1 zetabyte"
                         ],
                         [
- -                              pow( 1024, 8 ),
+ +                              1024 ** 8,
                                 "1 YB",
                                 "1 yottabyte"
                         ],
@@@ -1173,37 -1180,37 +1180,37 @@@
                                 "1 megabit per second"
                         ],
                         [
- -                              pow( 10, 9 ),
+ +                              10 ** 9,
                                 "1 Gbps",
                                 "1 gigabit per second"
                         ],
                         [
- -                              pow( 10, 12 ),
+ +                              10 ** 12,
                                 "1 Tbps",
                                 "1 terabit per second"
                         ],
                         [
- -                              pow( 10, 15 ),
+ +                              10 ** 15,
                                 "1 Pbps",
                                 "1 petabit per second"
                         ],
                         [
- -                              pow( 10, 18 ),
+ +                              10 ** 18,
                                 "1 Ebps",
                                 "1 exabit per second"
                         ],
                         [
- -                              pow( 10, 21 ),
+ +                              10 ** 21,
                                 "1 Zbps",
                                 "1 zetabit per second"
                         ],
                         [
- -                              pow( 10, 24 ),
+ +                              10 ** 24,
                                 "1 Ybps",
                                 "1 yottabit per second"
                         ],
                         [
- -                              pow( 10, 27 ),
+ +                              10 ** 27,
                                 "1,000 Ybps",
                                 "1,000 yottabits per second"
                         ],
@@@ -1593,9 -1600,9 +1600,9 @@@
          * @covers Language::embedBidi()
          */
         public function testEmbedBidi() {
- -              $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING
- -              $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING
- -              $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING
+ +              $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING
+ +              $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING
+ +              $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING
                 $lang = $this->getLang();
                 $this->assertEquals(
                         '123',
author	jenkins-bot <jenkins-bot@gerrit.wikimedia.org>
	Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
committer	Gerrit Code Review <gerrit@wikimedia.org>
	Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
		1	2
languages/Language.php	patch \|	diff1 \|	diff2 \|	blob \| history
tests/phpunit/languages/LanguageTest.php	patch \|	diff1 \|	diff2 \|	blob \| history