Merge "Fix the bug for dates between 1912 and 1941 in Thai language"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 10 Jul 2018 08:55:56 +0000 (08:55 +0000)
1  2 
languages/Language.php
tests/phpunit/languages/LanguageTest.php

diff --combined languages/Language.php
@@@ -154,9 -154,9 +154,9 @@@ class Language 
        /**
         * Unicode directional formatting characters, for embedBidi()
         */
 -      static private $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING
 -      static private $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING
 -      static private $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING
 +      static private $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING
 +      static private $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING
 +      static private $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING
  
        /**
         * Directionality test regex for embedBidi(). Matches the first strong directionality codepoint:
                }
  
                // get the language object to process
 -              $langObj = isset( self::$mLangObjCache[$code] )
 -                      ? self::$mLangObjCache[$code]
 -                      : self::newFromCode( $code );
 +              $langObj = self::$mLangObjCache[$code] ?? self::newFromCode( $code );
  
                // merge the language object in to get it up front in the cache
                self::$mLangObjCache = array_merge( [ $code => $langObj ], self::$mLangObjCache );
  
                // Check if there is a language class for the code
                $class = self::classFromCode( $code, $fallback );
 -              if ( class_exists( $class ) ) {
 +              // LanguageCode does not inherit Language
 +              if ( class_exists( $class ) && is_a( $class, 'Language', true ) ) {
                        $lang = new $class;
                        return $lang;
                }
         * language, script or variant codes actually exist in the repositories.
         *
         * Based on regexes by Mark Davis of the Unicode Consortium:
 -       * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
 +       * https://www.unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
         *
         * @param string $code
         * @param bool $lenient Whether to allow '_' as separator. The default is only '-'.
         */
        public function getNsText( $index ) {
                $ns = $this->getNamespaces();
 -              return isset( $ns[$index] ) ? $ns[$index] : false;
 +              return $ns[$index] ?? false;
        }
  
        /**
                $ns = $wgExtraGenderNamespaces +
                        (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
  
 -              return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
 +              return $ns[$index][$gender] ?? $this->getNsText( $index );
        }
  
        /**
        function getLocalNsIndex( $text ) {
                $lctext = $this->lc( $text );
                $ids = $this->getNamespaceIds();
 -              return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 +              return $ids[$lctext] ?? false;
        }
  
        /**
                        return $ns;
                }
                $ids = $this->getNamespaceIds();
 -              return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 +              return $ids[$lctext] ?? false;
        }
  
        /**
        /**
         * Get an array of language names, indexed by code.
         * @param null|string $inLanguage Code of language in which to return the names
 -       *              Use null for autonyms (native names)
 +       *              Use null for autonyms (native names)
         * @param string $include One of:
 -       *              'all' all available languages
 -       *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
 -       *              'mwfile' only if the language is in 'mw' *and* has a message file
 -       * @return array Language code => language name
 +       *              'all' all available languages
 +       *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
 +       *              'mwfile' only if the language is in 'mw' *and* has a message file
 +       * @return array Language code => language name (sorted by key)
         * @since 1.20
         */
        public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
         *              'all' all available languages
         *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
         *              'mwfile' only if the language is in 'mw' *and* has a message file
 -       * @return array Language code => language name
 +       * @return array Language code => language name (sorted by key)
         */
        private static function fetchLanguageNamesUncached( $inLanguage = null, $include = 'mw' ) {
                global $wgExtraLanguageNames, $wgUsePigLatinVariant;
         * @param string $ts 14-character timestamp
         *      YYYYMMDDHHMMSS
         *      01234567890123
 -       * @param DateTimeZone $zone Timezone of $ts
 +       * @param DateTimeZone|null $zone Timezone of $ts
         * @param int &$ttl The amount of time (in seconds) the output may be cached for.
         * Only makes sense if $ts is the current time.
         * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
                        # Add 543 years to the Gregorian calendar
                        # Months and days are identical
                        $gy_offset = $gy + 543;
+                       # fix for dates between 1912 and 1941
+                       # https://en.wikipedia.org/?oldid=836596673#New_year
+                       if ( $gy >= 1912 && $gy <= 1940 ) {
+                               if ( $gm <= 3 ) {
+                                       $gy_offset--;
+                               }
+                               $gm = ( $gm - 3 ) % 12;
+                       }
                } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
                        # Minguo dates
                        # Deduct 1911 years from the Gregorian calendar
         * Gets directionality of the first strongly directional codepoint, for embedBidi()
         *
         * This is the rule the BIDI algorithm uses to determine the directionality of
 -       * paragraphs ( http://unicode.org/reports/tr9/#The_Paragraph_Level ) and
 -       * FSI isolates ( http://unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
 +       * paragraphs ( https://www.unicode.org/reports/tr9/#The_Paragraph_Level ) and
 +       * FSI isolates ( https://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
         *
         * TODO: Does not handle BIDI control characters inside the text.
         * TODO: Does not handle unallocated characters.
                        if ( $code < 0xac00 || 0xd7a4 <= $code ) {
                                return $matches[1];
                        } elseif ( $code < 0xb098 ) {
 -                              return "\xe3\x84\xb1";
 +                              return "\u{3131}";
                        } elseif ( $code < 0xb2e4 ) {
 -                              return "\xe3\x84\xb4";
 +                              return "\u{3134}";
                        } elseif ( $code < 0xb77c ) {
 -                              return "\xe3\x84\xb7";
 +                              return "\u{3137}";
                        } elseif ( $code < 0xb9c8 ) {
 -                              return "\xe3\x84\xb9";
 +                              return "\u{3139}";
                        } elseif ( $code < 0xbc14 ) {
 -                              return "\xe3\x85\x81";
 +                              return "\u{3141}";
                        } elseif ( $code < 0xc0ac ) {
 -                              return "\xe3\x85\x82";
 +                              return "\u{3142}";
                        } elseif ( $code < 0xc544 ) {
 -                              return "\xe3\x85\x85";
 +                              return "\u{3145}";
                        } elseif ( $code < 0xc790 ) {
 -                              return "\xe3\x85\x87";
 +                              return "\u{3147}";
                        } elseif ( $code < 0xcc28 ) {
 -                              return "\xe3\x85\x88";
 +                              return "\u{3148}";
                        } elseif ( $code < 0xce74 ) {
 -                              return "\xe3\x85\x8a";
 +                              return "\u{314A}";
                        } elseif ( $code < 0xd0c0 ) {
 -                              return "\xe3\x85\x8b";
 +                              return "\u{314B}";
                        } elseif ( $code < 0xd30c ) {
 -                              return "\xe3\x85\x8c";
 +                              return "\u{314C}";
                        } elseif ( $code < 0xd558 ) {
 -                              return "\xe3\x85\x8d";
 +                              return "\u{314D}";
                        } else {
 -                              return "\xe3\x85\x8e";
 +                              return "\u{314E}";
                        }
                } else {
                        return '';
         * @deprecated No-op since 1.28
         */
        function initEncoding() {
 +              wfDeprecated( __METHOD__, '1.28' );
                // No-op.
        }
  
         * @deprecated No-op since 1.28
         */
        function recodeForEdit( $s ) {
 +              wfDeprecated( __METHOD__, '1.28' );
                return $s;
        }
  
         * @deprecated No-op since 1.28
         */
        function recodeInput( $s ) {
 +              wfDeprecated( __METHOD__, '1.28' );
                return $s;
        }
  
                global $wgAllUnicodeFixes;
                $s = UtfNormal\Validator::cleanUp( $s );
                if ( $wgAllUnicodeFixes ) {
 -                      $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
 -                      $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
 +                      $s = $this->transformUsingPairFile( 'normalize-ar.php', $s );
 +                      $s = $this->transformUsingPairFile( 'normalize-ml.php', $s );
                }
  
                return $s;
         * @throws MWException
         * @return string
         */
 -      function transformUsingPairFile( $file, $string ) {
 +      protected function transformUsingPairFile( $file, $string ) {
                if ( !isset( $this->transformData[$file] ) ) {
 -                      $data = wfGetPrecompiledData( $file );
 -                      if ( $data === false ) {
 -                              throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
 -                      }
 +                      global $IP;
 +                      $data = require "$IP/languages/data/{$file}";
                        $this->transformData[$file] = new ReplacementArray( $data );
                }
                return $this->transformData[$file]->replace( $string );
         * @return string
         */
        function getDirMark( $opposite = false ) {
 -              $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
 -              $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
 +              $lrm = "\u{200E}"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
 +              $rlm = "\u{200F}"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
                if ( $opposite ) {
                        return $this->isRTL() ? $lrm : $rlm;
                }
                        return;
                }
                $this->mMagicHookDone = true;
 -              Hooks::run( 'LanguageGetMagic', [ &$this->mMagicExtensions, $this->getCode() ] );
 +              Hooks::run( 'LanguageGetMagic', [ &$this->mMagicExtensions, $this->getCode() ], '1.16' );
        }
  
        /**
                        $this->mExtendedSpecialPageAliases =
                                self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
                        Hooks::run( 'LanguageGetSpecialPageAliases',
 -                              [ &$this->mExtendedSpecialPageAliases, $this->getCode() ] );
 +                              [ &$this->mExtendedSpecialPageAliases, $this->getCode() ], '1.16' );
                }
  
                return $this->mExtendedSpecialPageAliases;
         * @param int $length Maximum length (including ellipsis)
         * @param string $ellipsis String to append to the truncated text
         * @param bool $adjustLength Subtract length of ellipsis from $length.
 -       *      $adjustLength was introduced in 1.18, before that behaved as if false.
 +       *      $adjustLength was introduced in 1.18, before that behaved as if false.
         * @return string
         */
        function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
         * @return string
         */
        private function truncateInternal(
 -              $string, $length, $ellipsis = '...', $adjustLength = true, $measureLength, $getSubstring
 +              $string, $length, $ellipsis, $adjustLength, $measureLength, $getSubstring
        ) {
                if ( !is_callable( $measureLength ) || !is_callable( $getSubstring ) ) {
                        throw new InvalidArgumentException( 'Invalid callback provided' );
                if ( $gender === 'female' ) {
                        return $forms[1];
                }
 -              return isset( $forms[2] ) ? $forms[2] : $forms[0];
 +              return $forms[2] ?? $forms[0];
        }
  
        /**
         * match up with it.
         *
         * @param string $str The validated block duration in English
 -       * @param User $user User object to use timezone from or null for $wgUser
 +       * @param User|null $user User object to use timezone from or null for $wgUser
         * @param int $now Current timestamp, for formatting relative block durations
         * @return string Somehow translated block duration
         * @see LanguageFi.php for example implementation
         * the "raw" tag (-{R| }-) to prevent conversion.
         *
         * This function is called "markNoConversion" for historical
 -       * reasons.
 +       * reasons *BUT DIFFERS SIGNIFICANTLY* from
 +       * LanguageConverter::markNoConversion(), with which it is easily
 +       * confused.
         *
         * @param string $text Text to be used for external link
         * @param bool $noParse Wrap it without confirming it's a real URL first
         * @return string The tagged text
 +       * @deprecated since 1.32, use LanguageConverter::markNoConversion()
 +       *  instead.
         */
        public function markNoConversion( $text, $noParse = false ) {
 +              wfDeprecated( __METHOD__, '1.32' );
                // Excluding protocal-relative URLs may avoid many false positives.
                if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
                        return $this->mConverter->markNoConversion( $text );
         * @return bool
         */
        public function equals( Language $lang ) {
 -              return $lang->getCode() === $this->mCode;
 +              return $lang === $this || $lang->getCode() === $this->mCode;
        }
  
        /**
         * @throws MWException
         * @return string $prefix . $mangledCode . $suffix
         */
 -      public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
 +      public static function getFileName( $prefix, $code, $suffix = '.php' ) {
                if ( !self::isValidBuiltInCode( $code ) ) {
                        throw new MWException( "Invalid language code \"$code\"" );
                }
@@@ -1029,6 -1029,13 +1029,13 @@@ class LanguageTest extends LanguageClas
                                '2555',
                                'Thai year'
                        ],
+                       [
+                               'xkY',
+                               '19410101090705',
+                               '2484',
+                               '2484',
+                               'Thai year'
+                       ],
                        [
                                'xoY',
                                '20120102090705',
                                "1 gigabyte"
                        ],
                        [
 -                              pow( 1024, 4 ),
 +                              1024 ** 4,
                                "1 TB",
                                "1 terabyte"
                        ],
                        [
 -                              pow( 1024, 5 ),
 +                              1024 ** 5,
                                "1 PB",
                                "1 petabyte"
                        ],
                        [
 -                              pow( 1024, 6 ),
 +                              1024 ** 6,
                                "1 EB",
                                "1,024 exabyte"
                        ],
                        [
 -                              pow( 1024, 7 ),
 +                              1024 ** 7,
                                "1 ZB",
                                "1 zetabyte"
                        ],
                        [
 -                              pow( 1024, 8 ),
 +                              1024 ** 8,
                                "1 YB",
                                "1 yottabyte"
                        ],
                                "1 megabit per second"
                        ],
                        [
 -                              pow( 10, 9 ),
 +                              10 ** 9,
                                "1 Gbps",
                                "1 gigabit per second"
                        ],
                        [
 -                              pow( 10, 12 ),
 +                              10 ** 12,
                                "1 Tbps",
                                "1 terabit per second"
                        ],
                        [
 -                              pow( 10, 15 ),
 +                              10 ** 15,
                                "1 Pbps",
                                "1 petabit per second"
                        ],
                        [
 -                              pow( 10, 18 ),
 +                              10 ** 18,
                                "1 Ebps",
                                "1 exabit per second"
                        ],
                        [
 -                              pow( 10, 21 ),
 +                              10 ** 21,
                                "1 Zbps",
                                "1 zetabit per second"
                        ],
                        [
 -                              pow( 10, 24 ),
 +                              10 ** 24,
                                "1 Ybps",
                                "1 yottabit per second"
                        ],
                        [
 -                              pow( 10, 27 ),
 +                              10 ** 27,
                                "1,000 Ybps",
                                "1,000 yottabits per second"
                        ],
         * @covers Language::embedBidi()
         */
        public function testEmbedBidi() {
 -              $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING
 -              $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING
 -              $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING
 +              $lre = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING
 +              $rle = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING
 +              $pdf = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING
                $lang = $this->getLang();
                $this->assertEquals(
                        '123',