Merge "Accept BCP 47 codes in LanguageConverter rules"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 27 Nov 2018 18:49:25 +0000 (18:49 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 27 Nov 2018 18:49:25 +0000 (18:49 +0000)
languages/ConverterRule.php
languages/LanguageConverter.php
tests/parser/parserTests.txt

index 8be2d6a..4a330ad 100644 (file)
@@ -153,25 +153,27 @@ class ConverterRule {
                        $to = trim( $v[1] );
                        $v = trim( $v[0] );
                        $u = explode( '=>', $v, 2 );
+                       $vv = $this->mConverter->validateVariant( $v );
                        // if $to is empty (which is also used as $from in bidtable),
                        // strtr() could return a wrong result.
-                       if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
-                               $bidtable[$v] = $to;
+                       if ( count( $u ) == 1 && $to !== '' && $vv ) {
+                               $bidtable[$vv] = $to;
                        } elseif ( count( $u ) == 2 ) {
                                $from = trim( $u[0] );
                                $v = trim( $u[1] );
+                               $vv = $this->mConverter->validateVariant( $v );
                                // if $from is empty, strtr() could return a wrong result.
-                               if ( array_key_exists( $v, $unidtable )
-                                       && !is_array( $unidtable[$v] )
+                               if ( array_key_exists( $vv, $unidtable )
+                                       && !is_array( $unidtable[$vv] )
                                        && $from !== ''
-                                       && in_array( $v, $variants ) ) {
-                                       $unidtable[$v] = [ $from => $to ];
-                               } elseif ( $from !== '' && in_array( $v, $variants ) ) {
-                                       $unidtable[$v][$from] = $to;
+                                       && $vv ) {
+                                       $unidtable[$vv] = [ $from => $to ];
+                               } elseif ( $from !== '' && $vv ) {
+                                       $unidtable[$vv][$from] = $to;
                                }
                        }
                        // syntax error, pass
-                       if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
+                       if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
                                $bidtable = [];
                                $unidtable = [];
                                break;
index 3c8d300..8fdf4f5 100644 (file)
@@ -1176,8 +1176,21 @@ class LanguageConverter {
                        //    [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
                        //    [2] => ''
                        //  ]
-                       $pat = '/;\s*(?=';
+                       $expandedVariants = [];
                        foreach ( $this->mVariants as $variant ) {
+                               $expandedVariants[ $variant ] = 1;
+                               // Accept standard BCP 47 names for variants as well.
+                               $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
+                       }
+                       // Accept old deprecated names for variants
+                       foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
+                               if ( isset( $expandedVariants[ $new ] ) ) {
+                                       $expandedVariants[ $old ] = 1;
+                               }
+                       }
+
+                       $pat = '/;\s*(?=';
+                       foreach ( $expandedVariants as $variant => $ignore ) {
                                // zh-hans:xxx;zh-hant:yyy
                                $pat .= $variant . '\s*:|';
                                // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
index b870efd..d65e49a 100644 (file)
@@ -22998,6 +22998,19 @@ language=zh variant=zh-tw
 <p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-tw","t":"Taiwan"}]}'></span>, not China</p>
 !! end
 
+!! test
+Explicit definition of language variant alternatives (BCP 47 codes)
+!! options
+language=zh variant=zh-tw
+!! wikitext
+-{zh:China;zh-Hant-TW:Taiwan}-, not China
+!! html/php
+<p>Taiwan, not China
+</p>
+!! html/parsoid
+<p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-Hant-TW","t":"Taiwan"}]}'></span>, not China</p>
+!! end
+
 !! test
 Filter syntax for language variants
 !! options