From f7bb180fef88665866ef3922e596edbf273ce78d Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Fri, 13 Jul 2018 15:40:20 -0400 Subject: [PATCH] Accept BCP 47 codes in LanguageConverter rules Facilitate a gradual migration away from non-standard MediaWiki language codes. This will ensure that (a) rules can be written with standard BCP 47 codes, and (b) rules written with existing nonstandard codes will continue to work once these are added to LanguageCode::$deprecatedLanguageCodeMapping. Change-Id: I3ba96faafaf40bd47fb5919621f7035f0431a698 --- languages/ConverterRule.php | 20 +++++++++++--------- languages/LanguageConverter.php | 15 ++++++++++++++- tests/parser/parserTests.txt | 13 +++++++++++++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/languages/ConverterRule.php b/languages/ConverterRule.php index dc61519c69..6ce1274be3 100644 --- a/languages/ConverterRule.php +++ b/languages/ConverterRule.php @@ -153,25 +153,27 @@ class ConverterRule { $to = trim( $v[1] ); $v = trim( $v[0] ); $u = explode( '=>', $v, 2 ); + $vv = $this->mConverter->validateVariant( $v ); // if $to is empty (which is also used as $from in bidtable), // strtr() could return a wrong result. - if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) { - $bidtable[$v] = $to; + if ( count( $u ) == 1 && $to !== '' && $vv ) { + $bidtable[$vv] = $to; } elseif ( count( $u ) == 2 ) { $from = trim( $u[0] ); $v = trim( $u[1] ); + $vv = $this->mConverter->validateVariant( $v ); // if $from is empty, strtr() could return a wrong result. - if ( array_key_exists( $v, $unidtable ) - && !is_array( $unidtable[$v] ) + if ( array_key_exists( $vv, $unidtable ) + && !is_array( $unidtable[$vv] ) && $from !== '' - && in_array( $v, $variants ) ) { - $unidtable[$v] = [ $from => $to ]; - } elseif ( $from !== '' && in_array( $v, $variants ) ) { - $unidtable[$v][$from] = $to; + && $vv ) { + $unidtable[$vv] = [ $from => $to ]; + } elseif ( $from !== '' && $vv ) { + $unidtable[$vv][$from] = $to; } } // syntax error, pass - if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { + if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) { $bidtable = []; $unidtable = []; break; diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index ea26c64dc7..137fe87d1e 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -1175,8 +1175,21 @@ class LanguageConverter { // [1] => 'zh-hant:yyy' // [2] => '' // ] - $pat = '/;\s*(?='; + $expandedVariants = []; foreach ( $this->mVariants as $variant ) { + $expandedVariants[ $variant ] = 1; + // Accept standard BCP 47 names for variants as well. + $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1; + } + // Accept old deprecated names for variants + foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) { + if ( isset( $expandedVariants[ $new ] ) ) { + $expandedVariants[ $old ] = 1; + } + } + + $pat = '/;\s*(?='; + foreach ( $expandedVariants as $variant => $ignore ) { // zh-hans:xxx;zh-hant:yyy $pat .= $variant . '\s*:|'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index bbd9ecbe99..50c6a891b9 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -22434,6 +22434,19 @@ language=zh variant=zh-tw

, not China

!! end +!! test +Explicit definition of language variant alternatives (BCP 47 codes) +!! options +language=zh variant=zh-tw +!! wikitext +-{zh:China;zh-Hant-TW:Taiwan}-, not China +!! html/php +

Taiwan, not China +

+!! html/parsoid +

, not China

+!! end + !! test Filter syntax for language variants !! options -- 2.20.1