From 750b8f7c0417f0acd9645043e8f57cbf9719107d Mon Sep 17 00:00:00 2001
From: Tim Starling
Date: Tue, 19 Jan 2010 02:36:33 +0000
Subject: [PATCH] In LanguageConverter: * Rewrote convertArray() as an RD
parser (with inline tokenizer) as suggested on CR r60986. Fixes unclosed rule
issue (with parser test). Fixes O(N^2) timing. * Removed $this->mMarkup
abstraction. Life is complicated enough as it is. * Replaced a couple of
instances of explode() with StringUtils::explode(), limited element count in
a couple more.
In ConverterRule:
* Removed mConvTable initialisation from the constructor, unnecessary
* Optimised the "-{xxx}-" tight loop by replacing function calls such as count() and in_array() with language constructs such as isset(). Reduced execution time from 356us to 275us.
* Cached $varsep_pattern for further reduction to 243us.
* A couple more parseFlags() hacks brings it back to 230us.
* Split out $this->mVariantFlags from $this->mFlags. Rearranged flag detection into a foreach/switch to avoid unnecessary isset() calls. 189us.
* Added a special-case optimisation to generateConvTable() for the case where there are no tables defined inline in the article. 116us.
* Fixed bug from r37499: "!R || !N" is always true since they are mutually exclusive, "!R && !N" was intended (with parser test).
* Fixed E_NOTICE from "-{N|foo}-"
---
RELEASE-NOTES | 1 +
languages/LanguageConverter.php | 512 ++++++++++++++++--------------
languages/classes/LanguageGan.php | 6 +-
languages/classes/LanguageKk.php | 3 +-
languages/classes/LanguageSr.php | 3 +-
languages/classes/LanguageZh.php | 4 +-
languages/messages/MessagesEn.php | 1 +
maintenance/language/messages.inc | 1 +
maintenance/parserTests.txt | 22 ++
9 files changed, 304 insertions(+), 249 deletions(-)
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 13fa883111..3efa86ff8c 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -721,6 +721,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
for image divs.
* (bug 22096) IE50Fixes.css and IE55Fixes.css have been dropped from the Monobook
and Chick skins
+* Fixed bug involving unclosed "-{" markup in the language converter
== API changes in 1.16 ==
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php
index 8a4b711ac4..58e77c92c8 100644
--- a/languages/LanguageConverter.php
+++ b/languages/LanguageConverter.php
@@ -25,7 +25,6 @@ class LanguageConverter {
var $mManualLevel;
var $mCacheKey;
var $mLangObj;
- var $mMarkup;
var $mFlags;
var $mDescCodeSep = ':', $mDescVarSep = ';';
var $mUcfirst = false;
@@ -33,6 +32,8 @@ class LanguageConverter {
var $mURLVariant;
var $mUserVariant;
var $mHeaderVariant;
+ var $mMaxDepth = 10;
+ var $mVarSeparatorPattern;
const CACHE_VERSION_KEY = 'VERSION 6';
@@ -52,7 +53,6 @@ class LanguageConverter {
function __construct( $langobj, $maincode,
$variants = array(),
$variantfallbacks = array(),
- $markup = array(),
$flags = array(),
$manualLevel = array() ) {
$this->mLangObj = $langobj;
@@ -69,15 +69,6 @@ class LanguageConverter {
global $wgLanguageNames;
$this->mVariantNames = $wgLanguageNames;
$this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
- $m = array(
- 'begin' => '-{',
- 'flagsep' => '|',
- 'unidsep' => '=>', // for unidirectional conversion
- 'codesep' => ':',
- 'varsep' => ';',
- 'end' => '}-'
- );
- $this->mMarkup = array_merge( $m, $markup );
$f = array(
// 'S' show converted text
// '+' add rules for alltext
@@ -124,7 +115,7 @@ class LanguageConverter {
* @public
*/
function getVariantFallbacks( $v ) {
- if ( array_key_exists( $v, $this->mVariantFallbacks ) ) {
+ if ( isset( $this->mVariantFallbacks[$v] ) ) {
return $this->mVariantFallbacks[$v];
}
return $this->mMainLanguageCode;
@@ -259,7 +250,7 @@ class LanguageConverter {
}
// explode by comma
- $result = explode( ',', strtolower( $acceptLanguage ) );
+ $result = StringUtils::explode( ',', strtolower( $acceptLanguage ) );
$languages = array();
foreach ( $result as $elem ) {
@@ -473,19 +464,22 @@ class LanguageConverter {
}
$ret = array();
- $tarray = explode( $this->mMarkup['begin'], $text );
- $tfirst = array_shift( $tarray );
-
- foreach ( $this->mVariants as $variant ) {
- $ret[$variant] = $this->translate( $tfirst, $variant );
- }
+ $tarray = StringUtils::explode( '-{', $text );
+ $first = true;
foreach ( $tarray as $txt ) {
- $marked = explode( $this->mMarkup['end'], $txt, 2 );
+ if ( $first ) {
+ $first = false;
+ foreach ( $this->mVariants as $variant ) {
+ $ret[$variant] = $this->translate( $txt, $variant );
+ }
+ continue;
+ }
+
+ $marked = explode( '}-', $txt, 2 );
foreach ( $this->mVariants as $variant ) {
- $ret[$variant] .= $this->mMarkup['begin'] . $marked[0] .
- $this->mMarkup['end'];
+ $ret[$variant] .= '-{' . $marked[0] . '}-';
if ( array_key_exists( 1, $marked ) ) {
$ret[$variant] .= $this->translate( $marked[1], $variant );
}
@@ -535,7 +529,7 @@ class LanguageConverter {
* @private
*/
function convertNamespace( $title, $variant ) {
- $splittitle = explode( ':', $title );
+ $splittitle = explode( ':', $title, 2 );
if ( count( $splittitle ) < 2 ) {
return $title;
}
@@ -546,73 +540,6 @@ class LanguageConverter {
return $ret;
}
- /**
- * Convert a text array.
- *
- * @param string $tarray text array to be converted
- * @param string $plang preferred variant
- * @return string converted text
- * @private
- */
- function convertArray( $tarray, $plang ) {
- $beginlen = strlen( $this->mMarkup['begin'] );
- $converted = '';
- $middle = '';
-
- foreach ( $tarray as $text ) {
- // for nested use
- if( $middle ) {
- $text = $middle . $text;
- $middle = '';
- }
-
- // find first and last begin markup(s)
- $firstbegin = strpos( $text, $this->mMarkup['begin'] );
- $lastbegin = strrpos( $text, $this->mMarkup['begin'] );
-
- // if $text contains no begin markup,
- // append $text to $converted and restore end markup
- if( $firstbegin === false ) {
- $converted .= $this->autoConvert( $text, $plang );
- $converted .= $this->mMarkup['end'];
- continue;
- }
-
- // split $text into $left and $right,
- // omit the begin markup in $right
- $left = substr( $text, 0, $firstbegin );
- $right = substr( $text, $lastbegin + $beginlen );
-
- // always convert $left and append it to $converted
- // for nested case, $left is blank but can also be converted
- $converted .= $this->autoConvert( $left, $plang );
-
- // parse and apply manual rule from $right
- $crule = new ConverterRule( $right, $this );
- $crule->parse( $plang );
- $right = $crule->getDisplay();
- $this->applyManualConv( $crule );
-
- // if $text contains only one begin markup,
- // append $left and $right to $converted.
- //
- // otherwise it's a nested use like "-{-{}-}-",
- // this should be handled properly.
- if( $firstbegin === $lastbegin ) {
- $converted .= $right;
- }
- else {
- // not omit the first begin markup
- $middle = substr( $text, $firstbegin, $lastbegin - $firstbegin );
- $middle .= $right;
- //print $middle;
- }
- }
- // Remove the last delimiter (wasn't real)
- $converted = substr( $converted, 0, - strlen( $this->mMarkup['end'] ) );
- return $converted;
- }
-
/**
* Convert text to different variants of a language. The automatic
* conversion is done in autoConvert(). Here we parse the text
@@ -632,12 +559,105 @@ class LanguageConverter {
global $wgDisableLangConversion;
if ( $wgDisableLangConversion ) return $text;
- $plang = $this->getPreferredVariant();
+ $variant = $this->getPreferredVariant();
+
+ return $this->recursiveConvertTopLevel( $text, $variant );
+ }
+
+ protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
+ $startPos = 0;
+ $out = '';
+ $length = strlen( $text );
+ while ( $startPos < $length ) {
+ $m = false;
+ $pos = strpos( $text, '-{', $startPos );
+
+ if ( $pos === false ) {
+ // No more markup, append final segment
+ $out .= $this->autoConvert( substr( $text, $startPos ), $variant );
+ $startPos = $length;
+ return $out;
+ }
+
+ // Markup found
+ // Append initial segment
+ $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant );
+
+ // Advance position
+ $startPos = $pos;
+
+ // Do recursive conversion
+ $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
+ }
+
+ return $out;
+ }
+
+ protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
+ // Quick sanity check (no function calls)
+ if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
+ throw new MWException( __METHOD__.': invalid input string' );
+ }
- $tarray = StringUtils::explode( $this->mMarkup['end'], $text );
- $converted = $this->convertArray( $tarray, $plang );
+ $startPos += 2;
+ $inner = '';
+ $warningDone = false;
+ $length = strlen( $text );
+
+ while ( $startPos < $length ) {
+ $m = false;
+ preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
+ if ( !$m ) {
+ // Unclosed rule
+ break;
+ }
+
+ $token = $m[0][0];
+ $pos = $m[0][1];
+
+ // Markup found
+ // Append initial segment
+ $inner .= substr( $text, $startPos, $pos - $startPos );
+
+ // Advance position
+ $startPos = $pos;
+
+ switch ( $token ) {
+ case '-{':
+ // Check max depth
+ if ( $depth >= $this->mMaxDepth ) {
+ $inner .= '-{';
+ if ( !$warningDone ) {
+ $inner .= '' .
+ wfMsgForContent( 'language-converter-depth-warning',
+ $this->mMaxDepth ) .
+ '';
+ $warningDone = true;
+ }
+ $startPos += 2;
+ continue;
+ }
+ // Recursively parse another rule
+ $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
+ break;
+ case '}-':
+ // Apply the rule
+ $startPos += 2;
+ $rule = new ConverterRule( $inner, $this );
+ $rule->parse( $variant );
+ $this->applyManualConv( $rule );
+ return $rule->getDisplay();
+ default:
+ throw new MWException( __METHOD__.': invalid regex match' );
+ }
+ }
- return $converted;
+ // Unclosed rule
+ if ( $startPos < $length ) {
+ $inner .= substr( $text, $startPos );
+ }
+ $startPos = $length;
+ return '-{' . $this->autoConvert( $inner, $variant );
}
/**
@@ -840,14 +860,14 @@ class LanguageConverter {
// [[MediaWiki:conversiontable/zh-xx/...|...]]
$linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
':Conversiontable';
- $subs = explode( '[[', $txt );
+ $subs = StringUtils::explode( '[[', $txt );
$sublinks = array();
foreach ( $subs as $sub ) {
$link = explode( ']]', $sub, 2 );
if ( count( $link ) != 2 ) {
continue;
}
- $b = explode( '|', $link[0] );
+ $b = explode( '|', $link[0], 2 );
$b = explode( '/', trim( $b[0] ), 3 );
if ( count( $b ) == 3 ) {
$sublink = $b[2];
@@ -862,16 +882,21 @@ class LanguageConverter {
// parse the mappings in this page
- $blocks = explode( $this->mMarkup['begin'], $txt );
- array_shift( $blocks );
+ $blocks = StringUtils::explode( '-{', $txt );
$ret = array();
+ $first = true;
foreach ( $blocks as $block ) {
- $mappings = explode( $this->mMarkup['end'], $block, 2 );
+ if ( $first ) {
+ // Skip the part before the first -{
+ $first = false;
+ continue;
+ }
+ $mappings = explode( '}-', $block, 2 );
$stripped = str_replace( array( "'", '"', '*', '#' ), '',
$mappings[0] );
- $table = explode( ';', $stripped );
+ $table = StringUtils::explode( ';', $stripped );
foreach ( $table as $t ) {
- $m = explode( '=>', $t );
+ $m = explode( '=>', $t, 3 );
if ( count( $m ) != 2 )
continue;
// trim any trailling comments starting with '//'
@@ -908,12 +933,11 @@ class LanguageConverter {
*/
function markNoConversion( $text, $noParse = false ) {
# don't mark if already marked
- if ( strpos( $text, $this->mMarkup['begin'] )
- || strpos( $text, $this->mMarkup['end'] ) ) {
+ if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
return $text;
}
- $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
+ $ret = "-{R|$text}-";
return $ret;
}
@@ -955,9 +979,38 @@ class LanguageConverter {
// we need to convert '-{' and '}-' to '-{' and '}-'
// to avoid a unwanted '}-' appeared after the math-image.
$text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) );
- $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
+ $ret = "-{R|$text}-";
return $ret;
}
+
+ /**
+ * Get the cached separator pattern for ConverterRule::parseRules()
+ */
+ function getVarSeparatorPattern() {
+ if ( is_null( $this->mVarSeparatorPattern ) ) {
+ // varsep_pattern for preg_split:
+ // text should be splited by ";" only if a valid variant
+ // name exist after the markup, for example:
+ // -{zh-hans:xxx;zh-hant:\
+ // yyy;}-
+ // we should split it as:
+ // array(
+ // [0] => 'zh-hans:xxx'
+ // [1] => 'zh-hant:yyy'
+ // [2] => ''
+ // )
+ $pat = '/;\s*(?=';
+ foreach ( $this->mVariants as $variant ) {
+ // zh-hans:xxx;zh-hant:yyy
+ $pat .= $variant . '\s*:|';
+ // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
+ $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
+ }
+ $pat .= '\s*$)/';
+ $this->mVarSeparatorPattern = $pat;
+ }
+ return $this->mVarSeparatorPattern;
+ }
}
/**
@@ -974,6 +1027,7 @@ class ConverterRule {
var $mRules = '';// string : the text of the rules
var $mRulesAction = 'none';
var $mFlags = array();
+ var $mVariantFlags = array();
var $mConvTable = array();
var $mBidtable = array();// array of the translation in each variant
var $mUnidtable = array();// array of the translation in each variant
@@ -988,9 +1042,6 @@ class ConverterRule {
function __construct( $text, $converter ) {
$this->mText = $text;
$this->mConverter = $converter;
- foreach ( $converter->mVariants as $v ) {
- $this->mConvTable[$v] = array();
- }
}
/**
@@ -1001,14 +1052,12 @@ class ConverterRule {
* @public
*/
function getTextInBidtable( $variants ) {
- if ( is_string( $variants ) ) {
- $variants = array( $variants );
- }
- if ( !is_array( $variants ) ) {
+ $variants = (array)$variants;
+ if ( !$variants ) {
return false;
}
foreach ( $variants as $variant ) {
- if ( array_key_exists( $variant, $this->mBidtable ) ) {
+ if ( isset( $this->mBidtable[$variant] ) ) {
return $this->mBidtable[$variant];
}
}
@@ -1021,74 +1070,60 @@ class ConverterRule {
*/
function parseFlags() {
$text = $this->mText;
- if ( strlen( $text ) < 2 ) {
- $this->mFlags = array( 'R' );
- $this->mRules = $text;
- return;
- }
-
$flags = array();
- $markup = $this->mConverter->mMarkup;
- $validFlags = $this->mConverter->mFlags;
- $variants = $this->mConverter->mVariants;
+ $variantFlags = array();
- $tt = explode( $markup['flagsep'], $text, 2 );
- if ( count( $tt ) == 2 ) {
- $f = explode( $markup['varsep'], $tt[0] );
+ $sepPos = strpos( $text, '|' );
+ if ( $sepPos !== false ) {
+ $validFlags = $this->mConverter->mFlags;
+ $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
foreach ( $f as $ff ) {
$ff = trim( $ff );
- if ( array_key_exists( $ff, $validFlags )
- && !in_array( $validFlags[$ff], $flags ) ) {
- $flags[] = $validFlags[$ff];
+ if ( isset( $validFlags[$ff] ) ) {
+ $flags[$validFlags[$ff]] = true;
}
}
- $rules = $tt[1];
- } else {
- $rules = $text;
- }
-
- // check flags
- if ( in_array( 'R', $flags ) ) {
- $flags = array( 'R' );// remove other flags
- } elseif ( in_array( 'N', $flags ) ) {
- $flags = array( 'N' );// remove other flags
- } elseif ( in_array( '-', $flags ) ) {
- $flags = array( '-' );// remove other flags
- } elseif ( count( $flags ) == 1 && $flags[0] == 'T' ) {
- $flags[] = 'H';
- } elseif ( in_array( 'H', $flags ) ) {
+ $text = strval( substr( $text, $sepPos + 1 ) );
+ }
+
+ if ( !$flags ) {
+ $flags['S'] = true;
+ } elseif ( isset( $flags['R'] ) ) {
+ $flags = array( 'R' => true );// remove other flags
+ } elseif ( isset( $flags['N'] ) ) {
+ $flags = array( 'N' => true );// remove other flags
+ } elseif ( isset( $flags['-'] ) ) {
+ $flags = array( '-' => true );// remove other flags
+ } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
+ $flags['H'] = true;
+ } elseif ( isset( $flags['H'] ) ) {
// replace A flag, and remove other flags except T
- $temp = array( '+', 'H' );
- if ( in_array( 'T', $flags ) ) {
- $temp[] = 'T';
+ $temp = array( '+' => true, 'H' => true );
+ if ( isset( $flags['T'] ) ) {
+ $temp['T'] = true;
}
- if ( in_array( 'D', $flags ) ) {
- $temp[] = 'D';
+ if ( isset( $flags['D'] ) ) {
+ $temp['D'] = true;
}
$flags = $temp;
} else {
- if ( in_array( 'A', $flags ) ) {
- $flags[] = '+';
- $flags[] = 'S';
- }
- if ( in_array( 'D', $flags ) ) {
- $flags = array_diff( $flags, array( 'S' ) );
+ if ( isset( $flags['A'] ) ) {
+ $flags['+'] = true;
+ $flags['S'] = true;
}
- $flags_temp = array();
- foreach ( $variants as $variant ) {
- // try to find flags like "zh-hans", "zh-hant"
- // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
- if ( in_array( $variant, $flags ) )
- $flags_temp[] = $variant;
+ if ( isset( $flags['D'] ) ) {
+ unset( $flags['S'] );
}
- if ( count( $flags_temp ) !== 0 ) {
- $flags = $flags_temp;
+ // try to find flags like "zh-hans", "zh-hant"
+ // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
+ $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
+ if ( $variantFlags ) {
+ $variantFlags = array_flip( $variantFlags );
+ $flags = array();
}
}
- if ( count( $flags ) == 0 ) {
- $flags = array( 'S' );
- }
- $this->mRules = $rules;
+ $this->mVariantFlags = $variantFlags;
+ $this->mRules = $text;
$this->mFlags = $flags;
}
@@ -1101,41 +1136,20 @@ class ConverterRule {
$flags = $this->mFlags;
$bidtable = array();
$unidtable = array();
- $markup = $this->mConverter->mMarkup;
$variants = $this->mConverter->mVariants;
-
- // varsep_pattern for preg_split:
- // text should be splited by ";" only if a valid variant
- // name exist after the markup, for example:
- // -{zh-hans:xxx;zh-hant:\
- // yyy;}-
- // we should split it as:
- // array(
- // [0] => 'zh-hans:xxx'
- // [1] => 'zh-hant:yyy'
- // [2] => ''
- // )
- $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?=';
- foreach ( $variants as $variant ) {
- // zh-hans:xxx;zh-hant:yyy
- $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|';
- // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
- $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant
- . '\s*' . $markup['codesep'] . '|';
- }
- $varsep_pattern .= '\s*$)/';
+ $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
$choice = preg_split( $varsep_pattern, $rules );
foreach ( $choice as $c ) {
- $v = explode( $markup['codesep'], $c, 2 );
+ $v = explode( ':', $c, 2 );
if ( count( $v ) != 2 ) {
// syntax error, skip
continue;
}
$to = trim( $v[1] );
$v = trim( $v[0] );
- $u = explode( $markup['unidsep'], $v, 2 );
+ $u = explode( '=>', $v, 2 );
// if $to is empty, strtr() could return a wrong result
if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
$bidtable[$v] = $to;
@@ -1152,7 +1166,7 @@ class ConverterRule {
}
}
// syntax error, pass
- if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ) {
+ if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
$bidtable = array();
$unidtable = array();
break;
@@ -1225,7 +1239,12 @@ class ConverterRule {
* @private
*/
function generateConvTable() {
- $flags = $this->mFlags;
+ // Special case optimisation
+ if ( !$this->mBidtable && !$this->mUnidtable ) {
+ $this->mConvTable = array();
+ return;
+ }
+
$bidtable = $this->mBidtable;
$unidtable = $this->mUnidtable;
$manLevel = $this->mConverter->mManualLevel;
@@ -1235,7 +1254,7 @@ class ConverterRule {
/* for bidirectional array
fill in the missing variants, if any,
with fallbacks */
- if ( !array_key_exists( $v, $bidtable ) ) {
+ if ( !isset( $bidtable[$v] ) ) {
$variantFallbacks =
$this->mConverter->getVariantFallbacks( $v );
$vf = $this->getTextInBidtable( $variantFallbacks );
@@ -1244,7 +1263,7 @@ class ConverterRule {
}
}
- if ( array_key_exists( $v, $bidtable ) ) {
+ if ( isset( $bidtable[$v] ) ) {
foreach ( $vmarked as $vo ) {
// use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
// or -{H|zh:WordZh;zh-tw:WordTw}-
@@ -1261,11 +1280,14 @@ class ConverterRule {
$vmarked[] = $v;
}
/*for unidirectional array fill to convert tables */
- if ( ( $manLevel[$v] == 'bidirectional'
- || $manLevel[$v] == 'unidirectional' )
- && array_key_exists( $v, $unidtable ) ) {
- $ct = $this->mConvTable[$v];
- $this->mConvTable[$v] = array_merge( $ct, $unidtable[$v] );
+ if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
+ && isset( $unidtable[$v] ) )
+ {
+ if ( isset( $this->mConvTable[$v] ) ) {
+ $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
+ } else {
+ $this->mConvTable[$v] = $unidtable[$v];
+ }
}
}
}
@@ -1285,10 +1307,9 @@ class ConverterRule {
// convert to specified variant
// syntax: -{zh-hans;zh-hant[;...]|}-
- if ( count( array_diff( $flags, $variants ) ) == 0
- and count( $flags ) != 0 ) {
+ if ( $this->mVariantFlags ) {
// check if current variant in flags
- if ( in_array( $variant, $flags ) ) {
+ if ( isset( $this->mVariantFlags[$variant] ) ) {
// then convert to current language
$this->mRules = $this->mConverter->autoConvert( $this->mRules,
$variant );
@@ -1298,7 +1319,7 @@ class ConverterRule {
$this->mConverter->getVariantFallbacks( $variant );
foreach ( $variantFallbacks as $variantFallback ) {
// if current variant's fallback exist in flags
- if ( in_array( $variantFallback, $flags ) ) {
+ if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
// then convert to fallback language
$this->mRules =
$this->mConverter->autoConvert( $this->mRules,
@@ -1307,57 +1328,72 @@ class ConverterRule {
}
}
}
- $this->mFlags = $flags = array( 'R' );
+ $this->mFlags = $flags = array( 'R' => true );
}
- if ( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) {
+ if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
// decode => HTML entities modified by Sanitizer::removeHTMLtags
$this->mRules = str_replace( '=>', '=>', $this->mRules );
-
$this->parseRules();
}
$rules = $this->mRules;
- if ( count( $this->mBidtable ) == 0
- && count( $this->mUnidtable ) == 0 ) {
- if ( in_array( '+', $flags ) || in_array( '-', $flags ) ) {
+ if ( !$this->mBidtable && !$this->mUnidtable ) {
+ if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
// fill all variants if text in -{A/H/-|text} without rules
foreach ( $this->mConverter->mVariants as $v ) {
$this->mBidtable[$v] = $rules;
}
- } elseif ( !in_array( 'N', $flags ) && !in_array( 'T', $flags ) ) {
- $this->mFlags = $flags = array( 'R' );
+ } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
+ $this->mFlags = $flags = array( 'R' => true );
}
}
- if ( in_array( 'R', $flags ) ) {
- // if we don't do content convert, still strip the -{}- tags
- $this->mRuleDisplay = $rules;
- } elseif ( in_array( 'N', $flags ) ) {
- // proces N flag: output current variant name
- $this->mRuleDisplay =
- $this->mConverter->mVariantNames[ trim( $rules ) ];
- } elseif ( in_array( 'D', $flags ) ) {
- // proces D flag: output rules description
- $this->mRuleDisplay = $this->getRulesDesc();
- } elseif ( in_array( 'H', $flags ) || in_array( '-', $flags ) ) {
- // proces H,- flag or T only: output nothing
- $this->mRuleDisplay = '';
- } elseif ( in_array( 'S', $flags ) ) {
- $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
- } else {
- $this->mRuleDisplay = $this->mManualCodeError;
- }
- // process T flag
- if ( in_array( 'T', $flags ) ) {
- $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
- }
-
- if ( in_array( '-', $flags ) ) {
- $this->mRulesAction = 'remove';
+ $this->mRuleDisplay = false;
+ foreach ( $flags as $flag => $unused ) {
+ switch ( $flag ) {
+ case 'R':
+ // if we don't do content convert, still strip the -{}- tags
+ $this->mRuleDisplay = $rules;
+ break;
+ case 'N':
+ // process N flag: output current variant name
+ $ruleVar = trim( $rules );
+ if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
+ $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
+ } else {
+ $this->mRuleDisplay = '';
+ }
+ break;
+ case 'D':
+ // process D flag: output rules description
+ $this->mRuleDisplay = $this->getRulesDesc();
+ break;
+ case 'H':
+ // process H,- flag or T only: output nothing
+ $this->mRuleDisplay = '';
+ break;
+ case '-':
+ $this->mRulesAction = 'remove';
+ $this->mRuleDisplay = '';
+ break;
+ case '+':
+ $this->mRulesAction = 'add';
+ $this->mRuleDisplay = '';
+ break;
+ case 'S':
+ $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
+ break;
+ case 'T':
+ $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
+ $this->mRuleDisplay = '';
+ break;
+ default:
+ // ignore unknown flags (but see error case below)
+ }
}
- if ( in_array( '+', $flags ) ) {
- $this->mRulesAction = 'add';
+ if ( $this->mRuleDisplay === false ) {
+ $this->mRuleDisplay = $this->mManualCodeError;
}
$this->generateConvTable();
diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php
index ff5b76a8da..f878cf3f2f 100644
--- a/languages/classes/LanguageGan.php
+++ b/languages/classes/LanguageGan.php
@@ -11,7 +11,6 @@ class GanConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array(),
$manualLevel = array() ) {
$this->mDescCodeSep = 'ï¼';
@@ -19,7 +18,6 @@ class GanConverter extends LanguageConverter {
parent::__construct($langobj, $maincode,
$variants,
$variantfallbacks,
- $markup,
$flags,
$manualLevel);
$names = array(
@@ -117,7 +115,7 @@ class LanguageGan extends LanguageZh {
$this->mConverter = new GanConverter( $this, 'gan',
$variants, $variantfallbacks,
- array(),array(),
+ array(),
$ml);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
@@ -149,4 +147,4 @@ class LanguageGan extends LanguageZh {
$ret = array_unique( explode('|', $terms) );
return $ret;
}
-}
\ No newline at end of file
+}
diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php
index 479f1e4c42..318b82a800 100644
--- a/languages/classes/LanguageKk.php
+++ b/languages/classes/LanguageKk.php
@@ -21,10 +21,9 @@ class KkConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array()) {
parent::__construct( $langobj, $maincode,
- $variants, $variantfallbacks, $markup, $flags );
+ $variants, $variantfallbacks, $flags );
// No point delaying this since they're in code.
// Waiting until loadDefaultTables() means they never get loaded
diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php
index 7a57e91943..693660fbbf 100644
--- a/languages/classes/LanguageSr.php
+++ b/languages/classes/LanguageSr.php
@@ -165,12 +165,11 @@ class LanguageSr extends LanguageSr_ec {
'sr-el' => 'sr',
);
- $marker = array();//don't mess with these, leave them as they are
$flags = array(
'S' => 'S', 'пиÑмо' => 'S', 'pismo' => 'S',
'W' => 'W', 'ÑеÑ' => 'W', 'reÄ' => 'W', 'ÑиÑеÑ' => 'W', 'rijeÄ' => 'W'
);
- $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $marker, $flags);
+ $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $flags);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
}
diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php
index 490808bdaf..6a004a080d 100644
--- a/languages/classes/LanguageZh.php
+++ b/languages/classes/LanguageZh.php
@@ -11,7 +11,6 @@ class ZhConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array(),
$manualLevel = array() ) {
$this->mDescCodeSep = 'ï¼';
@@ -19,7 +18,6 @@ class ZhConverter extends LanguageConverter {
parent::__construct($langobj, $maincode,
$variants,
$variantfallbacks,
- $markup,
$flags,
$manualLevel);
$names = array(
@@ -153,7 +151,7 @@ class LanguageZh extends LanguageZh_hans {
$this->mConverter = new ZhConverter( $this, 'zh',
$variants, $variantfallbacks,
- array(),array(),
+ array(),
$ml);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php
index 452850741f..e816fd4ff8 100644
--- a/languages/messages/MessagesEn.php
+++ b/languages/messages/MessagesEn.php
@@ -1381,6 +1381,7 @@ These arguments have been omitted.",
'post-expand-template-argument-category' => 'Pages containing omitted template arguments',
'parser-template-loop-warning' => 'Template loop detected: [[$1]]',
'parser-template-recursion-depth-warning' => 'Template recursion depth limit exceeded ($1)',
+'language-converter-depth-warning' => 'Language converter depth limit exceeded ($1)',
# "Undo" feature
'undo-success' => 'The edit can be undone.
diff --git a/maintenance/language/messages.inc b/maintenance/language/messages.inc
index ef1d5b0894..a0718b401d 100644
--- a/maintenance/language/messages.inc
+++ b/maintenance/language/messages.inc
@@ -614,6 +614,7 @@ $wgMessageStructure = array(
'post-expand-template-argument-category',
'parser-template-loop-warning',
'parser-template-recursion-depth-warning',
+ 'language-converter-depth-warning',
),
'undo' => array(
'undo-success',
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index 4b45f5852c..d849f6bd15 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -6982,6 +6982,28 @@ Fridrih IV je car.
!! end
+!! test
+Unclosed language converter markup "-{"
+!! options
+language=sr
+!! input
+-{T|hello
+!! result
+-{T|hello
+
+!! end
+
+!! test
+Don't convert raw rule "-{R|=>}-" to "=>"
+!! options
+language=sr
+!! input
+-{R|=>}-
+!! result
+=>
+
+!!end
+
!!article
Template:Bullet
!!text
--
2.20.1