3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
23 * Parser for rules of language conversion, parse rules in -{ }- tag.
25 * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
28 public $mText; // original text in -{text}-
29 public $mConverter; // LanguageConverter object
30 public $mRuleDisplay = '';
31 public $mRuleTitle = false;
32 public $mRules = ''; // string : the text of the rules
33 public $mRulesAction = 'none';
35 public $mVariantFlags = [];
36 public $mConvTable = [];
37 public $mBidtable = []; // array of the translation in each variant
38 public $mUnidtable = []; // array of the translation in each variant
41 * @param string $text The text between -{ and }-
42 * @param LanguageConverter $converter
44 public function __construct( $text, $converter ) {
46 $this->mConverter
= $converter;
50 * Check if variants array in convert array.
52 * @param array|string $variants Variant language code
53 * @return string Translated text
55 public function getTextInBidtable( $variants ) {
56 $variants = (array)$variants;
60 foreach ( $variants as $variant ) {
61 if ( isset( $this->mBidtable
[$variant] ) ) {
62 return $this->mBidtable
[$variant];
69 * Parse flags with syntax -{FLAG| ... }-
72 function parseFlags() {
77 $sepPos = strpos( $text, '|' );
78 if ( $sepPos !== false ) {
79 $validFlags = $this->mConverter
->mFlags
;
80 $f = StringUtils
::explode( ';', substr( $text, 0, $sepPos ) );
81 foreach ( $f as $ff ) {
83 if ( isset( $validFlags[$ff] ) ) {
84 $flags[$validFlags[$ff]] = true;
87 $text = strval( substr( $text, $sepPos +
1 ) );
92 } elseif ( isset( $flags['R'] ) ) {
93 $flags = [ 'R' => true ];// remove other flags
94 } elseif ( isset( $flags['N'] ) ) {
95 $flags = [ 'N' => true ];// remove other flags
96 } elseif ( isset( $flags['-'] ) ) {
97 $flags = [ '-' => true ];// remove other flags
98 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
100 } elseif ( isset( $flags['H'] ) ) {
101 // replace A flag, and remove other flags except T
102 $temp = [ '+' => true, 'H' => true ];
103 if ( isset( $flags['T'] ) ) {
106 if ( isset( $flags['D'] ) ) {
111 if ( isset( $flags['A'] ) ) {
115 if ( isset( $flags['D'] ) ) {
116 unset( $flags['S'] );
118 // try to find flags like "zh-hans", "zh-hant"
119 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
120 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter
->mVariants
);
121 if ( $variantFlags ) {
122 $variantFlags = array_flip( $variantFlags );
126 $this->mVariantFlags
= $variantFlags;
127 $this->mRules
= $text;
128 $this->mFlags
= $flags;
132 * Generate conversion table.
135 function parseRules() {
136 $rules = $this->mRules
;
139 $variants = $this->mConverter
->mVariants
;
140 $varsep_pattern = $this->mConverter
->getVarSeparatorPattern();
142 // Split according to $varsep_pattern, but ignore semicolons from HTML entities
143 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
144 $choice = preg_split( $varsep_pattern, $rules );
145 $choice = str_replace( "\x01", ';', $choice );
147 foreach ( $choice as $c ) {
148 $v = explode( ':', $c, 2 );
149 if ( count( $v ) != 2 ) {
150 // syntax error, skip
155 $u = explode( '=>', $v, 2 );
156 $vv = $this->mConverter
->validateVariant( $v );
157 // if $to is empty (which is also used as $from in bidtable),
158 // strtr() could return a wrong result.
159 if ( count( $u ) == 1 && $to !== '' && $vv ) {
160 $bidtable[$vv] = $to;
161 } elseif ( count( $u ) == 2 ) {
162 $from = trim( $u[0] );
164 $vv = $this->mConverter
->validateVariant( $v );
165 // if $from is empty, strtr() could return a wrong result.
166 if ( array_key_exists( $vv, $unidtable )
167 && !is_array( $unidtable[$vv] )
170 $unidtable[$vv] = [ $from => $to ];
171 } elseif ( $from !== '' && $vv ) {
172 $unidtable[$vv][$from] = $to;
175 // syntax error, pass
176 if ( !isset( $this->mConverter
->mVariantNames
[$vv] ) ) {
182 $this->mBidtable
= $bidtable;
183 $this->mUnidtable
= $unidtable;
191 function getRulesDesc() {
192 $codesep = $this->mConverter
->mDescCodeSep
;
193 $varsep = $this->mConverter
->mDescVarSep
;
195 foreach ( $this->mBidtable
as $k => $v ) {
196 $text .= $this->mConverter
->mVariantNames
[$k] . "$codesep$v$varsep";
198 foreach ( $this->mUnidtable
as $k => $a ) {
199 foreach ( $a as $from => $to ) {
200 $text .= $from . '⇒' . $this->mConverter
->mVariantNames
[$k] .
201 "$codesep$to$varsep";
208 * Parse rules conversion.
211 * @param string $variant
215 function getRuleConvertedStr( $variant ) {
216 $bidtable = $this->mBidtable
;
217 $unidtable = $this->mUnidtable
;
219 if ( count( $bidtable ) +
count( $unidtable ) == 0 ) {
220 return $this->mRules
;
222 // display current variant in bidirectional array
223 $disp = $this->getTextInBidtable( $variant );
224 // or display current variant in fallbacks
225 if ( $disp === false ) {
226 $disp = $this->getTextInBidtable(
227 $this->mConverter
->getVariantFallbacks( $variant ) );
229 // or display current variant in unidirectional array
230 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
231 $disp = array_values( $unidtable[$variant] )[0];
233 // or display first text under disable manual convert
234 if ( $disp === false && $this->mConverter
->mManualLevel
[$variant] == 'disable' ) {
235 if ( count( $bidtable ) > 0 ) {
236 $disp = array_values( $bidtable )[0];
238 $disp = array_values( array_values( $unidtable )[0] )[0];
246 * Similar to getRuleConvertedStr(), but this prefers to use original
247 * page title if $variant === $this->mConverter->mMainLanguageCode
248 * and may return false in this case (so this title conversion rule
249 * will be ignored and the original title is shown).
252 * @param string $variant The variant code to display page title in
253 * @return string|bool The converted title or false if just page name
255 function getRuleConvertedTitle( $variant ) {
256 if ( $variant === $this->mConverter
->mMainLanguageCode
) {
257 // If a string targeting exactly this variant is set,
258 // use it. Otherwise, just return false, so the real
259 // page name can be shown (and because variant === main,
260 // there'll be no further automatic conversion).
261 $disp = $this->getTextInBidtable( $variant );
265 if ( array_key_exists( $variant, $this->mUnidtable
) ) {
266 $disp = array_values( $this->mUnidtable
[$variant] )[0];
268 // Assigned above or still false.
271 return $this->getRuleConvertedStr( $variant );
276 * Generate conversion table for all text.
279 function generateConvTable() {
280 // Special case optimisation
281 if ( !$this->mBidtable
&& !$this->mUnidtable
) {
282 $this->mConvTable
= [];
286 $bidtable = $this->mBidtable
;
287 $unidtable = $this->mUnidtable
;
288 $manLevel = $this->mConverter
->mManualLevel
;
291 foreach ( $this->mConverter
->mVariants
as $v ) {
292 /* for bidirectional array
293 fill in the missing variants, if any,
295 if ( !isset( $bidtable[$v] ) ) {
297 $this->mConverter
->getVariantFallbacks( $v );
298 $vf = $this->getTextInBidtable( $variantFallbacks );
304 if ( isset( $bidtable[$v] ) ) {
305 foreach ( $vmarked as $vo ) {
306 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
307 // or -{H|zh:WordZh;zh-tw:WordTw}-
308 // or -{-|zh:WordZh;zh-tw:WordTw}-
309 // to introduce a custom mapping between
310 // words WordZh and WordTw in the whole text
311 if ( $manLevel[$v] == 'bidirectional' ) {
312 $this->mConvTable
[$v][$bidtable[$vo]] = $bidtable[$v];
314 if ( $manLevel[$vo] == 'bidirectional' ) {
315 $this->mConvTable
[$vo][$bidtable[$v]] = $bidtable[$vo];
320 /* for unidirectional array fill to convert tables */
321 if ( ( $manLevel[$v] == 'bidirectional' ||
$manLevel[$v] == 'unidirectional' )
322 && isset( $unidtable[$v] )
324 if ( isset( $this->mConvTable
[$v] ) ) {
325 $this->mConvTable
[$v] = $unidtable[$v] +
$this->mConvTable
[$v];
327 $this->mConvTable
[$v] = $unidtable[$v];
334 * Parse rules and flags.
335 * @param string|null $variant Variant language code
337 public function parse( $variant = null ) {
339 $variant = $this->mConverter
->getPreferredVariant();
343 $flags = $this->mFlags
;
345 // convert to specified variant
346 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
347 if ( $this->mVariantFlags
) {
348 // check if current variant in flags
349 if ( isset( $this->mVariantFlags
[$variant] ) ) {
350 // then convert <text to convert> to current language
351 $this->mRules
= $this->mConverter
->autoConvert( $this->mRules
,
354 // if current variant no in flags,
355 // then we check its fallback variants.
357 $this->mConverter
->getVariantFallbacks( $variant );
358 if ( is_array( $variantFallbacks ) ) {
359 foreach ( $variantFallbacks as $variantFallback ) {
360 // if current variant's fallback exist in flags
361 if ( isset( $this->mVariantFlags
[$variantFallback] ) ) {
362 // then convert <text to convert> to fallback language
364 $this->mConverter
->autoConvert( $this->mRules
,
371 $this->mFlags
= $flags = [ 'R' => true ];
374 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
375 // decode => HTML entities modified by Sanitizer::removeHTMLtags
376 $this->mRules
= str_replace( '=>', '=>', $this->mRules
);
379 $rules = $this->mRules
;
381 if ( !$this->mBidtable
&& !$this->mUnidtable
) {
382 if ( isset( $flags['+'] ) ||
isset( $flags['-'] ) ) {
383 // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
384 if ( $rules !== '' ) {
385 foreach ( $this->mConverter
->mVariants
as $v ) {
386 $this->mBidtable
[$v] = $rules;
389 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
390 $this->mFlags
= $flags = [ 'R' => true ];
394 $this->mRuleDisplay
= false;
395 foreach ( $flags as $flag => $unused ) {
398 // if we don't do content convert, still strip the -{}- tags
399 $this->mRuleDisplay
= $rules;
402 // process N flag: output current variant name
403 $ruleVar = trim( $rules );
404 $this->mRuleDisplay
= $this->mConverter
->mVariantNames
[$ruleVar] ??
'';
407 // process D flag: output rules description
408 $this->mRuleDisplay
= $this->getRulesDesc();
411 // process H,- flag or T only: output nothing
412 $this->mRuleDisplay
= '';
415 $this->mRulesAction
= 'remove';
416 $this->mRuleDisplay
= '';
419 $this->mRulesAction
= 'add';
420 $this->mRuleDisplay
= '';
423 $this->mRuleDisplay
= $this->getRuleConvertedStr( $variant );
426 $this->mRuleTitle
= $this->getRuleConvertedTitle( $variant );
427 $this->mRuleDisplay
= '';
430 // ignore unknown flags (but see error case below)
433 if ( $this->mRuleDisplay
=== false ) {
434 $this->mRuleDisplay
= '<span class="error">'
435 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
439 $this->generateConvTable();
443 * Checks if there are conversion rules.
446 public function hasRules() {
447 return $this->mRules
!== '';
451 * Get display text on markup -{...}-
454 public function getDisplay() {
455 return $this->mRuleDisplay
;
459 * Get converted title.
462 public function getTitle() {
463 return $this->mRuleTitle
;
467 * Return how deal with conversion rules.
470 public function getRulesAction() {
471 return $this->mRulesAction
;
475 * Get conversion table. (bidirectional and unidirectional
479 public function getConvTable() {
480 return $this->mConvTable
;
484 * Get conversion rules string.
487 public function getRules() {
488 return $this->mRules
;
492 * Get conversion flags.
495 public function getFlags() {
496 return $this->mFlags
;