* Re-implement the message caches for r35821
[lhc/web/wiklou.git] / languages / LanguageConverter.php
1 <?php
2
3 /**
4 * @ingroup Language
5 *
6 * @author Zhengzhu Feng <zhengzhu@gmail.com>
7 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
8 * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>
9 */
10
11 class LanguageConverter {
12 var $mPreferredVariant='';
13 var $mMainLanguageCode;
14 var $mVariants, $mVariantFallbacks, $mVariantNames;
15 var $mTablesLoaded = false;
16 var $mTables;
17 var $mTitleDisplay='';
18 var $mDoTitleConvert=true, $mDoContentConvert=true;
19 var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants
20 var $mManualCodeError='<span style="color: red;">code error!</span>';
21 var $mTitleFromFlag = false;
22 var $mCacheKey;
23 var $mLangObj;
24 var $mMarkup;
25 var $mFlags;
26 var $mUcfirst = false;
27
28 const CACHE_VERSION_KEY = 'VERSION 6';
29
30 /**
31 * Constructor
32 *
33 * @param string $maincode the main language code of this language
34 * @param array $variants the supported variants of this language
35 * @param array $variantfallback the fallback language of each variant
36 * @param array $markup array defining the markup used for manual conversion
37 * @param array $flags array defining the custom strings that maps to the flags
38 * @access public
39 */
40 function __construct($langobj, $maincode,
41 $variants=array(),
42 $variantfallbacks=array(),
43 $markup=array(),
44 $flags = array(),
45 $manualLevel = array() ) {
46 $this->mLangObj = $langobj;
47 $this->mMainLanguageCode = $maincode;
48 $this->mVariants = $variants;
49 $this->mVariantFallbacks = $variantfallbacks;
50 global $wgLanguageNames;
51 $this->mVariantNames = $wgLanguageNames;
52 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
53 $m = array(
54 'begin'=>'-{',
55 'flagsep'=>'|',
56 'unidsep'=>'=>', //for unidirectional conversion
57 'codesep'=>':',
58 'varsep'=>';',
59 'end'=>'}-'
60 );
61 $this->mMarkup = array_merge($m, $markup);
62 $f = array(
63 // 'S' show converted text
64 // '+' add rules for alltext
65 // 'E' the gave flags is error
66 // these flags above are reserved for program
67 'A'=>'A', // add rule for convert code (all text convert)
68 'T'=>'T', // title convert
69 'R'=>'R', // raw content
70 'D'=>'D', // convert description (subclass implement)
71 '-'=>'-', // remove convert (not implement)
72 'H'=>'H', // add rule for convert code (but no display in placed code )
73 'N'=>'N' // current variant name
74 );
75 $this->mFlags = array_merge($f, $flags);
76 foreach( $this->mVariants as $v)
77 $this->mManualLevel[$v]=array_key_exists($v,$manualLevel)
78 ?$manualLevel[$v]
79 :'bidirectional';
80 }
81
82 /**
83 * @access public
84 */
85 function getVariants() {
86 return $this->mVariants;
87 }
88
89 /**
90 * in case some variant is not defined in the markup, we need
91 * to have some fallback. for example, in zh, normally people
92 * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
93 * when zh-sg is preferred but not defined, we will pick zh-hans
94 * in this case. right now this is only used by zh.
95 *
96 * @param string $v the language code of the variant
97 * @return string array the code of the fallback language or false if there is no fallback
98 * @private
99 */
100 function getVariantFallbacks($v) {
101 if( isset( $this->mVariantFallbacks[$v] ) ) {
102 return $this->mVariantFallbacks[$v];
103 }
104 return $this->mMainLanguageCode;
105 }
106
107 /**
108 * check if variants array in convert array
109 *
110 * @param string $variant Variant language code
111 * @param array $carray convert array
112 * @param string $text Text to convert
113 * @return string Translated text
114 * @private
115 */
116 function getTextInCArray($variants,$carray){
117 if(is_string($variants)){ $variants=array($variants); }
118 if(!is_array($variants)) return false;
119 foreach ($variants as $variant){
120 if(array_key_exists($variant, $carray)){
121 return $carray[$variant];
122 }
123 }
124 return false;
125 }
126
127 /**
128 * get preferred language variants.
129 * @param boolean $fromUser Get it from $wgUser's preferences
130 * @return string the preferred language code
131 * @access public
132 */
133 function getPreferredVariant( $fromUser = true ) {
134 global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
135
136 if($this->mPreferredVariant)
137 return $this->mPreferredVariant;
138
139 // see if the preference is set in the request
140 $req = $wgRequest->getText( 'variant' );
141 if( in_array( $req, $this->mVariants ) ) {
142 $this->mPreferredVariant = $req;
143 return $req;
144 }
145
146 // check the syntax /code/ArticleTitle
147 if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){
148 // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI
149 // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations
150 $scriptBase = basename( $_SERVER['SCRIPT_NAME'] );
151 if(in_array($scriptBase,$this->mVariants)){
152 $this->mPreferredVariant = $scriptBase;
153 return $this->mPreferredVariant;
154 }
155 }
156
157 // get language variant preference from logged in users
158 // Don't call this on stub objects because that causes infinite
159 // recursion during initialisation
160 if( $fromUser && $wgUser->isLoggedIn() ) {
161 $this->mPreferredVariant = $wgUser->getOption('variant');
162 return $this->mPreferredVariant;
163 }
164
165 // see if default variant is globaly set
166 if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){
167 $this->mPreferredVariant = $wgDefaultLanguageVariant;
168 return $this->mPreferredVariant;
169 }
170
171 # FIXME rewrite code for parsing http header. The current code
172 # is written specific for detecting zh- variants
173 if( !$this->mPreferredVariant ) {
174 // see if some supported language variant is set in the
175 // http header, but we don't set the mPreferredVariant
176 // variable in case this is called before the user's
177 // preference is loaded
178 $pv=$this->mMainLanguageCode;
179 if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
180 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
181 $zh = strstr($header, $pv.'-');
182 if($zh) {
183 $pv = substr($zh,0,5);
184 }
185 }
186 // don't try to return bad variant
187 if(in_array( $pv, $this->mVariants ))
188 return $pv;
189 }
190
191 return $this->mMainLanguageCode;
192
193 }
194
195 /**
196 * dictionary-based conversion
197 *
198 * @param string $text the text to be converted
199 * @param string $toVariant the target language code
200 * @return string the converted text
201 * @private
202 */
203 function autoConvert($text, $toVariant=false) {
204 $fname="LanguageConverter::autoConvert";
205
206 wfProfileIn( $fname );
207
208 if(!$this->mTablesLoaded)
209 $this->loadTables();
210
211 if(!$toVariant)
212 $toVariant = $this->getPreferredVariant();
213 if(!in_array($toVariant, $this->mVariants))
214 return $text;
215
216 /* we convert everything except:
217 1. html markups (anything between < and >)
218 2. html entities
219 3. place holders created by the parser
220 */
221 global $wgParser;
222 if (isset($wgParser) && $wgParser->UniqPrefix()!=''){
223 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
224 } else
225 $marker = "";
226
227 // this one is needed when the text is inside an html markup
228 $htmlfix = '|<[^>]+$|^[^<>]*>';
229
230 // disable convert to variants between <code></code> tags
231 $codefix = '<code>.+?<\/code>|';
232 // disable convertsion of <script type="text/javascript"> ... </script>
233 $scriptfix = '<script.*?>.*?<\/script>|';
234 // disable conversion of <pre xxxx> ... </pre>
235 $prefix = '<pre.*?>.*?<\/pre>|';
236
237 $reg = '/'.$codefix . $scriptfix . $prefix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
238
239 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
240
241 $m = array_shift($matches);
242
243 $ret = $this->translate($m[0], $toVariant);
244 $mstart = $m[1]+strlen($m[0]);
245 foreach($matches as $m) {
246 $ret .= substr($text, $mstart, $m[1]-$mstart);
247 $ret .= $this->translate($m[0], $toVariant);
248 $mstart = $m[1] + strlen($m[0]);
249 }
250 wfProfileOut( $fname );
251 return $ret;
252 }
253
254 /**
255 * Translate a string to a variant
256 * Doesn't process markup or do any of that other stuff, for that use convert()
257 *
258 * @param string $text Text to convert
259 * @param string $variant Variant language code
260 * @return string Translated text
261 * @private
262 */
263 function translate( $text, $variant ) {
264 wfProfileIn( __METHOD__ );
265 if( !$this->mTablesLoaded )
266 $this->loadTables();
267 $text = $this->mTables[$variant]->replace( $text );
268 wfProfileOut( __METHOD__ );
269 return $text;
270 }
271
272 /**
273 * convert text to all supported variants
274 *
275 * @param string $text the text to be converted
276 * @return array of string
277 * @public
278 */
279 function autoConvertToAllVariants($text) {
280 $fname="LanguageConverter::autoConvertToAllVariants";
281 wfProfileIn( $fname );
282 if( !$this->mTablesLoaded )
283 $this->loadTables();
284
285 $ret = array();
286 foreach($this->mVariants as $variant) {
287 $ret[$variant] = $this->translate($text, $variant);
288 }
289
290 wfProfileOut( $fname );
291 return $ret;
292 }
293
294 /**
295 * convert link text to all supported variants
296 *
297 * @param string $text the text to be converted
298 * @return array of string
299 * @public
300 */
301 function convertLinkToAllVariants($text) {
302 if( !$this->mTablesLoaded )
303 $this->loadTables();
304
305 $ret = array();
306 $tarray = explode($this->mMarkup['begin'], $text);
307 $tfirst = array_shift($tarray);
308
309 foreach($this->mVariants as $variant)
310 $ret[$variant] = $this->translate($tfirst,$variant);
311
312 foreach($tarray as $txt) {
313 $marked = explode($this->mMarkup['end'], $txt, 2);
314
315 foreach($this->mVariants as $variant){
316 $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
317 if(array_key_exists(1, $marked))
318 $ret[$variant] .= $this->translate($marked[1],$variant);
319 }
320
321 }
322
323 return $ret;
324 }
325
326
327 /**
328 * Convert text using a parser object for context
329 * @public
330 */
331 function parserConvert( $text, &$parser ) {
332 global $wgDisableLangConversion;
333 /* don't do anything if this is the conversion table */
334 if ( $parser->getTitle()->getNamespace() == NS_MEDIAWIKI &&
335 strpos($parser->mTitle->getText(), "Conversiontable") !== false )
336 {
337 return $text;
338 }
339
340 if($wgDisableLangConversion)
341 return $text;
342
343 $text = $this->convert( $text );
344 $parser->mOutput->setTitleText( $this->mTitleDisplay );
345 return $text;
346 }
347
348 /**
349 * Parse flags with syntax -{FLAG| ... }-
350 * @private
351 */
352 function parseFlags($marked){
353 $flags = array();
354
355 // for multi-FLAGs
356 if(strlen($marked) < 2 )
357 return array($marked,array('R'));
358
359 $tt = explode($this->mMarkup['flagsep'], $marked, 2);
360
361 if(count($tt) == 2) {
362 $f = explode($this->mMarkup['varsep'], $tt[0]);
363 foreach($f as $ff) {
364 $ff = trim($ff);
365 if(array_key_exists($ff, $this->mFlags) &&
366 !in_array($this->mFlags[$ff], $flags))
367 $flags[] = $this->mFlags[$ff];
368 }
369 $rules = $tt[1];
370 } else {
371 $rules = $marked;
372 }
373
374 if( !in_array('R',$flags) ){
375 //FIXME: may cause trouble here...
376 //strip &nbsp; since it interferes with the parsing, plus,
377 //all spaces should be stripped in this tag anyway.
378 $rules = str_replace('&nbsp;', '', $rules);
379 $rules = str_replace('=&gt;','=>',$rules);
380 }
381
382 //check flags
383 if( in_array('R',$flags) ){
384 $flags = array('R');// remove other flags
385 } elseif ( in_array('N',$flags) ){
386 $flags = array('N');// remove other flags
387 } elseif ( in_array('-',$flags) ){
388 $flags = array('-');// remove other flags
389 } elseif (count($flags)==1 && $flags[0]=='T'){
390 $flags[]='H';
391 } elseif ( in_array('H',$flags) ){
392 // replace A flag, and remove other flags except T
393 $temp=array('+','H');
394 if(in_array('T',$flags)) $temp[] = 'T';
395 if(in_array('D',$flags)) $temp[] = 'D';
396 $flags = $temp;
397 } else {
398 if ( in_array('A',$flags)) {
399 $flags[]='+';
400 $flags[]='S';
401 }
402 if ( in_array('D',$flags) )
403 $flags=array_diff($flags,array('S'));
404 }
405 if ( count($flags)==0 )
406 $flags = array('S');
407 return array($rules,$flags);
408 }
409
410 /**
411 * @private
412 */
413 function getRulesDesc($bidtable,$unidtable){
414 $text='';
415 foreach($bidtable as $k => $v)
416 $text .= $this->mVariantNames[$k].':'.$v.';';
417 foreach($unidtable as $k => $a)
418 foreach($a as $from=>$to)
419 $text.=$from.'⇒'.$this->mVariantNames[$k].':'.$to.';';
420 return $text;
421 }
422
423 /**
424 * parse the manually marked conversion rule
425 * @param string $rule the text of the rule
426 * @return array of the translation in each variant
427 * @private
428 */
429 function getConvTableFromRules($rules,$flags=array()) {
430 $bidtable = array();
431 $unidtable = array();
432 $choice = explode($this->mMarkup['varsep'], $rules );
433 foreach($choice as $c) {
434 $v = explode($this->mMarkup['codesep'], $c);
435 if(count($v) != 2)
436 continue;// syntax error, skip
437 $to=trim($v[1]);
438 $v=trim($v[0]);
439 $u = explode($this->mMarkup['unidsep'], $v);
440 if(count($u) == 1) {
441 $bidtable[$v] = $to;
442 } else if(count($u) == 2){
443 $from=trim($u[0]);$v=trim($u[1]);
444 if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) )
445 $unidtable[$v]=array($from=>$to);
446 else
447 $unidtable[$v][$from]=$to;
448 }
449 // syntax error, pass
450 }
451 return array($bidtable,$unidtable);
452 }
453
454 /**
455 * get display text on markup -{...}-
456 * @param string $rules the original code
457 * @param array $flags FLAGs
458 * @param array $bidtable bidirectional convert table
459 * @param string $unidtable unidirectional convert table
460 * @param string $variant the current variant
461 * @param bool $$doConvert if do convert
462 * @private
463 */
464 function getRulesDisplay($rules,$flags,
465 $bidtable,$unidtable,
466 $variant=false,$doConvert=true){
467 if(!$variant) $variant = $this->getPreferredVariant();
468 $is_mc_disable = $this->mManualLevel[$variant]=='disable';
469
470 if( in_array('R',$flags) ) {
471 // if we don't do content convert, still strip the -{}- tags
472 $disp = $rules;
473 } elseif ( in_array('N',$flags) ){
474 // proces N flag: output current variant name
475 $disp = $this->mVariantNames[trim($rules)];
476 } elseif ( in_array('D',$flags) ){
477 // proces D flag: output rules description
478 $disp = $this->getRulesDesc($bidtable,$unidtable);
479 } elseif ( in_array('H',$flags) || in_array('-',$flags) ) {
480 // proces H,- flag or T only: output nothing
481 $disp = '';
482 } elseif ( in_array('S',$flags) ){
483 if( count($bidtable) + count($unidtable) == 0 ){
484 $disp = $rules;
485 } elseif ($doConvert){// the text converted
486 // display current variant in bidirectional array
487 $disp = $this->getTextInCArray($variant,$bidtable);
488 // or display current variant in fallbacks
489 if(!$disp)
490 $disp = $this->getTextInCArray($this->getVariantFallbacks($variant),$bidtable);
491 // or display current variant in unidirectional array
492 if(!$disp && array_key_exists($variant,$unidtable)){
493 $disp = array_values($unidtable[$variant]);
494 $disp = $disp[0];
495 }
496 // or display frist text under disable manual convert
497 if(!$disp && $is_mc_disable) {
498 if(count($bidtable)>0){
499 $disp = array_values($bidtable);
500 $disp = $disp[0];
501 } else {
502 $disp = array_values($unidtable);
503 $disp = array_values($disp[0]);
504 $disp = $disp[0];
505 }
506 }
507 } else {// no convert
508 $disp = $rules;
509 }
510 } elseif ( in_array('T',$flags) ) {
511 // proces T flag : output nothing
512 $disp = '';
513 }
514 else
515 $disp= $this->mManualCodeError;
516
517 return $disp;
518 }
519
520 /**
521 * @access private
522 */
523 function applyManualFlag($rules,$flags,$bidtable,$unidtable,$variant=false){
524 if(!$variant) $variant = $this->getPreferredVariant();
525
526 $is_title_flag = in_array('T', $flags);
527 // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
528 if($is_title_flag){
529 $this->mTitleFromFlag = true;
530 $this->mTitleDisplay = $this->getRulesDisplay($rules,array('S'),
531 $bidtable,$unidtable,
532 $variant,
533 $this->mDoTitleConvert);
534 }
535
536 if($this->mManualLevel[$variant]=='disable') return;
537
538 $is_remove_flag = !$is_title_flag && in_array('-', $flags);
539 $is_add_flag = !$is_remove_flag && in_array('+', $flags);
540 $is_bidMC = $this->mManualLevel[$variant]=='bidirectional';
541 $is_unidMC = $this->mManualLevel[$variant]=='unidirectional';
542 $vmarked=array();
543
544 foreach($this->mVariants as $v) {
545 /* for bidirectional array
546 fill in the missing variants, if any,
547 with fallbacks */
548 if($is_bidMC && !array_key_exists($v, $bidtable)) {
549 $vf = $this->getTextInCArray($this->getVariantFallbacks($v),$bidtable);
550 if($vf) $bidtable[$v] = $vf;
551 }
552 if($is_bidMC && array_key_exists($v,$bidtable)){
553 foreach($vmarked as $vo){
554 // use syntax:
555 // -{A|zh:WordZh;zh-tw:WordTw}- or -{+|zh:WordZh;zh-tw:WordTw}-
556 // to introduce a custom mapping between
557 // words WordZh and WordTw in the whole text
558 if($is_add_flag){
559 $this->mTables[$v]->setPair($bidtable[$vo], $bidtable[$v]);
560 $this->mTables[$vo]->setPair($bidtable[$v], $bidtable[$vo]);
561 }
562 // use syntax -{-|zh:WordZh;zh-tw:WordTw}- to remove a conversion
563 // words WordZh and WordTw in the whole text
564 if($is_remove_flag){
565 $this->mTables[$v]->removePair($bidtable[$vo]);
566 $this->mTables[$vo]->removePair($bidtable[$v]);
567 }
568 }
569 $vmarked[]=$v;
570 }
571 /*for unidirectional array
572 fill to convert tables */
573 if($is_unidMC && array_key_exists($v,$unidtable)){
574 if($is_add_flag)$this->mTables[$v]->mergeArray($unidtable[$v]);
575 if($is_remove_flag)$this->mTables[$v]->removeArray($unidtable[$v]);
576 }
577 }
578 }
579
580 /**
581 * Parse rules and flags
582 * @private
583 */
584 function parseRules($rules,$flags,$variant=false){
585 if(!$variant) $variant = $this->getPreferredVariant();
586
587 list($bidtable,$unidtable) = $this->getConvTableFromRules($rules, $flags);
588 if(count($bidtable)==0 && count($unidtable)==0
589 && !in_array('N',$flags) && !in_array('T',$flags) )
590 $flags = array('R');
591 $disp = $this->getRulesDisplay($rules,$flags,
592 $bidtable,$unidtable,
593 $variant,
594 $this->mDoContentConvert);
595 $this->applyManualFlag($rules,$flags,$bidtable,$unidtable);
596
597 return $disp;
598 }
599
600 /**
601 * convert title
602 * @private
603 */
604 function convertTitle($text){
605 // check for __NOTC__ tag
606 if( !$this->mDoTitleConvert ) {
607 $this->mTitleDisplay = $text;
608 return $text;
609 }
610
611 // use the title from the T flag if any
612 if($this->mTitleFromFlag){
613 $this->mTitleFromFlag = false;
614 return $this->mTitleDisplay;
615 }
616
617 global $wgRequest;
618 $isredir = $wgRequest->getText( 'redirect', 'yes' );
619 $action = $wgRequest->getText( 'action' );
620 if ( $isredir == 'no' || $action == 'edit' ) {
621 return $text;
622 } else {
623 $this->mTitleDisplay = $this->convert($text);
624 return $this->mTitleDisplay;
625 }
626 }
627
628 /**
629 * convert text to different variants of a language. the automatic
630 * conversion is done in autoConvert(). here we parse the text
631 * marked with -{}-, which specifies special conversions of the
632 * text that can not be accomplished in autoConvert()
633 *
634 * syntax of the markup:
635 * -{code1:text1;code2:text2;...}- or
636 * -{flags|code1:text1;code2:text2;...}- or
637 * -{text}- in which case no conversion should take place for text
638 *
639 * @param string $text text to be converted
640 * @param bool $isTitle whether this conversion is for the article title
641 * @return string converted text
642 * @access public
643 */
644 function convert( $text , $isTitle=false) {
645
646 $mw =& MagicWord::get( 'notitleconvert' );
647 if( $mw->matchAndRemove( $text ) )
648 $this->mDoTitleConvert = false;
649 $mw =& MagicWord::get( 'nocontentconvert' );
650 if( $mw->matchAndRemove( $text ) ) {
651 $this->mDoContentConvert = false;
652 }
653
654 // no conversion if redirecting
655 $mw =& MagicWord::get( 'redirect' );
656 if( $mw->matchStart( $text ))
657 return $text;
658
659 // for title convertion
660 if ($isTitle) return $this->convertTitle($text);
661
662 $plang = $this->getPreferredVariant();
663
664 $tarray = explode($this->mMarkup['begin'], $text);
665 $tfirst = array_shift($tarray);
666 if($this->mDoContentConvert)
667 $text = $this->autoConvert($tfirst,$plang);
668 else
669 $text = $tfirst;
670 foreach($tarray as $txt) {
671 $marked = explode($this->mMarkup['end'], $txt, 2);
672
673 // strip the flags from syntax like -{T| ... }-
674 list($rules,$flags) = $this->parseFlags($marked[0]);
675
676 $text .= $this->parseRules($rules,$flags,$plang);
677
678 if(array_key_exists(1, $marked)){
679 if( $this->mDoContentConvert )
680 $text .= $this->autoConvert($marked[1],$plang);
681 else
682 $text .= $marked[1];
683 }
684 }
685
686 return $text;
687 }
688
689 /**
690 * if a language supports multiple variants, it is
691 * possible that non-existing link in one variant
692 * actually exists in another variant. this function
693 * tries to find it. See e.g. LanguageZh.php
694 *
695 * @param string $link the name of the link
696 * @param mixed $nt the title object of the link
697 * @return null the input parameters may be modified upon return
698 * @access public
699 */
700 function findVariantLink( &$link, &$nt ) {
701 global $wgDisableLangConversion;
702 $linkBatch = new LinkBatch();
703
704 $ns=NS_MAIN;
705
706 if(is_object($nt))
707 $ns = $nt->getNamespace();
708
709 $variants = $this->autoConvertToAllVariants($link);
710 if($variants == false) //give up
711 return;
712
713 $titles = array();
714
715 foreach( $variants as $v ) {
716 if($v != $link){
717 $varnt = Title::newFromText( $v, $ns );
718 if(!is_null($varnt)){
719 $linkBatch->addObj($varnt);
720 $titles[]=$varnt;
721 }
722 }
723 }
724
725 // fetch all variants in single query
726 $linkBatch->execute();
727
728 foreach( $titles as $varnt ) {
729 if( $varnt->getArticleID() > 0 ) {
730 $nt = $varnt;
731 if( !$wgDisableLangConversion )
732 $link = $v;
733 break;
734 }
735 }
736 }
737
738 /**
739 * returns language specific hash options
740 *
741 * @access public
742 */
743 function getExtraHashOptions() {
744 $variant = $this->getPreferredVariant();
745 return '!' . $variant ;
746 }
747
748 /**
749 * get title text as defined in the body of the article text
750 *
751 * @access public
752 */
753 function getParsedTitle() {
754 return $this->mTitleDisplay;
755 }
756
757 /**
758 * a write lock to the cache
759 *
760 * @private
761 */
762 function lockCache() {
763 global $wgMemc;
764 $success = false;
765 for($i=0; $i<30; $i++) {
766 if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
767 break;
768 sleep(1);
769 }
770 return $success;
771 }
772
773 /**
774 * unlock cache
775 *
776 * @private
777 */
778 function unlockCache() {
779 global $wgMemc;
780 $wgMemc->delete($this->mCacheKey . "lock");
781 }
782
783
784 /**
785 * Load default conversion tables
786 * This method must be implemented in derived class
787 *
788 * @private
789 */
790 function loadDefaultTables() {
791 $name = get_class($this);
792 wfDie("Must implement loadDefaultTables() method in class $name");
793 }
794
795 /**
796 * load conversion tables either from the cache or the disk
797 * @private
798 */
799 function loadTables($fromcache=true) {
800 global $wgMemc;
801 if( $this->mTablesLoaded )
802 return;
803 wfProfileIn( __METHOD__ );
804 $this->mTablesLoaded = true;
805 $this->mTables = false;
806 if($fromcache) {
807 wfProfileIn( __METHOD__.'-cache' );
808 $this->mTables = $wgMemc->get( $this->mCacheKey );
809 wfProfileOut( __METHOD__.'-cache' );
810 }
811 if ( !$this->mTables || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) {
812 wfProfileIn( __METHOD__.'-recache' );
813 // not in cache, or we need a fresh reload.
814 // we will first load the default tables
815 // then update them using things in MediaWiki:Zhconversiontable/*
816 $this->loadDefaultTables();
817 foreach($this->mVariants as $var) {
818 $cached = $this->parseCachedTable($var);
819 $this->mTables[$var]->mergeArray($cached);
820 }
821
822 $this->postLoadTables();
823 $this->mTables[self::CACHE_VERSION_KEY] = true;
824
825 if($this->lockCache()) {
826 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
827 $this->unlockCache();
828 }
829 wfProfileOut( __METHOD__.'-recache' );
830 }
831 wfProfileOut( __METHOD__ );
832 }
833
834 /**
835 * Hook for post processig after conversion tables are loaded
836 *
837 */
838 function postLoadTables() {}
839
840 /**
841 * Reload the conversion tables
842 *
843 * @private
844 */
845 function reloadTables() {
846 if($this->mTables)
847 unset($this->mTables);
848 $this->mTablesLoaded = false;
849 $this->loadTables(false);
850 }
851
852
853 /**
854 * parse the conversion table stored in the cache
855 *
856 * the tables should be in blocks of the following form:
857 * -{
858 * word => word ;
859 * word => word ;
860 * ...
861 * }-
862 *
863 * to make the tables more manageable, subpages are allowed
864 * and will be parsed recursively if $recursive=true
865 *
866 */
867 function parseCachedTable($code, $subpage='', $recursive=true) {
868 global $wgMessageCache;
869 static $parsed = array();
870
871 if(!is_object($wgMessageCache))
872 return array();
873
874 $key = 'Conversiontable/'.$code;
875 if($subpage)
876 $key .= '/' . $subpage;
877
878 if(array_key_exists($key, $parsed))
879 return array();
880
881 if ( strpos( $code, '/' ) === false ) {
882 $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
883 } else {
884 $title = Title::makeTitleSafe( NS_MEDIAWIKI, "Conversiontable/$code" );
885 if ( $title && $title->exists() ) {
886 $article = new Article( $title );
887 $txt = $article->getContents();
888 } else {
889 $txt = '';
890 }
891 }
892
893 // get all subpage links of the form
894 // [[MediaWiki:conversiontable/zh-xx/...|...]]
895 $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
896 $subs = explode('[[', $txt);
897 $sublinks = array();
898 foreach( $subs as $sub ) {
899 $link = explode(']]', $sub, 2);
900 if(count($link) != 2)
901 continue;
902 $b = explode('|', $link[0]);
903 $b = explode('/', trim($b[0]), 3);
904 if(count($b)==3)
905 $sublink = $b[2];
906 else
907 $sublink = '';
908
909 if($b[0] == $linkhead && $b[1] == $code) {
910 $sublinks[] = $sublink;
911 }
912 }
913
914
915 // parse the mappings in this page
916 $blocks = explode($this->mMarkup['begin'], $txt);
917 array_shift($blocks);
918 $ret = array();
919 foreach($blocks as $block) {
920 $mappings = explode($this->mMarkup['end'], $block, 2);
921 $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
922 $table = explode( ';', $stripped );
923 foreach( $table as $t ) {
924 $m = explode( '=>', $t );
925 if( count( $m ) != 2)
926 continue;
927 // trim any trailling comments starting with '//'
928 $tt = explode('//', $m[1], 2);
929 $ret[trim($m[0])] = trim($tt[0]);
930 }
931 }
932 $parsed[$key] = true;
933
934
935 // recursively parse the subpages
936 if($recursive) {
937 foreach($sublinks as $link) {
938 $s = $this->parseCachedTable($code, $link, $recursive);
939 $ret = array_merge($ret, $s);
940 }
941 }
942
943 if ($this->mUcfirst) {
944 foreach ($ret as $k => $v) {
945 $ret[Language::ucfirst($k)] = Language::ucfirst($v);
946 }
947 }
948 return $ret;
949 }
950
951 /**
952 * Enclose a string with the "no conversion" tag. This is used by
953 * various functions in the Parser
954 *
955 * @param string $text text to be tagged for no conversion
956 * @return string the tagged text
957 * @public
958 */
959 function markNoConversion($text, $noParse=false) {
960 # don't mark if already marked
961 if(strpos($text, $this->mMarkup['begin']) ||
962 strpos($text, $this->mMarkup['end']))
963 return $text;
964
965 $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end'];
966 return $ret;
967 }
968
969 /**
970 * convert the sorting key for category links. this should make different
971 * keys that are variants of each other map to the same key
972 */
973 function convertCategoryKey( $key ) {
974 return $key;
975 }
976 /**
977 * hook to refresh the cache of conversion tables when
978 * MediaWiki:conversiontable* is updated
979 * @private
980 */
981 function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
982 $titleobj = $article->getTitle();
983 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
984 $title = $titleobj->getDBkey();
985 $t = explode('/', $title, 3);
986 $c = count($t);
987 if( $c > 1 && $t[0] == 'Conversiontable' ) {
988 if(in_array($t[1], $this->mVariants)) {
989 $this->reloadTables();
990 }
991 }
992 }
993 return true;
994 }
995
996 /**
997 * Armour rendered math against conversion
998 * Wrap math into rawoutput -{R| math }- syntax
999 * @public
1000 */
1001 function armourMath($text){
1002 $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
1003 return $ret;
1004 }
1005 }