* (bug 14604) Introduced the following features for the LanguageConverter: Multi...
[lhc/web/wiklou.git] / languages / LanguageConverter.php
1 <?php
2 //require_once( dirname(__FILE__).'/Names.php' );
3
4 /**
5 * @ingroup Language
6 *
7 * @author Zhengzhu Feng <zhengzhu@gmail.com>
8 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
9 * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>
10 */
11
12 class LanguageConverter {
13 var $mPreferredVariant='';
14 var $mMainLanguageCode;
15 var $mVariants, $mVariantFallbacks, $mVariantNames;
16 var $mTablesLoaded = false;
17 var $mTables;
18 var $mTitleDisplay='';
19 var $mDoTitleConvert=true, $mDoContentConvert=true;
20 var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants
21 var $mManualCodeError='<span style="color: red;">code error!</span>';
22 var $mTitleFromFlag = false;
23 var $mCacheKey;
24 var $mLangObj;
25 var $mMarkup;
26 var $mFlags;
27 var $mUcfirst = false;
28
29 const CACHE_VERSION_KEY = 'VERSION 6';
30
31 /**
32 * Constructor
33 *
34 * @param string $maincode the main language code of this language
35 * @param array $variants the supported variants of this language
36 * @param array $variantfallback the fallback language of each variant
37 * @param array $markup array defining the markup used for manual conversion
38 * @param array $flags array defining the custom strings that maps to the flags
39 * @access public
40 */
41 function __construct($langobj, $maincode,
42 $variants=array(),
43 $variantfallbacks=array(),
44 $markup=array(),
45 $flags = array(),
46 $manualLevel = array() ) {
47 $this->mLangObj = $langobj;
48 $this->mMainLanguageCode = $maincode;
49 $this->mVariants = $variants;
50 $this->mVariantFallbacks = $variantfallbacks;
51 global $wgLanguageNames;
52 $this->mVariantNames = $wgLanguageNames;
53 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
54 $m = array(
55 'begin'=>'-{',
56 'flagsep'=>'|',
57 'unidsep'=>'=>', //for unidirectional conversion
58 'codesep'=>':',
59 'varsep'=>';',
60 'end'=>'}-'
61 );
62 $this->mMarkup = array_merge($m, $markup);
63 $f = array(
64 // 'S' show converted text
65 // '+' add rules for alltext
66 // 'E' the gave flags is error
67 // these flags above are reserved for program
68 'A'=>'A', // add rule for convert code (all text convert)
69 'T'=>'T', // title convert
70 'R'=>'R', // raw content
71 'D'=>'D', // convert description (subclass implement)
72 '-'=>'-', // remove convert (not implement)
73 'H'=>'H', // add rule for convert code (but no display in placed code )
74 'N'=>'N' // current variant name
75 );
76 $this->mFlags = array_merge($f, $flags);
77 foreach( $this->mVariants as $v)
78 $this->mManualLevel[$v]=array_key_exists($v,$manualLevel)
79 ?$manualLevel[$v]
80 :'bidirectional';
81 }
82
83 /**
84 * @access public
85 */
86 function getVariants() {
87 return $this->mVariants;
88 }
89
90 /**
91 * in case some variant is not defined in the markup, we need
92 * to have some fallback. for example, in zh, normally people
93 * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
94 * when zh-sg is preferred but not defined, we will pick zh-hans
95 * in this case. right now this is only used by zh.
96 *
97 * @param string $v the language code of the variant
98 * @return string array the code of the fallback language or false if there is no fallback
99 * @private
100 */
101 function getVariantFallbacks($v) {
102 if( isset( $this->mVariantFallbacks[$v] ) ) {
103 return $this->mVariantFallbacks[$v];
104 }
105 return $this->mMainLanguageCode;
106 }
107
108 /**
109 * check if variants array in convert array
110 *
111 * @param string $variant Variant language code
112 * @param array $carray convert array
113 * @param string $text Text to convert
114 * @return string Translated text
115 * @private
116 */
117 function getTextInCArray($variants,$carray){
118 if(is_string($variants)){ $variants=array($variants); }
119 if(!is_array($variants)) return false;
120 foreach ($variants as $variant){
121 if(array_key_exists($variant, $carray)){
122 return $carray[$variant];
123 }
124 }
125 return false;
126 }
127
128 /**
129 * get preferred language variants.
130 * @param boolean $fromUser Get it from $wgUser's preferences
131 * @return string the preferred language code
132 * @access public
133 */
134 function getPreferredVariant( $fromUser = true ) {
135 global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
136
137 if($this->mPreferredVariant)
138 return $this->mPreferredVariant;
139
140 // see if the preference is set in the request
141 $req = $wgRequest->getText( 'variant' );
142 if( in_array( $req, $this->mVariants ) ) {
143 $this->mPreferredVariant = $req;
144 return $req;
145 }
146
147 // check the syntax /code/ArticleTitle
148 if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){
149 // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI
150 // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations
151 $scriptBase = basename( $_SERVER['SCRIPT_NAME'] );
152 if(in_array($scriptBase,$this->mVariants)){
153 $this->mPreferredVariant = $scriptBase;
154 return $this->mPreferredVariant;
155 }
156 }
157
158 // get language variant preference from logged in users
159 // Don't call this on stub objects because that causes infinite
160 // recursion during initialisation
161 if( $fromUser && $wgUser->isLoggedIn() ) {
162 $this->mPreferredVariant = $wgUser->getOption('variant');
163 return $this->mPreferredVariant;
164 }
165
166 // see if default variant is globaly set
167 if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){
168 $this->mPreferredVariant = $wgDefaultLanguageVariant;
169 return $this->mPreferredVariant;
170 }
171
172 # FIXME rewrite code for parsing http header. The current code
173 # is written specific for detecting zh- variants
174 if( !$this->mPreferredVariant ) {
175 // see if some supported language variant is set in the
176 // http header, but we don't set the mPreferredVariant
177 // variable in case this is called before the user's
178 // preference is loaded
179 $pv=$this->mMainLanguageCode;
180 if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
181 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
182 $zh = strstr($header, $pv.'-');
183 if($zh) {
184 $pv = substr($zh,0,5);
185 }
186 }
187 // don't try to return bad variant
188 if(in_array( $pv, $this->mVariants ))
189 return $pv;
190 }
191
192 return $this->mMainLanguageCode;
193
194 }
195
196 /**
197 * dictionary-based conversion
198 *
199 * @param string $text the text to be converted
200 * @param string $toVariant the target language code
201 * @return string the converted text
202 * @private
203 */
204 function autoConvert($text, $toVariant=false) {
205 $fname="LanguageConverter::autoConvert";
206
207 wfProfileIn( $fname );
208
209 if(!$this->mTablesLoaded)
210 $this->loadTables();
211
212 if(!$toVariant)
213 $toVariant = $this->getPreferredVariant();
214 if(!in_array($toVariant, $this->mVariants))
215 return $text;
216
217 /* we convert everything except:
218 1. html markups (anything between < and >)
219 2. html entities
220 3. place holders created by the parser
221 */
222 global $wgParser;
223 if (isset($wgParser) && $wgParser->UniqPrefix()!='')
224 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
225 else
226 $marker = "";
227
228 // this one is needed when the text is inside an html markup
229 $htmlfix = '|<[^>]+$|^[^<>]*>';
230
231 // disable convert to variants between <code></code> tags
232 $codefix = '<code>.+?<\/code>|';
233 // disable convertsion of <script type="text/javascript"> ... </script>
234 $scriptfix = '<script.*?>.*?<\/script>|';
235
236 $reg = '/'.$codefix . $scriptfix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
237
238 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
239
240 $m = array_shift($matches);
241
242 $ret = $this->translate($m[0], $toVariant);
243 $mstart = $m[1]+strlen($m[0]);
244 foreach($matches as $m) {
245 $ret .= substr($text, $mstart, $m[1]-$mstart);
246 $ret .= $this->translate($m[0], $toVariant);
247 $mstart = $m[1] + strlen($m[0]);
248 }
249 wfProfileOut( $fname );
250 return $ret;
251 }
252
253 /**
254 * Translate a string to a variant
255 * Doesn't process markup or do any of that other stuff, for that use convert()
256 *
257 * @param string $text Text to convert
258 * @param string $variant Variant language code
259 * @return string Translated text
260 */
261 function translate( $text, $variant ) {
262 wfProfileIn( __METHOD__ );
263 if( !$this->mTablesLoaded )
264 $this->loadTables();
265 $text = $this->mTables[$variant]->replace( $text );
266 wfProfileOut( __METHOD__ );
267 return $text;
268 }
269
270 /**
271 * convert text to all supported variants
272 *
273 * @param string $text the text to be converted
274 * @return array of string
275 * @public
276 */
277 function autoConvertToAllVariants($text) {
278 $fname="LanguageConverter::autoConvertToAllVariants";
279 wfProfileIn( $fname );
280 if( !$this->mTablesLoaded )
281 $this->loadTables();
282
283 $ret = array();
284 foreach($this->mVariants as $variant) {
285 $ret[$variant] = $this->translate($text, $variant);
286 }
287
288 wfProfileOut( $fname );
289 return $ret;
290 }
291
292 /**
293 * convert link text to all supported variants
294 *
295 * @param string $text the text to be converted
296 * @return array of string
297 * @public
298 */
299 function convertLinkToAllVariants($text) {
300 if( !$this->mTablesLoaded )
301 $this->loadTables();
302
303 $ret = array();
304 $tarray = explode($this->mMarkup['begin'], $text);
305 $tfirst = array_shift($tarray);
306
307 foreach($this->mVariants as $variant)
308 $ret[$variant] = $this->translate($tfirst,$variant);
309
310 foreach($tarray as $txt) {
311 $marked = explode($this->mMarkup['end'], $txt, 2);
312
313 foreach($this->mVariants as $variant){
314 $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
315 if(array_key_exists(1, $marked))
316 $ret[$variant] .= $this->translate($marked[1],$variant);
317 }
318
319 }
320
321 return $ret;
322 }
323
324
325 /**
326 * Convert text using a parser object for context
327 */
328 function parserConvert( $text, &$parser ) {
329 global $wgDisableLangConversion;
330 /* don't do anything if this is the conversion table */
331 if ( $parser->getTitle()->getNamespace() == NS_MEDIAWIKI &&
332 strpos($parser->mTitle->getText(), "Conversiontable") !== false )
333 {
334 return $text;
335 }
336
337 if($wgDisableLangConversion)
338 return $text;
339
340 $text = $this->convert( $text );
341 $parser->mOutput->setTitleText( $this->mTitleDisplay );
342 return $text;
343 }
344
345 /**
346 * Parse flags with syntax -{FLAG| ... }-
347 *
348 */
349 function parseFlags($marked){
350 $flags = array();
351
352 // for multi-FLAGs
353 if(strlen($marked) < 2 )
354 return array($marked,array('R'));
355
356 $tt = explode($this->mMarkup['flagsep'], $marked, 2);
357
358 if(count($tt) == 2) {
359 $f = explode($this->mMarkup['varsep'], $tt[0]);
360 foreach($f as $ff) {
361 $ff = trim($ff);
362 if(array_key_exists($ff, $this->mFlags) &&
363 !in_array($this->mFlags[$ff], $flags))
364 $flags[] = $this->mFlags[$ff];
365 }
366 $rules = $tt[1];
367 } else {
368 $rules = $marked;
369 }
370
371 if( !in_array('R',$flags) ){
372 //FIXME: may cause trouble here...
373 //strip &nbsp; since it interferes with the parsing, plus,
374 //all spaces should be stripped in this tag anyway.
375 $rules = str_replace('&nbsp;', '', $rules);
376 $rules = str_replace('=&gt;','=>',$rules);
377 }
378
379 //check flags
380 if( in_array('R',$flags) ){
381 $flags = array('R');// remove other flags
382 } elseif ( in_array('N',$flags) ){
383 $flags = array('N');// remove other flags
384 } elseif ( in_array('-',$flags) ){
385 $flags = array('-');// remove other flags
386 } elseif (count($flags)==1 && $flags[0]=='T'){
387 $flags[]='H';
388 } elseif ( in_array('H',$flags) ){
389 // replace A flag, and remove other flags except T
390 $temp=array('+','H');
391 if(in_array('T',$flags)) $temp[] = 'T';
392 if(in_array('D',$flags)) $temp[] = 'D';
393 $flags = $temp;
394 } else {
395 if ( in_array('A',$flags)) {
396 $flags[]='+';
397 $flags[]='S';
398 }
399 if ( in_array('D',$flags) )
400 $flags=array_diff($flags,array('S'));
401 }
402 if ( count($flags)==0 )
403 $flags = array('S');
404
405 return array($rules,$flags);
406 }
407
408 function getRulesDesc($bidtable,$unidtable){
409 $text='';
410 foreach($bidtable as $k => $v)
411 $text .= $this->mVariantNames[$k].':'.$v.';';
412 foreach($unidtable as $k => $a)
413 foreach($a as $from=>$to)
414 $text.=$from.'\81Ë'.$this->mVariantNames[$k].':'.$to.';';
415 return $text;
416 }
417
418 /**
419 * parse the manually marked conversion rule
420 * @param string $rule the text of the rule
421 * @return array of the translation in each variant
422 * @private
423 */
424 function getConvTableFromRules($rules,$flags=array()) {
425 $bidtable = array();
426 $unidtable = array();
427 $choice = explode($this->mMarkup['varsep'], $rules );
428 foreach($choice as $c) {
429 $v = explode($this->mMarkup['codesep'], $c);
430 if(count($v) != 2)
431 continue;// syntax error, skip
432 $to=trim($v[1]);
433 $v=trim($v[0]);
434 $u = explode($this->mMarkup['unidsep'], $v);
435 if(count($u) == 1) {
436 $bidtable[$v] = $to;
437 } else if(count($u) == 2){
438 $from=trim($u[0]);$v=trim($u[1]);
439 if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) )
440 $unidtable[$v]=array($from=>$to);
441 else
442 $unidtable[$v][$from]=$to;
443 }
444 // syntax error, pass
445 }
446 return array($bidtable,$unidtable);
447 }
448
449 /**
450 * get display text on markup -{...}-
451 * @param string $rules the original code
452 * @param array $flags FLAGs
453 * @param array $bidtable bidirectional convert table
454 * @param string $unidtable unidirectional convert table
455 * @param string $variant the current variant
456 * @param bool $$doConvert if do convert
457 * @private
458 */
459 function getRulesDisplay($rules,$flags,
460 $bidtable,$unidtable,
461 $variant=false,$doConvert=true){
462 if(!$variant) $variant = $this->getPreferredVariant();
463 $is_mc_disable = $this->mManualLevel[$variant]=='disable';
464
465 if( in_array('R',$flags) ) {
466 // if we don't do content convert, still strip the -{}- tags
467 $disp = $rules;
468 } elseif ( in_array('N',$flags) ){
469 // proces N flag: output current variant name
470 $disp = $this->mVariantNames[trim($rules)];
471 } elseif ( in_array('D',$flags) ){
472 // proces D flag: output rules description
473 $disp = $this->getRulesDesc($bidtable,$unidtable);
474 } elseif ( in_array('H',$flags) || in_array('-',$flags) ) {
475 // proces H,- flag or T only: output nothing
476 $disp = '';
477 } elseif ( in_array('S',$flags) ){
478 // the text converted
479 if($doConvert){
480 // display current variant in bidirectional array
481 $disp = $this->getTextInCArray($variant,$bidtable);
482 // or display current variant in fallbacks
483 if(!$disp)
484 $disp = $this->getTextInCArray($this->getVariantFallbacks($variant),$bidtable);
485 // or display current variant in unidirectional array
486 if(!$disp && array_key_exists($variant,$unidtable)){
487 $disp = array_values($unidtable[$variant]);
488 $disp = $disp[0];
489 }
490 // or display frist text under disable manual convert
491 if(!$disp && $is_mc_disable) {
492 if(count($bidtable)>0){
493 $disp = array_values($bidtable);
494 $disp = $disp[0];
495 } else {
496 $disp = array_values($unidtable);
497 $disp = array_values($disp[0]);
498 $disp = $disp[0];
499 }
500 }
501 } else {// no convert
502 $disp = $rules;
503 }
504 } elseif ( in_array('T',$flags) ) {
505 // proces T flag : output nothing
506 $disp = '';
507 }
508 else
509 $disp= $this->mManualCodeError;
510
511 return $disp;
512 }
513
514 function applyManualFlag($flags,$bidtable,$unidtable,$variant=false){
515 if(!$variant) $variant = $this->getPreferredVariant();
516
517 $is_title_flag = in_array('T', $flags);
518 // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
519 if($is_title_flag){
520 $this->mTitleFromFlag = true;
521 $this->mTitleDisplay = $this->getRulesDisplay($rules,array('S'),
522 $bidtable,$unidtable,
523 $variant,
524 $this->mDoTitleConvert);
525 }
526
527 if($this->mManualLevel[$variant]=='disable') return;
528
529 $is_remove_flag = !$is_title_flag && in_array('-', $flags);
530 $is_add_flag = !$is_remove_flag && in_array('+', $flags);
531 $is_bidMC = $this->mManualLevel[$variant]=='bidirectional';
532 $is_unidMC = $this->mManualLevel[$variant]=='unidirectional';
533 $vmarked=array();
534
535 foreach($this->mVariants as $v) {
536 /* for bidirectional array
537 fill in the missing variants, if any,
538 with fallbacks */
539 if($is_bidMC && !array_key_exists($v, $bidtable)) {
540 $vf = $this->getTextInCArray($this->getVariantFallbacks($v),$bidtable);
541 if($vf) $bidtable[$v] = $vf;
542 }
543 if($is_bidMC && array_key_exists($v,$bidtable)){
544 foreach($vmarked as $vo){
545 // use syntax:
546 // -{A|zh:WordZh;zh-tw:WordTw}- or -{+|zh:WordZh;zh-tw:WordTw}-
547 // to introduce a custom mapping between
548 // words WordZh and WordTw in the whole text
549 if($is_add_flag){
550 $this->mTables[$v]->setPair($bidtable[$vo], $bidtable[$v]);
551 $this->mTables[$vo]->setPair($bidtable[$v], $bidtable[$vo]);
552 }
553 // use syntax -{-|zh:WordZh;zh-tw:WordTw}- to remove a conversion
554 // words WordZh and WordTw in the whole text
555 if($is_remove_flag){
556 $this->mTables[$v]->removePair($bidtable[$vo]);
557 $this->mTables[$vo]->removePair($bidtable[$v]);
558 }
559 }
560 $vmarked[]=$v;
561 }
562 /*for unidirectional array
563 fill to convert tables */
564 if($is_unidMC && array_key_exists($v,$unidtable)){
565 if($is_add_flag)$this->mTables[$v]->mergeArray($unidtable[$v]);
566 if($is_remove_flag)$this->mTables[$v]->removeArray($unidtable[$v]);
567 }
568 }
569 }
570
571 /**
572 * Parse rules and flags
573 * @private
574 */
575 function parseRules($rules,$flags,$variant=false){
576 if(!$variant) $variant = $this->getPreferredVariant();
577
578 list($bidtable,$unidtable) = $this->getConvTableFromRules($rules, $flags);
579 if(count($bidtable)==0 && count($unidtable)==0
580 && !in_array('N',$flags) && !in_array('T',$flags) )
581 $flags = array('R');
582 $disp = $this->getRulesDisplay($rules,$flags,
583 $bidtable,$unidtable,
584 $variant,
585 $this->mDoContentConvert);
586 $this->applyManualFlag($flags,$bidtable,$unidtable);
587
588 return $disp;
589 }
590
591 function convertTitle($text){
592 // check for __NOTC__ tag
593 if( !$this->mDoTitleConvert ) {
594 $this->mTitleDisplay = $text;
595 return $text;
596 }
597
598 // use the title from the T flag if any
599 if($this->mTitleFromFlag){
600 $this->mTitleFromFlag = false;
601 return $this->mTitleDisplay;
602 }
603
604 global $wgRequest;
605 $isredir = $wgRequest->getText( 'redirect', 'yes' );
606 $action = $wgRequest->getText( 'action' );
607 if ( $isredir == 'no' || $action == 'edit' ) {
608 return $text;
609 } else {
610 $this->mTitleDisplay = $this->convert($text);
611 return $this->mTitleDisplay;
612 }
613 }
614
615 /**
616 * convert text to different variants of a language. the automatic
617 * conversion is done in autoConvert(). here we parse the text
618 * marked with -{}-, which specifies special conversions of the
619 * text that can not be accomplished in autoConvert()
620 *
621 * syntax of the markup:
622 * -{code1:text1;code2:text2;...}- or
623 * -{flags|code1:text1;code2:text2;...}- or
624 * -{text}- in which case no conversion should take place for text
625 *
626 * @param string $text text to be converted
627 * @param bool $isTitle whether this conversion is for the article title
628 * @return string converted text
629 * @access public
630 */
631 function convert( $text , $isTitle=false) {
632
633 $mw =& MagicWord::get( 'notitleconvert' );
634 if( $mw->matchAndRemove( $text ) )
635 $this->mDoTitleConvert = false;
636 $mw =& MagicWord::get( 'nocontentconvert' );
637 if( $mw->matchAndRemove( $text ) ) {
638 $this->mDoContentConvert = false;
639 }
640
641 // no conversion if redirecting
642 $mw =& MagicWord::get( 'redirect' );
643 if( $mw->matchStart( $text ))
644 return $text;
645
646 // for title convertion
647 if ($isTitle) return $this->convertTitle($text);
648
649 $plang = $this->getPreferredVariant();
650
651 $tarray = explode($this->mMarkup['begin'], $text);
652 $tfirst = array_shift($tarray);
653 if($this->mDoContentConvert)
654 $text = $this->autoConvert($tfirst,$plang);
655 else
656 $text = $tfirst;
657 foreach($tarray as $txt) {
658 $marked = explode($this->mMarkup['end'], $txt, 2);
659
660 // strip the flags from syntax like -{T| ... }-
661 list($rules,$flags) = $this->parseFlags($marked[0]);
662
663 $text .= $this->parseRules($rules,$flags,$plang);
664
665 if(array_key_exists(1, $marked)){
666 if( $this->mDoContentConvert )
667 $text .= $this->autoConvert($marked[1],$plang);
668 else
669 $text .= $marked[1];
670 }
671 }
672
673 return $text;
674 }
675
676 /**
677 * if a language supports multiple variants, it is
678 * possible that non-existing link in one variant
679 * actually exists in another variant. this function
680 * tries to find it. See e.g. LanguageZh.php
681 *
682 * @param string $link the name of the link
683 * @param mixed $nt the title object of the link
684 * @return null the input parameters may be modified upon return
685 * @access public
686 */
687 function findVariantLink( &$link, &$nt ) {
688 global $wgDisableLangConversion;
689 $linkBatch = new LinkBatch();
690
691 $ns=NS_MAIN;
692
693 if(is_object($nt))
694 $ns = $nt->getNamespace();
695
696 $variants = $this->autoConvertToAllVariants($link);
697 if($variants == false) //give up
698 return;
699
700 $titles = array();
701
702 foreach( $variants as $v ) {
703 if($v != $link){
704 $varnt = Title::newFromText( $v, $ns );
705 if(!is_null($varnt)){
706 $linkBatch->addObj($varnt);
707 $titles[]=$varnt;
708 }
709 }
710 }
711
712 // fetch all variants in single query
713 $linkBatch->execute();
714
715 foreach( $titles as $varnt ) {
716 if( $varnt->getArticleID() > 0 ) {
717 $nt = $varnt;
718 if( !$wgDisableLangConversion )
719 $link = $v;
720 break;
721 }
722 }
723 }
724
725 /**
726 * returns language specific hash options
727 *
728 * @access public
729 */
730 function getExtraHashOptions() {
731 $variant = $this->getPreferredVariant();
732 return '!' . $variant ;
733 }
734
735 /**
736 * get title text as defined in the body of the article text
737 *
738 * @access public
739 */
740 function getParsedTitle() {
741 return $this->mTitleDisplay;
742 }
743
744 /**
745 * a write lock to the cache
746 *
747 * @private
748 */
749 function lockCache() {
750 global $wgMemc;
751 $success = false;
752 for($i=0; $i<30; $i++) {
753 if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
754 break;
755 sleep(1);
756 }
757 return $success;
758 }
759
760 /**
761 * unlock cache
762 *
763 * @private
764 */
765 function unlockCache() {
766 global $wgMemc;
767 $wgMemc->delete($this->mCacheKey . "lock");
768 }
769
770
771 /**
772 * Load default conversion tables
773 * This method must be implemented in derived class
774 *
775 * @private
776 */
777 function loadDefaultTables() {
778 $name = get_class($this);
779 wfDie("Must implement loadDefaultTables() method in class $name");
780 }
781
782 /**
783 * load conversion tables either from the cache or the disk
784 * @private
785 */
786 function loadTables($fromcache=true) {
787 global $wgMemc;
788 if( $this->mTablesLoaded )
789 return;
790 wfProfileIn( __METHOD__ );
791 $this->mTablesLoaded = true;
792 $this->mTables = false;
793 if($fromcache) {
794 wfProfileIn( __METHOD__.'-cache' );
795 $this->mTables = $wgMemc->get( $this->mCacheKey );
796 wfProfileOut( __METHOD__.'-cache' );
797 }
798 if ( !$this->mTables || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) {
799 wfProfileIn( __METHOD__.'-recache' );
800 // not in cache, or we need a fresh reload.
801 // we will first load the default tables
802 // then update them using things in MediaWiki:Zhconversiontable/*
803 $this->loadDefaultTables();
804 foreach($this->mVariants as $var) {
805 $cached = $this->parseCachedTable($var);
806 $this->mTables[$var]->mergeArray($cached);
807 }
808
809 $this->postLoadTables();
810 $this->mTables[self::CACHE_VERSION_KEY] = true;
811
812 if($this->lockCache()) {
813 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
814 $this->unlockCache();
815 }
816 wfProfileOut( __METHOD__.'-recache' );
817 }
818 wfProfileOut( __METHOD__ );
819 }
820
821 /**
822 * Hook for post processig after conversion tables are loaded
823 *
824 */
825 function postLoadTables() {}
826
827 /**
828 * Reload the conversion tables
829 *
830 * @private
831 */
832 function reloadTables() {
833 if($this->mTables)
834 unset($this->mTables);
835 $this->mTablesLoaded = false;
836 $this->loadTables(false);
837 }
838
839
840 /**
841 * parse the conversion table stored in the cache
842 *
843 * the tables should be in blocks of the following form:
844 * -{
845 * word => word ;
846 * word => word ;
847 * ...
848 * }-
849 *
850 * to make the tables more manageable, subpages are allowed
851 * and will be parsed recursively if $recursive=true
852 *
853 */
854 function parseCachedTable($code, $subpage='', $recursive=true) {
855 global $wgMessageCache;
856 static $parsed = array();
857
858 if(!is_object($wgMessageCache))
859 return array();
860
861 $key = 'Conversiontable/'.$code;
862 if($subpage)
863 $key .= '/' . $subpage;
864
865 if(array_key_exists($key, $parsed))
866 return array();
867
868
869 $txt = $wgMessageCache->get( $key, true, true, true );
870
871 // get all subpage links of the form
872 // [[MediaWiki:conversiontable/zh-xx/...|...]]
873 $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
874 $subs = explode('[[', $txt);
875 $sublinks = array();
876 foreach( $subs as $sub ) {
877 $link = explode(']]', $sub, 2);
878 if(count($link) != 2)
879 continue;
880 $b = explode('|', $link[0]);
881 $b = explode('/', trim($b[0]), 3);
882 if(count($b)==3)
883 $sublink = $b[2];
884 else
885 $sublink = '';
886
887 if($b[0] == $linkhead && $b[1] == $code) {
888 $sublinks[] = $sublink;
889 }
890 }
891
892
893 // parse the mappings in this page
894 $blocks = explode($this->mMarkup['begin'], $txt);
895 array_shift($blocks);
896 $ret = array();
897 foreach($blocks as $block) {
898 $mappings = explode($this->mMarkup['end'], $block, 2);
899 $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
900 $table = explode( ';', $stripped );
901 foreach( $table as $t ) {
902 $m = explode( '=>', $t );
903 if( count( $m ) != 2)
904 continue;
905 // trim any trailling comments starting with '//'
906 $tt = explode('//', $m[1], 2);
907 $ret[trim($m[0])] = trim($tt[0]);
908 }
909 }
910 $parsed[$key] = true;
911
912
913 // recursively parse the subpages
914 if($recursive) {
915 foreach($sublinks as $link) {
916 $s = $this->parseCachedTable($code, $link, $recursive);
917 $ret = array_merge($ret, $s);
918 }
919 }
920
921 if ($this->mUcfirst) {
922 foreach ($ret as $k => $v) {
923 $ret[Language::ucfirst($k)] = Language::ucfirst($v);
924 }
925 }
926 return $ret;
927 }
928
929 /**
930 * Enclose a string with the "no conversion" tag. This is used by
931 * various functions in the Parser
932 *
933 * @param string $text text to be tagged for no conversion
934 * @return string the tagged text
935 */
936 function markNoConversion($text, $noParse=false) {
937 # don't mark if already marked
938 if(strpos($text, $this->mMarkup['begin']) ||
939 strpos($text, $this->mMarkup['end']))
940 return $text;
941
942 $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end'];
943 return $ret;
944 }
945
946 /**
947 * convert the sorting key for category links. this should make different
948 * keys that are variants of each other map to the same key
949 */
950 function convertCategoryKey( $key ) {
951 return $key;
952 }
953 /**
954 * hook to refresh the cache of conversion tables when
955 * MediaWiki:conversiontable* is updated
956 * @private
957 */
958 function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
959 $titleobj = $article->getTitle();
960 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
961 $title = $titleobj->getDBkey();
962 $t = explode('/', $title, 3);
963 $c = count($t);
964 if( $c > 1 && $t[0] == 'Conversiontable' ) {
965 if(in_array($t[1], $this->mVariants)) {
966 $this->reloadTables();
967 }
968 }
969 }
970 return true;
971 }
972
973 /**
974 * Armour rendered math against conversion
975 * Wrap math into rawoutput -{R| math }- syntax
976 */
977 function armourMath($text){
978 $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
979 return $ret;
980 }
981 }