Cruft war continues: more uses of isAnon() and isLoggedIn()
[lhc/web/wiklou.git] / languages / LanguageZh.php
1 <?php
2 require_once( "LanguageZh_cn.php");
3 require_once( "LanguageZh_tw.php");
4 require_once( "LanguageZh_sg.php");
5 require_once( "LanguageZh_hk.php");
6
7 /*
8 hook to refresh the cache of conversion tables when
9 MediaWiki:zhconversiontable* is updated
10 */
11 function zhOnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
12 $titleobj = $article->getTitle();
13 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
14 global $wgContLang; // should be an LanguageZh.
15 if(get_class($wgContLang) != 'languagezh')
16 return true;
17
18 $title = $titleobj->getDBkey();
19 $t = explode('/', $title, 3);
20 $c = count($t);
21 if( $c > 1 && $t[0] == 'Zhconversiontable' ) {
22 if(in_array($t[1], array('zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'))) {
23 $wgContLang->reloadTables();
24 }
25 }
26 }
27 return true;
28 }
29
30 $wgHooks['ArticleSaveComplete'][] = 'zhOnArticleSaveComplete';
31
32 /* class that handles both Traditional and Simplified Chinese
33 right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
34 non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
35 */
36 class LanguageZh extends LanguageZh_cn {
37
38 var $mZhLanguageCode=false;
39 var $mTables=false; //the mapping tables
40 var $mTablesLoaded = false;
41 var $mCacheKey;
42 var $mDoTitleConvert = true, $mDoContentConvert = true;
43
44 function LanguageZh() {
45 global $wgDBname;
46 $this->mCacheKey = $wgDBname . ":zhtables";
47 }
48
49 // a write lock
50 function lockCache() {
51 global $wgMemc;
52 $success = false;
53 for($i=0; $i<30; $i++) {
54 if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
55 break;
56 sleep(1);
57 }
58 return $success;
59 }
60
61 function unlockCache() {
62 global $wgMemc;
63 $wgMemc->delete($this->mCacheKey . "lock");
64 }
65
66 function updateTable($code, $table) {
67 global $wgMemc;
68 if(!$this->mTablesLoaded)
69 $this->loadTables();
70
71 $this->mTables[$code] = array_merge($this->mTables[$code], $table);
72 if($this->lockCache()) {
73 $wgMemc->delete($this->mCacheKey);
74 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
75 $this->unlockCache();
76 }
77 }
78
79 function reloadTables() {
80 if($this->mTables)
81 unset($this->mTables);
82 $this->mTablesLoaded = false;
83 $this->loadTables(false);
84 }
85
86 // load conversion tables either from the cache or the disk
87 function loadTables($fromcache=true) {
88 global $wgMemc;
89 if( $this->mTablesLoaded )
90 return;
91 $this->mTablesLoaded = true;
92 if($fromcache) {
93 $this->mTables = $wgMemc->get( $this->mCacheKey );
94 if( !empty( $this->mTables ) ) //all done
95 return;
96 }
97 // not in cache, or we need a fresh reload.
98 // we will first load the tables from file
99 // then update them using things in MediaWiki:Zhconversiontable/*
100 global $wgMessageCache;
101 require( "includes/ZhConversion.php" );
102 $this->mTables = array();
103 $this->mTables['zh-cn'] = $zh2CN;
104 $this->mTables['zh-tw'] = $zh2TW;
105 $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG);
106 $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK);
107
108 $cached = $this->parseCachedTable('zh-cn');
109 $this->mTables['zh-cn'] = array_merge($this->mTables['zh-cn'], $cached);
110
111 $cached = $this->parseCachedTable('zh-tw');
112 $this->mTables['zh-tw'] = array_merge($this->mTables['zh-tw'], $cached);
113
114 $cached = $this->parseCachedTable('zh-sg');
115 $this->mTables['zh-sg'] = array_merge($this->mTables['zh-sg'], $cached);
116
117 $cached = $this->parseCachedTable('zh-hk');
118 $this->mTables['zh-hk'] = array_merge($this->mTables['zh-hk'], $cached);
119 if($this->lockCache()) {
120 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
121 $this->unlockCache();
122 }
123 }
124
125
126 /*
127 parse the conversion table stored in the cache
128
129 the tables should be in blocks of the following form:
130
131 -{
132 word => word ;
133 word => word ;
134 ...
135 }-
136
137 to make the tables more manageable, subpages are allowed
138 and will be parsed recursively if $recursive=true
139
140 */
141 function parseCachedTable($code, $subpage='', $recursive=true) {
142 global $wgMessageCache;
143 static $parsed = array();
144
145 if(!is_object($wgMessageCache))
146 return array();
147
148 $key = 'zhconversiontable/'.$code;
149 if($subpage)
150 $key .= '/' . $subpage;
151
152 if(array_key_exists($key, $parsed))
153 return array();
154
155
156 $txt = $wgMessageCache->get( $key, true, true, true );
157
158 // get all subpage links of the form
159 // [[MediaWiki:Zhconversiontable/zh-xx/...|...]]
160 $linkhead = $this->getNsText(NS_MEDIAWIKI) . ':Zhconversiontable';
161 $subs = explode('[[', $txt);
162 $sublinks = array();
163 foreach( $subs as $sub ) {
164 $link = explode(']]', $sub, 2);
165 if(count($link) != 2)
166 continue;
167 $b = explode('|', $link[0]);
168 $b = explode('/', trim($b[0]), 3);
169 if(count($b)==3)
170 $sublink = $b[2];
171 else
172 $sublink = '';
173
174 if($b[0] == $linkhead && $b[1] == $code) {
175 $sublinks[] = $sublink;
176 }
177 }
178
179
180 // parse the mappings in this page
181 $blocks = explode('-{', $txt);
182 array_shift($blocks);
183 $ret = array();
184 foreach($blocks as $block) {
185 $mappings = explode('}-', $block, 2);
186 $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
187 $table = explode( ';', $stripped );
188 foreach( $table as $t ) {
189 $m = explode( '=>', $t );
190 if( count( $m ) != 2)
191 continue;
192 // trim any trailling comments starting with '//'
193 $tt = explode('//', $m[1], 2);
194 $ret[trim($m[0])] = trim($tt[0]);
195 }
196 }
197 $parsed[$key] = true;
198
199
200 // recursively parse the subpages
201 if($recursive) {
202 foreach($sublinks as $link) {
203 $s = $this->parseCachedTable($code, $link, $recursive);
204 $ret = array_merge($ret, $s);
205 }
206 }
207 return $ret;
208 }
209
210 /*
211 get preferred language variants.
212 */
213 function getPreferredVariant() {
214 global $wgUser, $wgRequest;
215
216 if($this->mZhLanguageCode)
217 return $this->mZhLanguageCode;
218
219 // see if the preference is set in the request
220 $zhreq = $wgRequest->getText( 'variant' );
221 if( in_array( $zhreq, $this->getVariants() ) ) {
222 $this->mZhLanguageCode = $zhreq;
223 return $zhreq;
224 }
225
226 // get language variant preference from logged in users
227 if( $wgUser->isLoggedIn() ) {
228 $this->mZhLanguageCode = $wgUser->getOption('variant');
229 }
230
231 if( !$this->mZhLanguageCode ) {
232 // see if some zh- variant is set in the http header,
233 $this->mZhLanguageCode="zh";
234 if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
235 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
236 $zh = strstr($header, 'zh-');
237 if($zh) {
238 $this->mZhLanguageCode = substr($zh,0,5);
239 }
240 }
241 }
242 return $this->mZhLanguageCode;
243 }
244
245 # this should give much better diff info
246 function segmentForDiff( $text ) {
247 return preg_replace(
248 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
249 "' ' .\"$1\"", $text);
250 }
251
252 function unsegmentForDiff( $text ) {
253 return preg_replace(
254 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
255 "\"$1\"", $text);
256 }
257
258 function autoConvert($text, $toVariant=false) {
259 $fname="LanguageZh::autoConvert";
260 wfProfileIn( $fname );
261
262 if(!$this->mTablesLoaded)
263 $this->loadTables();
264
265 if(!$toVariant)
266 $toVariant = $this->getPreferredVariant();
267 $ret = '';
268 switch( $toVariant ) {
269 case 'zh-cn': $ret = strtr($text, $this->mTables['zh-cn']);break;
270 case 'zh-tw': $ret = strtr($text, $this->mTables['zh-tw']);break;
271 case 'zh-sg': $ret = strtr($text, $this->mTables['zh-sg']);break;
272 case 'zh-hk': $ret = strtr($text, $this->mTables['zh-hk']);break;
273 default: $ret = $text;
274 }
275 wfProfileOut( $fname );
276 return $ret;
277 }
278
279 function autoConvertToAllVariants($text) {
280 $fname="LanguageZh::autoConvertToAllVariants";
281 wfProfileIn( $fname );
282 if( !$this->mTablesLoaded )
283 $this->loadTables();
284
285 $ret = array();
286 $ret['zh-cn'] = strtr($text, $this->mTables['zh-cn']);
287 $ret['zh-tw'] = strtr($text, $this->mTables['zh-tw']);
288 $ret['zh-sg'] = strtr(strtr($text, $this->mTables['zh-cn']), $this->mTables['zh-sg']);
289 $ret['zh-hk'] = strtr(strtr($text, $this->mTables['zh-tw']), $this->mTables['zh-hk']);
290 wfProfileOut( $fname );
291 return $ret;
292 }
293
294 # convert text to different variants of a language. the automatic
295 # conversion is done in autoConvert(). here we parse the text
296 # marked with -{}-, which specifies special conversions of the
297 # text that can not be accomplished in autoConvert()
298 #
299 # syntax of the markup:
300 # -{code1:text1;code2:text2;...}- or
301 # -{text}- in which case no conversion should take place for text
302 function convert( $text , $isTitle=false) {
303 global $wgDisableLangConversion;
304 if($wgDisableLangConversion)
305 return $text;
306
307 $mw =& MagicWord::get( MAG_NOTITLECONVERT );
308 if( $mw->matchAndRemove( $text ) )
309 $this->mDoTitleConvert = false;
310
311 $mw =& MagicWord::get( MAG_NOCONTENTCONVERT );
312 if( $mw->matchAndRemove( $text ) ) {
313 $this->mDoContentConvert = false;
314 }
315
316 // no conversion if redirecting
317 $mw =& MagicWord::get( MAG_REDIRECT );
318 if( $mw->matchStart( $text ))
319 return $text;
320
321 if( $isTitle ) {
322 if( !$this->mDoTitleConvert )
323 return $text;
324
325 global $wgRequest;
326 $isredir = $wgRequest->getText( 'redirect', 'yes' );
327 $action = $wgRequest->getText( 'action' );
328 if ( $isredir == 'no' || $action == 'edit' ) {
329 return $text;
330 }
331 else {
332 return $this->autoConvert($text);
333 }
334 }
335
336 if( !$this->mDoContentConvert )
337 return $text;
338
339 $plang = $this->getPreferredVariant();
340 $fallback = $this->getVariantFallback($plang);
341
342 $tarray = explode("-{", $text);
343 $tfirst = array_shift($tarray);
344 $text = $this->autoConvert($tfirst);
345 foreach($tarray as $txt) {
346 $marked = explode("}-", $txt);
347
348 $choice = explode(";", $marked{0});
349 if(!array_key_exists(1, $choice)) {
350 /* a single choice */
351 $text .= $choice{0};
352 } else {
353 $choice1=false;
354 $choice2=false;
355 foreach($choice as $c) {
356 $v = explode(":", $c);
357 if(!array_key_exists(1, $v)) {
358 //syntax error in the markup, give up
359 break;
360 }
361 $code = trim($v{0});
362 $content = trim($v{1});
363 if($code == $plang) {
364 $choice1 = $content;
365 break;
366 }
367 if($code == $fallback)
368 $choice2 = $content;
369 }
370 if ( $choice1 )
371 $text .= $choice1;
372 elseif ( $choice2 )
373 $text .= $choice2;
374 else
375 $text .= $marked{0};
376 }
377 if(array_key_exists(1, $marked))
378 $text .= $this->autoConvert($marked{1});
379 }
380
381 return $text;
382 }
383
384
385 function getVariants() {
386 return array("zh", "zh-cn", "zh-tw", "zh-sg", "zh-hk");
387 }
388
389 function getVariantFallback($v) {
390 switch ($v) {
391 case 'zh': return 'zh-cn'; break;
392 case 'zh-cn': return 'zh-sg'; break;
393 case 'zh-sg': return 'zh-cn'; break;
394 case 'zh-tw': return 'zh-hk'; break;
395 case 'zh-hk': return 'zh-tw'; break;
396 }
397 return false;
398 }
399
400 // word segmentation
401 function stripForSearch( $string ) {
402 $fname="LanguageZh::stripForSearch";
403 wfProfileIn( $fname );
404
405 // eventually this should be a word segmentation
406 // for now just treat each character as a word
407 $t = preg_replace(
408 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
409 "' ' .\"$1\"", $string);
410
411 //always convert to zh-cn before indexing. it should be
412 //better to use zh-cn for search, since conversion from
413 //Traditional to Simplified is less ambiguous than the
414 //other way around
415
416 $t = $this->autoConvert($t, 'zh-cn');
417 $t = LanguageUtf8::stripForSearch( $t );
418 wfProfileOut( $fname );
419 return $t;
420
421 }
422
423 function convertForSearchResult( $termsArray ) {
424 $terms = implode( '|', $termsArray );
425 $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
426 $ret = array_unique( explode('|', $terms) );
427 return $ret;
428 }
429
430 function findVariantLink( &$link, &$nt ) {
431 static $count=0; //used to limit this operation
432 static $cache=array();
433 global $wgDisableLangConversion;
434 $pref = $this->getPreferredVariant();
435 if( $count > 50 )
436 return;
437 $count++;
438 $variants = $this->autoConvertToAllVariants($link);
439 if($variants == false) //give up
440 return;
441 foreach( $variants as $v ) {
442 if(isset($cache[$v]))
443 continue;
444 $cache[$v] = 1;
445 $varnt = Title::newFromText( $v );
446 if( $varnt && $varnt->getArticleID() > 0 ) {
447 $nt = $varnt;
448 if( !$wgDisableLangConversion && $pref != 'zh' )
449 $link = $v;
450 break;
451 }
452 }
453 }
454
455 function getExtraHashOptions() {
456 global $wgUser;
457 $variant = $this->getPreferredVariant();
458 return '!' . $variant ;
459 }
460
461 }
462 ?>