Added magic word MAG_NOTITLECONVERT to indicate that the title of the page do not...
[lhc/web/wiklou.git] / languages / LanguageZh.php
1 <?php
2 require_once( "LanguageZh_cn.php");
3 require_once( "LanguageZh_tw.php");
4 require_once( "LanguageZh_sg.php");
5 require_once( "LanguageZh_hk.php");
6
7 /*
8 hook to refresh the cache of conversion tables when
9 MediaWiki:zhconversiontable* is updated
10 */
11 function zhOnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
12 $titleobj = $article->getTitle();
13 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
14 global $wgContLang; // should be an LanguageZh.
15 if(get_class($wgContLang) != 'languagezh')
16 return true;
17
18 $title = $titleobj->getDBkey();
19 $t = explode('/', $title, 2);
20 if( $t[0] == 'Zhconversiontable' ) {
21 if(!in_array($t[1], array('zh-cn', 'zh-tw', 'zh-sg', 'zh-hk')))
22 return true;
23 $wgContLang->reloadTables();
24 }
25 }
26 }
27
28 $wgHooks['ArticleSaveComplete'][] = 'zhOnArticleSaveComplete';
29
30 /* class that handles both Traditional and Simplified Chinese
31 right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
32 non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
33 */
34 class LanguageZh extends LanguageZh_cn {
35
36 var $mZhLanguageCode=false;
37 var $mTables=false; //the mapping tables
38 var $mTablesLoaded = false;
39 var $mCacheKey;
40 var $mDoTitleConvert = true;
41 function LanguageZh() {
42 global $wgDBname;
43 $this->mCacheKey = $wgDBname . ":zhtables";
44 }
45
46 function reloadTables() {
47 global $wgMemc;
48 $wgMemc->delete($this->mCacheKey);
49 $this->mTablesLoaded=false;
50 $this->loadTables();
51 }
52
53 // load conversion tables either from the cache or the disk
54 function loadTables() {
55 global $wgMemc;
56 if( $this->mTablesLoaded )
57 return;
58 $this->mTablesLoaded = true;
59 $this->mTables = $wgMemc->get( $this->mCacheKey );
60 if( empty( $this->mTables ) ) {
61 global $wgMessageCache;
62 require( "includes/ZhConversion.php" );
63 $this->mTables = array();
64 $this->mTables['zh-cn'] = $zh2CN;
65 $this->mTables['zh-tw'] = $zh2TW;
66 $this->mTables['zh-sg'] = $zh2SG;
67 $this->mTables['zh-hk'] = $zh2HK;
68 if( is_object( $wgMessageCache ) ){
69 $cached = $this->parseCachedTable( $wgMessageCache->get( 'zhconversiontable/zh-cn', true, true, true ) );
70 $this->mTables['zh-cn'] = array_merge($this->mTables['zh-cn'], $cached);
71
72 $cached = $this->parseCachedTable( $wgMessageCache->get( 'zhconversiontable/zh-tw', true, true, true ) );
73 $this->mTables['zh-tw'] = array_merge($this->mTables['zh-tw'], $cached);
74
75 $cached = $this->parseCachedTable( $wgMessageCache->get( 'zhconversiontable/zh-sg', true, true, true ) );
76 $this->mTables['zh-sg'] = array_merge($this->mTables['zh-sg'], $cached);
77
78 $cached = $this->parseCachedTable( $wgMessageCache->get( 'zhconversiontable/zh-hk', true, true, true ) );
79 $this->mTables['zh-hk'] = array_merge($this->mTables['zh-hk'], $cached);
80
81 }
82 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
83 }
84 }
85
86 /*
87 parse the conversion table stored in the cache
88
89 the table should be in the following format:
90
91 -{
92 word => word ;
93 word => word ;
94 ...
95 -}
96 */
97 function parseCachedTable($txt) {
98 /* $txt should be enclosed by -{ and }- */
99 $a = explode( '-{', $txt);
100 if( count($a) < 2)
101 return array();
102 array_shift($a);
103 $b = explode( '}-', $a[0]);
104
105 $stripped = str_replace(array('*','#'), '', $b[0]);
106 $table = explode( ';', $stripped );
107 $ret = array();
108 foreach( $table as $t ) {
109 $m = explode( '=>', $t );
110 if( count( $m ) != 2)
111 continue;
112 $ret[trim($m[0])] = trim($m[1]);
113 }
114 return $ret;
115 }
116
117 /*
118 get preferred language variants.
119 */
120 function getPreferredVariant() {
121 global $wgUser, $wgRequest;
122
123 if($this->mZhLanguageCode)
124 return $this->mZhLanguageCode;
125
126 // see if the preference is set in the request
127 $zhreq = $wgRequest->getText( 'variant' );
128 if( in_array( $zhreq, $this->getVariants() ) ) {
129 $this->mZhLanguageCode = $zhreq;
130 return $zhreq;
131 }
132
133 // get language variant preference from logged in users
134 if($wgUser->getID()!=0) {
135 $this->mZhLanguageCode = $wgUser->getOption('variant');
136 }
137
138 if( !$this->mZhLanguageCode ) {
139 // see if some zh- variant is set in the http header,
140 $this->mZhLanguageCode="zh";
141 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
142 $zh = strstr($header, 'zh-');
143 if($zh) {
144 $this->mZhLanguageCode = substr($zh,0,5);
145 }
146 }
147 return $this->mZhLanguageCode;
148 }
149
150 # this should give much better diff info
151 function segmentForDiff( $text ) {
152 return preg_replace(
153 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
154 "' ' .\"$1\"", $text);
155 }
156
157 function unsegmentForDiff( $text ) {
158 return preg_replace(
159 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
160 "\"$1\"", $text);
161 }
162
163 function autoConvert($text, $toVariant=false) {
164 $fname="LanguageZh::autoConvert";
165 wfProfileIn( $fname );
166
167 if(!$this->mTablesLoaded)
168 $this->loadTables();
169
170 if(!$toVariant)
171 $toVariant = $this->getPreferredVariant();
172 $ret = '';
173 switch( $toVariant ) {
174 case 'zh-cn': $ret = strtr($text, $this->mTables['zh-cn']);break;
175 case 'zh-tw': $ret = strtr($text, $this->mTables['zh-tw']);break;
176 case 'zh-sg': $ret = strtr(strtr($text, $this->mTables['zh-cn']), $this->mTables['zh-sg']);break;
177 case 'zh-hk': $ret = strtr(strtr($text, $this->mTables['zh-tw']), $this->mTables['zh-hk']);break;
178 default: $ret = $text;
179 }
180 wfProfileOut( $fname );
181 return $ret;
182 }
183
184 function autoConvertToAllVariants($text) {
185 $fname="LanguageZh::autoConvertToAllVariants";
186 wfProfileIn( $fname );
187 if( !$this->mTablesLoaded )
188 $this->loadTables();
189
190 $ret = array();
191 $ret['zh-cn'] = strtr($text, $this->mTables['zh-cn']);
192 $ret['zh-tw'] = strtr($text, $this->mTables['zh-tw']);
193 $ret['zh-sg'] = strtr(strtr($text, $this->mTables['zh-cn']), $this->mTables['zh-sg']);
194 $ret['zh-hk'] = strtr(strtr($text, $this->mTables['zh-tw']), $this->mTables['zh-hk']);
195 wfProfileOut( $fname );
196 return $ret;
197 }
198
199 # convert text to different variants of a language. the automatic
200 # conversion is done in autoConvert(). here we parse the text
201 # marked with -{}-, which specifies special conversions of the
202 # text that can not be accomplished in autoConvert()
203 #
204 # syntax of the markup:
205 # -{code1:text1;code2:text2;...}- or
206 # -{text}- in which case no conversion should take place for text
207 function convert( $text , $isTitle=false) {
208 global $wgDisableLangConversion;
209 if($wgDisableLangConversion)
210 return $text;
211
212 $mw =& MagicWord::get( MAG_NOTITLECONVERT );
213 if( $mw->matchAndRemove( $text ) )
214 $this->mDoTitleConvert = false;
215
216 // no conversion if redirecting
217 $mw =& MagicWord::get( MAG_REDIRECT );
218 if( $mw->matchStart( $text ))
219 return $text;
220
221 if( $isTitle ) {
222 if( !$this->mDoTitleConvert )
223 return $text;
224
225 global $wgRequest;
226 $isredir = $wgRequest->getText( 'redirect', 'yes' );
227 $action = $wgRequest->getText( 'action' );
228 if ( $isredir == 'no' || $action == 'edit' ) {
229 return $text;
230 }
231 else {
232 return $this->autoConvert($text);
233 }
234 }
235
236 $plang = $this->getPreferredVariant();
237 $fallback = $this->getVariantFallback($plang);
238
239 $tarray = explode("-{", $text);
240 $tfirst = array_shift($tarray);
241 $text = $this->autoConvert($tfirst);
242 foreach($tarray as $txt) {
243 $marked = explode("}-", $txt);
244
245 $choice = explode(";", $marked{0});
246 if(!array_key_exists(1, $choice)) {
247 /* a single choice */
248 $text .= $choice{0};
249 } else {
250 $choice1=false;
251 $choice2=false;
252 foreach($choice as $c) {
253 $v = explode(":", $c);
254 if(!array_key_exists(1, $v)) {
255 //syntax error in the markup, give up
256 break;
257 }
258 $code = trim($v{0});
259 $content = trim($v{1});
260 if($code == $plang) {
261 $choice1 = $content;
262 break;
263 }
264 if($code == $fallback)
265 $choice2 = $content;
266 }
267 if ( $choice1 )
268 $text .= $choice1;
269 elseif ( $choice2 )
270 $text .= $choice2;
271 else
272 $text .= $marked{0};
273 }
274 if(array_key_exists(1, $marked))
275 $text .= $this->autoConvert($marked{1});
276 }
277
278 return $text;
279 }
280
281
282 function getVariants() {
283 return array("zh", "zh-cn", "zh-tw", "zh-sg", "zh-hk");
284 }
285
286 function getVariantFallback($v) {
287 switch ($v) {
288 case 'zh': return 'zh-cn'; break;
289 case 'zh-cn': return 'zh-sg'; break;
290 case 'zh-sg': return 'zh-cn'; break;
291 case 'zh-tw': return 'zh-hk'; break;
292 case 'zh-hk': return 'zh-tw'; break;
293 }
294 return false;
295 }
296
297 // word segmentation
298 function stripForSearch( $string ) {
299 $fname="LanguageZh::stripForSearch";
300 wfProfileIn( $fname );
301
302 // eventually this should be a word segmentation
303 // for now just treat each character as a word
304 $t = preg_replace(
305 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
306 "' ' .\"$1\"", $string);
307
308 //always convert to zh-cn before indexing. it should be
309 //better to use zh-cn for search, since conversion from
310 //Traditional to Simplified is less ambiguous than the
311 //other way around
312
313 $t = $this->autoConvert($t, 'zh-cn');
314 $t = LanguageUtf8::stripForSearch( $t );
315 wfProfileOut( $fname );
316 return $t;
317
318 }
319
320 function convertForSearchResult( $termsArray ) {
321 $terms = implode( '|', $termsArray );
322 $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
323 $ret = array_unique( explode('|', $terms) );
324 return $ret;
325 }
326
327 function findVariantLink( &$link, &$nt ) {
328 static $count=0; //used to limit this operation
329 static $cache=array();
330 global $wgDisableLangConversion;
331 $pref = $this->getPreferredVariant();
332 if( $wgDisableLangConversion || $pref == 'zh' || $count > 50)
333 return;
334 $count++;
335 $variants = $this->autoConvertToAllVariants($link);
336 if($variants == false) //give up
337 return;
338 foreach( $variants as $v ) {
339 if(isset($cache[$v]))
340 continue;
341 $cache[$v] = 1;
342 $varnt = Title::newFromText( $v );
343 if( $varnt && $varnt->getArticleID() > 0 ) {
344 $nt = $varnt;
345 $link = $v;
346 break;
347 }
348 }
349 }
350
351 function getExtraHashOptions() {
352 global $wgUser;
353 $variant = $this->getPreferredVariant();
354 return '!' . $variant ;
355 }
356 }
357 ?>