842a4805a8de905e155b84574aec569b20ecd349
[lhc/web/wiklou.git] / languages / LanguageZh.php
1 <?php
2 require_once( "includes/ZhClient.php" );
3 require_once( "LanguageZh_cn.php");
4 require_once( "LanguageZh_tw.php");
5 require_once( "LanguageZh_sg.php");
6 require_once( "LanguageZh_hk.php");
7
8 /* class that handles both Traditional and Simplified Chinese
9 right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
10 non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
11 */
12 class LanguageZh extends LanguageZh_cn {
13
14 var $mZhLanguageCode=false;
15 var $mZhClient=false;
16 function LanguageZh() {
17 global $wgUseZhdaemon, $wgZhdaemonHost, $wgZhdaemonPort;
18 global $wgDisableLangConversion, $wgUser;
19
20 if( $wgUser->getID()!=0 ) {
21 /* allow user to diable conversion */
22 if( $wgDisableLangConversion == false &&
23 $wgUser->getOption('nolangconversion') == 1)
24 $wgDisableLangConversion = true;
25 }
26
27 $this->mZhLanguageCode = $this->getPreferredVariant();
28 if($wgUseZhdaemon) {
29 $this->mZhClient=new ZhClient($wgZhdaemonHost, $wgZhdaemonPort);
30 if(!$this->mZhClient->isconnected())
31 $this->mZhClient = false;
32 }
33 // fallback to fake client
34 if($this->mZhClient == false)
35 $this->mZhClient=new ZhClientFake();
36 }
37
38 /*
39 get preferred language variants. eventually this will check the
40 user's preference setting as well, once the language option in
41 the setting pages is finalized.
42 */
43 function getPreferredVariant() {
44 global $wgUser;
45
46 if($this->mZhLanguageCode)
47 return $this->mZhLanguageCode;
48
49 // get language variant preference for logged in users
50 if($wgUser->getID()!=0) {
51 $this->mZhLanguageCode = $wgUser->getOption('variant');
52 }
53 else {
54 // see if some zh- variant is set in the http header,
55 $this->mZhLanguageCode="zh-cn";
56 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
57 $zh = strstr($header, 'zh-');
58 if($zh) {
59 $this->mZhLanguageCode = substr($zh,0,5);
60 }
61 }
62 return $this->mZhLanguageCode;
63 }
64
65 # this should give much better diff info
66 function segmentForDiff( $text ) {
67 return preg_replace(
68 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
69 "' ' .\"$1\"", $text);
70 }
71
72 function unsegmentForDiff( $text ) {
73 return preg_replace(
74 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
75 "\"$1\"", $text);
76 }
77
78
79
80 function autoConvert($text, $toVariant=false) {
81 if(!$toVariant)
82 $toVariant = $this->getPreferredVariant();
83 $fname="zhautoConvert";
84 wfProfileIn( $fname );
85 $t = $this->mZhClient->convert($text, $toVariant);
86 wfProfileOut( $fname );
87 return $t;
88 }
89
90 function autoConvertToAllVariants($text) {
91 $fname="zhautoConvertToAll";
92 wfProfileIn( $fname );
93 $ret = $this->mZhClient->convertToAllVariants($text);
94 if($ret == false) {//fall back...
95 $ret = Language::autoConvertToAllVariants($text);
96 }
97 wfProfileOut( $fname );
98 return $ret;
99 }
100
101 # only convert titles having more than one character
102 function convertTitle($text) {
103 $len=0;
104 if( function_exists( 'mb_strlen' ) )
105 $len = mb_strlen($text);
106 else
107 $len = strlen($text)/3;
108 if($len>1)
109 return $this->autoConvert( $text);
110 return $text;
111 }
112
113 function getVariants() {
114 return array("zh-cn", "zh-tw", "zh-sg", "zh-hk");
115 }
116
117 function getVariantFallback($v) {
118 switch ($v) {
119 case 'zh-cn': return 'zh-sg'; break;
120 case 'zh-sg': return 'zh-cn'; break;
121 case 'zh-tw': return 'zh-hk'; break;
122 case 'zh-hk': return 'zh-tw'; break;
123 }
124 return false;
125 }
126
127 // word segmentation through ZhClient
128 function stripForSearch( $string ) {
129 $fname="zhsegment";
130 wfProfileIn( $fname );
131 //always convert to zh-cn before indexing. it should be
132 //better to use zh-cn for search, since conversion from
133 //Traditional to Simplified is less ambiguous than the
134 //other way around
135 $t = $this->mZhClient->segment($string);
136 $t = $this->autoConvert($t, 'zh-cn');
137 $t = LanguageUtf8::stripForSearch( $t );
138 wfProfileOut( $fname );
139 return $t;
140
141 }
142
143 function convertForSearchResult( $termsArray ) {
144 $terms = implode( '|', $termsArray );
145 $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
146 $ret = array_unique( explode('|', $terms) );
147 return $ret;
148 }
149
150 function getExtraHashOptions() {
151 return array('variant', 'nolangconversion');
152 }
153 }
154 ?>