3 * Client for querying zhdaemon
10 var $mHost, $mPort, $mFP, $mConnected;
17 function ZhClient($host, $port) {
20 $this->mConnected
= $this->connect();
24 * Check if connection to zhdaemon is successful
28 function isconnected() {
29 return $this->mConnected
;
33 * Establish conncetion
39 $this->mFP
= fsockopen($this->mHost
, $this->mPort
, $errno, $errstr, 30);
48 * Query the daemon and return the result
52 function query($request) {
53 if(!$this->mConnected
)
56 fwrite($this->mFP
, $request);
58 $result=fgets($this->mFP
, 1024);
60 list($status, $len) = explode(" ", $result);
61 if($status == 'ERROR') {
62 //$len is actually the error code...
63 print "zhdaemon error $len<br />\n";
68 while(!feof($this->mFP
) && $bytesread<$len) {
69 $str= fread($this->mFP
, $len-$bytesread);
70 $bytesread +
= strlen($str);
73 //data should be of length $len. otherwise something is wrong
74 if(strlen($data) != $len)
80 * Convert the input to a different language variant
82 * @param string $text input text
83 * @param string $tolang language variant
84 * @return string the converted text
87 function convert($text, $tolang) {
89 $q = "CONV $tolang $len\n$text";
90 $result = $this->query($q);
97 * Convert the input to all possible variants
99 * @param string $text input text
100 * @return array langcode => converted_string
103 function convertToAllVariants($text) {
104 $len = strlen($text);
105 $q = "CONV ALL $len\n$text";
106 $result = $this->query($q);
109 list($infoline, $data) = explode('|', $result, 2);
110 $info = explode(";", $infoline);
113 foreach($info as $variant) {
114 list($code, $len) = explode(' ', $variant);
115 $ret[strtolower($code)] = substr($data, $i, $len);
116 $r = $ret[strtolower($code)];
122 * Perform word segmentation
124 * @param string $text input text
125 * @return string segmented text
128 function segment($text) {
129 $len = strlen($text);
130 $q = "SEG $len\n$text";
131 $result = $this->query($q);
132 if(!$result) {// fallback to character based segmentation
133 $result = ZhClientFake
::segment($text);
139 * Close the connection
150 function ZhClientFake() {
151 global $wgMemc, $wgDBname;
152 $this->zh2TW
= $wgMemc->get($key1 = "$wgDBname:zhConvert:tw");
153 $this->zh2CN
= $wgMemc->get($key2 = "$wgDBname:zhConvert:cn");
154 $this->zh2SG
= $wgMemc->get($key3 = "$wgDBname:zhConvert:sg");
155 $this->zh2HK
= $wgMemc->get($key4 = "$wgDBname:zhConvert:hk");
156 if(empty($this->zh2TW
) ||
empty($this->zh2CN
) ||
empty($this->zh2SG
) ||
empty($this->zh2HK
)) {
157 require("includes/ZhConversion.php");
158 $this->zh2TW
= $zh2TW;
159 $this->zh2CN
= $zh2CN;
160 $this->zh2HK
= $zh2HK;
161 $this->zh2SG
= $zh2SG;
162 $wgMemc->set($key1, $this->zh2TW
);
163 $wgMemc->set($key2, $this->zh2CN
);
164 $wgMemc->set($key3, $this->zh2SG
);
165 $wgMemc->set($key4, $this->zh2HK
);
169 function isconnected() {
178 function zh2tw($text) {
179 return strtr($text, $this->zh2TW
);
187 function zh2cn($text) {
188 return strtr($text, $this->zh2CN
);
196 function zh2sg($text) {
197 return strtr(strtr($text, $this->zh2CN
), $this->zh2SG
);
205 function zh2hk($text) {
206 return strtr(strtr($text, $this->zh2TW
), $this->zh2HK
);
210 * Convert the input to a different language variant
212 * @param string $text input text
213 * @param string $tolang language variant
214 * @return string the converted text
217 function convert($text, $tolang) {
221 $t = $this->zh2cn($text);
224 $t = $this->zh2tw($text);
227 $t = $this->zh2sg($text);
230 $t = $this->zh2hk($text);
238 function convertToAllVariants($text) {
240 $ret['zh-cn'] = $this->zh2cn($text);
241 $ret['zh-tw'] = $this->zh2tw($text);
242 $ret['zh-sg'] = $this->zh2sg($text);
243 $ret['zh-hk'] = $this->zh2hk($text);
248 * Perform "fake" word segmentation, i.e. treating each character as a word
250 * @param string $text input text
251 * @return string segmented text
254 function segment($text) {
255 /* adapted from LanguageZh_cn::stripForSearch()
256 here we will first separate the single characters,
257 and let the caller conver it to hex
259 if( function_exists( 'mb_strtolower' ) ) {
261 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
263 mb_strtolower( $text ) );
265 global $wikiLowerChars;
267 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
268 "' ' . strtr( \"\$1\", \$wikiLowerChars )",
274 * Close the fake connection