3 * Client for querying zhdaemon
10 var $mHost, $mPort, $mFP, $mConnected;
17 function ZhClient($host, $port) {
20 $this->mConnected
= $this->connect();
24 * Check if connection to zhdaemon is successful
28 function isconnected() {
29 return $this->mConnected
;
33 * Establish conncetion
38 $this->mFP
= fsockopen($this->mHost
, $this->mPort
, $errno, $errstr, 30);
46 * Query the daemon and return the result
50 function query($request) {
51 if(!$this->mConnected
)
54 fwrite($this->mFP
, $request);
56 $result=fgets($this->mFP
, 1024);
58 list($status, $len) = explode(" ", $result);
59 if($status == 'ERROR') {
60 //$len is actually the error code...
61 print "zhdaemon error $len<br />\n";
66 while(!feof($this->mFP
) && $bytesread<$len) {
67 $str= fread($this->mFP
, $len-$bytesread);
68 $bytesread +
= strlen($str);
75 * Convert the input to a different language variant
77 * @param string $text input text
78 * @param string $tolang language variant
79 * @return string the converted text
82 function convert($text, $tolang) {
84 $q = "CONV $tolang $len\n$text";
85 $result = $this->query($q);
92 * Perform word segmentation
94 * @param string $text input text
95 * @return string segmented text
98 function segment($text) {
100 $q = "SEG $len\n$text";
101 $result = $this->query($q);
108 * Close the connection
120 function ZhClientFake() {
121 global $wgMemc, $wgDBname;
122 $this->zh2TW
= $wgMemc->get($key1 = "$wgDBname:zhConvert:tw");
123 $this->zh2CN
= $wgMemc->get($key2 = "$wgDBname:zhConvert:cn");
124 $this->zh2SG
= $wgMemc->get($key3 = "$wgDBname:zhConvert:sg");
125 $this->zh2HK
= $wgMemc->get($key4 = "$wgDBname:zhConvert:hk");
126 if(empty($this->zh2TW
) ||
empty($this->zh2CN
) ||
empty($this->zh2SG
) ||
empty($this->zh2HK
)) {
127 require_once("includes/ZhConversion.php");
128 $this->zh2TW
= $zh2TW;
129 $this->zh2CN
= $zh2CN;
130 $this->zh2HK
= $zh2HK;
131 $this->zh2SG
= $zh2SG;
132 $wgMemc->set($key1, $this->zh2TW
);
133 $wgMemc->set($key2, $this->zh2CN
);
134 $wgMemc->set($key3, $this->zh2SG
);
135 $wgMemc->set($key4, $this->zh2HK
);
139 function isconnected() {
148 function zh2tw($text) {
149 return strtr($text, $this->zh2TW
);
157 function zh2cn($text) {
158 return strtr($text, $this->zh2CN
);
166 function zh2sg($text) {
167 return strtr(strtr($text, $this->zh2CN
), $this->zh2SG
);
175 function zh2hk($text) {
176 return strtr(strtr($text, $this->zh2TW
), $this->zh2HK
);
180 * Convert the input to a different language variant
182 * @param string $text input text
183 * @param string $tolang language variant
184 * @return string the converted text
187 function convert($text, $tolang) {
191 $t = $this->zh2cn($text);
194 $t = $this->zh2tw($text);
197 $t = $this->zh2sg($text);
200 $t = $this->zh2hk($text);
209 * Perform "fake" word segmentation, i.e. treating each character as a word
211 * @param string $text input text
212 * @return string segmented text
215 function segment($text) {
216 /* copied from LanguageZh_cn.stripForSearch() */
217 if( function_exists( 'mb_strtolower' ) ) {
219 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
220 "' U8' . bin2hex( \"$1\" )",
221 mb_strtolower( $string ) );
223 global $wikiLowerChars;
225 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
226 "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
232 * Close the fake connection