X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2FZhClient.php;h=4299841b330a280114959e10cf3ab582a86d5b85;hb=d63121016d894e3fccf3308a26704472e69ec08f;hp=b9a93463ea80a48e5ff0cc2825dcf503a1d93b58;hpb=43cfe944d7b0430a4b46b4ff51879f7d06d9bc28;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/ZhClient.php b/includes/ZhClient.php index b9a93463ea..4299841b33 100644 --- a/includes/ZhClient.php +++ b/includes/ZhClient.php @@ -1,20 +1,40 @@ mHost = $host; $this->mPort = $port; $this->mConnected = $this->connect(); @@ -23,7 +43,7 @@ class ZhClient { /** * Check if connection to zhdaemon is successful * - * @access public + * @return bool */ function isconnected() { return $this->mConnected; @@ -33,246 +53,112 @@ class ZhClient { * Establish conncetion * * @access private + * + * @return bool */ function connect() { wfSuppressWarnings(); - $this->mFP = fsockopen($this->mHost, $this->mPort, $errno, $errstr, 30); + $errno = $errstr = ''; + $this->mFP = fsockopen( $this->mHost, $this->mPort, $errno, $errstr, 30 ); wfRestoreWarnings(); - if(!$this->mFP) { - return false; - } - return true; + return !$this->mFP; } /** * Query the daemon and return the result * * @access private + * + * @return string */ - function query($request) { - if(!$this->mConnected) + function query( $request ) { + if ( !$this->mConnected ) { return false; + } - fwrite($this->mFP, $request); + fwrite( $this->mFP, $request ); - $result=fgets($this->mFP, 1024); + $result = fgets( $this->mFP, 1024 ); - list($status, $len) = explode(" ", $result); - if($status == 'ERROR') { - //$len is actually the error code... + list( $status, $len ) = explode( ' ', $result ); + if( $status == 'ERROR' ) { + // $len is actually the error code... print "zhdaemon error $len
\n"; return false; } - $bytesread=0; - $data=''; - while(!feof($this->mFP) && $bytesread<$len) { - $str= fread($this->mFP, $len-$bytesread); - $bytesread += strlen($str); + $bytesread = 0; + $data = ''; + while( !feof( $this->mFP ) && $bytesread < $len ) { + $str = fread( $this->mFP, $len - $bytesread ); + $bytesread += strlen( $str ); $data .= $str; } - //data should be of length $len. otherwise something is wrong - if(strlen($data) != $len) - return false; - return $data; + // data should be of length $len. otherwise something is wrong + return strlen( $data ) == $len; } /** * Convert the input to a different language variant * - * @param string $text input text - * @param string $tolang language variant + * @param $text String: input text + * @param $tolang String: language variant * @return string the converted text - * @access public */ - function convert($text, $tolang) { - $len = strlen($text); + function convert( $text, $tolang ) { + $len = strlen( $text ); $q = "CONV $tolang $len\n$text"; - $result = $this->query($q); - if(!$result) + $result = $this->query( $q ); + if ( !$result ) { $result = $text; + } return $result; } /** - * Convert the input to all possible variants + * Convert the input to all possible variants * - * @param string $text input text + * @param $text String: input text * @return array langcode => converted_string - * @access public - */ - function convertToAllVariants($text) { - $len = strlen($text); + */ + function convertToAllVariants( $text ) { + $len = strlen( $text ); $q = "CONV ALL $len\n$text"; - $result = $this->query($q); - if(!$result) + $result = $this->query( $q ); + if ( !$result ) { return false; - list($infoline, $data) = explode('|', $result); - $info = explode(";", $infoline); + } + list( $infoline, $data ) = explode( '|', $result, 2 ); + $info = explode( ';', $infoline ); $ret = array(); - $i=0; - foreach($info as $code => $len) { - $ret[strtolower($code)] = substr($data, $i, $len); - $i+=$len+1; + $i = 0; + foreach( $info as $variant ) { + list( $code, $len ) = explode( ' ', $variant ); + $ret[strtolower( $code )] = substr( $data, $i, $len ); + $i += $len; } return $ret; - } + } + /** * Perform word segmentation * - * @param string $text input text + * @param $text String: input text * @return string segmented text - * @access public */ - function segment($text) { - $len = strlen($text); + function segment( $text ) { + $len = strlen( $text ); $q = "SEG $len\n$text"; - $result = $this->query($q); - if(!$result) {// fallback to character based segmentation - $result = ZhClientFake::segment($text); + $result = $this->query( $q ); + if ( !$result ) { // fallback to character based segmentation + $result = $this->segment( $text ); } return $result; } /** * Close the connection - * - * @access public */ function close() { - fclose($this->mFP); - } -} - - -class ZhClientFake { - - function ZhClientFake() { - global $wgMemc, $wgDBname; - $this->zh2TW = $wgMemc->get($key1 = "$wgDBname:zhConvert:tw"); - $this->zh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn"); - $this->zh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg"); - $this->zh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk"); - if(empty($this->zh2TW) || empty($this->zh2CN) || empty($this->zh2SG) || empty($this->zh2HK)) { - require_once("includes/ZhConversion.php"); - global $zh2TW, $zh2CN, $zh2HK, $zh2SG; - $this->zh2TW = $zh2TW; - $this->zh2CN = $zh2CN; - $this->zh2HK = $zh2HK; - $this->zh2SG = $zh2SG; - $wgMemc->set($key1, $this->zh2TW); - $wgMemc->set($key2, $this->zh2CN); - $wgMemc->set($key3, $this->zh2SG); - $wgMemc->set($key4, $this->zh2HK); - } - } - - function isconnected() { - return true; - } - - /** - * Convert to zh-tw - * - * @access private - */ - function zh2tw($text) { - return strtr($text, $this->zh2TW); - } - - /** - * Convert to zh-cn - * - * @access private - */ - function zh2cn($text) { - return strtr($text, $this->zh2CN); - } - - /** - * Convert to zh-sg - * - * @access private - */ - function zh2sg($text) { - return strtr(strtr($text, $this->zh2CN), $this->zh2SG); - } - - /** - * Convert to zh-hk - * - * @access private - */ - function zh2hk($text) { - return strtr(strtr($text, $this->zh2TW), $this->zh2HK); + fclose( $this->mFP ); } - - /** - * Convert the input to a different language variant - * - * @param string $text input text - * @param string $tolang language variant - * @return string the converted text - * @access public - */ - function convert($text, $tolang) { - $t = ''; - switch($tolang) { - case 'zh-cn': - $t = $this->zh2cn($text); - break; - case 'zh-tw': - $t = $this->zh2tw($text); - break; - case 'zh-sg': - $t = $this->zh2sg($text); - break; - case 'zh-hk': - $t = $this->zh2hk($text); - break; - default: - $t = $text; - } - return $t; - } - - function convertToAllVariants($text) { - $ret = array(); - $ret['zh-cn'] = $this->zh2cn($text); - $ret['zh-tw'] = $this->zh2tw($text); - $ret['zh-sg'] = $this->zh2sg($text); - $ret['zh-hk'] = $this->zh2hk($text); - return $ret; - } - - /** - * Perform "fake" word segmentation, i.e. treating each character as a word - * - * @param string $text input text - * @return string segmented text - * @access public - */ - function segment($text) { - /* copied from LanguageZh_cn.stripForSearch() */ - if( function_exists( 'mb_strtolower' ) ) { - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( \"$1\" )", - mb_strtolower( $text ) ); - } else { - global $wikiLowerChars; - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $text ); - } - } - - /** - * Close the fake connection - * - * @access public - */ - function close() { } } - -?> \ No newline at end of file