From 7b5b13b7950af9eec2221f550c3e559c9087e740 Mon Sep 17 00:00:00 2001 From: Zheng Zhu Date: Mon, 22 Nov 2004 21:10:18 +0000 Subject: [PATCH] added convertToAllVariants() to query zhdaemon for all variants. --- includes/ZhClient.php | 46 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/includes/ZhClient.php b/includes/ZhClient.php index 8a6d8e292a..b2869e9fbc 100644 --- a/includes/ZhClient.php +++ b/includes/ZhClient.php @@ -35,7 +35,9 @@ class ZhClient { * @access private */ function connect() { + wfSuppressWarnings(); $this->mFP = fsockopen($this->mHost, $this->mPort, $errno, $errstr, 30); + wfRestoreWarnings(); if(!$this->mFP) { return false; } @@ -68,6 +70,9 @@ class ZhClient { $bytesread += strlen($str); $data .= $str; } + //data should be of length $len. otherwise something is wrong + if(strlen($data) != $len) + return false; return $data; } @@ -88,6 +93,29 @@ class ZhClient { return $result; } + /** + * Convert the input to all possible variants + * + * @param string $text input text + * @return array langcode => converted_string + * @access public + */ + function convertToAllVariants($text) { + $len = strlen($text); + $q = "CONV ALL $len\n$text"; + $result = $this->query($q); + if(!$result) + return false; + list($infoline, $data) = explode('|', $result); + $info = explode(";", $infoline); + $ret = array(); + $i=0; + foreach($info as $code => $len) { + $ret[strtolower($code)] = substr($data, $i, $len); + $i+=$len+1; + } + return $ret; + } /** * Perform word segmentation * @@ -99,8 +127,9 @@ class ZhClient { $len = strlen($text); $q = "SEG $len\n$text"; $result = $this->query($q); - if(!$result) - $result = $text; + if(!$result) {// fallback to character based segmentation + $result = ZhClientFake::segment($text); + } return $result; } @@ -205,6 +234,15 @@ class ZhClientFake { return $t; } + function convertToAllVariants($text) { + $ret = array(); + $ret['zh-cn'] = $this->zh2cn($text); + $ret['zh-tw'] = $this->zh2tw($text); + $ret['zh-sg'] = $this->zh2sg($text); + $ret['zh-hk'] = $this->zh2hk($text); + return $ret; + } + /** * Perform "fake" word segmentation, i.e. treating each character as a word * @@ -218,13 +256,13 @@ class ZhClientFake { return preg_replace( "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "' U8' . bin2hex( \"$1\" )", - mb_strtolower( $string ) ); + mb_strtolower( $text ) ); } else { global $wikiLowerChars; return preg_replace( "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); + $text ); } } -- 2.20.1