From c5320fc50d26e243472bc1b206b0e8948fa77263 Mon Sep 17 00:00:00 2001 From: Zheng Zhu Date: Tue, 9 Nov 2004 21:41:30 +0000 Subject: [PATCH] use the ZhClient interface to do the Chinese text processing --- includes/DefaultSettings.php | 5 +++ languages/LanguageZh.php | 73 ++++++++++++------------------------ 2 files changed, 29 insertions(+), 49 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 4741378cd9..6d89f1f44a 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -341,6 +341,11 @@ $wgPartialMessageCache = false; # code, so that if it breaks, only zh will be affected $wgDisableLangConversion = true; +# Whether to use zhdaemon to perform Chinese text processing +$wgUseZhdaemon = false; +$wgZhdaemonHost="localhost"; +$wgZhdaemonPort=2004; + # Miscellaneous configuration settings # diff --git a/languages/LanguageZh.php b/languages/LanguageZh.php index d4e4f010e4..c80a4afa56 100644 --- a/languages/LanguageZh.php +++ b/languages/LanguageZh.php @@ -1,20 +1,9 @@ get($key1 = "$wgDBname:zhConvert:tw"); -$zh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn"); -$zh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg"); -$zh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk"); -if(empty($zh2TW) || empty($zh2CN) || empty($zh2SG) || empty($zh2HK)) { - require_once("includes/ZhConversion.php"); - $wgMemc->set($key1, $zh2TW); - $wgMemc->set($key2, $zh2CN); - $wgMemc->set($key3, $zh2SG); - $wgMemc->set($key4, $zh2HK); -} /* class that handles both Traditional and Simplified Chinese right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and @@ -23,9 +12,20 @@ if(empty($zh2TW) || empty($zh2CN) || empty($zh2SG) || empty($zh2HK)) { class LanguageZh extends LanguageZh_cn { var $mZhLanguageCode=false; - + var $mZhClient=false; function LanguageZh() { + global $wgUseZhdaemon, $wgZhdaemonHost, $wgZhdaemonPort; + global $wgDisableLangConversion; + $this->mZhLanguageCode = $this->getPreferredVariant(); + if($wgUseZhdaemon) { + $this->mZhClient=new ZhClient($wgZhdaemonHost, $wgZhdaemonPort); + if(!$this->mZhClient->isconnected()) + $this->mZhClient = false; + } + // fallback to fake client + if($this->mZhClient == false) + $this->mZhClient=new ZhClientFake(); } /* @@ -56,48 +56,13 @@ class LanguageZh extends LanguageZh_cn { } - /* the Simplified/Traditional conversion stuff */ - - function zh2tw($text) { - global $zh2TW; - return strtr($text, $zh2TW); - } - - function zh2cn($text) { - global $zh2CN; - return strtr($text, $zh2CN); - } - - function zh2sg($text) { - global $zh2SG, $zh2CN; - return strtr(strtr($text, $zh2CN), $zh2SG); - } - - function zh2hk($text) { - global $zh2HK, $zh2TW; - return strtr(strtr($text, $zh2TW), $zh2HK); - } function autoConvert($text, $toVariant=false) { if(!$toVariant) $toVariant = $this->getPreferredVariant(); $fname="zhconvert"; wfProfileIn( $fname ); - $t = $text; - switch($toVariant) { - case 'zh-cn': - $t = $this->zh2cn($text); - break; - case 'zh-tw': - $t = $this->zh2tw($text); - break; - case 'zh-sg': - $t = $this->zh2sg($text); - break; - case 'zh-hk': - $t = $this->zh2hk($text); - break; - } + $t = $this->mZhClient->convert($text, $toVariant); wfProfileOut( $fname ); return $t; } @@ -127,5 +92,15 @@ class LanguageZh extends LanguageZh_cn { } return false; } + + // word segmentation through ZhClient + function stripForSearch( $string ) { + $fname="zhsegment"; + wfProfileIn( $fname ); + $t = $this->mZhClient->segment($string); + wfProfileOut( $fname ); + return $t; + + } } ?> -- 2.20.1