Handle automatic reassignment of edits from duplicate accounts to the main
[lhc/web/wiklou.git] / languages / LanguageZh.php
index 6ed75a9..f7f3255 100644 (file)
 <?php
-require_once( "LanguageUtf8.php" );
+/**
+  * @package MediaWiki
+  * @subpackage Language
+  */
+require_once( "LanguageConverter.php" );
 require_once( "LanguageZh_cn.php");
 require_once( "LanguageZh_tw.php");
-require_once( "ZhConversion.php");
-
-/* class that handles both Traditional and Simplified Chinese
-   right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
-   non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
-*/
-class LanguageZh extends LanguageUtf8 {
-    
-    var $mZhLang=false, $mZhLanguageCode=false;
-
-    function LanguageZh() {
-        $this->mZhLanguageCode = $this->getPreferredLanguage();
-        if($this->mZhLanguageCode == "cn") {
-            $this->mZhLang = new LanguageZh_cn();
-        }
-        else {
-            $this->mZhLang = new LanguageZh_tw();
-        }
+require_once( "LanguageZh_sg.php");
+require_once( "LanguageZh_hk.php");
+
+class ZhConverter extends LanguageConverter {
+       function loadDefaultTables() {
+               require( "includes/ZhConversion.php" );
+               $this->mTables = array();
+               $this->mTables['zh-cn'] = $zh2CN;
+               $this->mTables['zh-tw'] = $zh2TW;
+               $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG);
+               $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK);
+               $this->mTables['zh'] = array();
+       }
+
+       function postLoadTables() {
+               $this->mTables['zh-sg'] = array_merge($this->mTables['zh-cn'], $this->mTables['zh-sg']);
+               $this->mTables['zh-hk'] = array_merge($this->mTables['zh-tw'], $this->mTables['zh-hk']);
     }
 
-    /* 
-       get preferred language variants. eventually this will check the
-       user's preference setting as well, once the language option in
-       the setting pages is finalized.
+       /* there shouldn't be any latin text in Chinese conversion, so no need 
+          to mark anything
     */
-    function getPreferredLanguage() {
-        if($this->mZhLanguageCode)
-            return $this->mZhLanguageCode;
-        
-        $this->mZhLanguageCode="cn";
-               $value = $_SERVER["HTTP_ACCEPT_LANGUAGE"];
-               $zh = explode("zh-", $value);
-               array_shift($zh);
-               $l = array_shift($zh);
-               if($l != NULL) {
-            $this->mZhLanguageCode = strtolower(substr($l,0,2));
-               }
-        
-        return $this->mZhLanguageCode;
-    }
-    
-    
-  /* the Simplified/Traditional conversion stuff */
-
-       function simp2trad($text) {
-               global $wgZhSimp2Trad;
-               return strtr($text, $wgZhSimp2Trad);
-       }
-
-       function trad2simp($text) {
-               global $wgZhTrad2Simp;
-               return strtr($text, $wgZhTrad2Simp);
-       }
-       
-       function convert($text) {
-               
-               // no conversion if redirecting
-               if(substr($text,0,9) == "#REDIRECT") {
-                       return $text;
-               }
-               // determine the preferred language from the request header
-               $tolang = $this->getPreferredLanguage();
-       
-               $ltext = explode("-{", $text);
-               $lfirst = array_shift($ltext);
-               
-               if($tolang == "cn") {
-                       $text = $this->trad2simp($lfirst);
-               }
-               else {
-                       $text = $this->simp2trad($lfirst);
-               }
-               
-               foreach ($ltext as $i => $txt) {
-                       $a = explode("}-", $txt);
-                       $b = explode("zh-", $a{0});
-                       if($b{1}==NULL) {
-                               $text = $text.$b{0};
-                       }
-                       else {
-                               foreach ($b as $j => $lang) {
-                                       if(substr($lang,0,2) == $tolang) {
-                                               $text = $text.substr($lang, 2);
-                                               break;
-                                       }
-                               }
-                       }
-                       if($tolang == "cn") {
-                               $text = $text.$this->trad2simp($a{1});
-                       }
-                       else {
-                               $text = $text.$this->simp2trad($a{1});
-                       }
-               }
+       function markNoConversion($text) {
                return $text;
        }
-       
-
-
-
-    /* these just calls the method of the corresponding class */
-    
-    function getDefaultUserOptions () {
-        return $this->mZhLang->getDefaultUserOptions();
-    }
-
-       function getBookstoreList () {
-               return $this->mZhLang->getBookstoreList() ;
-       }
 
-       function getNamespaces() {
-               return $this->mZhLang->getNamespaces();
+       function convertCategoryKey( $key ) {
+               return $this->autoConvert( $key, 'zh-cn' ); 
        }
+}
 
-       function getNsText( $index ) {
-        return $this->mZhLang->getNsText($index);
-       }
 
-       function getNsIndex( $text ) {
-        return $this->mZhLang->getNsIndex($text);
-       }
-
-       function getQuickbarSettings() {
-               return $this->mZhLang->getQuickbarSettings();
-       }
-
-       function getSkinNames() {
-               return $this->mZhLang->getSkinNames();
+/* class that handles both Traditional and Simplified Chinese
+   right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk.
+*/
+class LanguageZh extends LanguageZh_cn {
+       
+       function LanguageZh() {
+               global $wgHooks;
+               $this->mConverter = new ZhConverter($this, 'zh', 
+                                            array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'),
+                                                                                       array('zh'=>'zh-cn',
+                                                                                                 'zh-cn'=>'zh-sg',
+                                                                                                 'zh-sg'=>'zh-cn',
+                                                                                                 'zh-tw'=>'zh-hk',
+                                                                                                 'zh-hk'=>'zh-tw'));      
+               $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
        }
 
-       function date( $ts, $adj = false )
-       {
-        return $this->mZhLang->date($ts,$adj);
+       
+       # this should give much better diff info
+       function segmentForDiff( $text ) {
+               return preg_replace(
+                       "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                       "' ' .\"$1\"", $text);
        }
 
-       function timeanddate( $ts, $adj = false )
-       {
-               return $this->mZhLang->timeanddate($ts, $adj);
+       function unsegmentForDiff( $text ) {
+               return preg_replace(
+                       "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                       "\"$1\"", $text);
        }
 
-       function getValidSpecialPages()
-       {
-               return $this->mZhLang->getValidSpecialPages();
-       }
+       // word segmentation
+       function stripForSearch( $string ) {
+               $fname="LanguageZh::stripForSearch";
+               wfProfileIn( $fname );
 
-       function getSysopSpecialPages()
-       {
-               return $this->mZhLang->getSysopSpecialPages();
-       }
+               // eventually this should be a word segmentation
+               // for now just treat each character as a word
+               $t = preg_replace(
+                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                               "' ' .\"$1\"", $string);
 
-       function getDeveloperSpecialPages()
-       {
-               return $this->mZhLang->getDeveloperSpecialPages();
+        //always convert to zh-cn before indexing. it should be
+               //better to use zh-cn for search, since conversion from 
+               //Traditional to Simplified is less ambiguous than the
+               //other way around
 
-       }
+               $t = $this->mConverter->autoConvert($t, 'zh-cn');
+               $t = LanguageUtf8::stripForSearch( $t );
+               wfProfileOut( $fname );
+               return $t;
 
-       function getMessage( $key )
-       {
-        return $this->mZhLang->getMessage($key);
        }
 
-       function stripForSearch( $string ) {
-        return $this->mZhLang->stripForSearch($string);
+       function convertForSearchResult( $termsArray ) {
+               $terms = implode( '|', $termsArray );
+               $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
+               $ret = array_unique( explode('|', $terms) );
+               return $ret;
        }
 
-    
 }
-
-
 ?>