Code refactoring for the language conversion system:
authorZheng Zhu <zhengzhu@users.mediawiki.org>
Fri, 15 Apr 2005 14:12:39 +0000 (14:12 +0000)
committerZheng Zhu <zhengzhu@users.mediawiki.org>
Fri, 15 Apr 2005 14:12:39 +0000 (14:12 +0000)
- Moved general conversion code from LanguageZh.php to LanguageConverter.php (new file);
- Add fake LanguageConverter object to the main language object. Languages that need real conversion functionality should implement their own converter object derived from LanguageConverter;
- As an example, LanguageZh.php now implements the conversion through the converter object.

languages/Language.php
languages/LanguageConverter.php [new file with mode: 0644]
languages/LanguageZh.php

index faa18a1..d382ca9 100644 (file)
@@ -1850,15 +1850,31 @@ ta[\'ca-nstab-category\'] = new Array(\'c\',\'View the category page\');
 'contributionsall' => 'all',
 );
 
+/* a fake language converter */
+class fakeConverter {
+       var $mLang;   
+       function fakeConverter($langobj) {$this->mLang = $langobj;}
+       function convert($t, $i) {return $t;}
+       function getVariants() { return array( strtolower( substr( get_class( $this->mLang ), 8 ) ) ); }
+       function getPreferredVariant() {return strtolower( substr( get_class( $this->mLang ), 8 ) );}
+       function findVariantLink(&$l, &$n) {}
+       function getExtraHashOptions() {return '';}
+       function getParsedTitle() {return '';}
+       function markNoConversion($text) {return $text;}
+}
+
 #--------------------------------------------------------------------------
 # Internationalisation code
 #--------------------------------------------------------------------------
 
 class Language {
+       var $mConverter;
        function Language() {
+
                # Copies any missing values in the specified arrays from En to the current language
                $fillin = array( 'wgSysopSpecialPages', 'wgValidSpecialPages', 'wgDeveloperSpecialPages' );
                $name = get_class( $this );
+
                if( strpos( $name, 'language' ) == 0){
                        $lang = ucfirst( substr( $name, 8 ) );
                        foreach( $fillin as $arrname ){
@@ -1872,6 +1888,7 @@ class Language {
                                }
                        }
                }
+               $this->mConverter = new fakeConverter($this);
        }
        
        /**
@@ -2471,7 +2488,7 @@ class Language {
 
        # convert text to different variants of a language.
        function convert( $text , $isTitle=false) {
-               return $text;
+               return $this->mConverter->convert($text, $isTitle);
        }
 
        /**
@@ -2481,26 +2498,12 @@ class Language {
         * @return array an array of language codes
         */
        function getVariants() {
-               $lang = strtolower( substr( get_class( $this ), 8 ) );
-               return array( $lang );
+               return $this->mConverter->getVariants();
        }
 
-       /**
-        * in case some variant is not defined in the markup, we need
-        * to have some fallback. for example, in zh, normally people
-        * will define zh-cn and zh-tw, but less so for zh-sg or zh-hk.
-        * when zh-sg is preferred but not defined, we will pick zh-cn
-        * in this case. right now this is only used by zh.
-        *      
-        * @param string $v the language code of the variant
-        * @return string the code of the fallback language or false if there is no fallback
-       */
-       function getVariantFallback( $v ) {
-               return false;
-       }
 
        function getPreferredVariant() {
-               return strtolower( substr( get_class( $this ), 8 ) );
+               return $this->mConverter->getPreferredVariant();
        }
 
        /**
@@ -2514,7 +2517,7 @@ class Language {
         * @return null the input parameters may be modified upon return
         */
        function findVariantLink( &$link, &$nt ) {
-               return;
+               $this->mConverter->findVariantLink($link, $nt);
        }
 
        /**
@@ -2525,9 +2528,31 @@ class Language {
         * @access public
         */
        function getExtraHashOptions() {
-               return '';
+               return $this->mConverter->getExtraHashOptions();
+       }
+
+       /**
+        * for languages that support multiple variants, the title of an
+        * article may be displayed differently in different variants. this
+        * function returns the apporiate title defined in the body of the article.
+        * 
+        * @return string
+        */
+       function getParsedTitle() {
+               return $this->mConverter->getParsedTitle();
        }
        
+       /**
+        * Enclose a string with the "no conversion" tag. This is used by
+        * various functions in the Parser
+        * 
+        * @param string $text text to be tagged for no conversion
+        * @return string the tagged text
+       */
+       function markNoConversion( $text ) {
+               return $this->mConverter->markNoConversion( $text );
+       }
+
        /**
         * A regular expression to match legal word-trailing characters
         * which should be merged onto a link of the form [[foo]]bar.
@@ -2545,16 +2570,6 @@ class Language {
                return $this;
        }
 
-       /**
-        * for languages that support multiple variants, the title of an
-        * article may be displayed differently in different variants. this
-        * function returns the apporiate title defined in the body of the article.
-        * 
-        * @return string
-        */
-       function getParsedTitle() {
-               return '';
-       }
 
 }
 
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php
new file mode 100644 (file)
index 0000000..ff73951
--- /dev/null
@@ -0,0 +1,616 @@
+<?php
+/*
+       Copyright (C) 2005 by Zhengzhu Feng (zhengzhu@gmail.com)
+
+       This file is distributed under the terms of GPL
+
+*/
+
+/**
+  * @package MediaWiki
+  * @subpackage Language
+  */
+
+class LanguageConverter {
+       var $mPreferredVariant='';
+       var $mMainLanguageCode;
+       var $mVariants, $mVariantFallbacks;
+       var $mTablesLoaded = false;
+       var $mTables;
+       var $mTitleDisplay='';
+       var $mDoTitleConvert=true, $mDoContentConvert=true;
+       var $mCacheKey;
+       var $mLangObj;
+       var $mMarkup;
+       /**
+     * Constructor
+        *
+     * @param string $maincode the main language code of this language
+     * @param array $variants the supported variants of this language
+     * @param array $variantfallback the fallback language of each variant
+     * @param array $markup array defining the markup used for manual conversion
+     * @access public
+     */
+       function LanguageConverter($langobj, $maincode, 
+                                                               $variants=array(), 
+                                                               $variantfallbacks=array(), 
+                                                               $markup=array('begin'=>'-{',
+                                                                                         'codesep'=>':',
+                                                                                         'varsep'=>';',
+                                                                                         'end'=>'}-')) {
+               global $wgDBname;
+               $this->mLangObj = $langobj;
+               $this->mMainLanguageCode = $maincode;
+               $this->mVariants = $variants;           
+               $this->mVariantFallbacks = $variantfallbacks;
+               $this->mCacheKey = $wgDBname . ":conversiontables";
+               $this->mMarkup = $markup;
+       }
+
+       /**
+     * @access public
+     */
+       function getVariants() { 
+               return $this->mVariants;
+       }
+
+       /**
+        * in case some variant is not defined in the markup, we need
+        * to have some fallback. for example, in zh, normally people
+        * will define zh-cn and zh-tw, but less so for zh-sg or zh-hk.
+        * when zh-sg is preferred but not defined, we will pick zh-cn
+        * in this case. right now this is only used by zh.
+        *      
+        * @param string $v the language code of the variant
+        * @return string the code of the fallback language or false if there is no fallback
+     * @access private
+       */
+       function getVariantFallback($v) {
+               return $this->mVariantFallbacks[$v];
+       }
+
+       
+       /** 
+     * get preferred language variants.
+     * @return string the preferred language code
+     * @access public
+       */
+       function getPreferredVariant() {
+               global $wgUser, $wgRequest;
+               
+               if($this->mPreferredVariant)
+                       return $this->mPreferredVariant;
+
+               // see if the preference is set in the request
+               $req = $wgRequest->getText( 'variant' );
+               if( in_array( $req, $this->mVariants ) ) {
+                       $this->mPreferredVariant = $req;
+                       return $req;
+               }
+
+               // get language variant preference from logged in users 
+               if(is_object($wgUser) && $wgUser->isLoggedIn() )  {
+                       $this->mPreferredVariant = $wgUser->getOption('variant');
+               }
+
+               # FIXME rewrite code for parsing http header. The current code
+               # is written specific for detecting zh- variants
+               if( !$this->mPreferredVariant ) {
+                       // see if some zh- variant is set in the http header,
+                       $this->mPreferredVariant=$this->mMainLanguageCode;
+                       if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
+                               $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
+                               $zh = strstr($header, 'zh-');
+                               if($zh) {
+                                       $this->mPreferredVariant = substr($zh,0,5);
+                               }
+                       }
+               }
+
+               return $this->mPreferredVariant;
+       }
+
+       /**
+     * dictionary-based conversion
+     *
+     * @param string $text the text to be converted
+     * @param string $toVariant the target language code
+     * @return string the converted text
+     * @access private
+     */
+       function autoConvert($text, $toVariant=false) {
+               $fname="LanguageConverter::autoConvert";
+               wfProfileIn( $fname );
+
+               if(!$this->mTablesLoaded)
+                       $this->loadTables();
+
+               if(!$toVariant) 
+                       $toVariant = $this->getPreferredVariant();
+               if(!in_array($toVariant, $this->mVariants))
+                       return $text;
+
+               $ret = '';
+
+               $a = explode('<', $text);
+               $a0 = array_shift($a);
+               $ret .= strtr($a0, $this->mTables[$toVariant]);
+               foreach( $a as $aa ) {
+                       $b = explode('>', $aa, 2);
+                       $ret .= '<' . $b[0];
+                       if(sizeof($b) == 2)
+                               $ret .= '>' . strtr($b[1], $this->mTables[$toVariant]);
+               }
+
+#              /* put back the marker if any */
+#              if(!empty($reg)) {
+#                      $reg = '<'.$reg.'>';
+#                      $ret = preg_replace('/'.$reg.'/', '${1}', $ret);
+#              }
+#
+               wfProfileOut( $fname );
+               return $ret;
+       }
+    
+       /**
+     * convert text to all supported variants
+     *
+     * @param string $text the text to be converted
+     * @return array of string
+     * @access private
+     */
+       function autoConvertToAllVariants($text) {
+               $fname="LanguageConverter::autoConvertToAllVariants";
+               wfProfileIn( $fname );
+               if( !$this->mTablesLoaded )
+                       $this->loadTables();
+
+               $ret = array();
+               foreach($this->mVariants as $variant) {
+                       $ret[$variant] = strtr($text, $this->mTables[$variant]);
+               }
+               wfProfileOut( $fname );
+               return $ret;
+       }
+
+       /**
+        * convert text to different variants of a language. the automatic
+        * conversion is done in autoConvert(). here we parse the text 
+        * marked with -{}-, which specifies special conversions of the 
+        * text that can not be accomplished in autoConvert()
+        *
+        * syntax of the markup:
+        * -{code1:text1;code2:text2;...}-  or
+        * -{text}- in which case no conversion should take place for text
+     *
+     * @param string $text text to be converted
+     * @param bool $isTitle whether this conversion is for the article title 
+     * @return string converted text
+     * @access public
+     */
+       function convert( $text , $isTitle=false) {
+               global $wgDisableLangConversion;
+               if($wgDisableLangConversion)
+                       return $text; 
+
+               $mw =& MagicWord::get( MAG_NOTITLECONVERT );
+               if( $mw->matchAndRemove( $text ) )
+                       $this->mDoTitleConvert = false;
+
+               $mw =& MagicWord::get( MAG_NOCONTENTCONVERT );
+               if( $mw->matchAndRemove( $text ) ) {
+                       $this->mDoContentConvert = false;
+               }
+
+               // no conversion if redirecting
+               $mw =& MagicWord::get( MAG_REDIRECT );
+               if( $mw->matchStart( $text ))
+                       return $text;
+
+               if( $isTitle ) {
+                       if( !$this->mDoTitleConvert ) {
+                               $this->mTitleDisplay = $text;
+                               return $text;
+                       }
+                       if( !empty($this->mTitleDisplay))
+                               return $this->mTitleDisplay;
+
+                       global $wgRequest;
+                       $isredir = $wgRequest->getText( 'redirect', 'yes' );
+                       $action = $wgRequest->getText( 'action' );
+                       if ( $isredir == 'no' || $action == 'edit' ) {
+                               return $text;
+                       }
+                       else {
+                               $this->mTitleDisplay = $this->autoConvert($text);
+                               return $this->mTitleDisplay;
+                       }
+               }
+
+               if( !$this->mDoContentConvert )
+                       return $text;
+
+               $search = array('/('.UNIQ_PREFIX.'-[a-zA-Z0-9]+)/', //nowiki marker
+                                       '/(&[a-z#][a-z0-9]+;)/', //html entities
+                        );
+               $replace = $this->mMarkup['begin'].'${1}'.$this->mMarkup['end'];
+
+               $text = preg_replace($search, $replace, $text);
+
+               $plang = $this->getPreferredVariant();
+               $fallback = $this->mVariantFallbacks[$plang];
+               $tarray = explode($this->mMarkup['begin'], $text);
+               $tfirst = array_shift($tarray);
+               $text = $this->autoConvert($tfirst);
+               foreach($tarray as $txt) {
+                       $marked = explode($this->mMarkup['end'], $txt);
+               
+                       //strip &nbsp; since it interferes with the parsing, plus,
+                       //all spaces should be stripped in this tag anyway.
+                       $marked[0] = str_replace('&nbsp;', '', $marked[0]);
+
+                       /* see if this conversion has special meaning
+                          # for article title:
+                                -{T|zh-cn:foo;zh-tw:bar}-
+                          # convert all occurence of foo/bar in this article:
+                                -{A|zh-cn:foo;zh-tw:bar}-
+                       */
+                       $flag = '';
+                       $choice = false;
+                       $tt = explode("|", $marked[0], 2);
+                       if(sizeof($tt) == 2) {
+                               $flag = trim($tt[0]);
+                               $choice = explode(";", $tt[1]);
+                       }
+
+                       if(!$choice) {
+                               $choice = explode($this->mMarkup['varsep'], $marked[0]);
+                       }
+                       $disp = '';
+                       $carray = array();
+                       if(!array_key_exists(1, $choice)) {
+                               /* a single choice */
+                               $disp = $choice[0];
+
+                               /* fill the carray if the conversion is for the whole article*/
+                               if($flag == 'A') {
+                                       foreach($this->mVariants as $v) {
+                                               $carray[$v] = $disp;
+                                       }
+                               }
+                       } 
+                       else {
+                               foreach($choice as $c) {
+                                       $v = explode($this->mMarkup['codesep'], $c);
+                                       if(sizeof($v) != 2) // syntax error, skip
+                                               continue;
+                                       $carray[trim($v[0])] = trim($v[1]);
+                               }
+                               if(array_key_exists($plang, $carray))
+                                       $disp = $carray[$plang];
+                               else if(array_key_exists($fallback, $carray))
+                                       $disp = $carray[$fallback];
+                       }
+                       if(empty($disp)) { // syntax error
+                               $text .= $marked[0];
+                       }
+                       else {  
+                               if($flag == 'T') // for title only
+                                       $this->mTitleDisplay = $disp;
+                               else {
+                                       $text .= $disp;
+                                       if($flag == 'A') {
+                                               /* modify the conversion table for this session*/
+
+                                               /* fill in the missing variants, if any,
+                                                   with fallbacks */ 
+                                               foreach($this->mVariants as $v) {
+                                                       if(!array_key_exists($v, $carray)) {
+                                                               $vf = $this->getVariantFallback($v);
+                                                               if(array_key_exists($vf, $carray))
+                                                                       $carray[$v] = $carray[$vf];
+                                                       }
+                                               }
+                                               foreach($this->mVariants as $vfrom) {
+                                                       if(!array_key_exists($vfrom, $carray))
+                                                               continue;
+                                                       foreach($this->mVariants as $vto) {
+                                                               if($vfrom == $vto)
+                                                                       continue;
+                                                               if(!array_key_exists($vto, $carray))
+                                                                       continue;
+                                                               $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+                       if(array_key_exists(1, $marked))
+                               $text .= $this->autoConvert($marked[1]);
+               }
+               
+               return $text;
+       }
+
+
+       /**
+        * if a language supports multiple variants, it is
+        * possible that non-existing link in one variant
+        * actually exists in another variant. this function 
+        * tries to find it. See e.g. LanguageZh.php
+        *
+        * @param string $link the name of the link
+        * @param mixed $nt the title object of the link
+        * @return null the input parameters may be modified upon return
+     * @access public
+        */
+       function findVariantLink( &$link, &$nt ) {
+               static $count=0; //used to limit this operation
+               static $cache=array();
+               global $wgDisableLangConversion;
+               $pref = $this->getPreferredVariant();
+               if( $count > 50 )
+                       return;
+               $count++;
+               $variants = $this->autoConvertToAllVariants($link);
+               if($variants == false) //give up
+                       return;
+               foreach( $variants as $v ) {
+                       if(isset($cache[$v]))
+                               continue;
+                       $cache[$v] = 1;
+                       $varnt = Title::newFromText( $v );
+                       if( $varnt && $varnt->getArticleID() > 0 ) {
+                               $nt = $varnt;
+                               if( !$wgDisableLangConversion && $pref != 'zh' )
+                                       $link = $v;
+                               break;
+                       }
+               }
+       }
+
+    /**
+     * returns language specific hash options
+     * 
+     * @access public
+     */
+       function getExtraHashOptions() {
+               $variant = $this->getPreferredVariant();
+               return '!' . $variant ;
+       }
+
+    /**
+     * get title text as defined in the body of the article text
+     *
+     * @access public
+     */
+       function getParsedTitle() {
+               return $this->mTitleDisplay;
+       }
+
+       /**
+     * a write lock to the cache
+     *
+     * @access private
+     */
+       function lockCache() {
+               global $wgMemc;
+               $success = false;
+               for($i=0; $i<30; $i++) {
+                       if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
+                               break;
+                       sleep(1);
+               }
+               return $success;                
+       }
+
+       /**
+     * unlock cache
+     *
+     * @access private
+     */
+       function unlockCache() {
+               global $wgMemc;
+               $wgMemc->delete($this->mCacheKey . "lock");
+       }
+
+
+       /**
+     * Load default conversion tables
+     * This method must be implemented in derived class
+     * 
+     * @access private
+     */
+       function loadDefaultTables() {
+               $name = get_class($this);
+               die("Must implement loadDefaultTables() method in class $name");
+       }
+
+       /**
+     * load conversion tables either from the cache or the disk
+     * @access private
+     */
+       function loadTables($fromcache=true) {
+               global $wgMemc;
+               if( $this->mTablesLoaded )
+                       return;
+               $this->mTablesLoaded = true;
+               if($fromcache) {
+                       $this->mTables = $wgMemc->get( $this->mCacheKey );
+                       if( !empty( $this->mTables ) ) //all done
+                               return;
+               }
+               // not in cache, or we need a fresh reload. 
+               // we will first load the default tables
+               // then update them using things in MediaWiki:Zhconversiontable/*
+               global $wgMessageCache;
+               $this->loadDefaultTables();
+               foreach($this->mVariants as $var) {
+                       $cached = $this->parseCachedTable($var);
+                       $this->mTables[$var] = array_merge($this->mTables[$var], $cached);
+               }
+
+               $this->postLoadTables();
+               
+               if($this->lockCache()) {
+                       $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
+                       $this->unlockCache();
+               }
+       }
+       
+    /**
+     * Hook for post processig after conversion tables are loaded
+     *
+     */
+       function postLoadTables() {}
+
+       /* deprecated? */
+       function updateTablexxxx($code, $table) {
+               global $wgMemc;
+               if(!$this->mTablesLoaded)
+                       $this->loadTables();
+
+               $this->mTables[$code] = array_merge($this->mTables[$code], $table);
+               if($this->lockCache()) {
+                       $wgMemc->delete($this->mCacheKey);
+                       $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
+                       $this->unlockCache();
+               }
+       }
+
+    /**
+     * Reload the conversion tables
+     * 
+     * @access private
+     */
+       function reloadTables() {
+               if($this->mTables)
+                       unset($this->mTables);
+               $this->mTablesLoaded = false;
+               $this->loadTables(false);
+       }
+
+
+       /**
+     * parse the conversion table stored in the cache 
+     *
+     * the tables should be in blocks of the following form:
+
+     *         -{
+     *                 word => word ;
+     *                 word => word ;
+     *                 ...
+     *         }-
+     * 
+     * to make the tables more manageable, subpages are allowed
+     * and will be parsed recursively if $recursive=true
+     *
+     * @access private
+        */
+       function parseCachedTable($code, $subpage='', $recursive=true) {
+               global $wgMessageCache;
+               static $parsed = array();
+
+               if(!is_object($wgMessageCache))
+                       return array();
+
+               $key = 'Conversiontable/'.$code;
+               if($subpage)
+                       $key .= '/' . $subpage;
+
+               if(array_key_exists($key, $parsed))
+                       return array();
+       
+
+               $txt = $wgMessageCache->get( $key, true, true, true );
+
+               // get all subpage links of the form
+               // [[MediaWiki:conversiontable/zh-xx/...|...]]
+               $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
+               $subs = explode('[[', $txt);
+               $sublinks = array();
+               foreach( $subs as $sub ) {
+                       $link = explode(']]', $sub, 2);
+                       if(count($link) != 2)
+                               continue;
+                       $b = explode('|', $link[0]);
+                       $b = explode('/', trim($b[0]), 3);
+                       if(count($b)==3)
+                               $sublink = $b[2];
+                       else    
+                               $sublink = '';
+
+                       if($b[0] == $linkhead && $b[1] == $code) {
+                               $sublinks[] = $sublink;
+                       }
+               }
+
+
+               // parse the mappings in this page
+               $blocks = explode('-{', $txt);
+               array_shift($blocks);
+               $ret = array(); 
+               foreach($blocks as $block) {
+                       $mappings = explode('}-', $block, 2);
+                       $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
+                       $table = explode( ';', $stripped );
+                       foreach( $table as $t ) {
+                               $m = explode( '=>', $t );
+                               if( count( $m ) != 2)
+                                       continue;
+                               // trim any trailling comments starting with '//'
+                               $tt = explode('//', $m[1], 2);
+                               $ret[trim($m[0])] = trim($tt[0]);
+                       }
+               }
+               $parsed[$key] = true;
+
+
+               // recursively parse the subpages
+               if($recursive) {
+                       foreach($sublinks as $link) {
+                               $s = $this->parseCachedTable($code, $link, $recursive);
+                               $ret = array_merge($ret, $s);
+                       }
+               }
+               return $ret;
+       }
+
+       /**
+        * Enclose a string with the "no conversion" tag. This is used by
+        * various functions in the Parser
+        * 
+        * @param string $text text to be tagged for no conversion
+        * @return string the tagged text
+       */
+       function markNoConversion($text) {
+               $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end'];
+       }
+
+       /**
+     * hook to refresh the cache of conversion tables when 
+     * MediaWiki:conversiontable* is updated
+     * @access private
+       */
+       function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
+               $titleobj = $article->getTitle();
+               if($titleobj->getNamespace() == NS_MEDIAWIKI) { 
+            /*
+                       global $wgContLang; // should be an LanguageZh.
+                       if(get_class($wgContLang) != 'languagezh')      
+                               return true;
+            */
+                       $title = $titleobj->getDBkey();
+                       $t = explode('/', $title, 3);
+                       $c = count($t);
+                       if( $c > 1 && $t[0] == 'Conversiontable' ) {
+                               if(in_array($t[1], $this->mVariants)) {
+                                       $this->reloadTables();
+                               }
+                       }
+               }
+               return true;
+       }
+}
+
+?>
\ No newline at end of file
index 721a32b..4bab40c 100644 (file)
   * @package MediaWiki
   * @subpackage Language
   */
+require_once( "LanguageConverter.php" );
 require_once( "LanguageZh_cn.php");
 require_once( "LanguageZh_tw.php");
 require_once( "LanguageZh_sg.php");
 require_once( "LanguageZh_hk.php");
 
-/*
-   hook to refresh the cache of conversion tables when 
-   MediaWiki:zhconversiontable* is updated
-*/
-function zhOnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
-       $titleobj = $article->getTitle();
-       if($titleobj->getNamespace() == NS_MEDIAWIKI) { 
-               global $wgContLang; // should be an LanguageZh.
-               if(get_class($wgContLang) != 'languagezh')      
-                       return true;
-
-               $title = $titleobj->getDBkey();
-               $t = explode('/', $title, 3);
-               $c = count($t);
-               if( $c > 1 && $t[0] == 'Zhconversiontable' ) {
-                       if(in_array($t[1], array('zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'))) {
-                               $wgContLang->reloadTables();
-                       }
-               }
-       }
-       return true;
-}
-
-$wgHooks['ArticleSaveComplete'][] = 'zhOnArticleSaveComplete';
-
-/* class that handles both Traditional and Simplified Chinese
-   right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and
-   non-zh_cn), will add support for zh_sg, zh_hk, etc, later.
-*/
-class LanguageZh extends LanguageZh_cn {
-       
-       var $mZhLanguageCode=false;
-       var $mTables=false; //the mapping tables
-       var $mTablesLoaded = false;
-       var $mCacheKey;
-       var $mDoTitleConvert = true, $mDoContentConvert = true;
-       var $mTitleDisplay='';
-       function LanguageZh() {
-               global $wgDBname;
-               $this->mCacheKey = $wgDBname . ":zhtables";
-       }
-
-       // a write lock
-       function lockCache() {
-               global $wgMemc;
-               $success = false;
-               for($i=0; $i<30; $i++) {
-                       if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
-                               break;
-                       sleep(1);
-               }
-               return $success;                
-       }
-
-       function unlockCache() {
-               global $wgMemc;
-               $wgMemc->delete($this->mCacheKey . "lock");
-       }
-
-       function updateTable($code, $table) {
-               global $wgMemc;
-               if(!$this->mTablesLoaded)
-                       $this->loadTables();
-
-               $this->mTables[$code] = array_merge($this->mTables[$code], $table);
-               if($this->lockCache()) {
-                       $wgMemc->delete($this->mCacheKey);
-                       $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
-                       $this->unlockCache();
-               }
-       }
-
-       function reloadTables() {
-               if($this->mTables)
-                       unset($this->mTables);
-               $this->mTablesLoaded = false;
-               $this->loadTables(false);
-       }
-
-       // load conversion tables either from the cache or the disk
-       function loadTables($fromcache=true) {
-               global $wgMemc;
-               if( $this->mTablesLoaded )
-                       return;
-               $this->mTablesLoaded = true;
-               if($fromcache) {
-                       $this->mTables = $wgMemc->get( $this->mCacheKey );
-                       if( !empty( $this->mTables ) ) //all done
-                               return;
-               }
-               // not in cache, or we need a fresh reload. 
-               // we will first load the tables from file
-               // then update them using things in MediaWiki:Zhconversiontable/*
-               global $wgMessageCache;
+class ZhConverter extends LanguageConverter {
+       function loadDefaultTables() {
                require( "includes/ZhConversion.php" );
                $this->mTables = array();
                $this->mTables['zh-cn'] = $zh2CN;
                $this->mTables['zh-tw'] = $zh2TW;
-               $this->mTables['zh-sg'] = $zh2SG;
-               $this->mTables['zh-hk'] = $zh2HK;
-
-               $cached = $this->parseCachedTable('zh-cn');
-               $this->mTables['zh-cn'] = array_merge($this->mTables['zh-cn'], $cached);
-
-               $cached = $this->parseCachedTable('zh-tw');
-               $this->mTables['zh-tw'] = array_merge($this->mTables['zh-tw'], $cached);
-
-               $cached = $this->parseCachedTable('zh-sg');
-               $this->mTables['zh-sg'] = array_merge($this->mTables['zh-cn'], $this->mTables['zh-sg'], $cached);
-
-               $cached = $this->parseCachedTable('zh-hk');
-               $this->mTables['zh-hk'] = array_merge($this->mTables['zh-tw'], $this->mTables['zh-hk'], $cached);
-               if($this->lockCache()) {
-                       $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
-                       $this->unlockCache();
-               }
+               $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG);
+               $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK);
+               $this->mTables['zh'] = array();
        }
-       
 
-       /*
-               parse the conversion table stored in the cache 
-
-               the tables should be in blocks of the following form:
-
-                       -{
-                               word => word ;
-                               word => word ;
-                               ...
-                       }-
-               
-               to make the tables more manageable, subpages are allowed
-               and will be parsed recursively if $recursive=true
-
-       */
-       function parseCachedTable($code, $subpage='', $recursive=true) {
-               global $wgMessageCache;
-               static $parsed = array();
-
-               if(!is_object($wgMessageCache))
-                       return array();
+       function postLoadTables() {
+               $this->mTables['zh-sg'] = array_merge($this->mTables['zh-cn'], $this->mTables['zh-sg']);
+               $this->mTables['zh-hk'] = array_merge($this->mTables['zh-tw'], $this->mTables['zh-hk']);
+    }
+}
 
-               $key = 'zhconversiontable/'.$code;
-               if($subpage)
-                       $key .= '/' . $subpage;
 
-               if(array_key_exists($key, $parsed))
-                       return array();
+/* class that handles both Traditional and Simplified Chinese
+   right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk.
+*/
+class LanguageZh extends LanguageZh_cn {
        
-
-               $txt = $wgMessageCache->get( $key, true, true, true );
-
-               // get all subpage links of the form
-               // [[MediaWiki:Zhconversiontable/zh-xx/...|...]]
-               $linkhead = $this->getNsText(NS_MEDIAWIKI) . ':Zhconversiontable';
-               $subs = explode('[[', $txt);
-               $sublinks = array();
-               foreach( $subs as $sub ) {
-                       $link = explode(']]', $sub, 2);
-                       if(count($link) != 2)
-                               continue;
-                       $b = explode('|', $link[0]);
-                       $b = explode('/', trim($b[0]), 3);
-                       if(count($b)==3)
-                               $sublink = $b[2];
-                       else    
-                               $sublink = '';
-
-                       if($b[0] == $linkhead && $b[1] == $code) {
-                               $sublinks[] = $sublink;
-                       }
-               }
-
-
-               // parse the mappings in this page
-               $blocks = explode('-{', $txt);
-               array_shift($blocks);
-               $ret = array(); 
-               foreach($blocks as $block) {
-                       $mappings = explode('}-', $block, 2);
-                       $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
-                       $table = explode( ';', $stripped );
-                       foreach( $table as $t ) {
-                               $m = explode( '=>', $t );
-                               if( count( $m ) != 2)
-                                       continue;
-                               // trim any trailling comments starting with '//'
-                               $tt = explode('//', $m[1], 2);
-                               $ret[trim($m[0])] = trim($tt[0]);
-                       }
-               }
-               $parsed[$key] = true;
-
-
-               // recursively parse the subpages
-               if($recursive) {
-                       foreach($sublinks as $link) {
-                               $s = $this->parseCachedTable($code, $link, $recursive);
-                               $ret = array_merge($ret, $s);
-                       }
-               }
-               return $ret;
+       function LanguageZh() {
+               global $wgHooks;
+               $this->mConverter = new ZhConverter($this, 'zh', 
+                                            array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'),
+                                                                                       array('zh'=>'zh-cn',
+                                                                                                 'zh-cn'=>'zh-sg',
+                                                                                                 'zh-sg'=>'zh-cn',
+                                                                                                 'zh-tw'=>'zh-hk',
+                                                                                                 'zh-hk'=>'zh-tw'));      
+               $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
        }
 
-       /* 
-               get preferred language variants.
-       */
-       function getPreferredVariant() {
-               global $wgUser, $wgRequest;
-               
-               if($this->mZhLanguageCode)
-                       return $this->mZhLanguageCode;
-
-               // see if the preference is set in the request
-               $zhreq = $wgRequest->getText( 'variant' );
-               if( in_array( $zhreq, $this->getVariants() ) ) {
-                       $this->mZhLanguageCode = $zhreq;
-                       return $zhreq;
-               }
-
-               // get language variant preference from logged in users 
-               if( $wgUser->isLoggedIn() ) {
-                       $this->mZhLanguageCode = $wgUser->getOption('variant');
-               }
-
-               if( !$this->mZhLanguageCode ) {
-                       // see if some zh- variant is set in the http header,
-                       $this->mZhLanguageCode="zh";
-                       if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
-                               $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
-                               $zh = strstr($header, 'zh-');
-                               if($zh) {
-                                       $this->mZhLanguageCode = substr($zh,0,5);
-                               }
-                       }
-               }
-               return $this->mZhLanguageCode;
-       }
        
        # this should give much better diff info
        function segmentForDiff( $text ) {
@@ -259,204 +58,6 @@ class LanguageZh extends LanguageZh_cn {
                        "\"$1\"", $text);
        }
 
-       function autoConvert($text, $toVariant=false) {
-               $fname="LanguageZh::autoConvert";
-               wfProfileIn( $fname );
-
-               if(!$this->mTablesLoaded)
-                       $this->loadTables();
-
-               if(!$toVariant) 
-                       $toVariant = $this->getPreferredVariant();
-               $ret = '';
-               switch( $toVariant ) {
-                       case 'zh-cn': $ret = strtr($text, $this->mTables['zh-cn']);break;
-                       case 'zh-tw': $ret = strtr($text, $this->mTables['zh-tw']);break;
-                       case 'zh-sg': $ret = strtr($text, $this->mTables['zh-sg']);break;
-                       case 'zh-hk': $ret = strtr($text, $this->mTables['zh-hk']);break;
-                       default: $ret = $text;
-               }
-               wfProfileOut( $fname );
-               return $ret;
-       }
-    
-       function autoConvertToAllVariants($text) {
-               $fname="LanguageZh::autoConvertToAllVariants";
-               wfProfileIn( $fname );
-               if( !$this->mTablesLoaded )
-                       $this->loadTables();
-
-               $ret = array();
-               $ret['zh-cn'] = strtr($text, $this->mTables['zh-cn']);
-               $ret['zh-tw'] = strtr($text, $this->mTables['zh-tw']);
-               $ret['zh-sg'] = strtr(strtr($text, $this->mTables['zh-cn']), $this->mTables['zh-sg']);
-               $ret['zh-hk'] = strtr(strtr($text, $this->mTables['zh-tw']), $this->mTables['zh-hk']);
-               wfProfileOut( $fname );
-               return $ret;
-       }
-    
-       # convert text to different variants of a language. the automatic
-       # conversion is done in autoConvert(). here we parse the text 
-       # marked with -{}-, which specifies special conversions of the 
-       # text that can not be accomplished in autoConvert()
-       #
-       # syntax of the markup:
-       # -{code1:text1;code2:text2;...}-  or
-       # -{text}- in which case no conversion should take place for text
-       function convert( $text , $isTitle=false) {
-               global $wgDisableLangConversion;
-               if($wgDisableLangConversion)
-                       return $text; 
-
-               $mw =& MagicWord::get( MAG_NOTITLECONVERT );
-               if( $mw->matchAndRemove( $text ) )
-                       $this->mDoTitleConvert = false;
-
-               $mw =& MagicWord::get( MAG_NOCONTENTCONVERT );
-               if( $mw->matchAndRemove( $text ) ) {
-                       $this->mDoContentConvert = false;
-               }
-
-               // no conversion if redirecting
-               $mw =& MagicWord::get( MAG_REDIRECT );
-               if( $mw->matchStart( $text ))
-                       return $text;
-
-               if( $isTitle ) {
-                       if( !$this->mDoTitleConvert ) {
-                               $this->mTitleDisplay = $text;
-                               return $text;
-                       }
-                       if( !empty($this->mTitleDisplay))
-                               return $this->mTitleDisplay;
-
-                       global $wgRequest;
-                       $isredir = $wgRequest->getText( 'redirect', 'yes' );
-                       $action = $wgRequest->getText( 'action' );
-                       if ( $isredir == 'no' || $action == 'edit' ) {
-                               return $text;
-                       }
-                       else {
-                               $this->mTitleDisplay = $this->autoConvert($text);
-                               return $this->mTitleDisplay;
-                       }
-               }
-
-               if( !$this->mDoContentConvert )
-                       return $text;
-
-               $plang = $this->getPreferredVariant();
-               $fallback = $this->getVariantFallback($plang);
-               $variants = $this->getVariants();
-
-               $tarray = explode("-{", $text);
-               $tfirst = array_shift($tarray);
-               $text = $this->autoConvert($tfirst);
-               foreach($tarray as $txt) {
-                       $marked = explode("}-", $txt);
-               
-                       //strip &nbsp; since it interferes with the parsing, plus,
-                       //all spaces should be stripped in this tag anyway.
-                       $marked[0] = str_replace('&nbsp;', '', $marked[0]);
-
-                       /* see if this conversion has special meaning
-                          # for article title:
-                                -{T|zh-cn:foo;zh-tw:bar}-
-                          # convert all occurence of foo/bar in this article:
-                                -{A|zh-cn:foo;zh-tw:bar}-
-                       */
-                       $flag = '';
-                       $choice = false;
-                       $tt = explode("|", $marked[0], 2);
-                       if(sizeof($tt) == 2) {
-                               $flag = trim($tt[0]);
-                               $choice = explode(";", $tt[1]);
-                       }
-
-                       if(!$choice) {
-                               $choice = explode(";", $marked[0]);
-                       }
-                       $disp = '';
-                       $carray = array();
-                       if(!array_key_exists(1, $choice)) {
-                               /* a single choice */
-                               $disp = $choice[0];
-
-                               /* fill the carray if the conversion is for the whole article*/
-                               if($flag == 'A') {
-                                       foreach($variants as $v)
-                                               $carray[$v] = $disp;
-                               }
-                       } 
-                       else {
-                               foreach($choice as $c) {
-                                       $v = explode(":", $c);
-                                       if(sizeof($v) != 2) // syntax error, skip
-                                               continue;
-                                       $carray[trim($v[0])] = trim($v[1]);
-                               }
-                               if(array_key_exists($plang, $carray))
-                                       $disp = $carray[$plang];
-                               else if(array_key_exists($fallback, $carray))
-                                       $disp = $carray[$fallback];
-                       }
-                       if(empty($disp)) { // syntax error
-                               $text .= $marked[0];
-                       }
-                       else {  
-                               if($flag == 'T') // for title only
-                                       $this->mTitleDisplay = $disp;
-                               else {
-                                       $text .= $disp;
-                                       if($flag == 'A') {
-                                               /* modify the conversion table for this session*/
-
-                                               /* fill in the missing variants, if any,
-                                                   with fallbacks */ 
-                                               foreach($variants as $v) {
-                                                       if(!array_key_exists($v, $carray)) {
-                                                               $vf = $this->getVariantFallback($v);
-                                                               if(array_key_exists($vf, $carray))
-                                                                       $carray[$v] = $carray[$vf];
-                                                       }
-                                               }
-                                               foreach($variants as $vfrom) {
-                                                       if(!array_key_exists($vfrom, $carray))
-                                                               continue;
-                                                       foreach($variants as $vto) {
-                                                               if($vfrom == $vto)
-                                                                       continue;
-                                                               if(!array_key_exists($vto, $carray))
-                                                                       continue;
-                                                               $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
-                                                       }
-                                               }
-                                       }
-                               }
-                       }
-                       if(array_key_exists(1, $marked))
-                               $text .= $this->autoConvert($marked[1]);
-               }
-               
-               return $text;
-       }
-
-
-       function getVariants() {
-               return array("zh", "zh-cn", "zh-tw", "zh-sg", "zh-hk");
-       }
-
-       function getVariantFallback($v) {
-               switch ($v) {
-               case 'zh': return 'zh-cn'; break;
-               case 'zh-cn': return 'zh-sg'; break;
-               case 'zh-sg': return 'zh-cn'; break;
-               case 'zh-tw': return 'zh-hk'; break;
-               case 'zh-hk': return 'zh-tw'; break;
-               }
-               return false;
-       }
-
        // word segmentation
        function stripForSearch( $string ) {
                $fname="LanguageZh::stripForSearch";
@@ -473,7 +74,7 @@ class LanguageZh extends LanguageZh_cn {
                //Traditional to Simplified is less ambiguous than the
                //other way around
 
-               $t = $this->autoConvert($t, 'zh-cn');
+               $t = $this->mConverter->autoConvert($t, 'zh-cn');
                $t = LanguageUtf8::stripForSearch( $t );
                wfProfileOut( $fname );
                return $t;
@@ -482,44 +83,10 @@ class LanguageZh extends LanguageZh_cn {
 
        function convertForSearchResult( $termsArray ) {
                $terms = implode( '|', $termsArray );
-               $terms = implode( '|', $this->autoConvertToAllVariants( $terms ) );
+               $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
                $ret = array_unique( explode('|', $terms) );
                return $ret;
        }
 
-       function findVariantLink( &$link, &$nt ) {
-               static $count=0; //used to limit this operation
-               static $cache=array();
-               global $wgDisableLangConversion;
-               $pref = $this->getPreferredVariant();
-               if( $count > 50 )
-                       return;
-               $count++;
-               $variants = $this->autoConvertToAllVariants($link);
-               if($variants == false) //give up
-                       return;
-               foreach( $variants as $v ) {
-                       if(isset($cache[$v]))
-                               continue;
-                       $cache[$v] = 1;
-                       $varnt = Title::newFromText( $v );
-                       if( $varnt && $varnt->getArticleID() > 0 ) {
-                               $nt = $varnt;
-                               if( !$wgDisableLangConversion && $pref != 'zh' )
-                                       $link = $v;
-                               break;
-                       }
-               }
-       }
-
-       function getExtraHashOptions() {
-               global $wgUser;
-               $variant = $this->getPreferredVariant();
-               return '!' . $variant ;
-       }
-
-       function getParsedTitle() {
-               return $this->mTitleDisplay;
-       }
 }
 ?>