* (bug 14604) Introduced the following features for the LanguageConverter: Multi...
[lhc/web/wiklou.git] / languages / classes / LanguageZh.php
1 <?php
2
3 require_once( dirname(__FILE__).'/../LanguageConverter.php' );
4 require_once( dirname(__FILE__).'/LanguageZh_hans.php' );
5
6 /**
7 * @ingroup Language
8 */
9 class ZhConverter extends LanguageConverter {
10
11 function __construct($langobj, $maincode,
12 $variants=array(),
13 $variantfallbacks=array(),
14 $markup=array(),
15 $flags = array(),
16 $manualLevel = array() ) {
17 parent::__construct($langobj, $maincode,
18 $variants,
19 $variantfallbacks,
20 $markup,
21 $flags,
22 $manualLevel);
23 $names = array(
24 'zh' => '原文',
25 'zh-hans' => '简体',
26 'zh-hant' => '繁體',
27 'zh-cn' => '大陆',
28 'zh-tw' => '台灣',
29 'zh-hk' => '香港',
30 'zh-mo' => '澳門',
31 'zh-sg' => '新加坡',
32 'zh-my' => '马来西亚',
33 );
34 $this->mVariantNames = array_merge($this->mVariantNames,$names);
35 }
36
37 function loadDefaultTables() {
38 require( dirname(__FILE__)."/../../includes/ZhConversion.php" );
39 $this->mTables = array(
40 'zh-hans' => new ReplacementArray( $zh2Hans ),
41 'zh-hant' => new ReplacementArray( $zh2Hant ),
42 'zh-cn' => new ReplacementArray( array_merge($zh2Hans, $zh2CN) ),
43 'zh-hk' => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
44 'zh-mo' => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
45 'zh-my' => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
46 'zh-sg' => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
47 'zh-tw' => new ReplacementArray( array_merge($zh2Hant, $zh2TW) ),
48 'zh' => new ReplacementArray
49 );
50 }
51
52 function postLoadTables() {
53 $this->mTables['zh-cn']->merge( $this->mTables['zh-hans'] );
54 $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] );
55 $this->mTables['zh-mo']->merge( $this->mTables['zh-hant'] );
56 $this->mTables['zh-my']->merge( $this->mTables['zh-hans'] );
57 $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] );
58 $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] );
59 }
60
61 /* there shouldn't be any latin text in Chinese conversion, so no need
62 to mark anything.
63 $noParse is there for compatibility with LanguageConvert::markNoConversion
64 */
65 function markNoConversion($text, $noParse = false) {
66 return $text;
67 }
68
69 /* description of convert code in chinese language*/
70 function getRulesDesc($bidtable,$unidtable){
71 $text=parent::getRulesDesc($bidtable,$unidtable);
72 $text=str_replace(':','\81F',$text);
73 $text=str_replace(';','\81G',$text);
74 return $text;
75 }
76
77 function convertCategoryKey( $key ) {
78 return $this->autoConvert( $key, 'zh' );
79 }
80 }
81
82 /**
83 * class that handles both Traditional and Simplified Chinese
84 * right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk.
85 *
86 * @ingroup Language
87 */
88 class LanguageZh extends LanguageZh_hans {
89
90 function __construct() {
91 global $wgHooks;
92 parent::__construct();
93
94 $variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw');
95 $variantfallbacks = array(
96 'zh' => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'),
97 'zh-hans' => array('zh-cn','zh-sg','zh-my'),
98 'zh-hant' => array('zh-tw','zh-hk','zh-mo'),
99 'zh-cn' => array('zh-hans','zh-sg','zh-my'),
100 'zh-sg' => array('zh-hans','zh-cn','zh-my'),
101 'zh-my' => array('zh-hant','zh-sg','zh-cn'),
102 'zh-tw' => array('zh-hant','zh-hk','zh-mo'),
103 'zh-hk' => array('zh-hant','zh-mo','zh-tw'),
104 'zh-mo' => array('zh-hant','zh-hk','zh-tw'),
105 );
106 $ml=array(
107 'zh' => 'disable',
108 'zh-hans' => 'unidirectional',
109 'zh-hant' => 'unidirectional',
110 );
111
112 $this->mConverter = new ZhConverter( $this, 'zh',
113 $variants, $variantfallbacks,
114 array(),array(),
115 $ml);
116
117 $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
118 }
119
120 # this should give much better diff info
121 function segmentForDiff( $text ) {
122 return preg_replace(
123 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
124 "' ' .\"$1\"", $text);
125 }
126
127 function unsegmentForDiff( $text ) {
128 return preg_replace(
129 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
130 "\"$1\"", $text);
131 }
132
133 // word segmentation
134 function stripForSearch( $string ) {
135 $fname="LanguageZh::stripForSearch";
136 wfProfileIn( $fname );
137
138 // eventually this should be a word segmentation
139 // for now just treat each character as a word
140 $t = preg_replace(
141 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
142 "' ' .\"$1\"", $string);
143
144 //always convert to zh-hans before indexing. it should be
145 //better to use zh-hans for search, since conversion from
146 //Traditional to Simplified is less ambiguous than the
147 //other way around
148
149 $t = $this->mConverter->autoConvert($t, 'zh-hans');
150 $t = parent::stripForSearch( $t );
151 wfProfileOut( $fname );
152 return $t;
153
154 }
155
156 function convertForSearchResult( $termsArray ) {
157 $terms = implode( '|', $termsArray );
158 $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
159 $ret = array_unique( explode('|', $terms) );
160 return $ret;
161 }
162 }
163