function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array(),
$manualLevel = array() ) {
$this->mDescCodeSep = ':';
parent::__construct($langobj, $maincode,
$variants,
$variantfallbacks,
- $markup,
$flags,
$manualLevel);
$names = array(
parent::__construct();
$variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw');
+
$variantfallbacks = array(
'zh' => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'),
'zh-hans' => array('zh-cn','zh-sg','zh-my'),
$this->mConverter = new ZhConverter( $this, 'zh',
$variants, $variantfallbacks,
- array(),array(),
+ array(),
$ml);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
}
// word segmentation
- function stripForSearch( $string ) {
+ function stripForSearch( $string, $doStrip = true, $autoVariant = 'zh-hans' ) {
wfProfileIn( __METHOD__ );
- // eventually this should be a word segmentation
- // for now just treat each character as a word
- // @fixme only do this for Han characters...
- $t = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1", $string);
-
- //always convert to zh-hans before indexing. it should be
- //better to use zh-hans for search, since conversion from
- //Traditional to Simplified is less ambiguous than the
- //other way around
-
- $t = $this->mConverter->autoConvert($t, 'zh-hans');
- $t = parent::stripForSearch( $t );
+ // always convert to zh-hans before indexing. it should be
+ // better to use zh-hans for search, since conversion from
+ // Traditional to Simplified is less ambiguous than the
+ // other way around
+ $s = $this->mConverter->autoConvert( $string, $autoVariant );
+ // LanguageZh_hans::stripForSearch
+ $s = parent::stripForSearch( $s, $doStrip );
wfProfileOut( __METHOD__ );
- return $t;
+ return $s;
}
function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
+ $terms = self::convertDoubleWidth( $terms );
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode('|', $terms) );
return $ret;