5 class LanguageYue
extends Language
{
6 function stripForSearch( $string ) {
7 wfProfileIn( __METHOD__
);
12 // Double-width roman characters: ff00-ff5f ~= 0020-007f
13 $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s );
14 $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s );
16 if ( $wgSearchType != 'LuceneSearch' ) {
17 // eventually this should be a word segmentation;
18 // for now just treat each character as a word.
19 // Not for LuceneSearch, because LSearch will
20 // split the text to words itself.
21 // @todo Fixme: only do this for Han characters...
23 "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
25 $s = preg_replace( '/ +/', ' ', $s );
30 // Do general case folding and UTF-8 armoring
31 $s = parent
::stripForSearch( $s );
32 wfProfileOut( __METHOD__
);