47b20e57ef52ec55936f12be866d65c0616c6370
[lhc/web/wiklou.git] / languages / classes / LanguageYue.php
1 <?php
2 /**
3 * @ingroup Language
4 */
5 class LanguageYue extends Language {
6 function stripForSearch( $string ) {
7 wfProfileIn( __METHOD__ );
8 global $wgSearchType;
9
10 $s = $string;
11
12 // Double-width roman characters: ff00-ff5f ~= 0020-007f
13 $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s );
14 $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s );
15
16 if ( $wgSearchType != 'LuceneSearch' ) {
17 // eventually this should be a word segmentation;
18 // for now just treat each character as a word.
19 // Not for LuceneSearch, because LSearch will
20 // split the text to words itself.
21 // @todo Fixme: only do this for Han characters...
22 $s = preg_replace(
23 "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
24 " $1 ", $s);
25 $s = preg_replace( '/ +/', ' ', $s );
26 }
27
28 $s = trim( $s );
29
30 // Do general case folding and UTF-8 armoring
31 $s = parent::stripForSearch( $s );
32 wfProfileOut( __METHOD__ );
33 return $s;
34 }
35 }