Merge "Added missing GPLv2 headers in some places."
[lhc/web/wiklou.git] / languages / classes / LanguageZh_hans.php
index 6ab6e9d..671a16e 100644 (file)
@@ -1,39 +1,46 @@
 <?php
 
 /**
+ * Simplified Chinese
+ *
  * @ingroup Language
  */
 class LanguageZh_hans extends Language {
+
+       /**
+        * @return bool
+        */
        function hasWordBreaks() {
                return false;
        }
-       
-       function stripForSearch( $string ) {
-               wfProfileIn( __METHOD__ );
-               global $wgSearchType;
-
-               $s = $string;
 
-               // Double-width roman characters: ff00-ff5f ~= 0020-007f
-               $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s );
-               $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s );
+       /**
+        * Eventually this should be a word segmentation;
+        * for now just treat each character as a word.
+        * @todo FIXME: Only do this for Han characters...
+        *
+        * @param $string string
+        *
+        * @return string
+        */
+       function segmentByWord( $string ) {
+               $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+               $s = self::insertSpace( $string, $reg );
+               return $s;
+       }
 
-               if ( $wgSearchType != 'LuceneSearch' ) {
-                       // Eventually this should be a word segmentation;
-                       // for now just treat each character as a word.
-                       // Not for LuceneSearch, because LSearch will
-                       // split the text to words itself.
-                       // @todo Fixme: only do this for Han characters...
-                       $s = preg_replace(
-                                       "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
-                                       " $1 ", $s);
-                       $s = preg_replace( '/ +/', ' ', $s );
-               }
+       /**
+        * @param $s
+        * @return string
+        */
+       function normalizeForSearch( $s ) {
+               wfProfileIn( __METHOD__ );
 
+               // Double-width roman characters
+               $s = parent::normalizeForSearch( $s );
                $s = trim( $s );
+               $s = $this->segmentByWord( $s );
 
-               // Do general case folding and UTF-8 armoring
-               $s = parent::stripForSearch( $s );
                wfProfileOut( __METHOD__ );
                return $s;
        }