<?php
+
/**
- * @addtogroup Language
- */
+ * Simplified Chinese
+ *
+ * @ingroup Language
+ */
class LanguageZh_hans extends Language {
- function stripForSearch( $string ) {
- # MySQL fulltext index doesn't grok utf-8, so we
- # need to fold cases and convert to hex
- # we also separate characters as "words"
- if( function_exists( 'mb_strtolower' ) ) {
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( \"$1\" )",
- mb_strtolower( $string ) );
- } else {
- list( , $wikiLowerChars ) = Language::getCaseMaps();
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
- $string );
- }
+
+ /**
+ * @return bool
+ */
+ function hasWordBreaks() {
+ return false;
+ }
+
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo FIXME: Only do this for Han characters...
+ *
+ * @param $string string
+ *
+ * @return string
+ */
+ function segmentByWord( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+
+ /**
+ * @param $s
+ * @return string
+ */
+ function normalizeForSearch( $s ) {
+ wfProfileIn( __METHOD__ );
+
+ // Double-width roman characters
+ $s = parent::normalizeForSearch( $s );
+ $s = trim( $s );
+ $s = $this->segmentByWord( $s );
+
+ wfProfileOut( __METHOD__ );
+ return $s;
}
}