<?php
+/**
+ * Simplified Chinese specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
/**
+ * Simplified Chinese
+ *
* @ingroup Language
*/
class LanguageZh_hans extends Language {
+
+ /**
+ * @return bool
+ */
function hasWordBreaks() {
return false;
}
-
- function stripForSearch( $string ) {
- // Eventually this should be a word segmentation;
- // for now just treat each character as a word.
- //
- // Note we put a space on both sides to cover cases
- // where a number or Latin char follows a Han char.
- //
- // @todo Fixme: only do this for Han characters...
- $t = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1 ", $string);
- $t = preg_replace( '/ +/', ' ', $t );
- $t = trim( $t );
- return parent::stripForSearch( $t );
+
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo FIXME: Only do this for Han characters...
+ *
+ * @param $string string
+ *
+ * @return string
+ */
+ function segmentByWord( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+
+ /**
+ * @param $s
+ * @return string
+ */
+ function normalizeForSearch( $s ) {
+ wfProfileIn( __METHOD__ );
+
+ // Double-width roman characters
+ $s = parent::normalizeForSearch( $s );
+ $s = trim( $s );
+ $s = $this->segmentByWord( $s );
+
+ wfProfileOut( __METHOD__ );
+ return $s;
}
}