return "SearchMySQL";
}
- /**
- * When overridden in derived class, performs database-specific conversions
- * on text to be used for searching or updating search index.
- * Default implementation does nothing (simply returns $string).
- *
- * @param $string string: String to strip
- * @return string
- */
- public function stripForSearch( $string ) {
- return $string;
- }
-
/**
* Allow or deny "big selects" for this session only. This is done by setting
* the sql_big_selects session variable.
* @see Database
*/
class DatabaseMysql extends DatabaseBase {
- static $mMinSearchLength;
-
function getType() {
return 'mysql';
}
public function unlockTables( $method ) {
$this->query( "UNLOCK TABLES", $method );
}
-
- /**
- * Converts some characters for MySQL's indexing to grok it correctly,
- * and pads short words to overcome limitations.
- */
- function stripForSearch( $string ) {
- global $wgContLang;
-
- wfProfileIn( __METHOD__ );
-
- // MySQL fulltext index doesn't grok utf-8, so we
- // need to fold cases and convert to hex
- $out = preg_replace_callback(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- array( $this, 'stripForSearchCallback' ),
- $wgContLang->lc( $string ) );
-
- // And to add insult to injury, the default indexing
- // ignores short words... Pad them so we can pass them
- // through without reconfiguring the server...
- $minLength = $this->minSearchLength();
- if( $minLength > 1 ) {
- $n = $minLength - 1;
- $out = preg_replace(
- "/\b(\w{1,$n})\b/",
- "$1u800",
- $out );
- }
-
- // Periods within things like hostnames and IP addresses
- // are also important -- we want a search for "example.com"
- // or "192.168.1.1" to work sanely.
- //
- // MySQL's search seems to ignore them, so you'd match on
- // "example.wikipedia.com" and "192.168.83.1" as well.
- $out = preg_replace(
- "/(\w)\.(\w|\*)/u",
- "$1u82e$2",
- $out );
-
- wfProfileOut( __METHOD__ );
-
- return $out;
- }
-
- /**
- * Armor a case-folded UTF-8 string to get through MySQL's
- * fulltext search without being mucked up by funny charset
- * settings or anything else of the sort.
- */
- protected function stripForSearchCallback( $matches ) {
- return 'u8' . bin2hex( $matches[1] );
- }
-
- /**
- * Check MySQL server's ft_min_word_len setting so we know
- * if we need to pad short words...
- *
- * @return int
- */
- protected function minSearchLength() {
- if( is_null( self::$mMinSearchLength ) ) {
- $sql = "show global variables like 'ft\\_min\\_word\\_len'";
-
- // Even though this query is pretty fast, let's not overload the master
- $dbr = wfGetDB( DB_SLAVE );
- $result = $dbr->query( $sql );
- $row = $result->fetchObject();
- $result->free();
-
- if( $row && $row->Variable_name == 'ft_min_word_len' ) {
- self::$mMinSearchLength = intval( $row->Value );
- } else {
- self::$mMinSearchLength = 0;
- }
- }
- return self::$mMinSearchLength;
- }
public function setBigSelects( $value = true ) {
if ( $value === 'default' ) {
return true;
}
+ /**
+ * When overridden in derived class, performs database-specific conversions
+ * on text to be used for searching or updating search index.
+ * Default implementation does nothing (simply returns $string).
+ *
+ * @param $string string: String to process
+ * @return string
+ */
+ public function normalizeText( $string ) {
+ return $string;
+ }
+
/**
* Transform search term in cases when parts of the query came as different GET params (when supported)
* e.g. for prefix queries: search=test&prefix=Main_Page/Archive -> test prefix:Main Page/Archive
*/
class SearchMySQL extends SearchEngine {
var $strictMatching = true;
+ static $mMinSearchLength;
/** @todo document */
function __construct( $db ) {
if( count( $strippedVariants) > 1 )
$searchon .= '(';
foreach( $strippedVariants as $stripped ) {
+ $stripped = $this->normalizeText( $stripped );
if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
// Hack for Chinese: we need to toss in quotes for
// multiple-character phrases since stripForSearch()
array( 'si_page' ),
array(
'si_page' => $id,
- 'si_title' => $title,
- 'si_text' => $text
+ 'si_title' => $this->normalizeText( $title ),
+ 'si_text' => $this->normalizeText( $text )
), __METHOD__ );
}
$dbw = wfGetDB( DB_MASTER );
$dbw->update( 'searchindex',
- array( 'si_title' => $title ),
+ array( 'si_title' => $this->normalizeText( $title ) ),
array( 'si_page' => $id ),
__METHOD__,
array( $dbw->lowPriorityOption() ) );
}
+
+ /**
+ * Converts some characters for MySQL's indexing to grok it correctly,
+ * and pads short words to overcome limitations.
+ */
+ function normalizeText( $string ) {
+ global $wgContLang;
+
+ wfProfileIn( __METHOD__ );
+
+ // MySQL fulltext index doesn't grok utf-8, so we
+ // need to fold cases and convert to hex
+ $out = preg_replace_callback(
+ "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
+ array( $this, 'stripForSearchCallback' ),
+ $wgContLang->lc( $string ) );
+
+ // And to add insult to injury, the default indexing
+ // ignores short words... Pad them so we can pass them
+ // through without reconfiguring the server...
+ $minLength = $this->minSearchLength();
+ if( $minLength > 1 ) {
+ $n = $minLength - 1;
+ $out = preg_replace(
+ "/\b(\w{1,$n})\b/",
+ "$1u800",
+ $out );
+ }
+
+ // Periods within things like hostnames and IP addresses
+ // are also important -- we want a search for "example.com"
+ // or "192.168.1.1" to work sanely.
+ //
+ // MySQL's search seems to ignore them, so you'd match on
+ // "example.wikipedia.com" and "192.168.83.1" as well.
+ $out = preg_replace(
+ "/(\w)\.(\w|\*)/u",
+ "$1u82e$2",
+ $out );
+
+ wfProfileOut( __METHOD__ );
+
+ return $out;
+ }
+
+ /**
+ * Armor a case-folded UTF-8 string to get through MySQL's
+ * fulltext search without being mucked up by funny charset
+ * settings or anything else of the sort.
+ */
+ protected function stripForSearchCallback( $matches ) {
+ return 'u8' . bin2hex( $matches[1] );
+ }
+
+ /**
+ * Check MySQL server's ft_min_word_len setting so we know
+ * if we need to pad short words...
+ *
+ * @return int
+ */
+ protected function minSearchLength() {
+ if( is_null( self::$mMinSearchLength ) ) {
+ $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $result = $dbr->query( $sql );
+ $row = $result->fetchObject();
+ $result->free();
+
+ if( $row && $row->Variable_name == 'ft_min_word_len' ) {
+ self::$mMinSearchLength = intval( $row->Value );
+ } else {
+ self::$mMinSearchLength = 0;
+ }
+ }
+ return self::$mMinSearchLength;
+ }
}
/**
* @return String
*/
function stripForSearch( $string, $doStrip = true ) {
- if ( !$doStrip ) {
- return $string;
- }
-
- $dbr = wfGetDB( DB_SLAVE );
- return $dbr->stripForSearch( $string );
+ return $string;
}
/**