From: Brion Vibber Date: Mon, 23 May 2005 08:42:20 +0000 (+0000) Subject: Change the SearchEngine interface around: X-Git-Tag: 1.5.0alpha2~120 X-Git-Url: http://git.cyclocoop.org/%24image?a=commitdiff_plain;h=aa99b80d7f29e9a3d2ebfb623643f285cd74c506;p=lhc%2Fweb%2Fwiklou.git Change the SearchEngine interface around: * Reduce some duplicated code between MySQL 3 and 4 classes * Generalize some things to better support Lucene search plugin --- diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 78705e3579..a8ba248273 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -364,10 +364,11 @@ $wgDBuser = 'wikiuser'; */ $wgDBtype = "mysql"; /** Search type - * "MyISAM" for MySQL native full text search, "Tsearch2" for PostgreSQL - * based search engine + * Leave as null to select the default search engine for the + * selected database type (eg SearchMySQL4), or set to a class + * name to override to a custom search engine. */ -$wgSearchType = "MyISAM"; +$wgSearchType = null; /** Table name prefix */ $wgDBprefix = ''; /** Database schema diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index 623f12bf92..46e77ec308 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -17,26 +17,28 @@ class SearchEngine { /** * Perform a full text search query and return a result set. + * If title searches are not supported or disabled, return null. * * @param string $term - Raw search term - * @param array $namespaces - List of namespaces to search - * @return ResultWrapper + * @return SearchResultSet * @access public + * @abstract */ function searchText( $term ) { - return $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); + return null; } /** * Perform a title-only search query and return a result set. + * If title searches are not supported or disabled, return null. * * @param string $term - Raw search term - * @param array $namespaces - List of namespaces to search - * @return ResultWrapper + * @return SearchResultSet * @access public + * @abstract */ function searchTitle( $term ) { - return $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); + return null; } /** @@ -142,17 +144,6 @@ class SearchEngine { return $arr; } - /** - * Fetch an array of regular expression fragments for matching - * the search terms as parsed by this engine in a text extract. - * - * @return array - * @access public - */ - function termMatches() { - return $this->searchTerms; - } - /** * Return a 'cleaned up' search string * @@ -163,67 +154,6 @@ class SearchEngine { $lc = $this->legalSearchChars(); return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); } - - /** - * Return a partial WHERE clause to exclude redirects, if so set - * @return string - * @access private - */ - function queryRedirect() { - if( $this->showRedirects ) { - return 'AND cur_is_redirect=0'; - } else { - return ''; - } - } - - /** - * Return a partial WHERE clause to limit the search to the given namespaces - * @return string - * @access private - */ - function queryNamespaces() { - $namespaces = implode( ',', $this->namespaces ); - if ($namespaces == '') { - $namespaces = '0'; - } - return 'AND page_namespace IN (' . $namespaces . ')'; - } - - /** - * Return a LIMIT clause to limit results on the query. - * @return string - * @access private - */ - function queryLimit() { - return $this->db->limitResult( $this->limit, $this->offset ); - } - - /** - * Does not do anything for generic search engine - * subclasses may define this though - * @return string - * @access private - */ - function queryRanking($filteredTerm,$fulltext) { - return ""; - } - - /** - * Construct the full SQL query to do the search. - * The guts shoulds be constructed in queryMain() - * @param string $filteredTerm - * @param bool $fulltext - * @access private - */ - function getQuery( $filteredTerm, $fulltext ) { - return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . - $this->queryRedirect() . ' ' . - $this->queryNamespaces() . ' ' . - $this->queryRanking($filteredTerm, $fulltext) . ' ' . - $this->queryLimit(); - } - /** * Load up the appropriate search engine class for the currently * active database backend, and return a configured instance. @@ -233,7 +163,9 @@ class SearchEngine { */ function create() { global $wgDBtype, $wgDBmysql4, $wgSearchType; - if( $wgDBtype == 'mysql' ) { + if( $wgSearchType ) { + $class = $wgSearchType; + } elseif( $wgDBtype == 'mysql' ) { if( $wgDBmysql4 ) { $class = 'SearchMySQL4'; require_once( 'SearchMySQL4.php' ); @@ -252,6 +184,125 @@ class SearchEngine { return $search; } + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + * @abstract + */ + function update( $id, $title, $text ) { + // no-op + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + * @abstract + */ + function updateTitle( $id, $title ) { + // no-op + } +} + +class SearchResultSet { + /** + * Fetch an array of regular expression fragments for matching + * the search terms as parsed by this engine in a text extract. + * + * @return array + * @access public + * @abstract + */ + function termMatches() { + return array(); + } + + function numRows() { + return 0; + } + + /** + * Return true if results are included in this result set. + * @return bool + * @abstract + */ + function hasResults() { + return false; + } + + /** + * Some search modes return a total hit count for the query + * in the entire article database. This may include pages + * in namespaces that would not be matched on the given + * settings. + * + * Return null if no total hits number is supported. + * + * @return int + * @access public + */ + function getTotalHits() { + return null; + } + + /** + * Some search modes return a suggested alternate term if there are + * no exact hits. Returns true if there is one on this set. + * + * @return bool + * @access public + */ + function hasSuggestion() { + return false; + } + + /** + * Some search modes return a suggested alternate term if there are + * no exact hits. Check hasSuggestion() first. + * + * @return string + * @access public + */ + function getSuggestion() { + return ''; + } + + /** + * Fetches next search result, or false. + * @return SearchResult + * @access public + * @abstract + */ + function next() { + return false; + } +} + +class SearchResult { + function SearchResult( $row ) { + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); + } + + /** + * @return Title + * @access public + */ + function getTitle() { + return $this->mTitle; + } + + /** + * @return double or null if not supported + */ + function getScore() { + return null; + } } /** diff --git a/includes/SearchMySQL.php b/includes/SearchMySQL.php new file mode 100644 index 0000000000..84571a596b --- /dev/null +++ b/includes/SearchMySQL.php @@ -0,0 +1,207 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Search engine hook base class for MySQL. + * Specific bits for MySQL 3 and 4 variants are in child classes. + * @package MediaWiki + * @subpackage Search + */ + +/** */ +require_once( 'SearchEngine.php' ); + +class SearchMySQL extends SearchEngine { + /** + * Perform a full text search query and return a result set. + * + * @param string $term - Raw search term + * @return MySQLSearchResultSet + * @access public + */ + function searchText( $term ) { + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); + } + + /** + * Perform a title-only search query and return a result set. + * + * @param string $term - Raw search term + * @return MySQLSearchResultSet + * @access public + */ + function searchTitle( $term ) { + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); + } + + + /** + * Return a partial WHERE clause to exclude redirects, if so set + * @return string + * @access private + */ + function queryRedirect() { + if( $this->showRedirects ) { + return 'AND cur_is_redirect=0'; + } else { + return ''; + } + } + + /** + * Return a partial WHERE clause to limit the search to the given namespaces + * @return string + * @access private + */ + function queryNamespaces() { + $namespaces = implode( ',', $this->namespaces ); + if ($namespaces == '') { + $namespaces = '0'; + } + return 'AND page_namespace IN (' . $namespaces . ')'; + } + + /** + * Return a LIMIT clause to limit results on the query. + * @return string + * @access private + */ + function queryLimit() { + return $this->db->limitResult( $this->limit, $this->offset ); + } + + /** + * Does not do anything for generic search engine + * subclasses may define this though + * @return string + * @access private + */ + function queryRanking( $filteredTerm, $fulltext ) { + return ""; + } + + /** + * Construct the full SQL query to do the search. + * The guts shoulds be constructed in queryMain() + * @param string $filteredTerm + * @param bool $fulltext + * @access private + */ + function getQuery( $filteredTerm, $fulltext ) { + return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . + $this->queryRedirect() . ' ' . + $this->queryNamespaces() . ' ' . + $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . + $this->queryLimit(); + } + + + /** + * Picks which field to index on, depending on what type of query. + * @param bool $fulltext + * @return string + */ + function getIndexField( $fulltext ) { + return $fulltext ? 'si_text' : 'si_title'; + } + + /** + * Get the base part of the search query. + * The actual match syntax will depend on the server + * version; MySQL 3 and MySQL 4 have different capabilities + * in their fulltext search indexes. + * + * @param string $filteredTerm + * @param bool $fulltext + * @return string + * @access private + */ + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery( $filteredTerm, $fulltext ); + $page = $this->db->tableName( 'page' ); + $searchindex = $this->db->tableName( 'searchindex' ); + return 'SELECT page_id, page_namespace, page_title ' . + "FROM $page,$searchindex " . + 'WHERE page_id=si_page AND ' . $match; + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + */ + function update( $id, $title, $text ) { + $dbw=& wfGetDB( DB_MASTER ); + $dbw->replace( 'searchindex', + array( 'si_page' ), + array( + 'si_page' => $id, + 'si_title' => $title, + 'si_text' => $text + ), 'SearchMySQL4::update' ); + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + */ + function updateTitle( $id, $title ) { + $dbw =& wfGetDB( DB_MASTER ); + + $dbw->update( array( 'searchindex' ), + array( 'si_title' => $title ), + array( 'si_page' => $id ), + 'SearchMySQL4::updateTitle', + $dbw->lowPriorityOption() ); + } +} + +class MySQLSearchResultSet extends SearchResultSet { + function MySQLSearchResultSet( $resultSet, $terms ) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if( $row === false ) { + return false; + } else { + return new SearchResult( $row ); + } + } +} + +?> diff --git a/includes/SearchMySQL3.php b/includes/SearchMySQL3.php index a03a554eb7..3636045187 100644 --- a/includes/SearchMySQL3.php +++ b/includes/SearchMySQL3.php @@ -24,21 +24,17 @@ */ /** */ -require_once( 'SearchEngine.php' ); +require_once( 'SearchMySQL.php' ); /** * @package MediaWiki * @subpackage Search */ -class SearchMySQL3 extends SearchEngine { +class SearchMySQL3 extends SearchMySQL { function SearchMySQL3( &$db ) { $this->db =& $db; } - - function getIndexField( $fulltext ) { - return $fulltext ? 'si_text' : 'si_title'; - } - + function parseQuery( $filteredText, $fulltext ) { global $wgDBminWordLen, $wgContLang; @@ -80,38 +76,6 @@ class SearchMySQL3 extends SearchEngine { return '(' . $cond . ' )'; } - function queryMain( $filteredTerm, $fulltext ) { - $match = $this->parseQuery( $filteredTerm, $fulltext ); - $page = $this->db->tableName( 'page' ); - $revision = $this->db->tableName( 'revision' ); - $text = $this->db->tableName( 'text' ); - $searchindex = $this->db->tableName( 'searchindex' ); - return 'SELECT page_id, page_namespace, page_title, old_flags, old_text ' . - "FROM $page,$revision,$text,$searchindex " . - 'WHERE page_id=si_page AND page_latest=rev_id AND rev_text_id=old_id AND ' . $match; - } - - function update( $id, $title, $text ) { - $dbw=& wfGetDB(DB_MASTER); - $dbw->replace( 'searchindex', array(array('si_page')), - array( - 'si_page' => $id, - 'si_title' => $title, - 'si_text' => $text - ), 'SearchMySQL3::update' ); - } - - function updateTitle($id,$title) { - $dbw=& wfGetDB(DB_MASTER); - $lowpri=$dbw->lowPriorityOption(); - $searchindex = $dbw->tableName( 'searchindex' ); - - $sql = "UPDATE $lowpri $searchindex SET si_title='" . - $dbw->strencode( $title ) . - "' WHERE si_page={$id}"; - - $dbw->query( $sql, "SearchMySQL3::updateTitle" ); - } } ?> diff --git a/includes/SearchMySQL4.php b/includes/SearchMySQL4.php index 2e19096a54..76025c4c6a 100644 --- a/includes/SearchMySQL4.php +++ b/includes/SearchMySQL4.php @@ -23,14 +23,13 @@ * @subpackage Search */ -/** */ -require_once( 'SearchEngine.php' ); +require_once( 'SearchMySQL.php' ); /** * @package MediaWiki * @subpackage Search */ -class SearchMySQL4 extends SearchEngine { +class SearchMySQL4 extends SearchMySQL { var $strictMatching = true; /** @todo document */ @@ -38,11 +37,6 @@ class SearchMySQL4 extends SearchEngine { $this->db =& $db; } - /** @todo document */ - function getIndexField( $fulltext ) { - return $fulltext ? 'si_text' : 'si_title'; - } - /** @todo document */ function parseQuery( $filteredText, $fulltext ) { global $wgContLang; @@ -77,41 +71,5 @@ class SearchMySQL4 extends SearchEngine { $field = $this->getIndexField( $fulltext ); return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; } - - /** @todo document */ - function queryMain( $filteredTerm, $fulltext ) { - $match = $this->parseQuery( $filteredTerm, $fulltext ); - $page = $this->db->tableName( 'page' ); - $revision = $this->db->tableName( 'revision' ); - $text = $this->db->tableName( 'text' ); - $searchindex = $this->db->tableName( 'searchindex' ); - return 'SELECT page_id, page_namespace, page_title, old_flags, old_text ' . - "FROM $page,$revision,$text,$searchindex " . - 'WHERE page_id=si_page AND page_latest=rev_id AND rev_text_id=old_id AND ' . $match; - } - - /** @todo document */ - function update( $id, $title, $text ) { - $dbw=& wfGetDB(DB_MASTER); - $dbw->replace( 'searchindex', array(array('si_page')), - array( - 'si_page' => $id, - 'si_title' => $title, - 'si_text' => $text - ), 'SearchMySQL4::update' ); - } - - /** @todo document */ - function updateTitle($id,$title) { - $dbw =& wfGetDB(DB_MASTER); - $lowpri = $dbw->lowPriorityOption(); - $searchindex = $dbw->tableName( 'searchindex' ); - - $sql = "UPDATE $lowpri $searchindex SET si_title='" . - $dbw->strencode( $title ) . - "' WHERE si_page={$id}"; - - $dbw->query( $sql, "SearchMySQL4::updateTitle" ); - } } ?> diff --git a/includes/SpecialSearch.php b/includes/SpecialSearch.php index d29f2e30a8..8be8d60ca5 100644 --- a/includes/SpecialSearch.php +++ b/includes/SpecialSearch.php @@ -168,7 +168,8 @@ class SpecialSearch { $titleMatches = $search->searchTitle( $term ); $textMatches = $search->searchText( $term ); - $num = $titleMatches->numRows() + $textMatches->numRows(); + $num = ( $titleMatches ? $titleMatches->numRows() : 0 ) + + ( $textMatches ? $textMatches->numRows() : 0); if ( $num >= $this->limit ) { $top = wfShowingResults( $this->offset, $this->limit ); } else { @@ -185,23 +186,23 @@ class SpecialSearch { $wgOut->addHTML( "
{$prevnext}\n" ); } - global $wgContLang; - $tm = $wgContLang->convertForSearchResult( $search->termMatches() ); - $terms = implode( '|', $tm ); - - if( $titleMatches->numRows() ) { - $wgOut->addWikiText( '==' . wfMsg( 'titlematches' ) . "==\n" ); - $wgOut->addHTML( $this->showMatches( $titleMatches, $terms ) ); - } else { - $wgOut->addWikiText( '==' . wfMsg( 'notitlematches' ) . "==\n" ); + if( $titleMatches ) { + if( $titleMatches->numRows() ) { + $wgOut->addWikiText( '==' . wfMsg( 'titlematches' ) . "==\n" ); + $wgOut->addHTML( $this->showMatches( $titleMatches ) ); + } else { + $wgOut->addWikiText( '==' . wfMsg( 'notitlematches' ) . "==\n" ); + } } - if( $textMatches->numRows() ) { - $wgOut->addWikiText( '==' . wfMsg( 'textmatches' ) . "==\n" ); - $wgOut->addHTML( $this->showMatches( $textMatches, $terms ) ); - } elseif( $num == 0 ) { - # Don't show the 'no text matches' if we received title matches - $wgOut->addWikiText( '==' . wfMsg( 'notextmatches' ) . "==\n" ); + if( $textMatches ) { + if( $textMatches->numRows() ) { + $wgOut->addWikiText( '==' . wfMsg( 'textmatches' ) . "==\n" ); + $wgOut->addHTML( $this->showMatches( $textMatches ) ); + } elseif( $num == 0 ) { + # Don't show the 'no text matches' if we received title matches + $wgOut->addWikiText( '==' . wfMsg( 'notextmatches' ) . "==\n" ); + } } if ( $num == 0 ) { @@ -280,19 +281,23 @@ class SpecialSearch { } /** - * @param ResultWrapper $matches + * @param SearchResultSet $matches * @param string $terms partial regexp for highlighting terms */ - function showMatches( &$matches, $terms ) { + function showMatches( &$matches ) { $fname = 'SpecialSearch::showMatches'; wfProfileIn( $fname ); + global $wgContLang; + $tm = $wgContLang->convertForSearchResult( $matches->termMatches() ); + $terms = implode( '|', $tm ); + global $wgOut; $off = $this->offset + 1; $out = "
    \n"; - while( $row = $matches->fetchObject() ) { - $out .= $this->showHit( $row, $terms ); + while( $result = $matches->next() ) { + $out .= $this->showHit( $result, $terms ); } $out .= "
\n"; @@ -305,15 +310,15 @@ class SpecialSearch { /** * Format a single hit result - * @param object $row + * @param SearchResult $result * @param string $terms partial regexp for highlighting terms */ - function showHit( $row, $terms ) { + function showHit( $result, $terms ) { $fname = 'SpecialSearch::showHit'; wfProfileIn( $fname ); global $wgUser, $wgContLang; - $t = Title::makeTitle( $row->page_namespace, $row->page_title ); + $t = $result->getTitle(); if( is_null( $t ) ) { wfProfileOut( $fname ); return "\n"; @@ -325,8 +330,9 @@ class SpecialSearch { $contextchars = $wgUser->getOption( 'contextchars' ); if ( '' == $contextchars ) { $contextchars = 50; } - $link = $sk->makeKnownLinkObj( $t, '' ); - $text = Revision::getRevisionText( $row ); + $link = $sk->makeKnownLinkObj( $t ); + $revision = Revision::newFromTitle( $t ); + $text = $revision->getText(); $size = wfMsg( 'nbytes', strlen( $text ) ); $lines = explode( "\n", $text );