From 0b2f7f7ea4c0b72398741e0b0fe6f3db2b408136 Mon Sep 17 00:00:00 2001 From: River Tarnell Date: Sun, 11 Mar 2007 04:41:02 +0000 Subject: [PATCH] full-search search for oracle using Oracle Text --- includes/AutoLoader.php | 1 + includes/SearchEngine.php | 2 + includes/SearchOracle.php | 240 +++++++++++++++++++++++++++++++++++++ maintenance/ora/tables.sql | 10 ++ 4 files changed, 253 insertions(+) create mode 100644 includes/SearchOracle.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 16950cf565..d7f684f443 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -148,6 +148,7 @@ function __autoload($className) { 'SearchPostgres' => 'includes/SearchPostgres.php', 'SearchUpdate' => 'includes/SearchUpdate.php', 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php', + 'SearchOracle' => 'includes/SearchOracle.php', 'SiteConfiguration' => 'includes/SiteConfiguration.php', 'SiteStats' => 'includes/SiteStats.php', 'SiteStatsUpdate' => 'includes/SiteStats.php', diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index c742e78a29..dc10279f4d 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -200,6 +200,8 @@ class SearchEngine { $class = 'SearchMySQL4'; } else if ( $wgDBtype == 'postgres' ) { $class = 'SearchPostgres'; + } else if ( $wgDBtype == 'oracle' ) { + $class = 'SearchOracle'; } else { $class = 'SearchEngineDummy'; } diff --git a/includes/SearchOracle.php b/includes/SearchOracle.php new file mode 100644 index 0000000000..de3ed554a1 --- /dev/null +++ b/includes/SearchOracle.php @@ -0,0 +1,240 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Search engine hook base class for Oracle (ConText). + * @addtogroup Search + */ + +class SearchOracle extends SearchEngine { + var $strictMatching = false; + + function __construct($db) { + $this->db = $db; + } + + /** + * Perform a full text search query and return a result set. + * + * @param string $term - Raw search term + * @return OracleSearchResultSet + * @access public + */ + function searchText( $term ) { + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); + return new OracleSearchResultSet($resultSet, $this->searchTerms); + } + + /** + * Perform a title-only search query and return a result set. + * + * @param string $term - Raw search term + * @return ORacleSearchResultSet + * @access public + */ + function searchTitle($term) { + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); + return new MySQLSearchResultSet($resultSet, $this->searchTerms); + } + + + /** + * Return a partial WHERE clause to exclude redirects, if so set + * @return string + * @private + */ + function queryRedirect() { + if ($this->showRedirects) { + return ''; + } else { + return 'AND page_is_redirect=0'; + } + } + + /** + * Return a partial WHERE clause to limit the search to the given namespaces + * @return string + * @private + */ + function queryNamespaces() { + $namespaces = implode(',', $this->namespaces); + if ($namespaces == '') { + $namespaces = '0'; + } + return 'AND page_namespace IN (' . $namespaces . ')'; + } + + /** + * Return a LIMIT clause to limit results on the query. + * @return string + * @private + */ + function queryLimit($sql) { + return $this->db->limitResult($sql, $this->limit, $this->offset); + } + + /** + * Does not do anything for generic search engine + * subclasses may define this though + * @return string + * @private + */ + function queryRanking($filteredTerm, $fulltext) { + return ''; + } + + /** + * Construct the full SQL query to do the search. + * The guts shoulds be constructed in queryMain() + * @param string $filteredTerm + * @param bool $fulltext + * @private + */ + function getQuery( $filteredTerm, $fulltext ) { + return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . + $this->queryRedirect() . ' ' . + $this->queryNamespaces() . ' ' . + $this->queryRanking( $filteredTerm, $fulltext ) . ' '); + } + + + /** + * Picks which field to index on, depending on what type of query. + * @param bool $fulltext + * @return string + */ + function getIndexField($fulltext) { + return $fulltext ? 'si_text' : 'si_title'; + } + + /** + * Get the base part of the search query. + * + * @param string $filteredTerm + * @param bool $fulltext + * @return string + * @private + */ + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery($filteredTerm, $fulltext); + $page = $this->db->tableName('page'); + $searchindex = $this->db->tableName('searchindex'); + return 'SELECT page_id, page_namespace, page_title ' . + "FROM $page,$searchindex " . + 'WHERE page_id=si_page AND ' . $match; + } + + /** @todo document */ + function parseQuery( $filteredText, $fulltext ) { + global $wgContLang; + $lc = SearchEngine::legalSearchChars(); + $searchon = ''; + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + $m = array(); + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $filteredText, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + if( $searchon !== '' ) $searchon .= ' '; + if( $this->strictMatching && ($terms[1] == '') ) { + $terms[1] = '+'; + } + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); + if( !empty( $terms[3] ) ) { + $regexp = preg_quote( $terms[3], '/' ); + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; + } else { + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); + } + $this->searchTerms[] = $regexp; + } + wfDebug( "Would search with '$searchon'\n" ); + wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); + } else { + wfDebug( "Can't understand search query '{$filteredText}'\n" ); + } + + $searchon = $this->db->strencode($searchon); + $field = $this->getIndexField( $fulltext ); + return " CONTAINS($field, '$searchon', 1) > 0 "; + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + */ + function update($id, $title, $text) { + $dbw = wfGetDB(DB_MASTER); + $dbw->replace('searchindex', + array('si_page'), + array( + 'si_page' => $id, + 'si_title' => $title, + 'si_text' => $text + ), 'SearchOracle::update' ); + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + */ + function updateTitle( $id, $title ) { + $dbw = wfGetDB(DB_MASTER); + + $dbw->update('searchindex', + array('si_title' => $title), + array('si_page' => $id), + 'SearchOracle::updateTitle', + array()); + } +} + +class OracleSearchResultSet extends SearchResultSet { + function __construct($resultSet, $terms) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if( $row === false) { + return false; + } else { + return new SearchResult($row); + } + } +} + +?> diff --git a/maintenance/ora/tables.sql b/maintenance/ora/tables.sql index 4cf034c313..f8ffc55dc1 100644 --- a/maintenance/ora/tables.sql +++ b/maintenance/ora/tables.sql @@ -425,3 +425,13 @@ CREATE INDEX job_cmd_namespace_title ON job (job_cmd, job_namespace, job_title); -- pf_server CLOB NULL --); --CREATE UNIQUE INDEX pf_name_server ON profiling (pf_name, pf_server); + +CREATE TABLE searchindex ( + si_page INTEGER UNIQUE NOT NULL, + si_title VARCHAR(255) DEFAULT '' NOT NULL, + si_text CLOB NOT NULL +); + + +CREATE INDEX si_title_idx ON searchindex(si_title) INDEXTYPE IS ctxsys.context; +CREATE INDEX si_text_idx ON searchindex(si_text) INDEXTYPE IS ctxsys.context; -- 2.20.1