From: Chad Horohoe Date: Tue, 17 Jun 2008 20:58:32 +0000 (+0000) Subject: More ~/includes cleanup. Moving all the Search*.php files to ~/includes/search. X-Git-Tag: 1.31.0-rc.0~46968 X-Git-Url: http://git.cyclocoop.org/%28?a=commitdiff_plain;h=61b77c1b0bb9114e5f5b51dcba4f1e8de7e40d64;p=lhc%2Fweb%2Fwiklou.git More ~/includes cleanup. Moving all the Search*.php files to ~/includes/search. --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 27ebd521b6..1754a59a0a 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -126,10 +126,8 @@ class AutoLoader { 'MimeMagic' => 'includes/MimeMagic.php', 'MWException' => 'includes/Exception.php', 'MWNamespace' => 'includes/Namespace.php', - 'MySQLSearchResultSet' => 'includes/SearchMySQL.php', 'Namespace' => 'includes/NamespaceCompat.php', // Compat 'OldChangesList' => 'includes/ChangesList.php', - 'OracleSearchResultSet' => 'includes/SearchOracle.php', 'OutputPage' => 'includes/OutputPage.php', 'PageHistory' => 'includes/PageHistory.php', 'PageHistoryPager' => 'includes/PageHistory.php', @@ -137,8 +135,6 @@ class AutoLoader { 'Pager' => 'includes/Pager.php', 'PasswordError' => 'includes/User.php', 'PatrolLog' => 'includes/PatrolLog.php', - 'PostgresSearchResult' => 'includes/SearchPostgres.php', - 'PostgresSearchResultSet' => 'includes/SearchPostgres.php', 'PrefixSearch' => 'includes/PrefixSearch.php', 'Profiler' => 'includes/Profiler.php', 'ProfilerSimple' => 'includes/ProfilerSimple.php', @@ -158,18 +154,6 @@ class AutoLoader { 'Revision' => 'includes/Revision.php', 'RSSFeed' => 'includes/Feed.php', 'Sanitizer' => 'includes/Sanitizer.php', - 'SearchEngineDummy' => 'includes/SearchEngine.php', - 'SearchEngine' => 'includes/SearchEngine.php', - 'SearchHighlighter' => 'includes/SearchEngine.php', - 'SearchMySQL4' => 'includes/SearchMySQL4.php', - 'SearchMySQL' => 'includes/SearchMySQL.php', - 'SearchOracle' => 'includes/SearchOracle.php', - 'SearchPostgres' => 'includes/SearchPostgres.php', - 'SearchResult' => 'includes/SearchEngine.php', - 'SearchResultSet' => 'includes/SearchEngine.php', - 'SearchResultTooMany' => 'includes/SearchEngine.php', - 'SearchUpdate' => 'includes/SearchUpdate.php', - 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php', 'SiteConfiguration' => 'includes/SiteConfiguration.php', 'SiteStats' => 'includes/SiteStats.php', 'SiteStatsUpdate' => 'includes/SiteStats.php', @@ -376,6 +360,24 @@ class AutoLoader { 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php', 'StripState' => 'includes/parser/Parser.php', + # includes/search + 'OracleSearchResultSet' => 'includes/search/Oracle.php', + 'PostgresSearchResult' => 'includes/search/Postgres.php', + 'PostgresSearchResultSet' => 'includes/search/Postgres.php', + 'MySQLSearchResultSet' => 'includes/Search/MySQL.php', + 'SearchEngineDummy' => 'includes/search/Engine.php', + 'SearchEngine' => 'includes/search/Engine.php', + 'SearchHighlighter' => 'includes/search/Engine.php', + 'SearchMySQL4' => 'includes/search/MySQL4.php', + 'SearchMySQL' => 'includes/search/MySQL.php', + 'SearchOracle' => 'includes/search/Oracle.php', + 'SearchPostgres' => 'includes/search/Postgres.php', + 'SearchResult' => 'includes/search/Engine.php', + 'SearchResultSet' => 'includes/search/Engine.php', + 'SearchResultTooMany' => 'includes/search/Engine.php', + 'SearchUpdate' => 'includes/search/Update.php', + 'SearchUpdateMyISAM' => 'includes/search/Update.php', + # includes/specials 'AncientPagesPage' => 'includes/specials/Ancientpages.php', 'BrokenRedirectsPage' => 'includes/specials/BrokenRedirects.php', diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php deleted file mode 100644 index 04fa906a00..0000000000 --- a/includes/SearchEngine.php +++ /dev/null @@ -1,1154 +0,0 @@ -hasVariants()){ - $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm)); - } - - foreach($allSearchTerms as $term){ - - # Exact match? No need to look further. - $title = Title::newFromText( $term ); - if (is_null($title)) - return NULL; - - if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() - || $title->exists() ) { - return $title; - } - - # Now try all lower case (i.e. first letter capitalized) - # - $title = Title::newFromText( $wgContLang->lc( $term ) ); - if ( $title && $title->exists() ) { - return $title; - } - - # Now try capitalized string - # - $title = Title::newFromText( $wgContLang->ucwords( $term ) ); - if ( $title && $title->exists() ) { - return $title; - } - - # Now try all upper case - # - $title = Title::newFromText( $wgContLang->uc( $term ) ); - if ( $title && $title->exists() ) { - return $title; - } - - # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc - $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); - if ( $title && $title->exists() ) { - return $title; - } - - global $wgCapitalLinks, $wgContLang; - if( !$wgCapitalLinks ) { - // Catch differs-by-first-letter-case-only - $title = Title::newFromText( $wgContLang->ucfirst( $term ) ); - if ( $title && $title->exists() ) { - return $title; - } - $title = Title::newFromText( $wgContLang->lcfirst( $term ) ); - if ( $title && $title->exists() ) { - return $title; - } - } - - // Give hooks a chance at better match variants - $title = null; - if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) { - return $title; - } - } - - $title = Title::newFromText( $searchterm ); - - # Entering an IP address goes to the contributions page - if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) ) - || User::isIP( trim( $searchterm ) ) ) { - return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() ); - } - - - # Entering a user goes to the user page whether it's there or not - if ( $title->getNamespace() == NS_USER ) { - return $title; - } - - # Go to images that exist even if there's no local page. - # There may have been a funny upload, or it may be on a shared - # file repository such as Wikimedia Commons. - if( $title->getNamespace() == NS_IMAGE ) { - $image = wfFindFile( $title ); - if( $image ) { - return $title; - } - } - - # MediaWiki namespace? Page may be "implied" if not customized. - # Just return it, with caps forced as the message system likes it. - if( $title->getNamespace() == NS_MEDIAWIKI ) { - return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) ); - } - - # Quoted term? Try without the quotes... - $matches = array(); - if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { - return SearchEngine::getNearMatch( $matches[1] ); - } - - return NULL; - } - - public static function legalSearchChars() { - return "A-Za-z_'0-9\\x80-\\xFF\\-"; - } - - /** - * Set the maximum number of results to return - * and how many to skip before returning the first. - * - * @param int $limit - * @param int $offset - * @access public - */ - function setLimitOffset( $limit, $offset = 0 ) { - $this->limit = intval( $limit ); - $this->offset = intval( $offset ); - } - - /** - * Set which namespaces the search should include. - * Give an array of namespace index numbers. - * - * @param array $namespaces - * @access public - */ - function setNamespaces( $namespaces ) { - $this->namespaces = $namespaces; - } - - /** - * Parse some common prefixes: all (search everything) - * or namespace names - * - * @param string $query - */ - function replacePrefixes( $query ){ - global $wgContLang; - - if( strpos($query,':') === false ) - return $query; // nothing to do - - $parsed = $query; - $allkeyword = wfMsgForContent('searchall').":"; - if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){ - $this->namespaces = null; - $parsed = substr($query,strlen($allkeyword)); - } else if( strpos($query,':') !== false ) { - $prefix = substr($query,0,strpos($query,':')); - $index = $wgContLang->getNsIndex($prefix); - if($index !== false){ - $this->namespaces = array($index); - $parsed = substr($query,strlen($prefix)+1); - } - } - if(trim($parsed) == '') - return $query; // prefix was the whole query - - return $parsed; - } - - /** - * Make a list of searchable namespaces and their canonical names. - * @return array - */ - public static function searchableNamespaces() { - global $wgContLang; - $arr = array(); - foreach( $wgContLang->getNamespaces() as $ns => $name ) { - if( $ns >= NS_MAIN ) { - $arr[$ns] = $name; - } - } - return $arr; - } - - /** - * Extract default namespaces to search from the given user's - * settings, returning a list of index numbers. - * - * @param User $user - * @return array - * @static - */ - public static function userNamespaces( &$user ) { - $arr = array(); - foreach( SearchEngine::searchableNamespaces() as $ns => $name ) { - if( $user->getOption( 'searchNs' . $ns ) ) { - $arr[] = $ns; - } - } - return $arr; - } - - /** - * Find snippet highlight settings for a given user - * - * @param User $user - * @return array contextlines, contextchars - * @static - */ - public static function userHighlightPrefs( &$user ){ - //$contextlines = $user->getOption( 'contextlines', 5 ); - //$contextchars = $user->getOption( 'contextchars', 50 ); - $contextlines = 2; // Hardcode this. Old defaults sucked. :) - $contextchars = 75; // same as above.... :P - return array($contextlines, $contextchars); - } - - /** - * An array of namespaces indexes to be searched by default - * - * @return array - * @static - */ - public static function defaultNamespaces(){ - global $wgNamespacesToBeSearchedDefault; - - return array_keys($wgNamespacesToBeSearchedDefault, true); - } - - /** - * Return a 'cleaned up' search string - * - * @return string - * @access public - */ - function filter( $text ) { - $lc = $this->legalSearchChars(); - return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); - } - /** - * Load up the appropriate search engine class for the currently - * active database backend, and return a configured instance. - * - * @return SearchEngine - */ - public static function create() { - global $wgDBtype, $wgSearchType; - if( $wgSearchType ) { - $class = $wgSearchType; - } elseif( $wgDBtype == 'mysql' ) { - $class = 'SearchMySQL'; - } else if ( $wgDBtype == 'postgres' ) { - $class = 'SearchPostgres'; - } else if ( $wgDBtype == 'oracle' ) { - $class = 'SearchOracle'; - } else { - $class = 'SearchEngineDummy'; - } - $search = new $class( wfGetDB( DB_SLAVE ) ); - $search->setLimitOffset(0,0); - return $search; - } - - /** - * Create or update the search index record for the given page. - * Title and text should be pre-processed. - * - * @param int $id - * @param string $title - * @param string $text - * @abstract - */ - function update( $id, $title, $text ) { - // no-op - } - - /** - * Update a search index record's title only. - * Title should be pre-processed. - * - * @param int $id - * @param string $title - * @abstract - */ - function updateTitle( $id, $title ) { - // no-op - } - - /** - * Get OpenSearch suggestion template - * - * @return string - * @static - */ - public static function getOpenSearchTemplate() { - global $wgOpenSearchTemplate, $wgServer, $wgScriptPath; - if($wgOpenSearchTemplate) - return $wgOpenSearchTemplate; - else{ - $ns = implode(',',SearchEngine::defaultNamespaces()); - if(!$ns) $ns = "0"; - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns; - } - } - - /** - * Get internal MediaWiki Suggest template - * - * @return string - * @static - */ - public static function getMWSuggestTemplate() { - global $wgMWSuggestTemplate, $wgServer, $wgScriptPath; - if($wgMWSuggestTemplate) - return $wgMWSuggestTemplate; - else - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}'; - } -} - -/** - * @ingroup Search - */ -class SearchResultSet { - /** - * Fetch an array of regular expression fragments for matching - * the search terms as parsed by this engine in a text extract. - * - * @return array - * @access public - * @abstract - */ - function termMatches() { - return array(); - } - - function numRows() { - return 0; - } - - /** - * Return true if results are included in this result set. - * @return bool - * @abstract - */ - function hasResults() { - return false; - } - - /** - * Some search modes return a total hit count for the query - * in the entire article database. This may include pages - * in namespaces that would not be matched on the given - * settings. - * - * Return null if no total hits number is supported. - * - * @return int - * @access public - */ - function getTotalHits() { - return null; - } - - /** - * Some search modes return a suggested alternate term if there are - * no exact hits. Returns true if there is one on this set. - * - * @return bool - * @access public - */ - function hasSuggestion() { - return false; - } - - /** - * @return string suggested query, null if none - */ - function getSuggestionQuery(){ - return null; - } - - /** - * @return string highlighted suggested query, '' if none - */ - function getSuggestionSnippet(){ - return ''; - } - - /** - * Return information about how and from where the results were fetched, - * should be useful for diagnostics and debugging - * - * @return string - */ - function getInfo() { - return null; - } - - /** - * Return a result set of hits on other (multiple) wikis associated with this one - * - * @return SearchResultSet - */ - function getInterwikiResults() { - return null; - } - - /** - * Check if there are results on other wikis - * - * @return boolean - */ - function hasInterwikiResults() { - return $this->getInterwikiResults() != null; - } - - - /** - * Fetches next search result, or false. - * @return SearchResult - * @access public - * @abstract - */ - function next() { - return false; - } - - /** - * Frees the result set, if applicable. - * @ access public - */ - function free() { - // ... - } -} - - -/** - * @ingroup Search - */ -class SearchResultTooMany { - ## Some search engines may bail out if too many matches are found -} - - -/** - * @ingroup Search - */ -class SearchResult { - var $mRevision = null; - - function SearchResult( $row ) { - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); - if( !is_null($this->mTitle) ) - $this->mRevision = Revision::newFromTitle( $this->mTitle ); - } - - /** - * Check if this is result points to an invalid title - * - * @return boolean - * @access public - */ - function isBrokenTitle(){ - if( is_null($this->mTitle) ) - return true; - return false; - } - - /** - * Check if target page is missing, happens when index is out of date - * - * @return boolean - * @access public - */ - function isMissingRevision(){ - if( !$this->mRevision ) - return true; - return false; - } - - /** - * @return Title - * @access public - */ - function getTitle() { - return $this->mTitle; - } - - /** - * @return double or null if not supported - */ - function getScore() { - return null; - } - - /** - * Lazy initialization of article text from DB - */ - protected function initText(){ - if( !isset($this->mText) ){ - $this->mText = $this->mRevision->getText(); - } - } - - /** - * @param array $terms terms to highlight - * @return string highlighted text snippet, null (and not '') if not supported - */ - function getTextSnippet($terms){ - global $wgUser, $wgAdvancedSearchHighlighting; - $this->initText(); - list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser); - $h = new SearchHighlighter(); - if( $wgAdvancedSearchHighlighting ) - return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); - else - return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); - } - - /** - * @param array $terms terms to highlight - * @return string highlighted title, '' if not supported - */ - function getTitleSnippet($terms){ - return ''; - } - - /** - * @param array $terms terms to highlight - * @return string highlighted redirect name (redirect to this page), '' if none or not supported - */ - function getRedirectSnippet($terms){ - return ''; - } - - /** - * @return Title object for the redirect to this page, null if none or not supported - */ - function getRedirectTitle(){ - return null; - } - - /** - * @return string highlighted relevant section name, null if none or not supported - */ - function getSectionSnippet(){ - return ''; - } - - /** - * @return Title object (pagename+fragment) for the section, null if none or not supported - */ - function getSectionTitle(){ - return null; - } - - /** - * @return string timestamp - */ - function getTimestamp(){ - return $this->mRevision->getTimestamp(); - } - - /** - * @return int number of words - */ - function getWordCount(){ - $this->initText(); - return str_word_count( $this->mText ); - } - - /** - * @return int size in bytes - */ - function getByteSize(){ - $this->initText(); - return strlen( $this->mText ); - } - - /** - * @return boolean if hit has related articles - */ - function hasRelated(){ - return false; - } - - /** - * @return interwiki prefix of the title (return iw even if title is broken) - */ - function getInterwikiPrefix(){ - return ''; - } -} - -/** - * Highlight bits of wikitext - * - * @ingroup Search - */ -class SearchHighlighter { - var $mCleanWikitext = true; - - function SearchHighlighter($cleanupWikitext = true){ - $this->mCleanWikitext = $cleanupWikitext; - } - - /** - * Default implementation of wikitext highlighting - * - * @param string $text - * @param array $terms Terms to highlight (unescaped) - * @param int $contextlines - * @param int $contextchars - * @return string - */ - public function highlightText( $text, $terms, $contextlines, $contextchars ) { - global $wgLang, $wgContLang; - global $wgSearchHighlightBoundaries; - $fname = __METHOD__; - - if($text == '') - return ''; - - // spli text into text + templates/links/tables - $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; - // first capture group is for detecting nested templates/links/tables/references - $endPatterns = array( - 1 => '/(\{\{)|(\}\})/', // template - 2 => '/(\[\[)|(\]\])/', // image - 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table - - // FIXME: this should prolly be a hook or something - if(function_exists('wfCite')){ - $spat .= '|()'; // references via cite extension - $endPatterns[4] = '/()|(<\/ref>)/'; - } - $spat .= '/'; - $textExt = array(); // text extracts - $otherExt = array(); // other extracts - wfProfileIn( "$fname-split" ); - $start = 0; - $textLen = strlen($text); - $count = 0; // sequence number to maintain ordering - while( $start < $textLen ){ - // find start of template/image/table - if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){ - $epat = ''; - foreach($matches as $key => $val){ - if($key > 0 && $val[1] != -1){ - if($key == 2){ - // see if this is an image link - $ns = substr($val[0],2,-1); - if( $wgContLang->getNsIndex($ns) != NS_IMAGE ) - break; - - } - $epat = $endPatterns[$key]; - $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); - $start = $val[1]; - break; - } - } - if( $epat ){ - // find end (and detect any nested elements) - $level = 0; - $offset = $start + 1; - $found = false; - while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){ - if( array_key_exists(2,$endMatches) ){ - // found end - if($level == 0){ - $len = strlen($endMatches[2][0]); - $off = $endMatches[2][1]; - $this->splitAndAdd( $otherExt, $count, - substr( $text, $start, $off + $len - $start ) ); - $start = $off + $len; - $found = true; - break; - } else{ - // end of nested element - $level -= 1; - } - } else{ - // nested - $level += 1; - } - $offset = $endMatches[0][1] + strlen($endMatches[0][0]); - } - if( ! $found ){ - // couldn't find appropriate closing tag, skip - $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) ); - $start += strlen($matches[0][0]); - } - continue; - } - } - // else: add as text extract - $this->splitAndAdd( $textExt, $count, substr($text,$start) ); - break; - } - - $all = $textExt + $otherExt; // these have disjunct key sets - - wfProfileOut( "$fname-split" ); - - // prepare regexps - foreach( $terms as $index => $term ) { - $terms[$index] = preg_quote( $term, '/' ); - // manually do upper/lowercase stuff for utf-8 since PHP won't do it - if(preg_match('/[\x80-\xff]/', $term) ){ - $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]); - } - - - } - $anyterm = implode( '|', $terms ); - $phrase = implode("$wgSearchHighlightBoundaries+", $terms ); - - // FIXME: a hack to scale contextchars, a correct solution - // would be to have contextchars actually be char and not byte - // length, and do proper utf-8 substrings and lengths everywhere, - // but PHP is making that very hard and unclean to implement :( - $scale = strlen($anyterm) / mb_strlen($anyterm); - $contextchars = intval( $contextchars * $scale ); - - $patPre = "(^|$wgSearchHighlightBoundaries)"; - $patPost = "($wgSearchHighlightBoundaries|$)"; - - $pat1 = "/(".$phrase.")/ui"; - $pat2 = "/$patPre(".$anyterm.")$patPost/ui"; - - wfProfileIn( "$fname-extract" ); - - $left = $contextlines; - - $snippets = array(); - $offsets = array(); - - // show beginning only if it contains all words - $first = 0; - $firstText = ''; - foreach($textExt as $index => $line){ - if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){ - $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); - $first = $index; - break; - } - } - if( $firstText ){ - $succ = true; - // check if first text contains all terms - foreach($terms as $term){ - if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){ - $succ = false; - break; - } - } - if( $succ ){ - $snippets[$first] = $firstText; - $offsets[$first] = 0; - } - } - if( ! $snippets ) { - // match whole query on text - $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets); - // match whole query on templates/tables/images - $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets); - // match any words on text - $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets); - // match any words on templates/tables/images - $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets); - - ksort($snippets); - } - - // add extra chars to each snippet to make snippets constant size - $extended = array(); - if( count( $snippets ) == 0){ - // couldn't find the target words, just show beginning of article - $targetchars = $contextchars * $contextlines; - $snippets[$first] = ''; - $offsets[$first] = 0; - } else{ - // if begin of the article contains the whole phrase, show only that !! - if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) - && $offsets[$first] < $contextchars * 2 ){ - $snippets = array ($first => $snippets[$first]); - } - - // calc by how much to extend existing snippets - $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) ); - } - - foreach($snippets as $index => $line){ - $extended[$index] = $line; - $len = strlen($line); - if( $len < $targetchars - 20 ){ - // complete this line - if($len < strlen( $all[$index] )){ - $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]); - $len = strlen( $extended[$index] ); - } - - // add more lines - $add = $index + 1; - while( $len < $targetchars - 20 - && array_key_exists($add,$all) - && !array_key_exists($add,$snippets) ){ - $offsets[$add] = 0; - $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); - $extended[$add] = $tt; - $len += strlen( $tt ); - $add++; - } - } - } - - //$snippets = array_map('htmlspecialchars', $extended); - $snippets = $extended; - $last = -1; - $extract = ''; - foreach($snippets as $index => $line){ - if($last == -1) - $extract .= $line; // first line - elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last])) - $extract .= " ".$line; // continous lines - else - $extract .= ' ... ' . $line; - - $last = $index; - } - if( $extract ) - $extract .= ' ... '; - - $processed = array(); - foreach($terms as $term){ - if( ! isset($processed[$term]) ){ - $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word - $extract = preg_replace( $pat3, - "\\1\\2\\3", $extract ); - $processed[$term] = true; - } - } - - wfProfileOut( "$fname-extract" ); - - return $extract; - } - - /** - * Split text into lines and add it to extracts array - * - * @param array $extracts index -> $line - * @param int $count - * @param string $text - */ - function splitAndAdd(&$extracts, &$count, $text){ - $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text ); - foreach($split as $line){ - $tt = trim($line); - if( $tt ) - $extracts[$count++] = $tt; - } - } - - /** - * Do manual case conversion for non-ascii chars - * - * @param unknown_type $matches - */ - function caseCallback($matches){ - global $wgContLang; - if( strlen($matches[0]) > 1 ){ - return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']'; - } else - return $matches[0]; - } - - /** - * Extract part of the text from start to end, but by - * not chopping up words - * @param string $text - * @param int $start - * @param int $end - * @param int $posStart (out) actual start position - * @param int $posEnd (out) actual end position - * @return string - */ - function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){ - global $wgContLang; - - if( $start != 0) - $start = $this->position( $text, $start, 1 ); - if( $end >= strlen($text) ) - $end = strlen($text); - else - $end = $this->position( $text, $end ); - - if(!is_null($posStart)) - $posStart = $start; - if(!is_null($posEnd)) - $posEnd = $end; - - if($end > $start) - return substr($text, $start, $end-$start); - else - return ''; - } - - /** - * Find a nonletter near a point (index) in the text - * - * @param string $text - * @param int $point - * @param int $offset to found index - * @return int nearest nonletter index, or beginning of utf8 char if none - */ - function position($text, $point, $offset=0 ){ - $tolerance = 10; - $s = max( 0, $point - $tolerance ); - $l = min( strlen($text), $point + $tolerance ) - $s; - $m = array(); - if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){ - return $m[0][1] + $s + $offset; - } else{ - // check if point is on a valid first UTF8 char - $char = ord( $text[$point] ); - while( $char >= 0x80 && $char < 0xc0 ) { - // skip trailing bytes - $point++; - if($point >= strlen($text)) - return strlen($text); - $char = ord( $text[$point] ); - } - return $point; - - } - } - - /** - * Search extracts for a pattern, and return snippets - * - * @param string $pattern regexp for matching lines - * @param array $extracts extracts to search - * @param int $linesleft number of extracts to make - * @param int $contextchars length of snippet - * @param array $out map for highlighted snippets - * @param array $offsets map of starting points of snippets - * @protected - */ - function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){ - if($linesleft == 0) - return; // nothing to do - foreach($extracts as $index => $line){ - if( array_key_exists($index,$out) ) - continue; // this line already highlighted - - $m = array(); - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) - continue; - - $offset = $m[0][1]; - $len = strlen($m[0][0]); - if($offset + $len < $contextchars) - $begin = 0; - elseif( $len > $contextchars) - $begin = $offset; - else - $begin = $offset + intval( ($len - $contextchars) / 2 ); - - $end = $begin + $contextchars; - - $posBegin = $begin; - // basic snippet from this line - $out[$index] = $this->extract($line,$begin,$end,$posBegin); - $offsets[$index] = $posBegin; - $linesleft--; - if($linesleft == 0) - return; - } - } - - /** - * Basic wikitext removal - * @protected - */ - function removeWiki($text) { - $fname = __METHOD__; - wfProfileIn( $fname ); - - //$text = preg_replace("/'{2,5}/", "", $text); - //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); - //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); - //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); - //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); - //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); - $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text); - $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text); - $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text); - $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text); - //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text); - $text = preg_replace("/<\/?[^>]+>/", "", $text); - $text = preg_replace("/'''''/", "", $text); - $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text); - $text = preg_replace("/''/", "", $text); - - wfProfileOut( $fname ); - return $text; - } - - /** - * callback to replace [[target|caption]] kind of links, if - * the target is category or image, leave it - * - * @param array $matches - */ - function linkReplace($matches){ - $colon = strpos( $matches[1], ':' ); - if( $colon === false ) - return $matches[2]; // replace with caption - global $wgContLang; - $ns = substr( $matches[1], 0, $colon ); - $index = $wgContLang->getNsIndex($ns); - if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) ) - return $matches[0]; // return the whole thing - else - return $matches[2]; - - } - - /** - * Simple & fast snippet extraction, but gives completely unrelevant - * snippets - * - * @param string $text - * @param array $terms - * @param int $contextlines - * @param int $contextchars - * @return string - */ - public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { - global $wgLang, $wgContLang; - $fname = __METHOD__; - - $lines = explode( "\n", $text ); - - $terms = implode( '|', $terms ); - $terms = str_replace( '/', "\\/", $terms); - $max = intval( $contextchars ) + 1; - $pat1 = "/(.*)($terms)(.{0,$max})/i"; - - $lineno = 0; - - $extract = ""; - wfProfileIn( "$fname-extract" ); - foreach ( $lines as $line ) { - if ( 0 == $contextlines ) { - break; - } - ++$lineno; - $m = array(); - if ( ! preg_match( $pat1, $line, $m ) ) { - continue; - } - --$contextlines; - $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' ); - - if ( count( $m ) < 3 ) { - $post = ''; - } else { - $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' ); - } - - $found = $m[2]; - - $line = htmlspecialchars( $pre . $found . $post ); - $pat2 = '/(' . $terms . ")/i"; - $line = preg_replace( $pat2, - "\\1", $line ); - - $extract .= "${line}\n"; - } - wfProfileOut( "$fname-extract" ); - - return $extract; - } - -} - -/** - * @ingroup Search - */ -class SearchEngineDummy { - function search( $term ) { - return null; - } - function setLimitOffset($l, $o) {} - function legalSearchChars() {} - function update() {} - function setnamespaces() {} - function searchtitle() {} - function searchtext() {} -} diff --git a/includes/SearchMySQL.php b/includes/SearchMySQL.php deleted file mode 100644 index f9b71c8ecd..0000000000 --- a/includes/SearchMySQL.php +++ /dev/null @@ -1,262 +0,0 @@ - -# http://www.mediawiki.org/ -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# http://www.gnu.org/copyleft/gpl.html - -/** - * @file - * @ingroup Search - */ - -/** - * Search engine hook for MySQL 4+ - * @ingroup Search - */ -class SearchMySQL extends SearchEngine { - var $strictMatching = true; - - /** @todo document */ - function __construct( $db ) { - $this->db = $db; - } - - /** @todo document */ - function parseQuery( $filteredText, $fulltext ) { - global $wgContLang; - $lc = SearchEngine::legalSearchChars(); // Minus format chars - $searchon = ''; - $this->searchTerms = array(); - - # FIXME: This doesn't handle parenthetical expressions. - $m = array(); - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', - $filteredText, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { - if( $searchon !== '' ) $searchon .= ' '; - if( $this->strictMatching && ($terms[1] == '') ) { - $terms[1] = '+'; - } - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); - if( !empty( $terms[3] ) ) { - // Match individual terms in result highlighting... - $regexp = preg_quote( $terms[3], '/' ); - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; - } else { - // Match the quoted term in result highlighting... - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); - } - $this->searchTerms[] = $regexp; - } - wfDebug( "Would search with '$searchon'\n" ); - wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); - } else { - wfDebug( "Can't understand search query '{$filteredText}'\n" ); - } - - $searchon = $this->db->strencode( $searchon ); - $field = $this->getIndexField( $fulltext ); - return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; - } - - public static function legalSearchChars() { - return "\"*" . parent::legalSearchChars(); - } - - /** - * Perform a full text search query and return a result set. - * - * @param string $term - Raw search term - * @return MySQLSearchResultSet - * @access public - */ - function searchText( $term ) { - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); - return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); - } - - /** - * Perform a title-only search query and return a result set. - * - * @param string $term - Raw search term - * @return MySQLSearchResultSet - * @access public - */ - function searchTitle( $term ) { - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); - return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); - } - - - /** - * Return a partial WHERE clause to exclude redirects, if so set - * @return string - * @private - */ - function queryRedirect() { - if( $this->showRedirects ) { - return ''; - } else { - return 'AND page_is_redirect=0'; - } - } - - /** - * Return a partial WHERE clause to limit the search to the given namespaces - * @return string - * @private - */ - function queryNamespaces() { - if( is_null($this->namespaces) ) - return ''; # search all - $namespaces = implode( ',', $this->namespaces ); - if ($namespaces == '') { - $namespaces = '0'; - } - return 'AND page_namespace IN (' . $namespaces . ')'; - } - - /** - * Return a LIMIT clause to limit results on the query. - * @return string - * @private - */ - function queryLimit() { - return $this->db->limitResult( '', $this->limit, $this->offset ); - } - - /** - * Does not do anything for generic search engine - * subclasses may define this though - * @return string - * @private - */ - function queryRanking( $filteredTerm, $fulltext ) { - return ''; - } - - /** - * Construct the full SQL query to do the search. - * The guts shoulds be constructed in queryMain() - * @param string $filteredTerm - * @param bool $fulltext - * @private - */ - function getQuery( $filteredTerm, $fulltext ) { - return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . - $this->queryRedirect() . ' ' . - $this->queryNamespaces() . ' ' . - $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . - $this->queryLimit(); - } - - - /** - * Picks which field to index on, depending on what type of query. - * @param bool $fulltext - * @return string - */ - function getIndexField( $fulltext ) { - return $fulltext ? 'si_text' : 'si_title'; - } - - /** - * Get the base part of the search query. - * The actual match syntax will depend on the server - * version; MySQL 3 and MySQL 4 have different capabilities - * in their fulltext search indexes. - * - * @param string $filteredTerm - * @param bool $fulltext - * @return string - * @private - */ - function queryMain( $filteredTerm, $fulltext ) { - $match = $this->parseQuery( $filteredTerm, $fulltext ); - $page = $this->db->tableName( 'page' ); - $searchindex = $this->db->tableName( 'searchindex' ); - return 'SELECT page_id, page_namespace, page_title ' . - "FROM $page,$searchindex " . - 'WHERE page_id=si_page AND ' . $match; - } - - /** - * Create or update the search index record for the given page. - * Title and text should be pre-processed. - * - * @param int $id - * @param string $title - * @param string $text - */ - function update( $id, $title, $text ) { - $dbw = wfGetDB( DB_MASTER ); - $dbw->replace( 'searchindex', - array( 'si_page' ), - array( - 'si_page' => $id, - 'si_title' => $title, - 'si_text' => $text - ), __METHOD__ ); - } - - /** - * Update a search index record's title only. - * Title should be pre-processed. - * - * @param int $id - * @param string $title - */ - function updateTitle( $id, $title ) { - $dbw = wfGetDB( DB_MASTER ); - - $dbw->update( 'searchindex', - array( 'si_title' => $title ), - array( 'si_page' => $id ), - __METHOD__, - array( $dbw->lowPriorityOption() ) ); - } -} - -/** - * @ingroup Search - */ -class MySQLSearchResultSet extends SearchResultSet { - function MySQLSearchResultSet( $resultSet, $terms ) { - $this->mResultSet = $resultSet; - $this->mTerms = $terms; - } - - function termMatches() { - return $this->mTerms; - } - - function numRows() { - return $this->mResultSet->numRows(); - } - - function next() { - $row = $this->mResultSet->fetchObject(); - if( $row === false ) { - return false; - } else { - return new SearchResult( $row ); - } - } - - function free() { - $this->mResultSet->free(); - } -} diff --git a/includes/SearchMySQL4.php b/includes/SearchMySQL4.php deleted file mode 100644 index 3e2bb2d1dd..0000000000 --- a/includes/SearchMySQL4.php +++ /dev/null @@ -1,34 +0,0 @@ - -# http://www.mediawiki.org/ -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# http://www.gnu.org/copyleft/gpl.html - -/** - * @file - * @ingroup Search - */ - -/** - * Search engine hook for MySQL 4+ - * This class retained for backwards compatibility... - * The meat's been moved to SearchMySQL, since the 3.x variety is gone. - * @ingroup Search - * @deprecated - */ -class SearchMySQL4 extends SearchMySQL { - /* whee */ -} diff --git a/includes/SearchOracle.php b/includes/SearchOracle.php deleted file mode 100644 index bf9368d150..0000000000 --- a/includes/SearchOracle.php +++ /dev/null @@ -1,240 +0,0 @@ - -# http://www.mediawiki.org/ -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# http://www.gnu.org/copyleft/gpl.html - -/** - * @file - * @ingroup Search - */ - -/** - * Search engine hook base class for Oracle (ConText). - * @ingroup Search - */ -class SearchOracle extends SearchEngine { - function __construct($db) { - $this->db = $db; - } - - /** - * Perform a full text search query and return a result set. - * - * @param string $term - Raw search term - * @return OracleSearchResultSet - * @access public - */ - function searchText( $term ) { - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); - return new OracleSearchResultSet($resultSet, $this->searchTerms); - } - - /** - * Perform a title-only search query and return a result set. - * - * @param string $term - Raw search term - * @return ORacleSearchResultSet - * @access public - */ - function searchTitle($term) { - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); - return new MySQLSearchResultSet($resultSet, $this->searchTerms); - } - - - /** - * Return a partial WHERE clause to exclude redirects, if so set - * @return string - * @private - */ - function queryRedirect() { - if ($this->showRedirects) { - return ''; - } else { - return 'AND page_is_redirect=0'; - } - } - - /** - * Return a partial WHERE clause to limit the search to the given namespaces - * @return string - * @private - */ - function queryNamespaces() { - if( is_null($this->namespaces) ) - return ''; - $namespaces = implode(',', $this->namespaces); - if ($namespaces == '') { - $namespaces = '0'; - } - return 'AND page_namespace IN (' . $namespaces . ')'; - } - - /** - * Return a LIMIT clause to limit results on the query. - * @return string - * @private - */ - function queryLimit($sql) { - return $this->db->limitResult($sql, $this->limit, $this->offset); - } - - /** - * Does not do anything for generic search engine - * subclasses may define this though - * @return string - * @private - */ - function queryRanking($filteredTerm, $fulltext) { - return ' ORDER BY score(1)'; - } - - /** - * Construct the full SQL query to do the search. - * The guts shoulds be constructed in queryMain() - * @param string $filteredTerm - * @param bool $fulltext - * @private - */ - function getQuery( $filteredTerm, $fulltext ) { - return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . - $this->queryRedirect() . ' ' . - $this->queryNamespaces() . ' ' . - $this->queryRanking( $filteredTerm, $fulltext ) . ' '); - } - - - /** - * Picks which field to index on, depending on what type of query. - * @param bool $fulltext - * @return string - */ - function getIndexField($fulltext) { - return $fulltext ? 'si_text' : 'si_title'; - } - - /** - * Get the base part of the search query. - * - * @param string $filteredTerm - * @param bool $fulltext - * @return string - * @private - */ - function queryMain( $filteredTerm, $fulltext ) { - $match = $this->parseQuery($filteredTerm, $fulltext); - $page = $this->db->tableName('page'); - $searchindex = $this->db->tableName('searchindex'); - return 'SELECT page_id, page_namespace, page_title ' . - "FROM $page,$searchindex " . - 'WHERE page_id=si_page AND ' . $match; - } - - /** @todo document */ - function parseQuery($filteredText, $fulltext) { - global $wgContLang; - $lc = SearchEngine::legalSearchChars(); - $this->searchTerms = array(); - - # FIXME: This doesn't handle parenthetical expressions. - $m = array(); - $q = array(); - - if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', - $filteredText, $m, PREG_SET_ORDER)) { - foreach($m as $terms) { - $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]); - - if (!empty($terms[3])) { - $regexp = preg_quote( $terms[3], '/' ); - if ($terms[4]) - $regexp .= "[0-9A-Za-z_]+"; - } else { - $regexp = preg_quote(str_replace('"', '', $terms[2]), '/'); - } - $this->searchTerms[] = $regexp; - } - } - - $searchon = $this->db->strencode(join(',', $q)); - $field = $this->getIndexField($fulltext); - return " CONTAINS($field, '$searchon', 1) > 0 "; - } - - /** - * Create or update the search index record for the given page. - * Title and text should be pre-processed. - * - * @param int $id - * @param string $title - * @param string $text - */ - function update($id, $title, $text) { - $dbw = wfGetDB(DB_MASTER); - $dbw->replace('searchindex', - array('si_page'), - array( - 'si_page' => $id, - 'si_title' => $title, - 'si_text' => $text - ), 'SearchOracle::update' ); - $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')"); - $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')"); - } - - /** - * Update a search index record's title only. - * Title should be pre-processed. - * - * @param int $id - * @param string $title - */ - function updateTitle($id, $title) { - $dbw = wfGetDB(DB_MASTER); - - $dbw->update('searchindex', - array('si_title' => $title), - array('si_page' => $id), - 'SearchOracle::updateTitle', - array()); - } -} - -/** - * @ingroup Search - */ -class OracleSearchResultSet extends SearchResultSet { - function __construct($resultSet, $terms) { - $this->mResultSet = $resultSet; - $this->mTerms = $terms; - } - - function termMatches() { - return $this->mTerms; - } - - function numRows() { - return $this->mResultSet->numRows(); - } - - function next() { - $row = $this->mResultSet->fetchObject(); - if ($row === false) - return false; - return new SearchResult($row); - } -} diff --git a/includes/SearchPostgres.php b/includes/SearchPostgres.php deleted file mode 100644 index 02638bb5c8..0000000000 --- a/includes/SearchPostgres.php +++ /dev/null @@ -1,255 +0,0 @@ - -# http://www.mediawiki.org/ -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# http://www.gnu.org/copyleft/gpl.html - -/** - * @file - * @ingroup Search - */ - -/** - * Search engine hook base class for Postgres - * @ingroup Search - */ -class SearchPostgres extends SearchEngine { - - function SearchPostgres( $db ) { - $this->db = $db; - } - - /** - * Perform a full text search query via tsearch2 and return a result set. - * Currently searches a page's current title (page.page_title) and - * latest revision article text (pagecontent.old_text) - * - * @param string $term - Raw search term - * @return PostgresSearchResultSet - * @access public - */ - function searchTitle( $term ) { - $q = $this->searchQuery( $term , 'titlevector', 'page_title' ); - $olderror = error_reporting(E_ERROR); - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); - error_reporting($olderror); - if (!$resultSet) { - // Needed for "Query requires full scan, GIN doesn't support it" - return new SearchResultTooMany(); - } - return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); - } - function searchText( $term ) { - $q = $this->searchQuery( $term, 'textvector', 'old_text' ); - $olderror = error_reporting(E_ERROR); - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); - error_reporting($olderror); - if (!$resultSet) { - return new SearchResultTooMany(); - } - return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); - } - - - /* - * Transform the user's search string into a better form for tsearch2 - */ - function parseQuery( $term ) { - - wfDebug( "parseQuery received: $term" ); - - ## No backslashes allowed - $term = preg_replace('/\\\/', '', $term); - - ## Collapse parens into nearby words: - $term = preg_replace('/\s*\(\s*/', ' (', $term); - $term = preg_replace('/\s*\)\s*/', ') ', $term); - - ## Treat colons as word separators: - $term = preg_replace('/:/', ' ', $term); - - $searchstring = ''; - $m = array(); - if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { - if (strlen($terms[1])) { - $searchstring .= ' & !'; - } - if (strtolower($terms[2]) === 'and') { - $searchstring .= ' & '; - } - else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') { - $searchstring .= ' | '; - } - else if (strtolower($terms[2]) === 'not') { - $searchstring .= ' & !'; - } - else { - $searchstring .= " & $terms[2]"; - } - } - } - - ## Strip out leading junk - $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); - - ## Remove any doubled-up operators - $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); - - ## Remove any non-spaced operators (e.g. "Zounds!") - $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); - - ## Remove any trailing whitespace or operators - $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); - - ## Remove unnecessary quotes around everything - $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); - - ## Quote the whole thing - $searchstring = $this->db->addQuotes($searchstring); - - wfDebug( "parseQuery returned: $searchstring" ); - - return $searchstring; - - } - - /** - * Construct the full SQL query to do the search. - * @param string $filteredTerm - * @param string $fulltext - * @private - */ - function searchQuery( $term, $fulltext, $colname ) { - global $wgDBversion; - - if ( !isset( $wgDBversion ) ) { - $this->db->getServerVersion(); - $wgDBversion = $this->db->numeric_version; - } - $prefix = $wgDBversion < 8.3 ? "'default'," : ''; - - $searchstring = $this->parseQuery( $term ); - - ## We need a separate query here so gin does not complain about empty searches - $SQL = "SELECT to_tsquery($prefix $searchstring)"; - $res = $this->db->doQuery($SQL); - if (!$res) { - ## TODO: Better output (example to catch: one 'two) - die ("Sorry, that was not a valid search string. Please go back and try again"); - } - $top = pg_fetch_result($res,0,0); - - if ($top === "") { ## e.g. if only stopwords are used XXX return something better - $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . - "AND r.rev_text_id = c.old_id AND 1=0"; - } - else { - $m = array(); - if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { - $this->searchTerms[$terms[1]] = $terms[1]; - } - } - - $rankscore = $wgDBversion > 8.2 ? 5 : 1; - $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank'; - $query = "SELECT page_id, page_namespace, page_title, ". - "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ". - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . - "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)"; - } - - ## Redirects - if (! $this->showRedirects) - $query .= ' AND page_is_redirect = 0'; - - ## Namespaces - defaults to 0 - if( !is_null($this->namespaces) ){ // null -> search all - if ( count($this->namespaces) < 1) - $query .= ' AND page_namespace = 0'; - else { - $namespaces = implode( ',', $this->namespaces ); - $query .= " AND page_namespace IN ($namespaces)"; - } - } - - $query .= " ORDER BY score DESC, page_id DESC"; - - $query .= $this->db->limitResult( '', $this->limit, $this->offset ); - - wfDebug( "searchQuery returned: $query" ); - - return $query; - } - - ## Most of the work of these two functions are done automatically via triggers - - function update( $pageid, $title, $text ) { - ## We don't want to index older revisions - $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ". - "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ". - "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)"; - $this->db->doQuery($SQL); - return true; - } - - function updateTitle( $id, $title ) { - return true; - } - -} ## end of the SearchPostgres class - -/** - * @ingroup Search - */ -class PostgresSearchResult extends SearchResult { - function PostgresSearchResult( $row ) { - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); - $this->score = $row->score; - } - function getScore() { - return $this->score; - } -} - -/** - * @ingroup Search - */ -class PostgresSearchResultSet extends SearchResultSet { - function PostgresSearchResultSet( $resultSet, $terms ) { - $this->mResultSet = $resultSet; - $this->mTerms = $terms; - } - - function termMatches() { - return $this->mTerms; - } - - function numRows() { - return $this->mResultSet->numRows(); - } - - function next() { - $row = $this->mResultSet->fetchObject(); - if( $row === false ) { - return false; - } else { - return new PostgresSearchResult( $row ); - } - } -} diff --git a/includes/SearchTsearch2.php b/includes/SearchTsearch2.php deleted file mode 100644 index e69f6acdd2..0000000000 --- a/includes/SearchTsearch2.php +++ /dev/null @@ -1,120 +0,0 @@ -, Domas Mituzas -# http://www.mediawiki.org/ -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# http://www.gnu.org/copyleft/gpl.html - -/** - * Search engine hook for PostgreSQL / Tsearch2 - * @file - * @ingroup Search - */ - -/** - * @todo document - * @ingroup Search - */ -class SearchTsearch2 extends SearchEngine { - var $strictMatching = false; - - function __construct( $db ) { - $this->db = $db; - $this->mRanking = true; - } - - function getIndexField( $fulltext ) { - return $fulltext ? 'si_text' : 'si_title'; - } - - function parseQuery( $filteredText, $fulltext ) { - global $wgContLang; - $lc = SearchEngine::legalSearchChars(); - $searchon = ''; - $this->searchTerms = array(); - - # FIXME: This doesn't handle parenthetical expressions. - $m = array(); - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', - $filteredText, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { - if( $searchon !== '' ) $searchon .= ' '; - if( $this->strictMatching && ($terms[1] == '') ) { - $terms[1] = '+'; - } - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); - if( !empty( $terms[3] ) ) { - $regexp = preg_quote( $terms[3], '/' ); - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; - } else { - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); - } - $this->searchTerms[] = $regexp; - } - wfDebug( "Would search with '$searchon'\n" ); - wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); - } else { - wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); - } - - $searchon = preg_replace( '/(\s+)/', '&', $searchon ); - $searchon = $this->db->strencode( $searchon ); - return $searchon; - } - - function queryRanking( $filteredTerm, $fulltext ) { - $field = $this->getIndexField( $fulltext ); - $searchon = $this->parseQuery( $filteredTerm, $fulltext ); - if ($this->mRanking) - return " ORDER BY rank($field,to_tsquery('$searchon')) DESC"; - else - return ""; - } - - - function queryMain( $filteredTerm, $fulltext ) { - $match = $this->parseQuery( $filteredTerm, $fulltext ); - $field = $this->getIndexField( $fulltext ); - $cur = $this->db->tableName( 'cur' ); - $searchindex = $this->db->tableName( 'searchindex' ); - return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' . - "FROM $cur,$searchindex " . - 'WHERE cur_id=si_page AND ' . - " $field @@ to_tsquery ('$match') " ; - } - - function update( $id, $title, $text ) { - $dbw = wfGetDB( DB_MASTER ); - $searchindex = $dbw->tableName( 'searchindex' ); - $sql = "DELETE FROM $searchindex WHERE si_page={$id}"; - $dbw->query( $sql, __METHOD__ ); - $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ". - " VALUES ( $id, to_tsvector('". - $dbw->strencode($title). - "'),to_tsvector('". - $dbw->strencode( $text)."')) "; - $dbw->query($sql, __METHOD__ ); - } - - function updateTitle($id,$title) { - $dbw = wfGetDB(DB_MASTER); - $searchindex = $dbw->tableName( 'searchindex' ); - $sql = "UPDATE $searchindex SET si_title=to_tsvector('" . - $dbw->strencode( $title ) . - "') WHERE si_page={$id}"; - - $dbw->query( $sql, __METHOD__ ); - } -} diff --git a/includes/SearchUpdate.php b/includes/SearchUpdate.php deleted file mode 100644 index 087a8ba5dc..0000000000 --- a/includes/SearchUpdate.php +++ /dev/null @@ -1,113 +0,0 @@ -mId = $id; - $this->mText = $text; - - $this->mNamespace = $nt->getNamespace(); - $this->mTitle = $nt->getText(); # Discard namespace - - $this->mTitleWords = $this->mTextWords = array(); - } else { - wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); - } - } - - function doUpdate() { - global $wgContLang, $wgDisableSearchUpdate; - - if( $wgDisableSearchUpdate || !$this->mId ) { - return false; - } - $fname = 'SearchUpdate::doUpdate'; - wfProfileIn( $fname ); - - $search = SearchEngine::create(); - $lc = SearchEngine::legalSearchChars() . '&#;'; - - if( $this->mText === false ) { - $search->updateTitle($this->mId, - Title::indexTitle( $this->mNamespace, $this->mTitle )); - wfProfileOut( $fname ); - return; - } - - # Language-specific strip/conversion - $text = $wgContLang->stripForSearch( $this->mText ); - - wfProfileIn( $fname.'-regexps' ); - $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/", - ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup - $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", - "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings - - # Strip external URLs - $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF"; - $protos = "http|https|ftp|mailto|news|gopher"; - $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; - $text = preg_replace( $pat, "\\1 \\3", $text ); - - $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; - $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; - $text = preg_replace( $p1, "\\1 ", $text ); - $text = preg_replace( $p2, "\\1 \\3 ", $text ); - - # Internal image links - $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; - $text = preg_replace( $pat2, " \\1 \\3", $text ); - - $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", - "\\1\\2 \\2\\3", $text ); # Handle [[game]]s - - # Strip all remaining non-search characters - $text = preg_replace( "/[^{$lc}]+/", " ", $text ); - - # Handle 's, s' - # - # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); - # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); - # - # These tail-anchored regexps are insanely slow. The worst case comes - # when Japanese or Chinese text (ie, no word spacing) is written on - # a wiki configured for Western UTF-8 mode. The Unicode characters are - # expanded to hex codes and the "words" are very long paragraph-length - # monstrosities. On a large page the above regexps may take over 20 - # seconds *each* on a 1GHz-level processor. - # - # Following are reversed versions which are consistently fast - # (about 3 milliseconds on 1GHz-level processor). - # - $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); - $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); - - # Strip wiki '' and ''' - $text = preg_replace( "/''[']*/", " ", $text ); - wfProfileOut( "$fname-regexps" ); - - wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); - - # Perform the actual update - $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ), - $text); - - wfProfileOut( $fname ); - } -} - -/** - * Placeholder class - * @ingroup Search - */ -class SearchUpdateMyISAM extends SearchUpdate { - # Inherits everything -} diff --git a/includes/search/Engine.php b/includes/search/Engine.php new file mode 100644 index 0000000000..04fa906a00 --- /dev/null +++ b/includes/search/Engine.php @@ -0,0 +1,1154 @@ +hasVariants()){ + $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm)); + } + + foreach($allSearchTerms as $term){ + + # Exact match? No need to look further. + $title = Title::newFromText( $term ); + if (is_null($title)) + return NULL; + + if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() + || $title->exists() ) { + return $title; + } + + # Now try all lower case (i.e. first letter capitalized) + # + $title = Title::newFromText( $wgContLang->lc( $term ) ); + if ( $title && $title->exists() ) { + return $title; + } + + # Now try capitalized string + # + $title = Title::newFromText( $wgContLang->ucwords( $term ) ); + if ( $title && $title->exists() ) { + return $title; + } + + # Now try all upper case + # + $title = Title::newFromText( $wgContLang->uc( $term ) ); + if ( $title && $title->exists() ) { + return $title; + } + + # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc + $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); + if ( $title && $title->exists() ) { + return $title; + } + + global $wgCapitalLinks, $wgContLang; + if( !$wgCapitalLinks ) { + // Catch differs-by-first-letter-case-only + $title = Title::newFromText( $wgContLang->ucfirst( $term ) ); + if ( $title && $title->exists() ) { + return $title; + } + $title = Title::newFromText( $wgContLang->lcfirst( $term ) ); + if ( $title && $title->exists() ) { + return $title; + } + } + + // Give hooks a chance at better match variants + $title = null; + if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) { + return $title; + } + } + + $title = Title::newFromText( $searchterm ); + + # Entering an IP address goes to the contributions page + if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) ) + || User::isIP( trim( $searchterm ) ) ) { + return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() ); + } + + + # Entering a user goes to the user page whether it's there or not + if ( $title->getNamespace() == NS_USER ) { + return $title; + } + + # Go to images that exist even if there's no local page. + # There may have been a funny upload, or it may be on a shared + # file repository such as Wikimedia Commons. + if( $title->getNamespace() == NS_IMAGE ) { + $image = wfFindFile( $title ); + if( $image ) { + return $title; + } + } + + # MediaWiki namespace? Page may be "implied" if not customized. + # Just return it, with caps forced as the message system likes it. + if( $title->getNamespace() == NS_MEDIAWIKI ) { + return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) ); + } + + # Quoted term? Try without the quotes... + $matches = array(); + if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { + return SearchEngine::getNearMatch( $matches[1] ); + } + + return NULL; + } + + public static function legalSearchChars() { + return "A-Za-z_'0-9\\x80-\\xFF\\-"; + } + + /** + * Set the maximum number of results to return + * and how many to skip before returning the first. + * + * @param int $limit + * @param int $offset + * @access public + */ + function setLimitOffset( $limit, $offset = 0 ) { + $this->limit = intval( $limit ); + $this->offset = intval( $offset ); + } + + /** + * Set which namespaces the search should include. + * Give an array of namespace index numbers. + * + * @param array $namespaces + * @access public + */ + function setNamespaces( $namespaces ) { + $this->namespaces = $namespaces; + } + + /** + * Parse some common prefixes: all (search everything) + * or namespace names + * + * @param string $query + */ + function replacePrefixes( $query ){ + global $wgContLang; + + if( strpos($query,':') === false ) + return $query; // nothing to do + + $parsed = $query; + $allkeyword = wfMsgForContent('searchall').":"; + if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){ + $this->namespaces = null; + $parsed = substr($query,strlen($allkeyword)); + } else if( strpos($query,':') !== false ) { + $prefix = substr($query,0,strpos($query,':')); + $index = $wgContLang->getNsIndex($prefix); + if($index !== false){ + $this->namespaces = array($index); + $parsed = substr($query,strlen($prefix)+1); + } + } + if(trim($parsed) == '') + return $query; // prefix was the whole query + + return $parsed; + } + + /** + * Make a list of searchable namespaces and their canonical names. + * @return array + */ + public static function searchableNamespaces() { + global $wgContLang; + $arr = array(); + foreach( $wgContLang->getNamespaces() as $ns => $name ) { + if( $ns >= NS_MAIN ) { + $arr[$ns] = $name; + } + } + return $arr; + } + + /** + * Extract default namespaces to search from the given user's + * settings, returning a list of index numbers. + * + * @param User $user + * @return array + * @static + */ + public static function userNamespaces( &$user ) { + $arr = array(); + foreach( SearchEngine::searchableNamespaces() as $ns => $name ) { + if( $user->getOption( 'searchNs' . $ns ) ) { + $arr[] = $ns; + } + } + return $arr; + } + + /** + * Find snippet highlight settings for a given user + * + * @param User $user + * @return array contextlines, contextchars + * @static + */ + public static function userHighlightPrefs( &$user ){ + //$contextlines = $user->getOption( 'contextlines', 5 ); + //$contextchars = $user->getOption( 'contextchars', 50 ); + $contextlines = 2; // Hardcode this. Old defaults sucked. :) + $contextchars = 75; // same as above.... :P + return array($contextlines, $contextchars); + } + + /** + * An array of namespaces indexes to be searched by default + * + * @return array + * @static + */ + public static function defaultNamespaces(){ + global $wgNamespacesToBeSearchedDefault; + + return array_keys($wgNamespacesToBeSearchedDefault, true); + } + + /** + * Return a 'cleaned up' search string + * + * @return string + * @access public + */ + function filter( $text ) { + $lc = $this->legalSearchChars(); + return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); + } + /** + * Load up the appropriate search engine class for the currently + * active database backend, and return a configured instance. + * + * @return SearchEngine + */ + public static function create() { + global $wgDBtype, $wgSearchType; + if( $wgSearchType ) { + $class = $wgSearchType; + } elseif( $wgDBtype == 'mysql' ) { + $class = 'SearchMySQL'; + } else if ( $wgDBtype == 'postgres' ) { + $class = 'SearchPostgres'; + } else if ( $wgDBtype == 'oracle' ) { + $class = 'SearchOracle'; + } else { + $class = 'SearchEngineDummy'; + } + $search = new $class( wfGetDB( DB_SLAVE ) ); + $search->setLimitOffset(0,0); + return $search; + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + * @abstract + */ + function update( $id, $title, $text ) { + // no-op + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + * @abstract + */ + function updateTitle( $id, $title ) { + // no-op + } + + /** + * Get OpenSearch suggestion template + * + * @return string + * @static + */ + public static function getOpenSearchTemplate() { + global $wgOpenSearchTemplate, $wgServer, $wgScriptPath; + if($wgOpenSearchTemplate) + return $wgOpenSearchTemplate; + else{ + $ns = implode(',',SearchEngine::defaultNamespaces()); + if(!$ns) $ns = "0"; + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns; + } + } + + /** + * Get internal MediaWiki Suggest template + * + * @return string + * @static + */ + public static function getMWSuggestTemplate() { + global $wgMWSuggestTemplate, $wgServer, $wgScriptPath; + if($wgMWSuggestTemplate) + return $wgMWSuggestTemplate; + else + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}'; + } +} + +/** + * @ingroup Search + */ +class SearchResultSet { + /** + * Fetch an array of regular expression fragments for matching + * the search terms as parsed by this engine in a text extract. + * + * @return array + * @access public + * @abstract + */ + function termMatches() { + return array(); + } + + function numRows() { + return 0; + } + + /** + * Return true if results are included in this result set. + * @return bool + * @abstract + */ + function hasResults() { + return false; + } + + /** + * Some search modes return a total hit count for the query + * in the entire article database. This may include pages + * in namespaces that would not be matched on the given + * settings. + * + * Return null if no total hits number is supported. + * + * @return int + * @access public + */ + function getTotalHits() { + return null; + } + + /** + * Some search modes return a suggested alternate term if there are + * no exact hits. Returns true if there is one on this set. + * + * @return bool + * @access public + */ + function hasSuggestion() { + return false; + } + + /** + * @return string suggested query, null if none + */ + function getSuggestionQuery(){ + return null; + } + + /** + * @return string highlighted suggested query, '' if none + */ + function getSuggestionSnippet(){ + return ''; + } + + /** + * Return information about how and from where the results were fetched, + * should be useful for diagnostics and debugging + * + * @return string + */ + function getInfo() { + return null; + } + + /** + * Return a result set of hits on other (multiple) wikis associated with this one + * + * @return SearchResultSet + */ + function getInterwikiResults() { + return null; + } + + /** + * Check if there are results on other wikis + * + * @return boolean + */ + function hasInterwikiResults() { + return $this->getInterwikiResults() != null; + } + + + /** + * Fetches next search result, or false. + * @return SearchResult + * @access public + * @abstract + */ + function next() { + return false; + } + + /** + * Frees the result set, if applicable. + * @ access public + */ + function free() { + // ... + } +} + + +/** + * @ingroup Search + */ +class SearchResultTooMany { + ## Some search engines may bail out if too many matches are found +} + + +/** + * @ingroup Search + */ +class SearchResult { + var $mRevision = null; + + function SearchResult( $row ) { + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); + if( !is_null($this->mTitle) ) + $this->mRevision = Revision::newFromTitle( $this->mTitle ); + } + + /** + * Check if this is result points to an invalid title + * + * @return boolean + * @access public + */ + function isBrokenTitle(){ + if( is_null($this->mTitle) ) + return true; + return false; + } + + /** + * Check if target page is missing, happens when index is out of date + * + * @return boolean + * @access public + */ + function isMissingRevision(){ + if( !$this->mRevision ) + return true; + return false; + } + + /** + * @return Title + * @access public + */ + function getTitle() { + return $this->mTitle; + } + + /** + * @return double or null if not supported + */ + function getScore() { + return null; + } + + /** + * Lazy initialization of article text from DB + */ + protected function initText(){ + if( !isset($this->mText) ){ + $this->mText = $this->mRevision->getText(); + } + } + + /** + * @param array $terms terms to highlight + * @return string highlighted text snippet, null (and not '') if not supported + */ + function getTextSnippet($terms){ + global $wgUser, $wgAdvancedSearchHighlighting; + $this->initText(); + list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser); + $h = new SearchHighlighter(); + if( $wgAdvancedSearchHighlighting ) + return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); + else + return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); + } + + /** + * @param array $terms terms to highlight + * @return string highlighted title, '' if not supported + */ + function getTitleSnippet($terms){ + return ''; + } + + /** + * @param array $terms terms to highlight + * @return string highlighted redirect name (redirect to this page), '' if none or not supported + */ + function getRedirectSnippet($terms){ + return ''; + } + + /** + * @return Title object for the redirect to this page, null if none or not supported + */ + function getRedirectTitle(){ + return null; + } + + /** + * @return string highlighted relevant section name, null if none or not supported + */ + function getSectionSnippet(){ + return ''; + } + + /** + * @return Title object (pagename+fragment) for the section, null if none or not supported + */ + function getSectionTitle(){ + return null; + } + + /** + * @return string timestamp + */ + function getTimestamp(){ + return $this->mRevision->getTimestamp(); + } + + /** + * @return int number of words + */ + function getWordCount(){ + $this->initText(); + return str_word_count( $this->mText ); + } + + /** + * @return int size in bytes + */ + function getByteSize(){ + $this->initText(); + return strlen( $this->mText ); + } + + /** + * @return boolean if hit has related articles + */ + function hasRelated(){ + return false; + } + + /** + * @return interwiki prefix of the title (return iw even if title is broken) + */ + function getInterwikiPrefix(){ + return ''; + } +} + +/** + * Highlight bits of wikitext + * + * @ingroup Search + */ +class SearchHighlighter { + var $mCleanWikitext = true; + + function SearchHighlighter($cleanupWikitext = true){ + $this->mCleanWikitext = $cleanupWikitext; + } + + /** + * Default implementation of wikitext highlighting + * + * @param string $text + * @param array $terms Terms to highlight (unescaped) + * @param int $contextlines + * @param int $contextchars + * @return string + */ + public function highlightText( $text, $terms, $contextlines, $contextchars ) { + global $wgLang, $wgContLang; + global $wgSearchHighlightBoundaries; + $fname = __METHOD__; + + if($text == '') + return ''; + + // spli text into text + templates/links/tables + $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; + // first capture group is for detecting nested templates/links/tables/references + $endPatterns = array( + 1 => '/(\{\{)|(\}\})/', // template + 2 => '/(\[\[)|(\]\])/', // image + 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table + + // FIXME: this should prolly be a hook or something + if(function_exists('wfCite')){ + $spat .= '|()'; // references via cite extension + $endPatterns[4] = '/()|(<\/ref>)/'; + } + $spat .= '/'; + $textExt = array(); // text extracts + $otherExt = array(); // other extracts + wfProfileIn( "$fname-split" ); + $start = 0; + $textLen = strlen($text); + $count = 0; // sequence number to maintain ordering + while( $start < $textLen ){ + // find start of template/image/table + if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){ + $epat = ''; + foreach($matches as $key => $val){ + if($key > 0 && $val[1] != -1){ + if($key == 2){ + // see if this is an image link + $ns = substr($val[0],2,-1); + if( $wgContLang->getNsIndex($ns) != NS_IMAGE ) + break; + + } + $epat = $endPatterns[$key]; + $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); + $start = $val[1]; + break; + } + } + if( $epat ){ + // find end (and detect any nested elements) + $level = 0; + $offset = $start + 1; + $found = false; + while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){ + if( array_key_exists(2,$endMatches) ){ + // found end + if($level == 0){ + $len = strlen($endMatches[2][0]); + $off = $endMatches[2][1]; + $this->splitAndAdd( $otherExt, $count, + substr( $text, $start, $off + $len - $start ) ); + $start = $off + $len; + $found = true; + break; + } else{ + // end of nested element + $level -= 1; + } + } else{ + // nested + $level += 1; + } + $offset = $endMatches[0][1] + strlen($endMatches[0][0]); + } + if( ! $found ){ + // couldn't find appropriate closing tag, skip + $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) ); + $start += strlen($matches[0][0]); + } + continue; + } + } + // else: add as text extract + $this->splitAndAdd( $textExt, $count, substr($text,$start) ); + break; + } + + $all = $textExt + $otherExt; // these have disjunct key sets + + wfProfileOut( "$fname-split" ); + + // prepare regexps + foreach( $terms as $index => $term ) { + $terms[$index] = preg_quote( $term, '/' ); + // manually do upper/lowercase stuff for utf-8 since PHP won't do it + if(preg_match('/[\x80-\xff]/', $term) ){ + $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]); + } + + + } + $anyterm = implode( '|', $terms ); + $phrase = implode("$wgSearchHighlightBoundaries+", $terms ); + + // FIXME: a hack to scale contextchars, a correct solution + // would be to have contextchars actually be char and not byte + // length, and do proper utf-8 substrings and lengths everywhere, + // but PHP is making that very hard and unclean to implement :( + $scale = strlen($anyterm) / mb_strlen($anyterm); + $contextchars = intval( $contextchars * $scale ); + + $patPre = "(^|$wgSearchHighlightBoundaries)"; + $patPost = "($wgSearchHighlightBoundaries|$)"; + + $pat1 = "/(".$phrase.")/ui"; + $pat2 = "/$patPre(".$anyterm.")$patPost/ui"; + + wfProfileIn( "$fname-extract" ); + + $left = $contextlines; + + $snippets = array(); + $offsets = array(); + + // show beginning only if it contains all words + $first = 0; + $firstText = ''; + foreach($textExt as $index => $line){ + if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){ + $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); + $first = $index; + break; + } + } + if( $firstText ){ + $succ = true; + // check if first text contains all terms + foreach($terms as $term){ + if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){ + $succ = false; + break; + } + } + if( $succ ){ + $snippets[$first] = $firstText; + $offsets[$first] = 0; + } + } + if( ! $snippets ) { + // match whole query on text + $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets); + // match whole query on templates/tables/images + $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets); + // match any words on text + $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets); + // match any words on templates/tables/images + $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets); + + ksort($snippets); + } + + // add extra chars to each snippet to make snippets constant size + $extended = array(); + if( count( $snippets ) == 0){ + // couldn't find the target words, just show beginning of article + $targetchars = $contextchars * $contextlines; + $snippets[$first] = ''; + $offsets[$first] = 0; + } else{ + // if begin of the article contains the whole phrase, show only that !! + if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) + && $offsets[$first] < $contextchars * 2 ){ + $snippets = array ($first => $snippets[$first]); + } + + // calc by how much to extend existing snippets + $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) ); + } + + foreach($snippets as $index => $line){ + $extended[$index] = $line; + $len = strlen($line); + if( $len < $targetchars - 20 ){ + // complete this line + if($len < strlen( $all[$index] )){ + $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]); + $len = strlen( $extended[$index] ); + } + + // add more lines + $add = $index + 1; + while( $len < $targetchars - 20 + && array_key_exists($add,$all) + && !array_key_exists($add,$snippets) ){ + $offsets[$add] = 0; + $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); + $extended[$add] = $tt; + $len += strlen( $tt ); + $add++; + } + } + } + + //$snippets = array_map('htmlspecialchars', $extended); + $snippets = $extended; + $last = -1; + $extract = ''; + foreach($snippets as $index => $line){ + if($last == -1) + $extract .= $line; // first line + elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last])) + $extract .= " ".$line; // continous lines + else + $extract .= ' ... ' . $line; + + $last = $index; + } + if( $extract ) + $extract .= ' ... '; + + $processed = array(); + foreach($terms as $term){ + if( ! isset($processed[$term]) ){ + $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word + $extract = preg_replace( $pat3, + "\\1\\2\\3", $extract ); + $processed[$term] = true; + } + } + + wfProfileOut( "$fname-extract" ); + + return $extract; + } + + /** + * Split text into lines and add it to extracts array + * + * @param array $extracts index -> $line + * @param int $count + * @param string $text + */ + function splitAndAdd(&$extracts, &$count, $text){ + $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text ); + foreach($split as $line){ + $tt = trim($line); + if( $tt ) + $extracts[$count++] = $tt; + } + } + + /** + * Do manual case conversion for non-ascii chars + * + * @param unknown_type $matches + */ + function caseCallback($matches){ + global $wgContLang; + if( strlen($matches[0]) > 1 ){ + return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']'; + } else + return $matches[0]; + } + + /** + * Extract part of the text from start to end, but by + * not chopping up words + * @param string $text + * @param int $start + * @param int $end + * @param int $posStart (out) actual start position + * @param int $posEnd (out) actual end position + * @return string + */ + function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){ + global $wgContLang; + + if( $start != 0) + $start = $this->position( $text, $start, 1 ); + if( $end >= strlen($text) ) + $end = strlen($text); + else + $end = $this->position( $text, $end ); + + if(!is_null($posStart)) + $posStart = $start; + if(!is_null($posEnd)) + $posEnd = $end; + + if($end > $start) + return substr($text, $start, $end-$start); + else + return ''; + } + + /** + * Find a nonletter near a point (index) in the text + * + * @param string $text + * @param int $point + * @param int $offset to found index + * @return int nearest nonletter index, or beginning of utf8 char if none + */ + function position($text, $point, $offset=0 ){ + $tolerance = 10; + $s = max( 0, $point - $tolerance ); + $l = min( strlen($text), $point + $tolerance ) - $s; + $m = array(); + if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){ + return $m[0][1] + $s + $offset; + } else{ + // check if point is on a valid first UTF8 char + $char = ord( $text[$point] ); + while( $char >= 0x80 && $char < 0xc0 ) { + // skip trailing bytes + $point++; + if($point >= strlen($text)) + return strlen($text); + $char = ord( $text[$point] ); + } + return $point; + + } + } + + /** + * Search extracts for a pattern, and return snippets + * + * @param string $pattern regexp for matching lines + * @param array $extracts extracts to search + * @param int $linesleft number of extracts to make + * @param int $contextchars length of snippet + * @param array $out map for highlighted snippets + * @param array $offsets map of starting points of snippets + * @protected + */ + function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){ + if($linesleft == 0) + return; // nothing to do + foreach($extracts as $index => $line){ + if( array_key_exists($index,$out) ) + continue; // this line already highlighted + + $m = array(); + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) + continue; + + $offset = $m[0][1]; + $len = strlen($m[0][0]); + if($offset + $len < $contextchars) + $begin = 0; + elseif( $len > $contextchars) + $begin = $offset; + else + $begin = $offset + intval( ($len - $contextchars) / 2 ); + + $end = $begin + $contextchars; + + $posBegin = $begin; + // basic snippet from this line + $out[$index] = $this->extract($line,$begin,$end,$posBegin); + $offsets[$index] = $posBegin; + $linesleft--; + if($linesleft == 0) + return; + } + } + + /** + * Basic wikitext removal + * @protected + */ + function removeWiki($text) { + $fname = __METHOD__; + wfProfileIn( $fname ); + + //$text = preg_replace("/'{2,5}/", "", $text); + //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); + //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); + //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); + //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); + //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); + $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text); + $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text); + $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text); + $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text); + //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text); + $text = preg_replace("/<\/?[^>]+>/", "", $text); + $text = preg_replace("/'''''/", "", $text); + $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text); + $text = preg_replace("/''/", "", $text); + + wfProfileOut( $fname ); + return $text; + } + + /** + * callback to replace [[target|caption]] kind of links, if + * the target is category or image, leave it + * + * @param array $matches + */ + function linkReplace($matches){ + $colon = strpos( $matches[1], ':' ); + if( $colon === false ) + return $matches[2]; // replace with caption + global $wgContLang; + $ns = substr( $matches[1], 0, $colon ); + $index = $wgContLang->getNsIndex($ns); + if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) ) + return $matches[0]; // return the whole thing + else + return $matches[2]; + + } + + /** + * Simple & fast snippet extraction, but gives completely unrelevant + * snippets + * + * @param string $text + * @param array $terms + * @param int $contextlines + * @param int $contextchars + * @return string + */ + public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { + global $wgLang, $wgContLang; + $fname = __METHOD__; + + $lines = explode( "\n", $text ); + + $terms = implode( '|', $terms ); + $terms = str_replace( '/', "\\/", $terms); + $max = intval( $contextchars ) + 1; + $pat1 = "/(.*)($terms)(.{0,$max})/i"; + + $lineno = 0; + + $extract = ""; + wfProfileIn( "$fname-extract" ); + foreach ( $lines as $line ) { + if ( 0 == $contextlines ) { + break; + } + ++$lineno; + $m = array(); + if ( ! preg_match( $pat1, $line, $m ) ) { + continue; + } + --$contextlines; + $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' ); + + if ( count( $m ) < 3 ) { + $post = ''; + } else { + $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' ); + } + + $found = $m[2]; + + $line = htmlspecialchars( $pre . $found . $post ); + $pat2 = '/(' . $terms . ")/i"; + $line = preg_replace( $pat2, + "\\1", $line ); + + $extract .= "${line}\n"; + } + wfProfileOut( "$fname-extract" ); + + return $extract; + } + +} + +/** + * @ingroup Search + */ +class SearchEngineDummy { + function search( $term ) { + return null; + } + function setLimitOffset($l, $o) {} + function legalSearchChars() {} + function update() {} + function setnamespaces() {} + function searchtitle() {} + function searchtext() {} +} diff --git a/includes/search/MySQL.php b/includes/search/MySQL.php new file mode 100644 index 0000000000..f9b71c8ecd --- /dev/null +++ b/includes/search/MySQL.php @@ -0,0 +1,262 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @file + * @ingroup Search + */ + +/** + * Search engine hook for MySQL 4+ + * @ingroup Search + */ +class SearchMySQL extends SearchEngine { + var $strictMatching = true; + + /** @todo document */ + function __construct( $db ) { + $this->db = $db; + } + + /** @todo document */ + function parseQuery( $filteredText, $fulltext ) { + global $wgContLang; + $lc = SearchEngine::legalSearchChars(); // Minus format chars + $searchon = ''; + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + $m = array(); + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $filteredText, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + if( $searchon !== '' ) $searchon .= ' '; + if( $this->strictMatching && ($terms[1] == '') ) { + $terms[1] = '+'; + } + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); + if( !empty( $terms[3] ) ) { + // Match individual terms in result highlighting... + $regexp = preg_quote( $terms[3], '/' ); + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; + } else { + // Match the quoted term in result highlighting... + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); + } + $this->searchTerms[] = $regexp; + } + wfDebug( "Would search with '$searchon'\n" ); + wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); + } else { + wfDebug( "Can't understand search query '{$filteredText}'\n" ); + } + + $searchon = $this->db->strencode( $searchon ); + $field = $this->getIndexField( $fulltext ); + return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; + } + + public static function legalSearchChars() { + return "\"*" . parent::legalSearchChars(); + } + + /** + * Perform a full text search query and return a result set. + * + * @param string $term - Raw search term + * @return MySQLSearchResultSet + * @access public + */ + function searchText( $term ) { + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); + } + + /** + * Perform a title-only search query and return a result set. + * + * @param string $term - Raw search term + * @return MySQLSearchResultSet + * @access public + */ + function searchTitle( $term ) { + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); + } + + + /** + * Return a partial WHERE clause to exclude redirects, if so set + * @return string + * @private + */ + function queryRedirect() { + if( $this->showRedirects ) { + return ''; + } else { + return 'AND page_is_redirect=0'; + } + } + + /** + * Return a partial WHERE clause to limit the search to the given namespaces + * @return string + * @private + */ + function queryNamespaces() { + if( is_null($this->namespaces) ) + return ''; # search all + $namespaces = implode( ',', $this->namespaces ); + if ($namespaces == '') { + $namespaces = '0'; + } + return 'AND page_namespace IN (' . $namespaces . ')'; + } + + /** + * Return a LIMIT clause to limit results on the query. + * @return string + * @private + */ + function queryLimit() { + return $this->db->limitResult( '', $this->limit, $this->offset ); + } + + /** + * Does not do anything for generic search engine + * subclasses may define this though + * @return string + * @private + */ + function queryRanking( $filteredTerm, $fulltext ) { + return ''; + } + + /** + * Construct the full SQL query to do the search. + * The guts shoulds be constructed in queryMain() + * @param string $filteredTerm + * @param bool $fulltext + * @private + */ + function getQuery( $filteredTerm, $fulltext ) { + return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . + $this->queryRedirect() . ' ' . + $this->queryNamespaces() . ' ' . + $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . + $this->queryLimit(); + } + + + /** + * Picks which field to index on, depending on what type of query. + * @param bool $fulltext + * @return string + */ + function getIndexField( $fulltext ) { + return $fulltext ? 'si_text' : 'si_title'; + } + + /** + * Get the base part of the search query. + * The actual match syntax will depend on the server + * version; MySQL 3 and MySQL 4 have different capabilities + * in their fulltext search indexes. + * + * @param string $filteredTerm + * @param bool $fulltext + * @return string + * @private + */ + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery( $filteredTerm, $fulltext ); + $page = $this->db->tableName( 'page' ); + $searchindex = $this->db->tableName( 'searchindex' ); + return 'SELECT page_id, page_namespace, page_title ' . + "FROM $page,$searchindex " . + 'WHERE page_id=si_page AND ' . $match; + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + */ + function update( $id, $title, $text ) { + $dbw = wfGetDB( DB_MASTER ); + $dbw->replace( 'searchindex', + array( 'si_page' ), + array( + 'si_page' => $id, + 'si_title' => $title, + 'si_text' => $text + ), __METHOD__ ); + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + */ + function updateTitle( $id, $title ) { + $dbw = wfGetDB( DB_MASTER ); + + $dbw->update( 'searchindex', + array( 'si_title' => $title ), + array( 'si_page' => $id ), + __METHOD__, + array( $dbw->lowPriorityOption() ) ); + } +} + +/** + * @ingroup Search + */ +class MySQLSearchResultSet extends SearchResultSet { + function MySQLSearchResultSet( $resultSet, $terms ) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if( $row === false ) { + return false; + } else { + return new SearchResult( $row ); + } + } + + function free() { + $this->mResultSet->free(); + } +} diff --git a/includes/search/MySQL4.php b/includes/search/MySQL4.php new file mode 100644 index 0000000000..3e2bb2d1dd --- /dev/null +++ b/includes/search/MySQL4.php @@ -0,0 +1,34 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @file + * @ingroup Search + */ + +/** + * Search engine hook for MySQL 4+ + * This class retained for backwards compatibility... + * The meat's been moved to SearchMySQL, since the 3.x variety is gone. + * @ingroup Search + * @deprecated + */ +class SearchMySQL4 extends SearchMySQL { + /* whee */ +} diff --git a/includes/search/Oracle.php b/includes/search/Oracle.php new file mode 100644 index 0000000000..bf9368d150 --- /dev/null +++ b/includes/search/Oracle.php @@ -0,0 +1,240 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @file + * @ingroup Search + */ + +/** + * Search engine hook base class for Oracle (ConText). + * @ingroup Search + */ +class SearchOracle extends SearchEngine { + function __construct($db) { + $this->db = $db; + } + + /** + * Perform a full text search query and return a result set. + * + * @param string $term - Raw search term + * @return OracleSearchResultSet + * @access public + */ + function searchText( $term ) { + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); + return new OracleSearchResultSet($resultSet, $this->searchTerms); + } + + /** + * Perform a title-only search query and return a result set. + * + * @param string $term - Raw search term + * @return ORacleSearchResultSet + * @access public + */ + function searchTitle($term) { + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); + return new MySQLSearchResultSet($resultSet, $this->searchTerms); + } + + + /** + * Return a partial WHERE clause to exclude redirects, if so set + * @return string + * @private + */ + function queryRedirect() { + if ($this->showRedirects) { + return ''; + } else { + return 'AND page_is_redirect=0'; + } + } + + /** + * Return a partial WHERE clause to limit the search to the given namespaces + * @return string + * @private + */ + function queryNamespaces() { + if( is_null($this->namespaces) ) + return ''; + $namespaces = implode(',', $this->namespaces); + if ($namespaces == '') { + $namespaces = '0'; + } + return 'AND page_namespace IN (' . $namespaces . ')'; + } + + /** + * Return a LIMIT clause to limit results on the query. + * @return string + * @private + */ + function queryLimit($sql) { + return $this->db->limitResult($sql, $this->limit, $this->offset); + } + + /** + * Does not do anything for generic search engine + * subclasses may define this though + * @return string + * @private + */ + function queryRanking($filteredTerm, $fulltext) { + return ' ORDER BY score(1)'; + } + + /** + * Construct the full SQL query to do the search. + * The guts shoulds be constructed in queryMain() + * @param string $filteredTerm + * @param bool $fulltext + * @private + */ + function getQuery( $filteredTerm, $fulltext ) { + return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . + $this->queryRedirect() . ' ' . + $this->queryNamespaces() . ' ' . + $this->queryRanking( $filteredTerm, $fulltext ) . ' '); + } + + + /** + * Picks which field to index on, depending on what type of query. + * @param bool $fulltext + * @return string + */ + function getIndexField($fulltext) { + return $fulltext ? 'si_text' : 'si_title'; + } + + /** + * Get the base part of the search query. + * + * @param string $filteredTerm + * @param bool $fulltext + * @return string + * @private + */ + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery($filteredTerm, $fulltext); + $page = $this->db->tableName('page'); + $searchindex = $this->db->tableName('searchindex'); + return 'SELECT page_id, page_namespace, page_title ' . + "FROM $page,$searchindex " . + 'WHERE page_id=si_page AND ' . $match; + } + + /** @todo document */ + function parseQuery($filteredText, $fulltext) { + global $wgContLang; + $lc = SearchEngine::legalSearchChars(); + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + $m = array(); + $q = array(); + + if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $filteredText, $m, PREG_SET_ORDER)) { + foreach($m as $terms) { + $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]); + + if (!empty($terms[3])) { + $regexp = preg_quote( $terms[3], '/' ); + if ($terms[4]) + $regexp .= "[0-9A-Za-z_]+"; + } else { + $regexp = preg_quote(str_replace('"', '', $terms[2]), '/'); + } + $this->searchTerms[] = $regexp; + } + } + + $searchon = $this->db->strencode(join(',', $q)); + $field = $this->getIndexField($fulltext); + return " CONTAINS($field, '$searchon', 1) > 0 "; + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param int $id + * @param string $title + * @param string $text + */ + function update($id, $title, $text) { + $dbw = wfGetDB(DB_MASTER); + $dbw->replace('searchindex', + array('si_page'), + array( + 'si_page' => $id, + 'si_title' => $title, + 'si_text' => $text + ), 'SearchOracle::update' ); + $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')"); + $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')"); + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param int $id + * @param string $title + */ + function updateTitle($id, $title) { + $dbw = wfGetDB(DB_MASTER); + + $dbw->update('searchindex', + array('si_title' => $title), + array('si_page' => $id), + 'SearchOracle::updateTitle', + array()); + } +} + +/** + * @ingroup Search + */ +class OracleSearchResultSet extends SearchResultSet { + function __construct($resultSet, $terms) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if ($row === false) + return false; + return new SearchResult($row); + } +} diff --git a/includes/search/Postgres.php b/includes/search/Postgres.php new file mode 100644 index 0000000000..02638bb5c8 --- /dev/null +++ b/includes/search/Postgres.php @@ -0,0 +1,255 @@ + +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @file + * @ingroup Search + */ + +/** + * Search engine hook base class for Postgres + * @ingroup Search + */ +class SearchPostgres extends SearchEngine { + + function SearchPostgres( $db ) { + $this->db = $db; + } + + /** + * Perform a full text search query via tsearch2 and return a result set. + * Currently searches a page's current title (page.page_title) and + * latest revision article text (pagecontent.old_text) + * + * @param string $term - Raw search term + * @return PostgresSearchResultSet + * @access public + */ + function searchTitle( $term ) { + $q = $this->searchQuery( $term , 'titlevector', 'page_title' ); + $olderror = error_reporting(E_ERROR); + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); + error_reporting($olderror); + if (!$resultSet) { + // Needed for "Query requires full scan, GIN doesn't support it" + return new SearchResultTooMany(); + } + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); + } + function searchText( $term ) { + $q = $this->searchQuery( $term, 'textvector', 'old_text' ); + $olderror = error_reporting(E_ERROR); + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); + error_reporting($olderror); + if (!$resultSet) { + return new SearchResultTooMany(); + } + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); + } + + + /* + * Transform the user's search string into a better form for tsearch2 + */ + function parseQuery( $term ) { + + wfDebug( "parseQuery received: $term" ); + + ## No backslashes allowed + $term = preg_replace('/\\\/', '', $term); + + ## Collapse parens into nearby words: + $term = preg_replace('/\s*\(\s*/', ' (', $term); + $term = preg_replace('/\s*\)\s*/', ') ', $term); + + ## Treat colons as word separators: + $term = preg_replace('/:/', ' ', $term); + + $searchstring = ''; + $m = array(); + if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + if (strlen($terms[1])) { + $searchstring .= ' & !'; + } + if (strtolower($terms[2]) === 'and') { + $searchstring .= ' & '; + } + else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') { + $searchstring .= ' | '; + } + else if (strtolower($terms[2]) === 'not') { + $searchstring .= ' & !'; + } + else { + $searchstring .= " & $terms[2]"; + } + } + } + + ## Strip out leading junk + $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); + + ## Remove any doubled-up operators + $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); + + ## Remove any non-spaced operators (e.g. "Zounds!") + $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); + + ## Remove any trailing whitespace or operators + $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); + + ## Remove unnecessary quotes around everything + $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); + + ## Quote the whole thing + $searchstring = $this->db->addQuotes($searchstring); + + wfDebug( "parseQuery returned: $searchstring" ); + + return $searchstring; + + } + + /** + * Construct the full SQL query to do the search. + * @param string $filteredTerm + * @param string $fulltext + * @private + */ + function searchQuery( $term, $fulltext, $colname ) { + global $wgDBversion; + + if ( !isset( $wgDBversion ) ) { + $this->db->getServerVersion(); + $wgDBversion = $this->db->numeric_version; + } + $prefix = $wgDBversion < 8.3 ? "'default'," : ''; + + $searchstring = $this->parseQuery( $term ); + + ## We need a separate query here so gin does not complain about empty searches + $SQL = "SELECT to_tsquery($prefix $searchstring)"; + $res = $this->db->doQuery($SQL); + if (!$res) { + ## TODO: Better output (example to catch: one 'two) + die ("Sorry, that was not a valid search string. Please go back and try again"); + } + $top = pg_fetch_result($res,0,0); + + if ($top === "") { ## e.g. if only stopwords are used XXX return something better + $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . + "AND r.rev_text_id = c.old_id AND 1=0"; + } + else { + $m = array(); + if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + $this->searchTerms[$terms[1]] = $terms[1]; + } + } + + $rankscore = $wgDBversion > 8.2 ? 5 : 1; + $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank'; + $query = "SELECT page_id, page_namespace, page_title, ". + "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ". + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . + "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)"; + } + + ## Redirects + if (! $this->showRedirects) + $query .= ' AND page_is_redirect = 0'; + + ## Namespaces - defaults to 0 + if( !is_null($this->namespaces) ){ // null -> search all + if ( count($this->namespaces) < 1) + $query .= ' AND page_namespace = 0'; + else { + $namespaces = implode( ',', $this->namespaces ); + $query .= " AND page_namespace IN ($namespaces)"; + } + } + + $query .= " ORDER BY score DESC, page_id DESC"; + + $query .= $this->db->limitResult( '', $this->limit, $this->offset ); + + wfDebug( "searchQuery returned: $query" ); + + return $query; + } + + ## Most of the work of these two functions are done automatically via triggers + + function update( $pageid, $title, $text ) { + ## We don't want to index older revisions + $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ". + "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ". + "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)"; + $this->db->doQuery($SQL); + return true; + } + + function updateTitle( $id, $title ) { + return true; + } + +} ## end of the SearchPostgres class + +/** + * @ingroup Search + */ +class PostgresSearchResult extends SearchResult { + function PostgresSearchResult( $row ) { + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); + $this->score = $row->score; + } + function getScore() { + return $this->score; + } +} + +/** + * @ingroup Search + */ +class PostgresSearchResultSet extends SearchResultSet { + function PostgresSearchResultSet( $resultSet, $terms ) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if( $row === false ) { + return false; + } else { + return new PostgresSearchResult( $row ); + } + } +} diff --git a/includes/search/Tsearch2.php b/includes/search/Tsearch2.php new file mode 100644 index 0000000000..e69f6acdd2 --- /dev/null +++ b/includes/search/Tsearch2.php @@ -0,0 +1,120 @@ +, Domas Mituzas +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Search engine hook for PostgreSQL / Tsearch2 + * @file + * @ingroup Search + */ + +/** + * @todo document + * @ingroup Search + */ +class SearchTsearch2 extends SearchEngine { + var $strictMatching = false; + + function __construct( $db ) { + $this->db = $db; + $this->mRanking = true; + } + + function getIndexField( $fulltext ) { + return $fulltext ? 'si_text' : 'si_title'; + } + + function parseQuery( $filteredText, $fulltext ) { + global $wgContLang; + $lc = SearchEngine::legalSearchChars(); + $searchon = ''; + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + $m = array(); + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $filteredText, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + if( $searchon !== '' ) $searchon .= ' '; + if( $this->strictMatching && ($terms[1] == '') ) { + $terms[1] = '+'; + } + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); + if( !empty( $terms[3] ) ) { + $regexp = preg_quote( $terms[3], '/' ); + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; + } else { + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); + } + $this->searchTerms[] = $regexp; + } + wfDebug( "Would search with '$searchon'\n" ); + wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); + } else { + wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); + } + + $searchon = preg_replace( '/(\s+)/', '&', $searchon ); + $searchon = $this->db->strencode( $searchon ); + return $searchon; + } + + function queryRanking( $filteredTerm, $fulltext ) { + $field = $this->getIndexField( $fulltext ); + $searchon = $this->parseQuery( $filteredTerm, $fulltext ); + if ($this->mRanking) + return " ORDER BY rank($field,to_tsquery('$searchon')) DESC"; + else + return ""; + } + + + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery( $filteredTerm, $fulltext ); + $field = $this->getIndexField( $fulltext ); + $cur = $this->db->tableName( 'cur' ); + $searchindex = $this->db->tableName( 'searchindex' ); + return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' . + "FROM $cur,$searchindex " . + 'WHERE cur_id=si_page AND ' . + " $field @@ to_tsquery ('$match') " ; + } + + function update( $id, $title, $text ) { + $dbw = wfGetDB( DB_MASTER ); + $searchindex = $dbw->tableName( 'searchindex' ); + $sql = "DELETE FROM $searchindex WHERE si_page={$id}"; + $dbw->query( $sql, __METHOD__ ); + $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ". + " VALUES ( $id, to_tsvector('". + $dbw->strencode($title). + "'),to_tsvector('". + $dbw->strencode( $text)."')) "; + $dbw->query($sql, __METHOD__ ); + } + + function updateTitle($id,$title) { + $dbw = wfGetDB(DB_MASTER); + $searchindex = $dbw->tableName( 'searchindex' ); + $sql = "UPDATE $searchindex SET si_title=to_tsvector('" . + $dbw->strencode( $title ) . + "') WHERE si_page={$id}"; + + $dbw->query( $sql, __METHOD__ ); + } +} diff --git a/includes/search/Update.php b/includes/search/Update.php new file mode 100644 index 0000000000..087a8ba5dc --- /dev/null +++ b/includes/search/Update.php @@ -0,0 +1,113 @@ +mId = $id; + $this->mText = $text; + + $this->mNamespace = $nt->getNamespace(); + $this->mTitle = $nt->getText(); # Discard namespace + + $this->mTitleWords = $this->mTextWords = array(); + } else { + wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); + } + } + + function doUpdate() { + global $wgContLang, $wgDisableSearchUpdate; + + if( $wgDisableSearchUpdate || !$this->mId ) { + return false; + } + $fname = 'SearchUpdate::doUpdate'; + wfProfileIn( $fname ); + + $search = SearchEngine::create(); + $lc = SearchEngine::legalSearchChars() . '&#;'; + + if( $this->mText === false ) { + $search->updateTitle($this->mId, + Title::indexTitle( $this->mNamespace, $this->mTitle )); + wfProfileOut( $fname ); + return; + } + + # Language-specific strip/conversion + $text = $wgContLang->stripForSearch( $this->mText ); + + wfProfileIn( $fname.'-regexps' ); + $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/", + ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup + $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", + "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings + + # Strip external URLs + $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF"; + $protos = "http|https|ftp|mailto|news|gopher"; + $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; + $text = preg_replace( $pat, "\\1 \\3", $text ); + + $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; + $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; + $text = preg_replace( $p1, "\\1 ", $text ); + $text = preg_replace( $p2, "\\1 \\3 ", $text ); + + # Internal image links + $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; + $text = preg_replace( $pat2, " \\1 \\3", $text ); + + $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", + "\\1\\2 \\2\\3", $text ); # Handle [[game]]s + + # Strip all remaining non-search characters + $text = preg_replace( "/[^{$lc}]+/", " ", $text ); + + # Handle 's, s' + # + # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); + # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); + # + # These tail-anchored regexps are insanely slow. The worst case comes + # when Japanese or Chinese text (ie, no word spacing) is written on + # a wiki configured for Western UTF-8 mode. The Unicode characters are + # expanded to hex codes and the "words" are very long paragraph-length + # monstrosities. On a large page the above regexps may take over 20 + # seconds *each* on a 1GHz-level processor. + # + # Following are reversed versions which are consistently fast + # (about 3 milliseconds on 1GHz-level processor). + # + $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); + $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); + + # Strip wiki '' and ''' + $text = preg_replace( "/''[']*/", " ", $text ); + wfProfileOut( "$fname-regexps" ); + + wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); + + # Perform the actual update + $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ), + $text); + + wfProfileOut( $fname ); + } +} + +/** + * Placeholder class + * @ingroup Search + */ +class SearchUpdateMyISAM extends SearchUpdate { + # Inherits everything +}