From: Brion Vibber Date: Sun, 15 Aug 2004 08:23:19 +0000 (+0000) Subject: More code cleanup on SearchEngine. X-Git-Tag: 1.5.0alpha1~2335 X-Git-Url: http://git.cyclocoop.org/%24self?a=commitdiff_plain;h=68d14537ea3dcb1aee11d93ada9035d58d75cf9f;p=lhc%2Fweb%2Fwiklou.git More code cleanup on SearchEngine. Renamed member vars to fooBar style, use $this->rawText more consistently. Separated matching parts of goResult() out. More redundant bits purged. Switching some messages to wikitext. Death to tags! --- diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index 188e39b678..94162cff32 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -5,32 +5,34 @@ define( "MW_SEARCH_OK", true ); define( "MW_SEARCH_BAD_QUERY", false ); class SearchEngine { - /* private */ var $mRawtext, $mUsertext, $mSearchterms; - /* private */ var $mTitlecond, $mTextcond; + /* private */ var $rawText, $filteredText, $searchTerms; + /* private */ var $titleCond, $textCond; var $doSearchRedirects = true; - var $addtoquery = array(); + var $addToQuery = array(); var $namespacesToSearch = array(); var $alternateTitle; - var $all_titles = false; + var $allTitles = false; + + function SearchEngine( $text ) { + $this->rawText = trim( $text ); - function SearchEngine( $text ) - { # We display the query, so let's strip it for safety # global $wgDBmysql4; $lc = SearchEngine::legalSearchChars() . "()"; - if( $wgDBmysql4 ) $lc .= "\"~<>*+-"; - $this->mRawtext = $text; - $this->mUsertext = trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); - $this->mSearchterms = array(); - $this->mStrictMatching = true; # Google-style, add '+' on all terms + if( $wgDBmysql4 ) { + $lc .= "\"~<>*+-"; + } + $this->filteredText = trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); + $this->searchTerms = array(); + $this->strictMatching = true; # Google-style, add '+' on all terms $this->db =& wfGetDB( DB_SLAVE ); } - function queryNamespaces() - { + # Return a partial WHERE clause to limit the search to the given namespaces + function queryNamespaces() { $namespaces = implode( ",", $this->namespacesToSearch ); if ($namespaces == "") { $namespaces = "0"; @@ -38,8 +40,8 @@ class SearchEngine { return "AND cur_namespace IN (" . $namespaces . ")"; } - function searchRedirects() - { + # Return a partial WHERE clause to include or exclude redirects from results + function searchRedirects() { if ( $this->doSearchRedirects ) { return ""; } else { @@ -62,12 +64,11 @@ class SearchEngine { # Display the "power search" footer. Does not actually perform the search, # that is done by showResults() - function powersearch() - { + function powersearch() { global $wgUser, $wgOut, $wgLang, $wgTitle, $wgRequest; $sk =& $wgUser->getSkin(); - $search = $this->mRawtext; + $search = $this->rawText; $searchx = $wgRequest->getVal( 'searchx' ); $listredirs = $wgRequest->getVal( 'redirs' ); @@ -75,7 +76,7 @@ class SearchEngine { $tempText = ""; # Temporary text, for substitution into $ret if( isset( $_REQUEST["searchx"] ) ) { - $this->addtoquery["searchx"] = "1"; + $this->addToQuery["searchx"] = "1"; } # Do namespace checkboxes @@ -99,7 +100,7 @@ class SearchEngine { $checked = ""; if ( $checkboxValue == 1 ) { $checked = " checked='checked'"; - $this->addtoquery["ns{$i}"] = 1; + $this->addToQuery["ns{$i}"] = 1; array_push( $this->namespacesToSearch, $i ); } $name = str_replace( "_", " ", $namespaces[$i] ); @@ -119,7 +120,7 @@ class SearchEngine { $checked = ""; if ( $listredirs == 1 ) { - $this->addtoquery["redirs"] = 1; + $this->addToQuery["redirs"] = 1; $checked = " checked='checked'"; } $tempText = "\n"; @@ -152,107 +153,100 @@ class SearchEngine { function setupPage() { global $wgOut; $wgOut->setPageTitle( wfMsg( "searchresults" ) ); - $q = wfMsg( "searchquery", htmlspecialchars( $this->mRawtext ) ); - $wgOut->setSubtitle( $q ); + $wgOut->setSubtitle( wfMsg( "searchquery", htmlspecialchars( $this->rawText ) ) ); $wgOut->setArticleRelated( false ); $wgOut->setRobotpolicy( "noindex,nofollow" ); } # Perform the search and construct the results page - function showResults() - { + function showResults() { global $wgUser, $wgTitle, $wgOut, $wgLang; global $wgDisableTextSearch, $wgInputEncoding; $fname = "SearchEngine::showResults"; - $search = $this->mRawtext; + $search = $this->rawText; $powersearch = $this->powersearch(); /* Need side-effects here? */ $this->setupPage(); $sk = $wgUser->getSkin(); - $header = wfMsg( "searchresulttext", $sk->makeKnownLink( - wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) ); - $wgOut->addHTML( $header ); - - $this->parseQuery(); - if ( "" == $this->mTitlecond || "" == $this->mTextcond ) { - $wgOut->addHTML( "

" . wfMsg( "badquery" ) . "

\n" . - "

" . wfMsg( "badquerytext" ) . "

\n" ); + $wgOut->addWikiText( wfMsg( "searchresulttext" ) ); + + if ( !$this->parseQuery() ) { + $wgOut->addWikiText( + "==" . wfMsg( "badquery" ) . "==\n" . + wfMsg( "badquerytext" ) ); return; } list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" ); - - $searchnamespaces = $this->queryNamespaces(); - $redircond = $this->searchRedirects(); if ( $wgDisableTextSearch ) { $wgOut->addHTML( wfMsg( "searchdisabled" ) ); - $wgOut->addHTML( wfMsg( "googlesearch", htmlspecialchars( $search ), htmlspecialchars( $wgInputEncoding ) ) ); + $wgOut->addHTML( wfMsg( "googlesearch", + htmlspecialchars( $this->rawText ), + htmlspecialchars( $wgInputEncoding ) ) ); + return; + } + + $titleMatches = $this->getMatches( $this->titleCond, $limit, $offset ); + $textMatches = $this->getMatches( $this->textCond, $limit, $offset ); + + $sk = $wgUser->getSkin(); + + $num = count( $titleMatches ) + count( $textMatches ); + if ( $num >= $limit ) { + $top = wfShowingResults( $offset, $limit ); } else { - if( $this->parseQuery() == MW_SEARCH_BAD_QUERY ) { - $wgOut->addHTML( "

" . wfMsg( "badquery" ) . "

\n" . - "

" . wfMsg( "badquerytext" ) . "

\n" ); - return; - } + $top = wfShowingResultsNum( $offset, $limit, $num ); + } + $wgOut->addHTML( "

{$top}

\n" ); - list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" ); - $titleMatches = $this->getMatches( $this->mTitlecond, $limit, $offset ); - $textMatches = $this->getMatches( $this->mTextcond, $limit, $offset ); + # For powersearch + $a2l = ""; + $akk = array_keys( $this->addToQuery ); + foreach ( $akk AS $ak ) { + $a2l .= "&{$ak}={$this->addToQuery[$ak]}" ; + } - $sk = $wgUser->getSkin(); - - $num = count( $titleMatches ) + count( $textMatches ); - if ( $num >= $limit ) { - $top = wfShowingResults( $offset, $limit ); - } else { - $top = wfShowingResultsNum( $offset, $limit, $num ); - } - $wgOut->addHTML( "

{$top}

\n" ); - - # For powersearch - - $a2l = ""; - $akk = array_keys( $this->addtoquery ); - foreach ( $akk AS $ak ) { - $a2l .= "&{$ak}={$this->addtoquery[$ak]}" ; - } - - $sl = wfViewPrevNext( $offset, $limit, "", - "search=" . wfUrlencode( $this->mUsertext ) . $a2l ); - $wgOut->addHTML( "
{$sl}\n" ); - - $foundsome = $this->showMatches( $titleMatches, $offset, "notitlematches", "titlematches" ) - || $this->showMatches( $textMatches, $offset, "notextmatches", "textmatches" ); - - if ( ! $foundsome ) { - $wgOut->addHTML( "

" . wfMsg( "nonefound" ) . "

\n" ); - } - $wgOut->addHTML( "

{$sl}

\n" ); - $wgOut->addHTML( $powersearch ); + $prevnext = wfViewPrevNext( $offset, $limit, "", + "search=" . wfUrlencode( $this->filteredText ) . $a2l ); + $wgOut->addHTML( "
{$prevnext}\n" ); + + $foundsome = $this->showMatches( $titleMatches, $offset, "notitlematches", "titlematches" ) + || $this->showMatches( $textMatches, $offset, "notextmatches", "textmatches" ); + + if ( !$foundsome ) { + $wgOut->addWikiText( wfMsg( "nonefound" ) ); } + $wgOut->addHTML( "

{$prevnext}

\n" ); + $wgOut->addHTML( $powersearch ); } - function legalSearchChars() - { + function legalSearchChars() { $lc = "A-Za-z_'0-9\\x80-\\xFF\\-"; return $lc; } - function parseQuery() - { - global $wgDBminWordLen, $wgLang, $wgDBmysql4; - + function parseQuery() { + global $wgDBmysql4; if( $wgDBmysql4 ) { # Use cleaner boolean search if available - return $this->parseQuery4( $this->db ); + return $this->parseQuery4(); + } else { + # Fall back to ugly hack with multiple search clauses + return $this->parseQuery3(); } + } + + function parseQuery3() { + global $wgDBminWordLen, $wgLang; + # on non mysql4 database: get list of words we don't want to search for require_once( "FulltextStoplist.php" ); $lc = SearchEngine::legalSearchChars() . "()"; - $q = preg_replace( "/([()])/", " \\1 ", $this->mUsertext ); + $q = preg_replace( "/([()])/", " \\1 ", $this->filteredText ); $q = preg_replace( "/\\s+/", " ", $q ); $w = explode( " ", trim( $q ) ); @@ -272,35 +266,34 @@ class SearchEngine { $cond .= " (MATCH (##field##) AGAINST ('" . $this->db->strencode( $word ). "'))"; $last = $word; - array_push( $this->mSearchterms, "\\b" . $word . "\\b" ); + array_push( $this->searchTerms, "\\b" . $word . "\\b" ); } } - if ( 0 == count( $this->mSearchterms ) ) { + if ( 0 == count( $this->searchTerms ) ) { return MW_SEARCH_BAD_QUERY; } - $this->mTitlecond = "(" . str_replace( "##field##", + $this->titleCond = "(" . str_replace( "##field##", "si_title", $cond ) . " )"; - $this->mTextcond = "(" . str_replace( "##field##", + $this->textCond = "(" . str_replace( "##field##", "si_text", $cond ) . " AND (cur_is_redirect=0) )"; return MW_SEARCH_OK; } - function parseQuery4() - { + function parseQuery4() { global $wgLang; $lc = SearchEngine::legalSearchChars(); $searchon = ""; - $this->mSearchterms = array(); + $this->searchTerms = array(); # FIXME: This doesn't handle parenthetical expressions. if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', - $this->mUsertext, $m, PREG_SET_ORDER ) ) { + $this->filteredText, $m, PREG_SET_ORDER ) ) { foreach( $m as $terms ) { if( $searchon !== "" ) $searchon .= " "; - if( $this->mStrictMatching && ($terms[1] == "") ) { + if( $this->strictMatching && ($terms[1] == "") ) { $terms[1] = "+"; } $searchon .= $terms[1] . $wgLang->stripForSearch( $terms[2] ); @@ -310,21 +303,21 @@ class SearchEngine { } else { $regexp = preg_quote( str_replace( '"', '', $terms[2] ) ); } - $this->mSearchterms[] = $regexp; + $this->searchTerms[] = $regexp; } wfDebug( "Would search with '$searchon'\n" ); - wfDebug( "Match with /\b" . implode( '\b|\b', $this->mSearchterms ) . "\b/\n" ); + wfDebug( "Match with /\b" . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); } else { - wfDebug( "Can't understand search query '$this->mUsertext'\n" ); + wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); } $searchon = $this->db->strencode( $searchon ); - $this->mTitlecond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)"; - $this->mTextcond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)"; + $this->titleCond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)"; + $this->textCond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)"; return MW_SEARCH_OK; } - function &getMatches( $cond, $limit, $offset ) { + function &getMatches( $cond, $limit, $offset = 0 ) { $searchindex = $this->db->tableName( 'searchindex' ); $cur = $this->db->tableName( 'cur' ); $searchnamespaces = $this->queryNamespaces(); @@ -334,7 +327,7 @@ class SearchEngine { "cur_text FROM $cur,$searchindex " . "WHERE cur_id=si_page AND {$cond} " . "{$searchnamespaces} {$redircond} " . - "LIMIT {$offset}, {$limit}"; + $this->db->limitResult( $limit, $offset ); $res = $this->db->query( $sql, "SearchEngine::getMatches" ); $matches = array(); @@ -365,11 +358,14 @@ class SearchEngine { } } - function showHit( $row ) - { + function showHit( $row ) { global $wgUser, $wgOut, $wgLang; $t = Title::makeName( $row->cur_namespace, $row->cur_title ); + if( is_null( $t ) ) { + $wgOut->addHTML( "\n" ); + return; + } $sk = $wgUser->getSkin(); $contextlines = $wgUser->getOption( "contextlines" ); @@ -382,7 +378,7 @@ class SearchEngine { $wgOut->addHTML( "
  • {$link} ({$size})" ); $lines = explode( "\n", $row->cur_text ); - $pat1 = "/(.*)(" . implode( "|", $this->mSearchterms ) . ")(.*)/i"; + $pat1 = "/(.*)(" . implode( "|", $this->searchTerms ) . ")(.*)/i"; $lineno = 0; foreach ( $lines as $line ) { @@ -406,72 +402,76 @@ class SearchEngine { $found = $m[2]; $line = htmlspecialchars( $pre . $found . $post ); - $pat2 = "/(" . implode( "|", $this->mSearchterms ) . ")/i"; + $pat2 = "/(" . implode( "|", $this->searchTerms ) . ")/i"; $line = preg_replace( $pat2, - "\\1", $line ); + "\\1", $line ); $wgOut->addHTML( "
    {$lineno}: {$line}\n" ); } $wgOut->addHTML( "
  • \n" ); } - function goResult() - { - global $wgOut, $wgGoToEdit; - global $wgDisableTextSearch; - $fname = "SearchEngine::goResult"; - - $search = trim( $this->mRawtext ); - - # Try to go to page as entered. - # - $t = Title::newFromText( $search ); - - # If the string cannot be used to create a title - if( false == $t ){ - $this->showResults(); - return; - } - + function getNearMatch() { # Exact match? No need to look further. - if ( $t->getNamespace() == NS_SPECIAL || 0 != $t->getArticleID() ) { - $wgOut->redirect( $t->getFullURL() ); - return; + $title = Title::newFromText( $this->rawText ); + if ( $title->getNamespace() == NS_SPECIAL || 0 != $title->getArticleID() ) { + return $title; } # Now try all lower case (i.e. first letter capitalized) # - $t = Title::newFromText( strtolower( $search ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( $t->getFullURL() ); - return; + $title = Title::newFromText( strtolower( $this->rawText ) ); + if ( 0 != $title->getArticleID() ) { + return $title; } # Now try capitalized string # - $t = Title::newFromText( ucwords( strtolower( $search ) ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( $t->getFullURL() ); - return; + $title = Title::newFromText( ucwords( strtolower( $this->rawText ) ) ); + if ( 0 != $title->getArticleID() ) { + return $title; } # Now try all upper case # - $t = Title::newFromText( strtoupper( $search ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( $t->getFullURL() ); - return; + $title = Title::newFromText( strtoupper( $this->rawText ) ); + if ( 0 != $title->getArticleID() ) { + return $title; } # Entering an IP address goes to the contributions page - if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $search ) ) { - $title = Title::makeTitle( NS_SPECIAL, "Contributions" ); - $wgOut->redirect( $title->getFullUrl( "target=$search" ) ); + if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $this->rawText ) ) { + $title = Title::makeTitle( NS_SPECIAL, "Contributions/" . $this->rawText ); + return $title; + } + + return NULL; + } + + function goResult() { + global $wgOut, $wgGoToEdit; + global $wgDisableTextSearch; + $fname = "SearchEngine::goResult"; + + # Try to go to page as entered. + # + $t = Title::newFromText( $this->rawText ); + + # If the string cannot be used to create a title + if( is_null( $t ) ){ + $this->showResults(); return; } + # If there's an exact or very near match, jump right there. + $t = $this->getNearMatch(); + if( !is_null( $t ) ) { + $wgOut->redirect( $t->getFullURL() ); + return; + } + # No match, generate an edit URL - $t = Title::newFromText( $this->mRawtext ); + $t = Title::newFromText( $this->rawText ); # If the feature is enabled, go straight to the edit page if ( $wgGoToEdit ) { @@ -491,7 +491,7 @@ class SearchEngine { global $wgDisableFuzzySearch; if(! $wgDisableFuzzySearch ){ foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){ - $anyhit |= SearchEngine::doFuzzyTitleSearch( $search, $namespace ); + $anyhit |= SearchEngine::doFuzzyTitleSearch( $this->rawText, $namespace ); } } @@ -561,9 +561,9 @@ class SearchEngine { $fname = 'SearchEngin::getTitlesByLength'; // to avoid multiple costly SELECTs in case of no memcached - if( $this->all_titles ){ - if( isset( $this->all_titles[$aLength][$aNamespace] ) ){ - return $this->all_titles[$aLength][$aNamespace]; + if( $this->allTitles ){ + if( isset( $this->allTitles[$aLength][$aNamespace] ) ){ + return $this->allTitles[$aLength][$aNamespace]; } else { return array(); } @@ -596,7 +596,7 @@ class SearchEngine { $wgMemc->set( $mkey, $title_arr, 3600 * 24 ); } } - $this->all_titles = $titles; + $this->allTitles = $titles; if( isset( $titles[$aLength][$aNamespace] ) ) return $titles[$aLength][$aNamespace]; else diff --git a/languages/Language.php b/languages/Language.php index 1db5cfe314..7243d31fc2 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -687,9 +687,9 @@ Legend: (cur) = difference with current version, # Search results # 'searchresults' => 'Search results', -'searchhelppage' => "{{ns:4}}:Searching", -'searchingwikipedia' => "Searching {{SITENAME}}", -'searchresulttext' => "For more information about searching {{SITENAME}}, see $1.", +#'searchhelppage' => "{{ns:4}}:Searching", +#'searchingwikipedia' => "Searching {{SITENAME}}", +'searchresulttext' => "For more information about searching {{SITENAME}}, see [[Project:Searching|Searching {{SITENAME}}]].", 'searchquery' => "For query \"$1\"", 'badquery' => 'Badly formed search query', 'badquerytext' => 'We could not process your query. @@ -710,7 +710,7 @@ and the text of $3 pages.", 'viewprevnext' => "View ($1) ($2) ($3).", 'showingresults' => "Showing below $1 results starting with #$2.", 'showingresultsnum' => "Showing below $3 results starting with #$2.", -'nonefound' => "Note: unsuccessful searches are +'nonefound' => "'''Note''': unsuccessful searches are often caused by searching for common words like \"have\" and \"from\", which are not indexed, or by specifying more than one search term (only pages containing all of the search terms will appear in the result).", diff --git a/stylesheets/common.css b/stylesheets/common.css index ed340016f5..4e8bc08f18 100644 --- a/stylesheets/common.css +++ b/stylesheets/common.css @@ -200,3 +200,7 @@ div.townBox dl dd { font-size:150%; margin:5px; } +.searchmatch { + color: red; + font-weight: bold; +} diff --git a/stylesheets/monobook/main.css b/stylesheets/monobook/main.css index bab6218bfa..b4e68c22a5 100644 --- a/stylesheets/monobook/main.css +++ b/stylesheets/monobook/main.css @@ -914,6 +914,9 @@ div.patrollink { font-size: 75%; text-align: right; } -span.newpage, span.minor { +span.newpage, span.minor, span.searchmatch { font-weight: bold; } +span.searchmatch { + color: red; +}