-<?
+<?php
# See search.doc
class SearchEngine {
var $addtoquery = array();
var $namespacesToSearch = array();
var $alternateTitle;
+ var $all_titles = false;
function SearchEngine( $text )
{
if( $wgDBmysql4 ) $lc .= "\"~<>*+-";
$this->mUsertext = trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
$this->mSearchterms = array();
+ $this->mStrictMatching = true; # Google-style, add '+' on all terms
}
function queryNamespaces()
$namespaces = "0";
}
return "AND cur_namespace IN (" . $namespaces . ")";
- #return "1";
}
function searchRedirects()
{
- if ( $this->doSearchRedirects ) return "";
- return "AND cur_is_redirect=0 ";
+ if ( $this->doSearchRedirects ) {
+ return "";
+ } else {
+ return "AND cur_is_redirect=0 ";
+ }
}
/* private */ function initNamespaceCheckbox( $i )
{
global $wgUser, $wgNamespacesToBeSearchedDefault;
-
if ($wgUser->getID()) {
// User is logged in so we retrieve his default namespaces
return $wgUser->getOption( "searchNs".$i );
- }
- else {
+ } else {
// User is not logged in so we give him the global default namespaces
return $wgNamespacesToBeSearchedDefault[ $i ];
}
$ret = wfMsg("powersearchtext"); # Text to be returned
$tempText = ""; # Temporary text, for substitution into $ret
+ if( isset( $_REQUEST["searchx"] ) ) {
+ $this->addtoquery["searchx"] = "1";
+ }
+
# Do namespace checkboxes
$namespaces = $wgLang->getNamespaces();
foreach ( $namespaces as $i => $namespace ) {
wfMsg("powersearch") . "\">\n";
$ret = str_replace( "$9", $tempText, $ret );
+ $titleObj = NULL; # this does tricky stuff
+
$ret = "<br><br>\n<form id=\"powersearch\" method=\"get\" " .
- "action=\"" . wfLocalUrl( "" ) . "\">\n{$ret}\n</form>\n";
+ "action=\"" . $titleObj->getUrl() . "\">\n{$ret}\n</form>\n";
if ( isset ( $searchx ) ) {
if ( ! $listredirs ) {
global $wgInputEncoding;
$fname = "SearchEngine::showResults";
- $search = $_REQUEST['search'];
+ $search = $_REQUEST['search'];
$powersearch = $this->powersearch(); /* Need side-effects here? */
$wgOut->setPageTitle( wfMsg( "searchresults" ) );
$q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) );
$wgOut->setSubtitle( $q );
- $wgOut->setArticleFlag( false );
+ $wgOut->setArticleRelated( false );
$wgOut->setRobotpolicy( "noindex,nofollow" );
$sk = $wgUser->getSkin();
- $text = wfMsg( "searchresulttext", $sk->makeKnownLink(
+ $header = wfMsg( "searchresulttext", $sk->makeKnownLink(
wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) );
- $wgOut->addHTML( $text );
+ $wgOut->addHTML( $header );
$this->parseQuery();
if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
$searchnamespaces = $this->queryNamespaces();
$redircond = $this->searchRedirects();
- $sql = "SELECT cur_id,cur_namespace,cur_title," .
- "cur_text FROM cur,searchindex " .
- "WHERE cur_id=si_page AND {$this->mTitlecond} " .
- "{$searchnamespaces} {$redircond}" .
- "LIMIT {$offset}, {$limit}";
- $res1 = wfQuery( $sql, DB_READ, $fname );
- $num = wfNumRows($res1);
-
if ( $wgDisableTextSearch ) {
- $wgOut->addHTML( wfMsg( "searchdisabled", $search, $wgInputEncoding ) );
+ $wgOut->addHTML( wfMsg( "searchdisabled" ) );
+ $wgOut->addHTML( wfMsg( "googlesearch", htmlspecialchars( $search ), $GLOBALS['wgInputEncoding'] ) );
} else {
+ $sql = "SELECT cur_id,cur_namespace,cur_title," .
+ "cur_text FROM cur,searchindex " .
+ "WHERE cur_id=si_page AND {$this->mTitlecond} " .
+ "{$searchnamespaces} {$redircond}" .
+ "LIMIT {$offset}, {$limit}";
+ $res1 = wfQuery( $sql, DB_READ, $fname );
+ $num = wfNumRows($res1);
+
$sk = $wgUser->getSkin();
- $text = wfMsg( "searchresulttext", $sk->makeKnownLink(
- wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) );
- $wgOut->addHTML( $text );
-
+ $text = "";
+
$this->parseQuery();
if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
$wgOut->addHTML( "<h2>" . wfMsg( "badquery" ) . "</h2>\n" .
wfFreeResult( $res1 );
$wgOut->addHTML( "</ol>\n" );
}
- }
-
- if ( $wgDisableTextSearch ) {
- $wgOut->addHTML( wfMsg( "searchdisabled", $search, $wgInputEncoding ) );
- } else {
+
if ( 0 == wfNumRows( $res2 ) ) {
$wgOut->addHTML( "<h2>" . wfMsg( "notextmatches" ) .
"</h2>\n" );
function parseQuery4()
{
- # FIXME: not ready yet! Do not use.
-
global $wgLang;
$lc = SearchEngine::legalSearchChars();
- #$q = preg_replace( "/([+-]?)([$lc]+)/e",
- # "\"$1\" . \$wgLang->stripForSearch(\"$2\")",
- # $this->mUsertext );
+ $searchon = "";
+ $this->mSearchterms = array();
+
+ # FIXME: This doesn't handle parenthetical expressions.
+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $this->mUsertext, $m, PREG_SET_ORDER ) ) {
+ foreach( $m as $terms ) {
+ if( $searchon !== "" ) $searchon .= " ";
+ if( $this->mStrictMatching && ($terms[1] == "") ) {
+ $terms[1] = "+";
+ }
+ $searchon .= $terms[1] . $wgLang->stripForSearch( $terms[2] );
+ if( $terms[3] ) {
+ $regexp = preg_quote( $terms[3] );
+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
+ } else {
+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ) );
+ }
+ $this->mSearchterms[] = $regexp;
+ }
+ wfDebug( "Would search with '$searchon'\n" );
+ wfDebug( "Match with /\b" . implode( '\b|\b', $this->mSearchterms ) . "\b/\n" );
+ } else {
+ wfDebug( "Can't understand search query '$this->mUsertext'\n" );
+ }
- $q = $this->mUsertext;
- $qq = wfStrencode( $wgLang->stripForSearch( $q ) );
- $this->mSearchterms = preg_split( '/\s+/', $q );
- $this->mTitlecond = " MATCH(si_title) AGAINST('$qq' IN BOOLEAN MODE)";
- $this->mTextcond = " (MATCH(si_text) AGAINST('$qq' IN BOOLEAN MODE) AND cur_is_redirect=0)";
+ $searchon = wfStrencode( $searchon );
+ $this->mTitlecond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)";
+ $this->mTextcond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)";
}
function showHit( $row )
if ( "" == $contextchars ) { $contextchars = 50; }
$link = $sk->makeKnownLink( $t, "" );
- $size = str_replace( "$1", strlen( $row->cur_text ), WfMsg( "nbytes" ) );
+ $size = wfMsg( "nbytes", strlen( $row->cur_text ) );
$wgOut->addHTML( "<li>{$link} ({$size})" );
$lines = explode( "\n", $row->cur_text );
function goResult()
{
- global $wgOut;
+ global $wgOut, $wgDisableTextSearch;
$fname = "SearchEngine::goResult";
$search = $_REQUEST['search'];
- # First try to go to page as entered
+ # First try to go to page as entered.
#
$t = Title::newFromText( $search );
+ # If the string cannot be used to create a title
+ if( false == $t ){
+ $this->showResults();
+ return;
+ }
+
if ( 0 != $t->getArticleID() ) {
- $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+ $wgOut->redirect( $t->getURL() );
return;
}
#
$t = Title::newFromText( strtolower( $search ) );
if ( 0 != $t->getArticleID() ) {
- $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+ $wgOut->redirect( $t->getURL() );
return;
}
#
$t = Title::newFromText( ucwords( strtolower( $search ) ) );
if ( 0 != $t->getArticleID() ) {
- $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+ $wgOut->redirect( $t->getURL() );
return;
}
#
$t = Title::newFromText( strtoupper( $search ) );
if ( 0 != $t->getArticleID() ) {
- $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+ $wgOut->redirect( $t->getURL() );
return;
}
- # Try a near match
- #
- if( !$wgDisableTextSearch ) {
- $this->parseQuery();
- $sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " .
- "WHERE cur_id=si_page AND {$this->mTitlecond} ORDER BY cur_namespace LIMIT 1";
-
- if ( "" != $this->mTitlecond ) {
- $res = wfQuery( $sql, DB_READ, $fname );
- }
- if ( isset( $res ) && 0 != wfNumRows( $res ) ) {
- $s = wfFetchObject( $res );
-
- $t = Title::makeTitle( $s->cur_namespace, $s->cur_title );
- $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
- return;
+ # No match, generate an edit URL
+ $t = Title::newFromText( $this->mUsertext );
+ $wgOut->addHTML( wfMsg("nogomatch", $t->getURL( "action=edit", true ) ) . "\n<p>" );
+
+ # Try a fuzzy title search
+ $anyhit = false;
+ global $wgDisableFuzzySearch;
+ if(! $wgDisableFuzzySearch ){
+ foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){
+ $anyhit |= SearchEngine::doFuzzyTitleSearch( $search, $namespace );
}
}
- $wgOut->addHTML( wfMsg("nogomatch",
- htmlspecialchars( wfLocalUrl( ucfirst($this->mUsertext), "action=edit") ) )
- . "\n<p>" );
- $this->showResults();
+
+ if( ! $anyhit ){
+ return $this->showResults();
+ }
+ }
+
+ /* static */ function doFuzzyTitleSearch( $search, $namespace ){
+ global $wgLang, $wgOut;
+ $sstr = ucfirst($search);
+ $sstr = str_replace(" ", "_", $sstr);
+ $fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace );
+ $fuzzymatches = array_slice($fuzzymatches, 0, 10);
+ $slen = strlen( $search );
+ $wikitext = "";
+ foreach($fuzzymatches as $res){
+ $t = str_replace("_", " ", $res[1]);
+ $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t";
+ if( $namespace == NS_MAIN )
+ $tfull = "$t";
+ $distance = $res[0];
+ $closeness = (strlen( $search ) - $distance) / strlen( $search );
+ $percent = intval( $closeness * 100 ) . "%";
+ $stars = str_repeat("*", ceil(5 * $closeness) );
+ $wikitext .= "* [[$tfull]] $percent ($stars)\n";
+ }
+ if( $wikitext ){
+ if( $namespace != NS_MAIN )
+ $wikitext = "=== " . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext;
+ $wgOut->addWikiText( $wikitext );
+ return true;
+ }
+ return false;
+ }
+
+ /* static */ function fuzzyTitles( $sstr, $namespace = NS_MAIN ){
+ $span = 0.10; // weed on title length before doing levenshtein.
+ $tolerance = 0.35; // allowed percentage of erronous characters
+ $slen = strlen($sstr);
+ $tolerance_count = ceil($tolerance * $slen);
+ $spanabs = ceil($slen * (1 + $span)) - $slen;
+ # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n";
+ $result = array();
+ for( $i=0; $i <= $spanabs; $i++ ){
+ $titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace );
+ if( $i != 0)
+ $titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) );
+ foreach($titles as $t){
+ $d = levenshtein($sstr, $t);
+ if($d < $tolerance_count)
+ $result[] = array($d, $t);
+ $cnt++;
+ }
+ }
+ usort($result, "SearchEngine_pcmp");
+ return $result;
+ }
+
+ /* static */ function getTitlesByLength($aLength, $aNamespace = 0){
+ global $wgMemc, $wgDBname;
+
+ // to avoid multiple costly SELECTs in case of no memcached
+ if( $this->all_titles ){
+ if( isset( $this->all_titles[$aLength][$aNamespace] ) ){
+ return $this->all_titles[$aLength][$aNamespace];
+ } else {
+ return array();
+ }
+ }
+
+ $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace";
+ $mkeyts = "$wgDBname:titlesbylength:createtime";
+ $ts = $wgMemc->get( $mkeyts );
+ $result = $wgMemc->get( $mkey );
+
+ if( time() - $ts < 3600 ){
+ // note: in case of insufficient memcached space, we return
+ // an empty list instead of starting to hit the DB.
+ return is_array( $result ) ? $result : array();
+ }
+
+ $wgMemc->set( $mkeyts, time() );
+
+ $res = wfQuery("SELECT cur_title, cur_namespace FROM cur", DB_READ);
+ $titles = array(); // length, ns, [titles]
+ while( $obj = wfFetchObject( $res ) ){
+ $title = $obj->cur_title;
+ $ns = $obj->cur_namespace;
+ $len = strlen( $title );
+ $titles[$len][$ns][] = $title;
+ }
+ foreach($titles as $length => $length_arr){
+ foreach($length_arr as $ns => $title_arr){
+ $mkey = "$wgDBname:titlesbylength:$length:$ns";
+ $wgMemc->set( $mkey, $title_arr, 3600 * 24 );
+ }
+ }
+ $this->all_titles = $titles;
+ if( isset( $titles[$aLength][$aNamespace] ) )
+ return $titles[$aLength][$aNamespace];
+ else
+ return array();
}
}
+/* private static */ function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; }
+
?>