-<?
+<?php
# See search.doc
class SearchEngine {
var $addtoquery = array();
var $namespacesToSearch = array();
var $alternateTitle;
+ var $all_titles = false;
function SearchEngine( $text )
{
# We display the query, so let's strip it for safety
#
+ global $wgDBmysql4;
$lc = SearchEngine::legalSearchChars() . "()";
+ if( $wgDBmysql4 ) $lc .= "\"~<>*+-";
$this->mUsertext = trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
$this->mSearchterms = array();
+ $this->mStrictMatching = true; # Google-style, add '+' on all terms
}
function queryNamespaces()
{
- return "cur_namespace IN (" . implode( ",", $this->namespacesToSearch ) . ")";
- #return "1";
+ $namespaces = implode( ",", $this->namespacesToSearch );
+ if ($namespaces == "") {
+ $namespaces = "0";
+ }
+ return "AND cur_namespace IN (" . $namespaces . ")";
}
function searchRedirects()
{
- if ( $this->doSearchRedirects ) return "";
- return "AND cur_is_redirect=0 ";
+ if ( $this->doSearchRedirects ) {
+ return "";
+ } else {
+ return "AND cur_is_redirect=0 ";
+ }
}
+ /* private */ function initNamespaceCheckbox( $i )
+ {
+ global $wgUser, $wgNamespacesToBeSearchedDefault;
+
+ if ($wgUser->getID()) {
+ // User is logged in so we retrieve his default namespaces
+ return $wgUser->getOption( "searchNs".$i );
+ } else {
+ // User is not logged in so we give him the global default namespaces
+ return $wgNamespacesToBeSearchedDefault[ $i ];
+ }
+ }
+
+ # Display the "power search" footer. Does not actually perform the search,
+ # that is done by showResults()
function powersearch()
{
global $wgUser, $wgOut, $wgLang, $wgTitle;
- $nscb = array();
$search = $_REQUEST['search'];
$searchx = $_REQUEST['searchx'];
$listredirs = $_REQUEST['redirs'];
- $nscb[0] = $_REQUEST['ns0'];
- $nscb[1] = $_REQUEST['ns1'];
- $nscb[2] = $_REQUEST['ns2'];
- $nscb[3] = $_REQUEST['ns3'];
- $nscb[4] = $_REQUEST['ns4'];
- $nscb[5] = $_REQUEST['ns5'];
- $nscb[6] = $_REQUEST['ns6'];
- $nscb[7] = $_REQUEST['ns7'];
-
- if ( ! isset ( $searchx ) ) { /* First time here */
- $nscb[0] = $listredirs = 1; /* All others should be unset */
+
+ $ret = wfMsg("powersearchtext"); # Text to be returned
+ $tempText = ""; # Temporary text, for substitution into $ret
+
+ if( isset( $_REQUEST["searchx"] ) ) {
+ $this->addtoquery["searchx"] = "1";
}
- $this->checkboxes["searchx"] = 1;
- $ret = wfMsg("powersearchtext");
+
+ # Do namespace checkboxes
+ $namespaces = $wgLang->getNamespaces();
+ foreach ( $namespaces as $i => $namespace ) {
+ # Skip virtual namespaces
+ if ( $i < 0 ) {
+ continue;
+ }
- # Determine namespace checkboxes
+ $formVar = "ns$i";
- $ns = $wgLang->getNamespaces();
- array_shift( $ns ); /* Skip "Special" */
+ # Initialise checkboxValues, either from defaults or from
+ # a previous invocation
+ if ( !isset( $searchx ) ) {
+ $checkboxValue = $this->initNamespaceCheckbox( $i );
+ } else {
+ $checkboxValue = $_REQUEST[$formVar];
+ }
- $r1 = "";
- for ( $i = 0; $i < count( $ns ); ++$i ) {
$checked = "";
- if ( $nscb[$i] == 1 ) {
+ if ( $checkboxValue == 1 ) {
$checked = " checked";
$this->addtoquery["ns{$i}"] = 1;
array_push( $this->namespacesToSearch, $i );
}
- $name = str_replace( "_", " ", $ns[$i] );
- if ( "" == $name ) { $name = "(Main)"; }
+ $name = str_replace( "_", " ", $namespaces[$i] );
+ if ( "" == $name ) {
+ $name = wfMsg( "blanknamespace" );
+ }
- if ( 0 != $i ) { $r1 .= " "; }
- $r1 .= "<input type=checkbox value=\"1\" name=\"" .
+ if ( $tempText !== "" ) {
+ $tempText .= " ";
+ }
+ $tempText .= "<input type=checkbox value=\"1\" name=\"" .
"ns{$i}\"{$checked}>{$name}\n";
}
- $ret = str_replace ( "$1", $r1, $ret );
+ $ret = str_replace ( "$1", $tempText, $ret );
# List redirects checkbox
$this->addtoquery["redirs"] = 1;
$checked = " checked";
}
- $r2 = "<input type=checkbox value=1 name=\"redirs\"{$checked}>\n";
- $ret = str_replace( "$2", $r2, $ret );
+ $tempText = "<input type=checkbox value=1 name=\"redirs\"{$checked}>\n";
+ $ret = str_replace( "$2", $tempText, $ret );
# Search field
- $r3 = "<input type=text name=\"search\" value=\"" .
+ $tempText = "<input type=text name=\"search\" value=\"" .
htmlspecialchars( $search ) ."\" width=80>\n";
- $ret = str_replace( "$3", $r3, $ret );
+ $ret = str_replace( "$3", $tempText, $ret );
# Searchx button
- $r9 = "<input type=submit name=\"searchx\" value=\"" .
+ $tempText = "<input type=submit name=\"searchx\" value=\"" .
wfMsg("powersearch") . "\">\n";
- $ret = str_replace( "$9", $r9, $ret );
+ $ret = str_replace( "$9", $tempText, $ret );
+ $titleObj = NULL; # this does tricky stuff
+
$ret = "<br><br>\n<form id=\"powersearch\" method=\"get\" " .
- "action=\"" . wfLocalUrl( "" ) . "\">\n{$ret}\n</form>\n";
+ "action=\"" . $titleObj->getUrl() . "\">\n{$ret}\n</form>\n";
if ( isset ( $searchx ) ) {
- if ( ! $listredirs ) { $this->doSearchRedirects = false; }
+ if ( ! $listredirs ) {
+ $this->doSearchRedirects = false;
+ }
}
return $ret;
}
+ # Perform the search and construct the results page
function showResults()
{
global $wgUser, $wgTitle, $wgOut, $wgLang, $wgDisableTextSearch;
+ global $wgInputEncoding;
$fname = "SearchEngine::showResults";
- $offset = $_REQUEST['offset'];
- $limit = $_REQUEST['limit'];
- $search = $_REQUEST['search'];
+ $search = $_REQUEST['search'];
$powersearch = $this->powersearch(); /* Need side-effects here? */
$wgOut->setPageTitle( wfMsg( "searchresults" ) );
- $q = str_replace( "$1", $this->mUsertext,
- wfMsg( "searchquery" ) );
+ $q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) );
$wgOut->setSubtitle( $q );
- $wgOut->setArticleFlag( false );
+ $wgOut->setArticleRelated( false );
$wgOut->setRobotpolicy( "noindex,nofollow" );
$sk = $wgUser->getSkin();
- $text = str_replace( "$1", $sk->makeKnownLink(
- wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ),
- wfMsg( "searchresulttext" ) );
- $wgOut->addHTML( $text );
+ $header = wfMsg( "searchresulttext", $sk->makeKnownLink(
+ wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) );
+ $wgOut->addHTML( $header );
$this->parseQuery();
if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
"<p>" . wfMsg( "badquerytext" ) );
return;
}
- if ( ! isset( $limit ) ) {
- $limit = $wgUser->getOption( "searchlimit" );
- if ( ! $limit ) { $limit = 20; }
- }
- if ( ! $offset ) { $offset = 0; }
+ list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" );
$searchnamespaces = $this->queryNamespaces();
$redircond = $this->searchRedirects();
- $sql = "SELECT cur_id,cur_namespace,cur_title," .
- "cur_text FROM cur,searchindex " .
- "WHERE cur_id=si_page AND {$this->mTitlecond} " .
- "AND {$searchnamespaces} {$redircond}" .
- "LIMIT {$offset}, {$limit}";
- $res1 = wfQuery( $sql, $fname );
-
if ( $wgDisableTextSearch ) {
- $res2 = 0;
+ $wgOut->addHTML( wfMsg( "searchdisabled" ) );
+ $wgOut->addHTML( wfMsg( "googlesearch", htmlspecialchars( $search ), $GLOBALS['wgInputEncoding'] ) );
} else {
$sql = "SELECT cur_id,cur_namespace,cur_title," .
"cur_text FROM cur,searchindex " .
- "WHERE cur_id=si_page AND {$this->mTextcond} " .
- "AND {$searchnamespaces} {$redircond} " .
+ "WHERE cur_id=si_page AND {$this->mTitlecond} " .
+ "{$searchnamespaces} {$redircond}" .
"LIMIT {$offset}, {$limit}";
- $res2 = wfQuery( $sql, $fname );
- }
+ $res1 = wfQuery( $sql, DB_READ, $fname );
+ $num = wfNumRows($res1);
- $top = wfShowingResults( $offset, $limit );
- $wgOut->addHTML( "<p>{$top}\n" );
+ $sk = $wgUser->getSkin();
+ $text = "";
- # For powersearch
-
- $a2l = "" ;
- $akk = array_keys( $this->addtoquery ) ;
- foreach ( $akk AS $ak ) {
- $a2l .= "&{$ak}={$this->addtoquery[$ak]}" ;
- }
-
- $sl = wfViewPrevNext( $offset, $limit, "",
- "search=" . wfUrlencode( $this->mUsertext ) . $a2l );
- $wgOut->addHTML( "<br>{$sl}\n" );
-
- $foundsome = false;
-
- if ( 0 == wfNumRows( $res1 ) ) {
- $wgOut->addHTML( "<h2>" . wfMsg( "notitlematches" ) .
- "</h2>\n" );
- } else {
- $foundsome = true;
- $off = $offset + 1;
- $wgOut->addHTML( "<h2>" . wfMsg( "titlematches" ) .
- "</h2>\n<ol start='{$off}'>" );
+ $this->parseQuery();
+ if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
+ $wgOut->addHTML( "<h2>" . wfMsg( "badquery" ) . "</h2>\n" .
+ "<p>" . wfMsg( "badquerytext" ) );
+ return;
+ }
+ list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" );
+
+ $searchnamespaces = $this->queryNamespaces();
+ $redircond = $this->searchRedirects();
+
+ $sql = "SELECT cur_id,cur_namespace,cur_title," .
+ "cur_text FROM cur,searchindex " .
+ "WHERE cur_id=si_page AND {$this->mTitlecond} " .
+ "{$searchnamespaces} {$redircond}" .
+ "LIMIT {$offset}, {$limit}";
+ $res1 = wfQuery( $sql, DB_READ, $fname );
+ $num = wfNumRows($res1);
+
+ $sql = "SELECT cur_id,cur_namespace,cur_title," .
+ "cur_text FROM cur,searchindex " .
+ "WHERE cur_id=si_page AND {$this->mTextcond} " .
+ "{$searchnamespaces} {$redircond} " .
+ "LIMIT {$offset}, {$limit}";
+ $res2 = wfQuery( $sql, DB_READ, $fname );
+ $num = $num + wfNumRows($res2);
- while ( $row = wfFetchObject( $res1 ) ) {
- $this->showHit( $row );
+ if ( $num == $limit ) {
+ $top = wfShowingResults( $offset, $limit);
+ } else {
+ $top = wfShowingResultsNum( $offset, $limit, $num );
+ }
+ $wgOut->addHTML( "<p>{$top}\n" );
+
+ # For powersearch
+
+ $a2l = "" ;
+ $akk = array_keys( $this->addtoquery ) ;
+ foreach ( $akk AS $ak ) {
+ $a2l .= "&{$ak}={$this->addtoquery[$ak]}" ;
+ }
+
+ $sl = wfViewPrevNext( $offset, $limit, "",
+ "search=" . wfUrlencode( $this->mUsertext ) . $a2l );
+ $wgOut->addHTML( "<br>{$sl}\n" );
+
+ $foundsome = false;
+
+ if ( 0 == wfNumRows( $res1 ) ) {
+ $wgOut->addHTML( "<h2>" . wfMsg( "notitlematches" ) .
+ "</h2>\n" );
+ } else {
+ $foundsome = true;
+ $off = $offset + 1;
+ $wgOut->addHTML( "<h2>" . wfMsg( "titlematches" ) .
+ "</h2>\n<ol start='{$off}'>" );
+
+ while ( $row = wfFetchObject( $res1 ) ) {
+ $this->showHit( $row );
+ }
+ wfFreeResult( $res1 );
+ $wgOut->addHTML( "</ol>\n" );
}
- wfFreeResult( $res1 );
- $wgOut->addHTML( "</ol>\n" );
- }
- if ( $wgDisableTextSearch ) {
- $wgOut->addHTML( str_replace( "$1",
- htmlspecialchars( $search ), wfMsg( "searchdisabled" ) ) );
- } else {
if ( 0 == wfNumRows( $res2 ) ) {
$wgOut->addHTML( "<h2>" . wfMsg( "notextmatches" ) .
"</h2>\n" );
wfFreeResult( $res2 );
$wgOut->addHTML( "</ol>\n" );
}
+ if ( ! $foundsome ) {
+ $wgOut->addHTML( "<p>" . wfMsg( "nonefound" ) . "\n" );
+ }
+ $wgOut->addHTML( "<p>{$sl}\n" );
+ $wgOut->addHTML( $powersearch );
}
- if ( ! $foundsome ) {
- $wgOut->addHTML( "<p>" . wfMsg( "nonefound" ) . "\n" );
- }
- $wgOut->addHTML( "<p>{$sl}\n" );
- $wgOut->addHTML( $powersearch );
}
function legalSearchChars()
function parseQuery()
{
- global $wgDBminWordLen, $wgLang;
+ global $wgDBminWordLen, $wgLang, $wgDBmysql4;
+
+ if( $wgDBmysql4 ) {
+ # Use cleaner boolean search if available
+ return $this->parseQuery4();
+ }
$lc = SearchEngine::legalSearchChars() . "()";
$q = preg_replace( "/([()])/", " \\1 ", $this->mUsertext );
}
if ( 0 == count( $this->mSearchterms ) ) { return; }
- # To disable boolean:
- # $cond = "MATCH (##field##) AGAINST('" . wfStrencode( $q ) . "')";
-
$this->mTitlecond = "(" . str_replace( "##field##",
"si_title", $cond ) . " )";
$this->mTextcond = "(" . str_replace( "##field##",
"si_text", $cond ) . " AND (cur_is_redirect=0) )";
}
+
+ function parseQuery4()
+ {
+ global $wgLang;
+ $lc = SearchEngine::legalSearchChars();
+ $searchon = "";
+ $this->mSearchterms = array();
+
+ # FIXME: This doesn't handle parenthetical expressions.
+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ $this->mUsertext, $m, PREG_SET_ORDER ) ) {
+ foreach( $m as $terms ) {
+ if( $searchon !== "" ) $searchon .= " ";
+ if( $this->mStrictMatching && ($terms[1] == "") ) {
+ $terms[1] = "+";
+ }
+ $searchon .= $terms[1] . $wgLang->stripForSearch( $terms[2] );
+ if( $terms[3] ) {
+ $regexp = preg_quote( $terms[3] );
+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
+ } else {
+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ) );
+ }
+ $this->mSearchterms[] = $regexp;
+ }
+ wfDebug( "Would search with '$searchon'\n" );
+ wfDebug( "Match with /\b" . implode( '\b|\b', $this->mSearchterms ) . "\b/\n" );
+ } else {
+ wfDebug( "Can't understand search query '$this->mUsertext'\n" );
+ }
+
+ $searchon = wfStrencode( $searchon );
+ $this->mTitlecond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)";
+ $this->mTextcond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)";
+ }
function showHit( $row )
{
if ( "" == $contextchars ) { $contextchars = 50; }
$link = $sk->makeKnownLink( $t, "" );
- $size = str_replace( "$1", strlen( $row->cur_text ), WfMsg( "nbytes" ) );
+ $size = wfMsg( "nbytes", strlen( $row->cur_text ) );
$wgOut->addHTML( "<li>{$link} ({$size})" );
$lines = explode( "\n", $row->cur_text );
function goResult()
{
- global $wgOut, $wgArticle, $wgTitle;
+ global $wgOut, $wgDisableTextSearch;
$fname = "SearchEngine::goResult";
$search = $_REQUEST['search'];
- # First try to go to page as entered
+ # First try to go to page as entered.
#
- $wgArticle = new Article();
- $wgTitle = Title::newFromText( $search );
+ $t = Title::newFromText( $search );
+
+ # If the string cannot be used to create a title
+ if( false == $t ){
+ $this->showResults();
+ return;
+ }
- if ( 0 != $wgArticle->getID() ) {
- $wgArticle->view();
+ if ( 0 != $t->getArticleID() ) {
+ $wgOut->redirect( $t->getURL() );
return;
}
# Now try all lower case (i.e. first letter capitalized)
#
- $wgTitle = Title::newFromText( strtolower( $search ) );
- if ( 0 != $wgArticle->getID() ) {
- $wgArticle->view();
+ $t = Title::newFromText( strtolower( $search ) );
+ if ( 0 != $t->getArticleID() ) {
+ $wgOut->redirect( $t->getURL() );
return;
}
-
+
# Now try capitalized string
#
- $wgTitle=Title::newFromText( ucwords( strtolower( $search ) ) );
- if ( 0 != $wgArticle->getID() ) {
- $wgArticle->view();
+ $t = Title::newFromText( ucwords( strtolower( $search ) ) );
+ if ( 0 != $t->getArticleID() ) {
+ $wgOut->redirect( $t->getURL() );
return;
}
- # Try a near match
+ # Now try all upper case
#
- $this->parseQuery();
- $sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " .
- "WHERE cur_id=si_page AND {$this->mTitlecond} LIMIT 1";
-
- if ( "" != $this->mTitlecond ) {
- $res = wfQuery( $sql, $fname );
- }
- if ( isset( $res ) && 0 != wfNumRows( $res ) ) {
- $s = wfFetchObject( $res );
-
- $wgTitle = Title::newFromDBkey( $s->cur_title );
- $wgTitle->setNamespace( $s->cur_namespace );
- $wgArticle->view();
+ $t = Title::newFromText( strtoupper( $search ) );
+ if ( 0 != $t->getArticleID() ) {
+ $wgOut->redirect( $t->getURL() );
return;
}
- $wgOut->addHTML( wfMsg("nogomatch") . "\n<p>" );
- $this->showResults();
+
+ # No match, generate an edit URL
+ $t = Title::newFromText( $this->mUsertext );
+ $wgOut->addHTML( wfMsg("nogomatch", $t->getURL( "action=edit", true ) ) . "\n<p>" );
+
+ # Try a fuzzy title search
+ $anyhit = false;
+ global $wgDisableFuzzySearch;
+ if(! $wgDisableFuzzySearch ){
+ foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){
+ $anyhit |= SearchEngine::doFuzzyTitleSearch( $search, $namespace );
+ }
+ }
+
+ if( ! $anyhit ){
+ return $this->showResults();
+ }
+ }
+
+ /* static */ function doFuzzyTitleSearch( $search, $namespace ){
+ global $wgLang, $wgOut;
+ $sstr = ucfirst($search);
+ $sstr = str_replace(" ", "_", $sstr);
+ $fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace );
+ $fuzzymatches = array_slice($fuzzymatches, 0, 10);
+ $slen = strlen( $search );
+ $wikitext = "";
+ foreach($fuzzymatches as $res){
+ $t = str_replace("_", " ", $res[1]);
+ $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t";
+ if( $namespace == NS_MAIN )
+ $tfull = "$t";
+ $distance = $res[0];
+ $closeness = (strlen( $search ) - $distance) / strlen( $search );
+ $percent = intval( $closeness * 100 ) . "%";
+ $stars = str_repeat("*", ceil(5 * $closeness) );
+ $wikitext .= "* [[$tfull]] $percent ($stars)\n";
+ }
+ if( $wikitext ){
+ if( $namespace != NS_MAIN )
+ $wikitext = "=== " . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext;
+ $wgOut->addWikiText( $wikitext );
+ return true;
+ }
+ return false;
+ }
+
+ /* static */ function fuzzyTitles( $sstr, $namespace = NS_MAIN ){
+ $span = 0.10; // weed on title length before doing levenshtein.
+ $tolerance = 0.35; // allowed percentage of erronous characters
+ $slen = strlen($sstr);
+ $tolerance_count = ceil($tolerance * $slen);
+ $spanabs = ceil($slen * (1 + $span)) - $slen;
+ # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n";
+ $result = array();
+ for( $i=0; $i <= $spanabs; $i++ ){
+ $titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace );
+ if( $i != 0)
+ $titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) );
+ foreach($titles as $t){
+ $d = levenshtein($sstr, $t);
+ if($d < $tolerance_count)
+ $result[] = array($d, $t);
+ $cnt++;
+ }
+ }
+ usort($result, "SearchEngine_pcmp");
+ return $result;
+ }
+
+ /* static */ function getTitlesByLength($aLength, $aNamespace = 0){
+ global $wgMemc, $wgDBname;
+
+ // to avoid multiple costly SELECTs in case of no memcached
+ if( $this->all_titles ){
+ if( isset( $this->all_titles[$aLength][$aNamespace] ) ){
+ return $this->all_titles[$aLength][$aNamespace];
+ } else {
+ return array();
+ }
+ }
+
+ $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace";
+ $mkeyts = "$wgDBname:titlesbylength:createtime";
+ $ts = $wgMemc->get( $mkeyts );
+ $result = $wgMemc->get( $mkey );
+
+ if( time() - $ts < 3600 ){
+ // note: in case of insufficient memcached space, we return
+ // an empty list instead of starting to hit the DB.
+ return is_array( $result ) ? $result : array();
+ }
+
+ $wgMemc->set( $mkeyts, time() );
+
+ $res = wfQuery("SELECT cur_title, cur_namespace FROM cur", DB_READ);
+ $titles = array(); // length, ns, [titles]
+ while( $obj = wfFetchObject( $res ) ){
+ $title = $obj->cur_title;
+ $ns = $obj->cur_namespace;
+ $len = strlen( $title );
+ $titles[$len][$ns][] = $title;
+ }
+ foreach($titles as $length => $length_arr){
+ foreach($length_arr as $ns => $title_arr){
+ $mkey = "$wgDBname:titlesbylength:$length:$ns";
+ $wgMemc->set( $mkey, $title_arr, 3600 * 24 );
+ }
+ }
+ $this->all_titles = $titles;
+ if( isset( $titles[$aLength][$aNamespace] ) )
+ return $titles[$aLength][$aNamespace];
+ else
+ return array();
}
}
+/* private static */ function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; }
+
+?>