X-Git-Url: https://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2FSearchEngine.php;h=94162cff3248baf021ee408128e66e779c99581f;hb=eebe7e3ef58635e18d08bc6ee9b2c53d57370112;hp=ffffb9bb853f8861e1b7aee5b1eb84c22f50f9b9;hpb=c5bbab5c6efab4d632203a9a14c9e3b39b53b0df;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/SearchEngine.php b/includes/SearchEngine.php index ffffb9bb85..94162cff32 100644 --- a/includes/SearchEngine.php +++ b/includes/SearchEngine.php @@ -1,72 +1,82 @@ -rawText = trim( $text ); - function SearchEngine( $text ) - { # We display the query, so let's strip it for safety # global $wgDBmysql4; $lc = SearchEngine::legalSearchChars() . "()"; - if( $wgDBmysql4 ) $lc .= "\"~<>*+-"; - $this->mUsertext = trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); - $this->mSearchterms = array(); + if( $wgDBmysql4 ) { + $lc .= "\"~<>*+-"; + } + $this->filteredText = trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); + $this->searchTerms = array(); + $this->strictMatching = true; # Google-style, add '+' on all terms + + $this->db =& wfGetDB( DB_SLAVE ); } - function queryNamespaces() - { + # Return a partial WHERE clause to limit the search to the given namespaces + function queryNamespaces() { $namespaces = implode( ",", $this->namespacesToSearch ); if ($namespaces == "") { $namespaces = "0"; } return "AND cur_namespace IN (" . $namespaces . ")"; - #return "1"; } - function searchRedirects() - { - if ( $this->doSearchRedirects ) return ""; - return "AND cur_is_redirect=0 "; + # Return a partial WHERE clause to include or exclude redirects from results + function searchRedirects() { + if ( $this->doSearchRedirects ) { + return ""; + } else { + return "AND cur_is_redirect=0 "; + } } /* private */ function initNamespaceCheckbox( $i ) { global $wgUser, $wgNamespacesToBeSearchedDefault; - if ($wgUser->getID()) { // User is logged in so we retrieve his default namespaces return $wgUser->getOption( "searchNs".$i ); - } - else { + } else { // User is not logged in so we give him the global default namespaces - return $wgNamespacesToBeSearchedDefault[ $i ]; + return !empty($wgNamespacesToBeSearchedDefault[ $i ]); } } # Display the "power search" footer. Does not actually perform the search, # that is done by showResults() - function powersearch() - { - global $wgUser, $wgOut, $wgLang, $wgTitle; - - $search = $_REQUEST['search']; - $searchx = $_REQUEST['searchx']; - $listredirs = $_REQUEST['redirs']; + function powersearch() { + global $wgUser, $wgOut, $wgLang, $wgTitle, $wgRequest; + $sk =& $wgUser->getSkin(); + + $search = $this->rawText; + $searchx = $wgRequest->getVal( 'searchx' ); + $listredirs = $wgRequest->getVal( 'redirs' ); $ret = wfMsg("powersearchtext"); # Text to be returned $tempText = ""; # Temporary text, for substitution into $ret if( isset( $_REQUEST["searchx"] ) ) { - $this->addtoquery["searchx"] = "1"; + $this->addToQuery["searchx"] = "1"; } # Do namespace checkboxes @@ -84,13 +94,13 @@ class SearchEngine { if ( !isset( $searchx ) ) { $checkboxValue = $this->initNamespaceCheckbox( $i ); } else { - $checkboxValue = $_REQUEST[$formVar]; + $checkboxValue = $wgRequest->getVal( $formVar ); } $checked = ""; if ( $checkboxValue == 1 ) { - $checked = " checked"; - $this->addtoquery["ns{$i}"] = 1; + $checked = " checked='checked'"; + $this->addToQuery["ns{$i}"] = 1; array_push( $this->namespacesToSearch, $i ); } $name = str_replace( "_", " ", $namespaces[$i] ); @@ -101,8 +111,8 @@ class SearchEngine { if ( $tempText !== "" ) { $tempText .= " "; } - $tempText .= "{$name}\n"; + $tempText .= "{$name}\n"; } $ret = str_replace ( "$1", $tempText, $ret ); @@ -110,26 +120,27 @@ class SearchEngine { $checked = ""; if ( $listredirs == 1 ) { - $this->addtoquery["redirs"] = 1; - $checked = " checked"; + $this->addToQuery["redirs"] = 1; + $checked = " checked='checked'"; } - $tempText = "\n"; + $tempText = "\n"; $ret = str_replace( "$2", $tempText, $ret ); # Search field - $tempText = "\n"; + $tempText = "\n"; $ret = str_replace( "$3", $tempText, $ret ); # Searchx button - $tempText = "\n"; + $tempText = "\n"; $ret = str_replace( "$9", $tempText, $ret ); - $ret = "

\n
\n{$ret}\n
\n"; + $action = $sk->escapeSearchLink(); + $ret = "

\n
\n{$ret}\n
\n"; if ( isset ( $searchx ) ) { if ( ! $listredirs ) { @@ -139,160 +150,105 @@ class SearchEngine { return $ret; } + function setupPage() { + global $wgOut; + $wgOut->setPageTitle( wfMsg( "searchresults" ) ); + $wgOut->setSubtitle( wfMsg( "searchquery", htmlspecialchars( $this->rawText ) ) ); + $wgOut->setArticleRelated( false ); + $wgOut->setRobotpolicy( "noindex,nofollow" ); + } + # Perform the search and construct the results page - function showResults() - { - global $wgUser, $wgTitle, $wgOut, $wgLang, $wgDisableTextSearch; - global $wgInputEncoding; + function showResults() { + global $wgUser, $wgTitle, $wgOut, $wgLang; + global $wgDisableTextSearch, $wgInputEncoding; $fname = "SearchEngine::showResults"; - $search = $_REQUEST['search']; + $search = $this->rawText; $powersearch = $this->powersearch(); /* Need side-effects here? */ - $wgOut->setPageTitle( wfMsg( "searchresults" ) ); - $q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) ); - $wgOut->setSubtitle( $q ); - $wgOut->setArticleFlag( false ); - $wgOut->setRobotpolicy( "noindex,nofollow" ); + $this->setupPage(); $sk = $wgUser->getSkin(); - $text = wfMsg( "searchresulttext", $sk->makeKnownLink( - wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) ); - $wgOut->addHTML( $text ); - - $this->parseQuery(); - if ( "" == $this->mTitlecond || "" == $this->mTextcond ) { - $wgOut->addHTML( "

" . wfMsg( "badquery" ) . "

\n" . - "

" . wfMsg( "badquerytext" ) ); + $wgOut->addWikiText( wfMsg( "searchresulttext" ) ); + + if ( !$this->parseQuery() ) { + $wgOut->addWikiText( + "==" . wfMsg( "badquery" ) . "==\n" . + wfMsg( "badquerytext" ) ); return; } list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" ); + + if ( $wgDisableTextSearch ) { + $wgOut->addHTML( wfMsg( "searchdisabled" ) ); + $wgOut->addHTML( wfMsg( "googlesearch", + htmlspecialchars( $this->rawText ), + htmlspecialchars( $wgInputEncoding ) ) ); + return; + } - $searchnamespaces = $this->queryNamespaces(); - $redircond = $this->searchRedirects(); + $titleMatches = $this->getMatches( $this->titleCond, $limit, $offset ); + $textMatches = $this->getMatches( $this->textCond, $limit, $offset ); - if ( $wgDisableTextSearch ) { - $wgOut->addHTML( wfMsg( "searchdisabled", htmlspecialchars( $search ), $wgInputEncoding ) ); + $sk = $wgUser->getSkin(); + + $num = count( $titleMatches ) + count( $textMatches ); + if ( $num >= $limit ) { + $top = wfShowingResults( $offset, $limit ); } else { - $sql = "SELECT cur_id,cur_namespace,cur_title," . - "cur_text FROM cur,searchindex " . - "WHERE cur_id=si_page AND {$this->mTitlecond} " . - "{$searchnamespaces} {$redircond}" . - "LIMIT {$offset}, {$limit}"; - $res1 = wfQuery( $sql, DB_READ, $fname ); - $num = wfNumRows($res1); - - $sk = $wgUser->getSkin(); - $text = wfMsg( "searchresulttext", $sk->makeKnownLink( - wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) ); - $wgOut->addHTML( $text ); - - $this->parseQuery(); - if ( "" == $this->mTitlecond || "" == $this->mTextcond ) { - $wgOut->addHTML( "

" . wfMsg( "badquery" ) . "

\n" . - "

" . wfMsg( "badquerytext" ) ); - return; - } - list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" ); - - $searchnamespaces = $this->queryNamespaces(); - $redircond = $this->searchRedirects(); - - $sql = "SELECT cur_id,cur_namespace,cur_title," . - "cur_text FROM cur,searchindex " . - "WHERE cur_id=si_page AND {$this->mTitlecond} " . - "{$searchnamespaces} {$redircond}" . - "LIMIT {$offset}, {$limit}"; - $res1 = wfQuery( $sql, DB_READ, $fname ); - $num = wfNumRows($res1); - - $sql = "SELECT cur_id,cur_namespace,cur_title," . - "cur_text FROM cur,searchindex " . - "WHERE cur_id=si_page AND {$this->mTextcond} " . - "{$searchnamespaces} {$redircond} " . - "LIMIT {$offset}, {$limit}"; - $res2 = wfQuery( $sql, DB_READ, $fname ); - $num = $num + wfNumRows($res2); - - if ( $num == $limit ) { - $top = wfShowingResults( $offset, $limit); - } else { - $top = wfShowingResultsNum( $offset, $limit, $num ); - } - $wgOut->addHTML( "

{$top}\n" ); - - # For powersearch - - $a2l = "" ; - $akk = array_keys( $this->addtoquery ) ; - foreach ( $akk AS $ak ) { - $a2l .= "&{$ak}={$this->addtoquery[$ak]}" ; - } - - $sl = wfViewPrevNext( $offset, $limit, "", - "search=" . wfUrlencode( $this->mUsertext ) . $a2l ); - $wgOut->addHTML( "
{$sl}\n" ); - - $foundsome = false; - - if ( 0 == wfNumRows( $res1 ) ) { - $wgOut->addHTML( "

" . wfMsg( "notitlematches" ) . - "

\n" ); - } else { - $foundsome = true; - $off = $offset + 1; - $wgOut->addHTML( "

" . wfMsg( "titlematches" ) . - "

\n
    " ); - - while ( $row = wfFetchObject( $res1 ) ) { - $this->showHit( $row ); - } - wfFreeResult( $res1 ); - $wgOut->addHTML( "
\n" ); - } + $top = wfShowingResultsNum( $offset, $limit, $num ); + } + $wgOut->addHTML( "

{$top}

\n" ); - if ( 0 == wfNumRows( $res2 ) ) { - $wgOut->addHTML( "

" . wfMsg( "notextmatches" ) . - "

\n" ); - } else { - $foundsome = true; - $off = $offset + 1; - $wgOut->addHTML( "

" . wfMsg( "textmatches" ) . "

\n" . - "
    " ); - while ( $row = wfFetchObject( $res2 ) ) { - $this->showHit( $row ); - } - wfFreeResult( $res2 ); - $wgOut->addHTML( "
\n" ); - } - if ( ! $foundsome ) { - $wgOut->addHTML( "

" . wfMsg( "nonefound" ) . "\n" ); - } - $wgOut->addHTML( "

{$sl}\n" ); - $wgOut->addHTML( $powersearch ); + # For powersearch + $a2l = ""; + $akk = array_keys( $this->addToQuery ); + foreach ( $akk AS $ak ) { + $a2l .= "&{$ak}={$this->addToQuery[$ak]}" ; } + + $prevnext = wfViewPrevNext( $offset, $limit, "", + "search=" . wfUrlencode( $this->filteredText ) . $a2l ); + $wgOut->addHTML( "
{$prevnext}\n" ); + + $foundsome = $this->showMatches( $titleMatches, $offset, "notitlematches", "titlematches" ) + || $this->showMatches( $textMatches, $offset, "notextmatches", "textmatches" ); + + if ( !$foundsome ) { + $wgOut->addWikiText( wfMsg( "nonefound" ) ); + } + $wgOut->addHTML( "

{$prevnext}

\n" ); + $wgOut->addHTML( $powersearch ); } - function legalSearchChars() - { + function legalSearchChars() { $lc = "A-Za-z_'0-9\\x80-\\xFF\\-"; return $lc; } - function parseQuery() - { - global $wgDBminWordLen, $wgLang, $wgDBmysql4; - + function parseQuery() { + global $wgDBmysql4; if( $wgDBmysql4 ) { # Use cleaner boolean search if available return $this->parseQuery4(); + } else { + # Fall back to ugly hack with multiple search clauses + return $this->parseQuery3(); } + } + + function parseQuery3() { + global $wgDBminWordLen, $wgLang; + + # on non mysql4 database: get list of words we don't want to search for + require_once( "FulltextStoplist.php" ); $lc = SearchEngine::legalSearchChars() . "()"; - $q = preg_replace( "/([()])/", " \\1 ", $this->mUsertext ); + $q = preg_replace( "/([()])/", " \\1 ", $this->filteredText ); $q = preg_replace( "/\\s+/", " ", $q ); - $w = explode( " ", strtolower( trim( $q ) ) ); + $w = explode( " ", trim( $q ) ); $last = $cond = ""; foreach ( $w as $word ) { @@ -308,42 +264,108 @@ class SearchEngine { } else { if ( "" != $last ) { $cond .= " AND"; } $cond .= " (MATCH (##field##) AGAINST ('" . - wfStrencode( $word ). "'))"; + $this->db->strencode( $word ). "'))"; $last = $word; - array_push( $this->mSearchterms, "\\b" . $word . "\\b" ); + array_push( $this->searchTerms, "\\b" . $word . "\\b" ); } } - if ( 0 == count( $this->mSearchterms ) ) { return; } + if ( 0 == count( $this->searchTerms ) ) { + return MW_SEARCH_BAD_QUERY; + } - $this->mTitlecond = "(" . str_replace( "##field##", + $this->titleCond = "(" . str_replace( "##field##", "si_title", $cond ) . " )"; - $this->mTextcond = "(" . str_replace( "##field##", + $this->textCond = "(" . str_replace( "##field##", "si_text", $cond ) . " AND (cur_is_redirect=0) )"; + + return MW_SEARCH_OK; } - function parseQuery4() - { - # FIXME: not ready yet! Do not use. - + function parseQuery4() { global $wgLang; $lc = SearchEngine::legalSearchChars(); - #$q = preg_replace( "/([+-]?)([$lc]+)/e", - # "\"$1\" . \$wgLang->stripForSearch(\"$2\")", - # $this->mUsertext ); + $searchon = ""; + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $this->filteredText, $m, PREG_SET_ORDER ) ) { + foreach( $m as $terms ) { + if( $searchon !== "" ) $searchon .= " "; + if( $this->strictMatching && ($terms[1] == "") ) { + $terms[1] = "+"; + } + $searchon .= $terms[1] . $wgLang->stripForSearch( $terms[2] ); + if( $terms[3] ) { + $regexp = preg_quote( $terms[3] ); + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; + } else { + $regexp = preg_quote( str_replace( '"', '', $terms[2] ) ); + } + $this->searchTerms[] = $regexp; + } + wfDebug( "Would search with '$searchon'\n" ); + wfDebug( "Match with /\b" . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); + } else { + wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); + } - $q = $this->mUsertext; - $qq = wfStrencode( $wgLang->stripForSearch( $q ) ); - $this->mSearchterms = preg_split( '/\s+/', $q ); - $this->mTitlecond = " MATCH(si_title) AGAINST('$qq' IN BOOLEAN MODE)"; - $this->mTextcond = " (MATCH(si_text) AGAINST('$qq' IN BOOLEAN MODE) AND cur_is_redirect=0)"; + $searchon = $this->db->strencode( $searchon ); + $this->titleCond = " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)"; + $this->textCond = " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)"; + return MW_SEARCH_OK; } - function showHit( $row ) - { - global $wgUser, $wgOut; + function &getMatches( $cond, $limit, $offset = 0 ) { + $searchindex = $this->db->tableName( 'searchindex' ); + $cur = $this->db->tableName( 'cur' ); + $searchnamespaces = $this->queryNamespaces(); + $redircond = $this->searchRedirects(); + + $sql = "SELECT cur_id,cur_namespace,cur_title," . + "cur_text FROM $cur,$searchindex " . + "WHERE cur_id=si_page AND {$cond} " . + "{$searchnamespaces} {$redircond} " . + $this->db->limitResult( $limit, $offset ); + + $res = $this->db->query( $sql, "SearchEngine::getMatches" ); + $matches = array(); + while ( $row = $this->db->fetchObject( $res ) ) { + $matches[] = $row; + } + $this->db->freeResult( $res ); + + return $matches; + } + + function showMatches( &$matches, $offset, $msgEmpty, $msgFound ) { + global $wgOut; + if ( 0 == count( $matches ) ) { + $wgOut->addHTML( "

" . wfMsg( $msgEmpty ) . + "

\n" ); + return false; + } else { + $off = $offset + 1; + $wgOut->addHTML( "

" . wfMsg( $msgFound ) . + "

\n
    " ); + + foreach( $matches as $row ) { + $this->showHit( $row ); + } + $wgOut->addHTML( "
\n" ); + return true; + } + } + + function showHit( $row ) { + global $wgUser, $wgOut, $wgLang; $t = Title::makeName( $row->cur_namespace, $row->cur_title ); + if( is_null( $t ) ) { + $wgOut->addHTML( "\n" ); + return; + } $sk = $wgUser->getSkin(); $contextlines = $wgUser->getOption( "contextlines" ); @@ -356,115 +378,232 @@ class SearchEngine { $wgOut->addHTML( "
  • {$link} ({$size})" ); $lines = explode( "\n", $row->cur_text ); - $pat1 = "/(.*)(" . implode( "|", $this->mSearchterms ) . ")(.*)/i"; + $pat1 = "/(.*)(" . implode( "|", $this->searchTerms ) . ")(.*)/i"; $lineno = 0; foreach ( $lines as $line ) { - if ( 0 == $contextlines ) { break; } + if ( 0 == $contextlines ) { + break; + } --$contextlines; ++$lineno; - if ( ! preg_match( $pat1, $line, $m ) ) { continue; } - - $pre = $m[1]; - if ( 0 == $contextchars ) { $pre = "..."; } - else { - if ( strlen( $pre ) > $contextchars ) { - $pre = "..." . substr( $pre, -$contextchars ); - } + if ( ! preg_match( $pat1, $line, $m ) ) { + continue; } - $pre = wfEscapeHTML( $pre ); - if ( count( $m ) < 3 ) { $post = ""; } - else { $post = $m[3]; } + $pre = $wgLang->truncate( $m[1], -$contextchars, "..." ); - if ( 0 == $contextchars ) { $post = "..."; } - else { - if ( strlen( $post ) > $contextchars ) { - $post = substr( $post, 0, $contextchars ) . "..."; - } + if ( count( $m ) < 3 ) { + $post = ""; + } else { + $post = $wgLang->truncate( $m[3], $contextchars, "..." ); } - $post = wfEscapeHTML( $post ); - $found = wfEscapeHTML( $m[2] ); - $line = "{$pre}{$found}{$post}"; - $pat2 = "/(" . implode( "|", $this->mSearchterms ) . ")/i"; + $found = $m[2]; + + $line = htmlspecialchars( $pre . $found . $post ); + $pat2 = "/(" . implode( "|", $this->searchTerms ) . ")/i"; $line = preg_replace( $pat2, - "\\1", $line ); + "\\1", $line ); - $wgOut->addHTML( "
    {$lineno}: {$line}\n" ); + $wgOut->addHTML( "
    {$lineno}: {$line}\n" ); } $wgOut->addHTML( "
  • \n" ); } - function goResult() - { - global $wgOut, $wgDisableTextSearch; - $fname = "SearchEngine::goResult"; + function getNearMatch() { + # Exact match? No need to look further. + $title = Title::newFromText( $this->rawText ); + if ( $title->getNamespace() == NS_SPECIAL || 0 != $title->getArticleID() ) { + return $title; + } + + # Now try all lower case (i.e. first letter capitalized) + # + $title = Title::newFromText( strtolower( $this->rawText ) ); + if ( 0 != $title->getArticleID() ) { + return $title; + } + + # Now try capitalized string + # + $title = Title::newFromText( ucwords( strtolower( $this->rawText ) ) ); + if ( 0 != $title->getArticleID() ) { + return $title; + } + + # Now try all upper case + # + $title = Title::newFromText( strtoupper( $this->rawText ) ); + if ( 0 != $title->getArticleID() ) { + return $title; + } + + # Entering an IP address goes to the contributions page + if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $this->rawText ) ) { + $title = Title::makeTitle( NS_SPECIAL, "Contributions/" . $this->rawText ); + return $title; + } - $search = $_REQUEST['search']; + return NULL; + } - # First try to go to page as entered. + function goResult() { + global $wgOut, $wgGoToEdit; + global $wgDisableTextSearch; + $fname = "SearchEngine::goResult"; + + # Try to go to page as entered. # - $t = Title::newFromText( $search ); + $t = Title::newFromText( $this->rawText ); # If the string cannot be used to create a title - if( false == $t ){ + if( is_null( $t ) ){ $this->showResults(); return; } - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); + # If there's an exact or very near match, jump right there. + $t = $this->getNearMatch(); + if( !is_null( $t ) ) { + $wgOut->redirect( $t->getFullURL() ); return; } - - # Now try all lower case (i.e. first letter capitalized) - # - $t = Title::newFromText( strtolower( $search ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); + + # No match, generate an edit URL + $t = Title::newFromText( $this->rawText ); + + # If the feature is enabled, go straight to the edit page + if ( $wgGoToEdit ) { + $wgOut->redirect( $t->getFullURL( "action=edit" ) ); return; } + + if( $t ) { + $editurl = $t->escapeLocalURL( "action=edit" ); + } else { + $editurl = ""; # ?? + } + $wgOut->addHTML( "

    " . wfMsg("nogomatch", $editurl ) . "

    \n" ); + + # Try a fuzzy title search + $anyhit = false; + global $wgDisableFuzzySearch; + if(! $wgDisableFuzzySearch ){ + foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){ + $anyhit |= SearchEngine::doFuzzyTitleSearch( $this->rawText, $namespace ); + } + } + + if( ! $anyhit ){ + return $this->showResults(); + } + } - # Now try capitalized string - # - $t = Title::newFromText( ucwords( strtolower( $search ) ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); - return; + /* static */ function doFuzzyTitleSearch( $search, $namespace ){ + global $wgLang, $wgOut; + + $this->setupPage(); + + $sstr = ucfirst($search); + $sstr = str_replace(" ", "_", $sstr); + $fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace ); + $fuzzymatches = array_slice($fuzzymatches, 0, 10); + $slen = strlen( $search ); + $wikitext = ""; + foreach($fuzzymatches as $res){ + $t = str_replace("_", " ", $res[1]); + $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t"; + if( $namespace == NS_MAIN ) + $tfull = "$t"; + $distance = $res[0]; + $closeness = (strlen( $search ) - $distance) / strlen( $search ); + $percent = intval( $closeness * 100 ) . "%"; + $stars = str_repeat("*", ceil(5 * $closeness) ); + $wikitext .= "* [[$tfull]] $percent ($stars)\n"; } + if( $wikitext ){ + if( $namespace != NS_MAIN ) + $wikitext = "=== " . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext; + $wgOut->addWikiText( $wikitext ); + return true; + } + return false; + } - # Now try all upper case - # - $t = Title::newFromText( strtoupper( $search ) ); - if ( 0 != $t->getArticleID() ) { - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); - return; + /* static */ function fuzzyTitles( $sstr, $namespace = NS_MAIN ){ + $span = 0.10; // weed on title length before doing levenshtein. + $tolerance = 0.35; // allowed percentage of erronous characters + $slen = strlen($sstr); + $tolerance_count = ceil($tolerance * $slen); + $spanabs = ceil($slen * (1 + $span)) - $slen; + # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count
    \n"; + $result = array(); + $cnt = 0; + for( $i=0; $i <= $spanabs; $i++ ){ + $titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace ); + if( $i != 0) { + $titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) ); + } + foreach($titles as $t){ + $d = levenshtein($sstr, $t); + if($d < $tolerance_count) + $result[] = array($d, $t); + $cnt++; + } } + usort($result, "SearchEngine_pcmp"); + return $result; + } - # Try a near match - # - if( !$wgDisableTextSearch ) { - $this->parseQuery(); - $sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " . - "WHERE cur_id=si_page AND {$this->mTitlecond} ORDER BY cur_namespace LIMIT 1"; - - if ( "" != $this->mTitlecond ) { - $res = wfQuery( $sql, DB_READ, $fname ); - } - if ( isset( $res ) && 0 != wfNumRows( $res ) ) { - $s = wfFetchObject( $res ); - - $t = Title::makeTitle( $s->cur_namespace, $s->cur_title ); - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); - return; + /* static */ function getTitlesByLength($aLength, $aNamespace = 0){ + global $wgMemc, $wgDBname; + $fname = 'SearchEngin::getTitlesByLength'; + + // to avoid multiple costly SELECTs in case of no memcached + if( $this->allTitles ){ + if( isset( $this->allTitles[$aLength][$aNamespace] ) ){ + return $this->allTitles[$aLength][$aNamespace]; + } else { + return array(); } } - $wgOut->addHTML( wfMsg("nogomatch", - htmlspecialchars( wfLocalUrl( ucfirst($this->mUsertext), "action=edit") ) ) - . "\n

    " ); - $this->showResults(); + + $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace"; + $mkeyts = "$wgDBname:titlesbylength:createtime"; + $ts = $wgMemc->get( $mkeyts ); + $result = $wgMemc->get( $mkey ); + + if( time() - $ts < 3600 ){ + // note: in case of insufficient memcached space, we return + // an empty list instead of starting to hit the DB. + return is_array( $result ) ? $result : array(); + } + + $wgMemc->set( $mkeyts, time() ); + + $res = $this->db->select( 'cur', array( 'cur_title', 'cur_namespace' ), false, $fname ); + $titles = array(); // length, ns, [titles] + while( $obj = $this->db->fetchObject( $res ) ){ + $title = $obj->cur_title; + $ns = $obj->cur_namespace; + $len = strlen( $title ); + $titles[$len][$ns][] = $title; + } + foreach($titles as $length => $length_arr){ + foreach($length_arr as $ns => $title_arr){ + $mkey = "$wgDBname:titlesbylength:$length:$ns"; + $wgMemc->set( $mkey, $title_arr, 3600 * 24 ); + } + } + $this->allTitles = $titles; + if( isset( $titles[$aLength][$aNamespace] ) ) + return $titles[$aLength][$aNamespace]; + else + return array(); } } +/* private static */ function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; } + ?>