DB error log
[lhc/web/wiklou.git] / includes / SearchEngine.php
index 35a4be1..53d9e69 100644 (file)
@@ -1,4 +1,4 @@
-<?
+<?php
 # See search.doc
 
 class SearchEngine {
@@ -9,6 +9,7 @@ class SearchEngine {
        var $addtoquery = array();
        var $namespacesToSearch = array();
        var $alternateTitle;
+       var $all_titles = false;
 
        function SearchEngine( $text )
        {
@@ -49,7 +50,7 @@ class SearchEngine {
                        return $wgUser->getOption( "searchNs".$i );
                } else {
                        // User is not logged in so we give him the global default namespaces
-                       return $wgNamespacesToBeSearchedDefault[ $i ];
+                       return !empty($wgNamespacesToBeSearchedDefault[ $i ]);
                }
        }
 
@@ -57,11 +58,12 @@ class SearchEngine {
        # that is done by showResults()
        function powersearch()
        {
-               global $wgUser, $wgOut, $wgLang, $wgTitle;
-
-               $search                 = $_REQUEST['search'];
-               $searchx                = $_REQUEST['searchx'];
-               $listredirs             = $_REQUEST['redirs'];
+               global $wgUser, $wgOut, $wgLang, $wgTitle, $wgRequest;
+               $sk =& $wgUser->getSkin();
+               
+               $search                 = $wgRequest->getText( 'search' );
+               $searchx                = $wgRequest->getVal( 'searchx' );
+               $listredirs             = $wgRequest->getVal( 'redirs' );
                
                $ret = wfMsg("powersearchtext"); # Text to be returned
                $tempText = ""; # Temporary text, for substitution into $ret    
@@ -85,12 +87,12 @@ class SearchEngine {
                        if ( !isset( $searchx ) ) {
                                $checkboxValue = $this->initNamespaceCheckbox( $i );
                        } else {
-                               $checkboxValue = $_REQUEST[$formVar];
+                               $checkboxValue = $wgRequest->getVal( $formVar );
                        }
 
                        $checked = "";
                        if ( $checkboxValue == 1 ) {
-                               $checked = " checked";
+                               $checked = " checked='checked'";
                                $this->addtoquery["ns{$i}"] = 1;
                                array_push( $this->namespacesToSearch, $i );
                        }
@@ -102,8 +104,8 @@ class SearchEngine {
                        if ( $tempText !== "" ) { 
                                $tempText .= " "; 
                        }
-                       $tempText .= "<input type=checkbox value=\"1\" name=\"" .
-                         "ns{$i}\"{$checked}>{$name}\n";
+                       $tempText .= "<input type='checkbox' value=\"1\" name=\"" .
+                         "ns{$i}\"{$checked} />{$name}\n";
                }
                $ret = str_replace ( "$1", $tempText, $ret );
 
@@ -112,25 +114,26 @@ class SearchEngine {
                $checked = "";
                if ( $listredirs == 1 ) {
                        $this->addtoquery["redirs"] = 1;
-                       $checked = " checked";
+                       $checked = " checked='checked'";
                }
-               $tempText = "<input type=checkbox value=1 name=\"redirs\"{$checked}>\n";
+               $tempText = "<input type='checkbox' value='1' name=\"redirs\"{$checked} />\n";
                $ret = str_replace( "$2", $tempText, $ret );
 
                # Search field
 
-               $tempText = "<input type=text name=\"search\" value=\"" .
-                       htmlspecialchars( $search ) ."\" width=80>\n";
+               $tempText = "<input type='text' name=\"search\" value=\"" .
+                       htmlspecialchars( $search ) ."\" width='80' />\n";
         $ret = str_replace( "$3", $tempText, $ret );
 
                # Searchx button
 
-               $tempText = "<input type=submit name=\"searchx\" value=\"" .
-                 wfMsg("powersearch") . "\">\n";
+               $tempText = "<input type='submit' name=\"searchx\" value=\"" .
+                 wfMsg("powersearch") . "\" />\n";
                $ret = str_replace( "$9", $tempText, $ret );
 
-               $ret = "<br><br>\n<form id=\"powersearch\" method=\"get\" " .
-                 "action=\"" . wfLocalUrl( "" ) . "\">\n{$ret}\n</form>\n";
+               $action = $sk->escapeSearchLink();
+               $ret = "<br /><br />\n<form id=\"powersearch\" method=\"get\" " .
+                 "action=\"$action\">\n{$ret}\n</form>\n";
 
                if ( isset ( $searchx ) ) {
                        if ( ! $listredirs ) { 
@@ -140,23 +143,28 @@ class SearchEngine {
                return $ret;
        }
 
+       function setupPage() {
+               global $wgOut;
+               $wgOut->setPageTitle( wfMsg( "searchresults" ) );
+               $q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) );
+               $wgOut->setSubtitle( $q );
+               $wgOut->setArticleRelated( false );
+               $wgOut->setRobotpolicy( "noindex,nofollow" );
+       }
+
        # Perform the search and construct the results page
        function showResults()
        {
-               global $wgUser, $wgTitle, $wgOut, $wgLang, $wgDisableTextSearch;
-               global $wgInputEncoding;
+               global $wgUser, $wgTitle, $wgOut, $wgLang, $wgRequest;
+               global $wgDisableTextSearch, $wgInputEncoding;
                $fname = "SearchEngine::showResults";
 
-               $search = $_REQUEST['search'];
+               $search = $wgRequest->getText( 'search' );
 
                $powersearch = $this->powersearch(); /* Need side-effects here? */
 
-               $wgOut->setPageTitle( wfMsg( "searchresults" ) );
-               $q = wfMsg( "searchquery", htmlspecialchars( $this->mUsertext ) );
-               $wgOut->setSubtitle( $q );
-               $wgOut->setArticleFlag( false );
-               $wgOut->setRobotpolicy( "noindex,nofollow" );
-
+               $this->setupPage();
+               
                $sk = $wgUser->getSkin();
                $header = wfMsg( "searchresulttext", $sk->makeKnownLink(
                  wfMsg( "searchhelppage" ), wfMsg( "searchingwikipedia" ) ) );
@@ -165,7 +173,7 @@ class SearchEngine {
                $this->parseQuery();
                if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
                        $wgOut->addHTML( "<h2>" . wfMsg( "badquery" ) . "</h2>\n" .
-                         "<p>" . wfMsg( "badquerytext" ) );
+                         "<p>" . wfMsg( "badquerytext" ) . "</p>\n" );
                        return;
                }
                list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" );
@@ -174,7 +182,8 @@ class SearchEngine {
                $redircond = $this->searchRedirects();
 
                if ( $wgDisableTextSearch ) {
-                       $wgOut->addHTML( wfMsg( "searchdisabled", htmlspecialchars( $search ), $wgInputEncoding ) );
+                       $wgOut->addHTML( wfMsg( "searchdisabled" ) );
+                       $wgOut->addHTML( wfMsg( "googlesearch", htmlspecialchars( $search ), $GLOBALS['wgInputEncoding'] ) );
                } else {
                        $sql = "SELECT cur_id,cur_namespace,cur_title," .
                          "cur_text FROM cur,searchindex " .
@@ -190,7 +199,7 @@ class SearchEngine {
                        $this->parseQuery();
                        if ( "" == $this->mTitlecond || "" == $this->mTextcond ) {
                                $wgOut->addHTML( "<h2>" . wfMsg( "badquery" ) . "</h2>\n" .
-                                 "<p>" . wfMsg( "badquerytext" ) );
+                                 "<p>" . wfMsg( "badquerytext" ) . "</p>\n" );
                                return;
                        }
                        list( $limit, $offset ) = wfCheckLimits( 20, "searchlimit" );
@@ -219,7 +228,7 @@ class SearchEngine {
                        } else {
                          $top = wfShowingResultsNum( $offset, $limit, $num );
                        }
-                       $wgOut->addHTML( "<p>{$top}\n" );
+                       $wgOut->addHTML( "<p>{$top}</p>\n" );
        
                        # For powersearch
        
@@ -231,7 +240,7 @@ class SearchEngine {
        
                        $sl = wfViewPrevNext( $offset, $limit, "",
                          "search=" . wfUrlencode( $this->mUsertext ) . $a2l );
-                       $wgOut->addHTML( "<br>{$sl}\n" );
+                       $wgOut->addHTML( "<br />{$sl}\n" );
        
                        $foundsome = false;
        
@@ -266,9 +275,9 @@ class SearchEngine {
                                $wgOut->addHTML( "</ol>\n" );
                        }
                        if ( ! $foundsome ) {
-                               $wgOut->addHTML( "<p>" . wfMsg( "nonefound" ) . "\n" );
+                               $wgOut->addHTML( "<p>" . wfMsg( "nonefound" ) . "</p>\n" );
                        }
-                       $wgOut->addHTML( "<p>{$sl}\n" );
+                       $wgOut->addHTML( "<p>{$sl}</p>\n" );
                        $wgOut->addHTML( $powersearch );
                }
        }
@@ -287,6 +296,8 @@ class SearchEngine {
                        # Use cleaner boolean search if available
                        return $this->parseQuery4();
                }
+               # on non mysql4 database: get list of words we don't want to search for
+               require_once( "FulltextStoplist.php" );
 
                $lc = SearchEngine::legalSearchChars() . "()";
                $q = preg_replace( "/([()])/", " \\1 ", $this->mUsertext );
@@ -408,19 +419,20 @@ class SearchEngine {
                        $line = preg_replace( $pat2,
                          "<font color='red'>\\1</font>", $line );
 
-                       $wgOut->addHTML( "<br><small>{$lineno}: {$line}</small>\n" );
+                       $wgOut->addHTML( "<br /><small>{$lineno}: {$line}</small>\n" );
                }
                $wgOut->addHTML( "</li>\n" );
        }
 
        function goResult()
        {
-               global $wgOut, $wgDisableTextSearch;
+               global $wgOut, $wgRequest, $wgGoToEdit;
+               global $wgDisableTextSearch;
                $fname = "SearchEngine::goResult";
                
-               $search         = $_REQUEST['search'];
+               $search = trim( $wgRequest->getText( "search" ) );
 
-               # First try to go to page as entered.
+               # Try to go to page as entered.
                #
                $t = Title::newFromText( $search );
 
@@ -430,8 +442,9 @@ class SearchEngine {
                        return;
                }
 
-               if ( 0 != $t->getArticleID() ) {
-                       $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+               # Exact match? No need to look further.
+               if ( $t->getNamespace() == NS_SPECIAL || 0 != $t->getArticleID() ) {
+                       $wgOut->redirect( $t->getFullURL() );
                        return;
                }
 
@@ -439,7 +452,7 @@ class SearchEngine {
                #
                $t = Title::newFromText( strtolower( $search ) );
                if ( 0 != $t->getArticleID() ) {
-                       $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+                       $wgOut->redirect( $t->getFullURL() );
                        return;
                }
 
@@ -447,7 +460,7 @@ class SearchEngine {
                #
                $t = Title::newFromText( ucwords( strtolower( $search ) ) );
                if ( 0 != $t->getArticleID() ) {
-                       $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+                       $wgOut->redirect( $t->getFullURL() );
                        return;
                }
 
@@ -455,33 +468,150 @@ class SearchEngine {
                #
                $t = Title::newFromText( strtoupper( $search ) );
                if ( 0 != $t->getArticleID() ) {
-                       $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
+                       $wgOut->redirect( $t->getFullURL() );
                        return;
                }
 
-               # Try a near match
-               #
-               if( !$wgDisableTextSearch ) {
-                       $this->parseQuery();                                                                            
-                       $sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " .
-                         "WHERE cur_id=si_page AND {$this->mTitlecond} ORDER BY cur_namespace LIMIT 1";
-       
-                       if ( "" != $this->mTitlecond ) {
-                               $res = wfQuery( $sql, DB_READ, $fname );
-                       }                               
-                       if ( isset( $res ) && 0 != wfNumRows( $res ) ) {
-                               $s = wfFetchObject( $res );
-       
-                               $t = Title::makeTitle( $s->cur_namespace, $s->cur_title );
-                               $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) );
-                               return;
+               # Entering an IP address goes to the contributions page
+               if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $search ) ) {
+                       $title = Title::makeTitle( NS_SPECIAL, "Contributions" );
+                       $wgOut->redirect( $title->getFullUrl( "target=$search" ) );
+                       return;
+               }
+
+               # No match, generate an edit URL
+               $t = Title::newFromText( $this->mUsertext );
+               
+               # If the feature is enabled, go straight to the edit page
+               if ( $wgGoToEdit ) {
+                       $wgOut->redirect( $t->getFullURL( "action=edit" ) );
+                       return;
+               }
+               
+               if( $t ) {
+                       $editurl = $t->escapeLocalURL( "action=edit" );
+               } else {
+                       $editurl = ""; # ?? 
+               }
+               $wgOut->addHTML( "<p>" . wfMsg("nogomatch", $editurl ) . "</p>\n" );
+
+               # Try a fuzzy title search
+               $anyhit = false;
+               global $wgDisableFuzzySearch;
+               if(! $wgDisableFuzzySearch ){
+                       foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){
+                               $anyhit |= SearchEngine::doFuzzyTitleSearch( $search, $namespace );
+                       }
+               }
+               
+               if( ! $anyhit ){
+                       return $this->showResults();
+               }
+       }
+
+       /* static */ function doFuzzyTitleSearch( $search, $namespace ){
+               global $wgLang, $wgOut;
+               
+               $this->setupPage();
+               
+               $sstr = ucfirst($search);
+               $sstr = str_replace(" ", "_", $sstr);
+               $fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace );
+               $fuzzymatches = array_slice($fuzzymatches, 0, 10);
+               $slen = strlen( $search );
+               $wikitext = "";
+               foreach($fuzzymatches as $res){
+                       $t = str_replace("_", " ", $res[1]);
+                       $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t";
+                       if( $namespace == NS_MAIN )
+                               $tfull = "$t";
+                       $distance = $res[0];
+                       $closeness = (strlen( $search ) - $distance) / strlen( $search );
+                       $percent = intval( $closeness * 100 ) . "%";
+                       $stars = str_repeat("*", ceil(5 * $closeness) );
+                       $wikitext .= "* [[$tfull]] $percent ($stars)\n";        
+               }
+               if( $wikitext ){
+                       if( $namespace != NS_MAIN )
+                               $wikitext = "=== " . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext;
+                       $wgOut->addWikiText( $wikitext );
+                       return true;
+               }
+               return false;
+       }
+
+       /* static */ function fuzzyTitles( $sstr, $namespace = NS_MAIN ){
+               $span = 0.10; // weed on title length before doing levenshtein.
+               $tolerance = 0.35; // allowed percentage of erronous characters
+               $slen = strlen($sstr);
+               $tolerance_count = ceil($tolerance * $slen);
+               $spanabs = ceil($slen * (1 + $span)) - $slen;
+               # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n";
+               $result = array();
+               $cnt = 0;
+               for( $i=0; $i <= $spanabs; $i++ ){
+                       $titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace );
+                       if( $i != 0) {
+                               $titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) );
+                       }
+                       foreach($titles as $t){
+                               $d = levenshtein($sstr, $t);
+                               if($d < $tolerance_count) 
+                                       $result[] = array($d, $t);
+                               $cnt++;
                        }
                }
-               $wgOut->addHTML( wfMsg("nogomatch", 
-                 htmlspecialchars( wfLocalUrl( ucfirst($this->mUsertext), "action=edit") ) )
-                 . "\n<p>" );
-               $this->showResults();
+               usort($result, "SearchEngine_pcmp");
+               return $result;
+       }
+
+       /* static */ function getTitlesByLength($aLength, $aNamespace = 0){
+               global $wgMemc, $wgDBname;
+
+               // to avoid multiple costly SELECTs in case of no memcached
+               if( $this->all_titles ){ 
+                       if( isset( $this->all_titles[$aLength][$aNamespace] ) ){
+                               return $this->all_titles[$aLength][$aNamespace];
+                       } else {
+                               return array();
+                       }
+               }
+
+               $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace";
+               $mkeyts = "$wgDBname:titlesbylength:createtime";
+               $ts = $wgMemc->get( $mkeyts );
+               $result = $wgMemc->get( $mkey );
+
+               if( time() - $ts < 3600 ){
+                       // note: in case of insufficient memcached space, we return
+                       // an empty list instead of starting to hit the DB.
+                       return is_array( $result ) ? $result : array();
+               }
+
+               $wgMemc->set( $mkeyts, time() );
+
+               $res = wfQuery("SELECT cur_title, cur_namespace FROM cur", DB_READ);
+               $titles = array(); // length, ns, [titles]
+               while( $obj = wfFetchObject( $res ) ){
+                       $title = $obj->cur_title;
+                       $ns = $obj->cur_namespace;
+                       $len = strlen( $title );
+                       $titles[$len][$ns][] = $title;
+               } 
+               foreach($titles as $length => $length_arr){
+                       foreach($length_arr as $ns => $title_arr){
+                               $mkey = "$wgDBname:titlesbylength:$length:$ns";
+                               $wgMemc->set( $mkey, $title_arr, 3600 * 24 );
+                       }
+               }
+               $this->all_titles = $titles;
+               if( isset( $titles[$aLength][$aNamespace] ) )
+                       return $titles[$aLength][$aNamespace];
+               else
+                       return array();
        }
 }
 
+/* private static */ function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; }
+
 ?>