10 define( 'MW_SEARCH_OK', true );
11 define( 'MW_SEARCH_BAD_QUERY', false );
17 /* private */ var $rawText, $filteredText, $searchTerms;
18 /* private */ var $titleCond, $textCond;
20 var $doSearchRedirects = true;
21 var $addToQuery = array();
22 var $namespacesToSearch = array();
24 var $allTitles = false;
26 function SearchEngine( $text ) {
27 $this->rawText
= trim( $text );
29 # We display the query, so let's strip it for safety
32 $lc = SearchEngine
::legalSearchChars() . '()';
36 $this->filteredText
= trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
37 $this->searchTerms
= array();
38 $this->strictMatching
= true; # Google-style, add '+' on all terms
40 $this->db
=& wfGetDB( DB_SLAVE
);
44 * Return a partial WHERE clause to limit the search to the given namespaces
46 function queryNamespaces() {
47 $namespaces = implode( ',', $this->namespacesToSearch
);
48 if ($namespaces == '') {
51 return "AND cur_namespace IN (" . $namespaces . ')';
55 * Return a partial WHERE clause to include or exclude redirects from results
57 function searchRedirects() {
58 if ( $this->doSearchRedirects
) {
61 return 'AND cur_is_redirect=0 ';
67 */ function initNamespaceCheckbox( $i ) {
68 global $wgUser, $wgNamespacesToBeSearchedDefault;
70 if ($wgUser->getID()) {
71 // User is logged in so we retrieve his default namespaces
72 return $wgUser->getOption( 'searchNs'.$i );
74 // User is not logged in so we give him the global default namespaces
75 return !empty($wgNamespacesToBeSearchedDefault[ $i ]);
80 * Display the "power search" footer. Does not actually perform the search,
81 * that is done by showResults()
83 function powersearch() {
84 global $wgUser, $wgOut, $wgLang, $wgTitle, $wgRequest;
85 $sk =& $wgUser->getSkin();
87 $search = $this->rawText
;
88 $searchx = $wgRequest->getVal( 'searchx' );
89 $listredirs = $wgRequest->getVal( 'redirs' );
91 $ret = wfMsg('powersearchtext'); # Text to be returned
92 $tempText = ''; # Temporary text, for substitution into $ret
94 if( isset( $_REQUEST['searchx'] ) ) {
95 $this->addToQuery
['searchx'] = '1';
98 # Do namespace checkboxes
99 $namespaces = $wgLang->getNamespaces();
100 foreach ( $namespaces as $i => $namespace ) {
101 # Skip virtual namespaces
108 # Initialise checkboxValues, either from defaults or from
109 # a previous invocation
110 if ( !isset( $searchx ) ) {
111 $checkboxValue = $this->initNamespaceCheckbox( $i );
113 $checkboxValue = $wgRequest->getVal( $formVar );
117 if ( $checkboxValue == 1 ) {
118 $checked = ' checked="checked"';
119 $this->addToQuery
['ns'.$i] = 1;
120 array_push( $this->namespacesToSearch
, $i );
122 $name = str_replace( '_', ' ', $namespaces[$i] );
124 $name = wfMsg( 'blanknamespace' );
127 if ( $tempText !== '' ) {
130 $tempText .= "<input type='checkbox' value=\"1\" name=\"" .
131 "ns{$i}\"{$checked} />{$name}\n";
133 $ret = str_replace ( '$1', $tempText, $ret );
135 # List redirects checkbox
138 if ( $listredirs == 1 ) {
139 $this->addToQuery
['redirs'] = 1;
140 $checked = ' checked="checked"';
142 $tempText = "<input type='checkbox' value='1' name=\"redirs\"{$checked} />\n";
143 $ret = str_replace( '$2', $tempText, $ret );
147 $tempText = "<input type='text' name=\"search\" value=\"" .
148 htmlspecialchars( $search ) ."\" width=\"80\" />\n";
149 $ret = str_replace( "$3", $tempText, $ret );
153 $tempText = '<input type="submit" name="searchx" value="' .
154 wfMsg('powersearch') . "\" />\n";
155 $ret = str_replace( '$9', $tempText, $ret );
157 $action = $sk->escapeSearchLink();
158 $ret = "<br /><br />\n<form id=\"powersearch\" method=\"get\" " .
159 "action=\"$action\">\n{$ret}\n</form>\n";
161 if ( isset ( $searchx ) ) {
162 if ( ! $listredirs ) {
163 $this->doSearchRedirects
= false;
169 function setupPage() {
171 $wgOut->setPageTitle( wfMsg( 'searchresults' ) );
172 $wgOut->setSubtitle( wfMsg( 'searchquery', htmlspecialchars( $this->rawText
) ) );
173 $wgOut->setArticleRelated( false );
174 $wgOut->setRobotpolicy( 'noindex,nofollow' );
178 * Perform the search and construct the results page
180 function showResults() {
181 global $wgUser, $wgTitle, $wgOut, $wgLang;
182 global $wgDisableTextSearch, $wgInputEncoding;
183 $fname = 'SearchEngine::showResults';
185 $search = $this->rawText
;
187 $powersearch = $this->powersearch(); /* Need side-effects here? */
191 $sk = $wgUser->getSkin();
192 $wgOut->addWikiText( wfMsg( 'searchresulttext' ) );
194 if ( !$this->parseQuery() ) {
196 '==' . wfMsg( 'badquery' ) . "==\n" .
197 wfMsg( 'badquerytext' ) );
200 list( $limit, $offset ) = wfCheckLimits( 20, 'searchlimit' );
202 if ( $wgDisableTextSearch ) {
203 $wgOut->addHTML( wfMsg( 'searchdisabled' ) );
204 $wgOut->addHTML( wfMsg( 'googlesearch',
205 htmlspecialchars( $this->rawText
),
206 htmlspecialchars( $wgInputEncoding ) ) );
210 $titleMatches = $this->getMatches( $this->titleCond
, $limit, $offset );
211 $textMatches = $this->getMatches( $this->textCond
, $limit, $offset );
213 $sk = $wgUser->getSkin();
215 $num = count( $titleMatches ) +
count( $textMatches );
216 if ( $num >= $limit ) {
217 $top = wfShowingResults( $offset, $limit );
219 $top = wfShowingResultsNum( $offset, $limit, $num );
221 $wgOut->addHTML( "<p>{$top}</p>\n" );
225 $akk = array_keys( $this->addToQuery
);
226 foreach ( $akk AS $ak ) {
227 $a2l .= "&{$ak}={$this->addToQuery[$ak]}" ;
230 $prevnext = wfViewPrevNext( $offset, $limit, '',
231 'search=' . wfUrlencode( $this->filteredText
) . $a2l );
232 $wgOut->addHTML( "<br />{$prevnext}\n" );
234 $foundsome = $this->showMatches( $titleMatches, $offset, 'notitlematches', 'titlematches' )
235 ||
$this->showMatches( $textMatches, $offset, 'notextmatches', 'textmatches' );
238 $wgOut->addWikiText( wfMsg( 'nonefound' ) );
240 $wgOut->addHTML( "<p>{$prevnext}</p>\n" );
241 $wgOut->addHTML( $powersearch );
244 function legalSearchChars() {
245 $lc = "A-Za-z_'0-9\\x80-\\xFF\\-";
249 function parseQuery() {
252 # Use cleaner boolean search if available
253 return $this->parseQuery4();
255 # Fall back to ugly hack with multiple search clauses
256 return $this->parseQuery3();
260 function parseQuery3() {
261 global $wgDBminWordLen, $wgLang;
263 # on non mysql4 database: get list of words we don't want to search for
264 require_once( 'FulltextStoplist.php' );
266 $lc = SearchEngine
::legalSearchChars() . '()';
267 $q = preg_replace( "/([()])/", " \\1 ", $this->filteredText
);
268 $q = preg_replace( "/\\s+/", " ", $q );
269 $w = explode( ' ', trim( $q ) );
272 foreach ( $w as $word ) {
273 $word = $wgLang->stripForSearch( $word );
274 if ( 'and' == $word ||
'or' == $word ||
'not' == $word
275 ||
'(' == $word ||
')' == $word ) {
276 $cond .= ' ' . strtoupper( $word );
278 } else if ( strlen( $word ) < $wgDBminWordLen ) {
280 } else if ( FulltextStoplist
::inList( $word ) ) {
283 if ( '' != $last ) { $cond .= ' AND'; }
284 $cond .= " (MATCH (##field##) AGAINST ('" .
285 $this->db
->strencode( $word ). "'))";
287 array_push( $this->searchTerms
, "\\b" . $word . "\\b" );
290 if ( 0 == count( $this->searchTerms
) ) {
291 return MW_SEARCH_BAD_QUERY
;
294 $this->titleCond
= '(' . str_replace( '##field##',
295 'si_title', $cond ) . ' )';
297 $this->textCond
= '(' . str_replace( '##field##',
298 'si_text', $cond ) . ' AND (cur_is_redirect=0) )';
303 function parseQuery4() {
305 $lc = SearchEngine
::legalSearchChars();
307 $this->searchTerms
= array();
309 # FIXME: This doesn't handle parenthetical expressions.
310 if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
311 $this->filteredText
, $m, PREG_SET_ORDER
) ) {
312 foreach( $m as $terms ) {
313 if( $searchon !== '' ) $searchon .= ' ';
314 if( $this->strictMatching
&& ($terms[1] == '') ) {
317 $searchon .= $terms[1] . $wgLang->stripForSearch( $terms[2] );
319 $regexp = preg_quote( $terms[3] );
320 if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
322 $regexp = preg_quote( str_replace( '"', '', $terms[2] ) );
324 $this->searchTerms
[] = $regexp;
326 wfDebug( "Would search with '$searchon'\n" );
327 wfDebug( "Match with /\b" . implode( '\b|\b', $this->searchTerms
) . "\b/\n" );
329 wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
332 $searchon = $this->db
->strencode( $searchon );
333 $this->titleCond
= " MATCH(si_title) AGAINST('$searchon' IN BOOLEAN MODE)";
334 $this->textCond
= " (MATCH(si_text) AGAINST('$searchon' IN BOOLEAN MODE) AND cur_is_redirect=0)";
338 function &getMatches( $cond, $limit, $offset = 0 ) {
339 $searchindex = $this->db
->tableName( 'searchindex' );
340 $cur = $this->db
->tableName( 'cur' );
341 $searchnamespaces = $this->queryNamespaces();
342 $redircond = $this->searchRedirects();
344 $sql = "SELECT cur_id,cur_namespace,cur_title," .
345 "cur_text FROM $cur,$searchindex " .
346 "WHERE cur_id=si_page AND {$cond} " .
347 "{$searchnamespaces} {$redircond} " .
348 $this->db
->limitResult( $limit, $offset );
350 $res = $this->db
->query( $sql, 'SearchEngine::getMatches' );
352 while ( $row = $this->db
->fetchObject( $res ) ) {
355 $this->db
->freeResult( $res );
360 function showMatches( &$matches, $offset, $msgEmpty, $msgFound ) {
362 if ( 0 == count( $matches ) ) {
363 $wgOut->addHTML( "<h2>" . wfMsg( $msgEmpty ) .
368 $wgOut->addHTML( "<h2>" . wfMsg( $msgFound ) .
369 "</h2>\n<ol start='{$off}'>" );
371 foreach( $matches as $row ) {
372 $this->showHit( $row );
374 $wgOut->addHTML( "</ol>\n" );
379 function showHit( $row ) {
380 global $wgUser, $wgOut, $wgLang;
382 $t = Title
::makeName( $row->cur_namespace
, $row->cur_title
);
383 if( is_null( $t ) ) {
384 $wgOut->addHTML( "<!-- Broken link in search result -->\n" );
387 $sk = $wgUser->getSkin();
389 $contextlines = $wgUser->getOption( 'contextlines' );
390 if ( '' == $contextlines ) { $contextlines = 5; }
391 $contextchars = $wgUser->getOption( 'contextchars' );
392 if ( '' == $contextchars ) { $contextchars = 50; }
394 $link = $sk->makeKnownLink( $t, '' );
395 $size = wfMsg( 'nbytes', strlen( $row->cur_text
) );
396 $wgOut->addHTML( "<li>{$link} ({$size})" );
398 $lines = explode( "\n", $row->cur_text
);
399 $pat1 = "/(.*)(" . implode( "|", $this->searchTerms
) . ")(.*)/i";
402 foreach ( $lines as $line ) {
403 if ( 0 == $contextlines ) {
408 if ( ! preg_match( $pat1, $line, $m ) ) {
412 $pre = $wgLang->truncate( $m[1], -$contextchars, '...' );
414 if ( count( $m ) < 3 ) {
417 $post = $wgLang->truncate( $m[3], $contextchars, '...' );
422 $line = htmlspecialchars( $pre . $found . $post );
423 $pat2 = '/(' . implode( '|', $this->searchTerms
) . ")/i";
424 $line = preg_replace( $pat2,
425 "<span class='searchmatch'>\\1</span>", $line );
427 $wgOut->addHTML( "<br /><small>{$lineno}: {$line}</small>\n" );
429 $wgOut->addHTML( "</li>\n" );
432 function getNearMatch() {
433 # Exact match? No need to look further.
434 $title = Title
::newFromText( $this->rawText
);
435 if ( $title->getNamespace() == NS_SPECIAL ||
0 != $title->getArticleID() ) {
439 # Now try all lower case (i.e. first letter capitalized)
441 $title = Title
::newFromText( strtolower( $this->rawText
) );
442 if ( 0 != $title->getArticleID() ) {
446 # Now try capitalized string
448 $title = Title
::newFromText( ucwords( strtolower( $this->rawText
) ) );
449 if ( 0 != $title->getArticleID() ) {
453 # Now try all upper case
455 $title = Title
::newFromText( strtoupper( $this->rawText
) );
456 if ( 0 != $title->getArticleID() ) {
460 # Entering an IP address goes to the contributions page
461 if ( preg_match( '/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/', $this->rawText
) ) {
462 $title = Title
::makeTitle( NS_SPECIAL
, "Contributions/" . $this->rawText
);
469 function goResult() {
470 global $wgOut, $wgGoToEdit;
471 global $wgDisableTextSearch;
472 $fname = 'SearchEngine::goResult';
474 # Try to go to page as entered.
476 $t = Title
::newFromText( $this->rawText
);
478 # If the string cannot be used to create a title
480 $this->showResults();
484 # If there's an exact or very near match, jump right there.
485 $t = $this->getNearMatch();
486 if( !is_null( $t ) ) {
487 $wgOut->redirect( $t->getFullURL() );
491 # No match, generate an edit URL
492 $t = Title
::newFromText( $this->rawText
);
494 # If the feature is enabled, go straight to the edit page
496 $wgOut->redirect( $t->getFullURL( 'action=edit' ) );
501 $editurl = $t->escapeLocalURL( 'action=edit' );
505 $wgOut->addHTML( '<p>' . wfMsg('nogomatch', $editurl ) . "</p>\n" );
507 # Try a fuzzy title search
509 global $wgDisableFuzzySearch;
510 if(! $wgDisableFuzzySearch ){
511 foreach( array(NS_MAIN
, NS_PROJECT
, NS_USER
, NS_IMAGE
, NS_MEDIAWIKI
) as $namespace){
512 $anyhit |
= SearchEngine
::doFuzzyTitleSearch( $this->rawText
, $namespace );
517 return $this->showResults();
524 function doFuzzyTitleSearch( $search, $namespace ){
525 global $wgLang, $wgOut;
529 $sstr = ucfirst($search);
530 $sstr = str_replace(' ', '_', $sstr);
531 $fuzzymatches = SearchEngine
::fuzzyTitles( $sstr, $namespace );
532 $fuzzymatches = array_slice($fuzzymatches, 0, 10);
533 $slen = strlen( $search );
535 foreach($fuzzymatches as $res){
536 $t = str_replace('_', ' ', $res[1]);
537 $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t";
538 if( $namespace == NS_MAIN
)
541 $closeness = (strlen( $search ) - $distance) / strlen( $search );
542 $percent = intval( $closeness * 100 ) . '%';
543 $stars = str_repeat('*', ceil(5 * $closeness) );
544 $wikitext .= "* [[$tfull]] $percent ($stars)\n";
547 if( $namespace != NS_MAIN
)
548 $wikitext = '=== ' . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext;
549 $wgOut->addWikiText( $wikitext );
558 function fuzzyTitles( $sstr, $namespace = NS_MAIN
){
559 $span = 0.10; // weed on title length before doing levenshtein.
560 $tolerance = 0.35; // allowed percentage of erronous characters
561 $slen = strlen($sstr);
562 $tolerance_count = ceil($tolerance * $slen);
563 $spanabs = ceil($slen * (1 +
$span)) - $slen;
564 # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n";
567 for( $i=0; $i <= $spanabs; $i++
){
568 $titles = SearchEngine
::getTitlesByLength( $slen +
$i, $namespace );
570 $titles = array_merge($titles, SearchEngine
::getTitlesByLength( $slen - $i, $namespace ) );
572 foreach($titles as $t){
573 $d = levenshtein($sstr, $t);
574 if($d < $tolerance_count)
575 $result[] = array($d, $t);
579 usort($result, 'SearchEngine_pcmp');
586 function getTitlesByLength($aLength, $aNamespace = 0){
587 global $wgMemc, $wgDBname;
588 $fname = 'SearchEngin::getTitlesByLength';
590 // to avoid multiple costly SELECTs in case of no memcached
591 if( $this->allTitles
){
592 if( isset( $this->allTitles
[$aLength][$aNamespace] ) ){
593 return $this->allTitles
[$aLength][$aNamespace];
599 $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace";
600 $mkeyts = "$wgDBname:titlesbylength:createtime";
601 $ts = $wgMemc->get( $mkeyts );
602 $result = $wgMemc->get( $mkey );
604 if( time() - $ts < 3600 ){
605 // note: in case of insufficient memcached space, we return
606 // an empty list instead of starting to hit the DB.
607 return is_array( $result ) ?
$result : array();
610 $wgMemc->set( $mkeyts, time() );
612 $res = $this->db
->select( 'cur', array( 'cur_title', 'cur_namespace' ), false, $fname );
613 $titles = array(); // length, ns, [titles]
614 while( $obj = $this->db
->fetchObject( $res ) ){
615 $title = $obj->cur_title
;
616 $ns = $obj->cur_namespace
;
617 $len = strlen( $title );
618 $titles[$len][$ns][] = $title;
620 foreach($titles as $length => $length_arr){
621 foreach($length_arr as $ns => $title_arr){
622 $mkey = "$wgDBname:titlesbylength:$length:$ns";
623 $wgMemc->set( $mkey, $title_arr, 3600 * 24 );
626 $this->allTitles
= $titles;
627 if( isset( $titles[$aLength][$aNamespace] ) )
628 return $titles[$aLength][$aNamespace];
638 function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; }