Silently drop unknown titles in completion search
authorErik Bernhardson <ebernhardson@wikimedia.org>
Thu, 14 Dec 2017 01:03:20 +0000 (17:03 -0800)
committerErik Bernhardson <ebernhardson@wikimedia.org>
Mon, 11 Jun 2018 20:56:19 +0000 (13:56 -0700)
This mimics how full text works by silenty dropping results returned from
search that no longer exist. This could be because the search index is slightly
out of sync with reality, or the search engine could simply be broken.

Only silent from the users perspective. We maintain a count in statsd of
the number of titles dropped. This can be monitored over time to
recognize any increases.

Bug: T115756
Change-Id: I2f29d73e258cd448a14d35a2b4902a4fb6f61c68

includes/search/SearchEngine.php
includes/search/SearchSuggestionSet.php
tests/phpunit/includes/api/ApiQueryPrefixSearchTest.php
tests/phpunit/includes/search/SearchEnginePrefixTest.php
tests/phpunit/includes/search/SearchEngineTest.php
tests/phpunit/mocks/search/MockCompletionSearchEngine.php
tests/phpunit/mocks/search/MockSearchEngine.php

index 4217ab0..b2bacda 100644 (file)
@@ -675,13 +675,20 @@ abstract class SearchEngine {
 
                $search = trim( $search );
                // preload the titles with LinkBatch
-               $titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
+               $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
                        return $sugg->getSuggestedTitle();
-               } );
-               $lb = new LinkBatch( $titles );
+               } ) );
                $lb->setCaller( __METHOD__ );
                $lb->execute();
 
+               $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
+                       return $sugg->getSuggestedTitle()->isKnown();
+               } );
+               if ( $diff > 0 ) {
+                       MediaWikiServices::getInstance()->getStatsdDataFactory()
+                               ->updateCount( 'search.completion.missing', $diff );
+               }
+
                $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
                        return $sugg->getSuggestedTitle()->getPrefixedText();
                } );
index ab38420..cb1f831 100644 (file)
@@ -87,6 +87,18 @@ class SearchSuggestionSet {
                return array_map( $callback, $this->suggestions );
        }
 
+       /**
+        * Filter the suggestions array
+        * @param callback $callback Callable accepting single SearchSuggestion
+        *  instance returning bool false to remove the item.
+        * @return int The number of suggestions removed
+        */
+       public function filter( $callback ) {
+               $before = count( $this->suggestions );
+               $this->suggestions = array_values( array_filter( $this->suggestions, $callback ) );
+               return $before - count( $this->suggestions );
+       }
+
        /**
         * Add a new suggestion at the end.
         * If the score of the new suggestion is greater than the worst one,
index ae5924d..749f154 100644 (file)
@@ -7,6 +7,23 @@
  * @covers ApiQueryPrefixSearch
  */
 class ApiQueryPrefixSearchTest extends ApiTestCase {
+       const TEST_QUERY = 'unittest';
+
+       public function setUp() {
+               parent::setUp();
+               $this->setMwGlobals( [
+                       'wgSearchType' => MockCompletionSearchEngine::class,
+               ] );
+               MockCompletionSearchEngine::clearMockResults();
+               $results = [];
+               foreach ( range( 0, 10 ) as $i ) {
+                       $title = "Search_Result_$i";
+                       $results[] = $title;
+                       $this->editPage( $title, 'hi there' );
+               }
+               MockCompletionSearchEngine::addMockResults( self::TEST_QUERY, $results );
+       }
+
        public function offsetContinueProvider() {
                return [
                        'no offset' => [ 2, 2, 0, 2 ],
@@ -20,11 +37,10 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
         * @dataProvider offsetContinueProvider
         */
        public function testOffsetContinue( $expectedOffset, $expectedResults, $offset, $limit ) {
-               $this->registerMockSearchEngine();
                $response = $this->doApiRequest( [
                        'action' => 'query',
                        'list' => 'prefixsearch',
-                       'pssearch' => 'example query terms',
+                       'pssearch' => self::TEST_QUERY,
                        'psoffset' => $offset,
                        'pslimit' => $limit,
                ] );
@@ -39,10 +55,4 @@ class ApiQueryPrefixSearchTest extends ApiTestCase {
                        $this->assertEquals( $expectedOffset, $result['continue']['psoffset'] );
                }
        }
-
-       private function registerMockSearchEngine() {
-               $this->setMwGlobals( [
-                       'wgSearchType' => MockCompletionSearchEngine::class,
-               ] );
-       }
 }
index dd12590..83df61a 100644 (file)
@@ -45,6 +45,9 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
                $this->insertPage( 'Talk:Example' );
 
                $this->insertPage( 'User:Example' );
+               $this->insertPage( 'Barcelona' );
+               $this->insertPage( 'Barbara' );
+               $this->insertPage( 'External' );
        }
 
        protected function setUp() {
@@ -238,7 +241,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
                                ],
                        ] ],
                        [ [
-                               'Exact match not on top (T72958)',
+                               'Exact match not in first result should be moved to the first result (T72958)',
                                'provision' => [
                                        'Barcelona',
                                        'Bar',
@@ -252,7 +255,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
                                ],
                        ] ],
                        [ [
-                               'Exact match missing (T72958)',
+                               'Exact match missing from results should be added as first result (T72958)',
                                'provision' => [
                                        'Barcelona',
                                        'Barbara',
@@ -266,7 +269,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
                                ],
                        ] ],
                        [ [
-                               'Exact match missing and not existing',
+                               'Exact match missing and not existing pages should be dropped',
                                'provision' => [
                                        'Exile',
                                        'Exist',
@@ -274,8 +277,6 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase {
                                ],
                                'query' => 'Ex',
                                'results' => [
-                                       'Exile',
-                                       'Exist',
                                        'External',
                                ],
                        ] ],
index 2561fd0..5884d19 100644 (file)
@@ -307,4 +307,37 @@ class SearchEngineTest extends MediaWikiLangTestCase {
                        } );
                $rowAugmentors['testRow'] = $rowAugmentor;
        }
+
+       public function testFiltersMissing() {
+               $availableResults = [];
+               foreach ( range( 0, 11 ) as $i ) {
+                       $title = "Search_Result_$i";
+                       $availableResults[] = $title;
+                       // pages not created must be filtered
+                       if ( $i % 2 == 0 ) {
+                               $this->editPage( $title );
+                       }
+               }
+               MockCompletionSearchEngine::addMockResults( 'foo', $availableResults );
+
+               $engine = new MockCompletionSearchEngine();
+               $engine->setLimitOffset( 10, 0 );
+               $results = $engine->completionSearch( 'foo' );
+               $this->assertEquals( 5, $results->getSize() );
+               $this->assertTrue( $results->hasMoreResults() );
+
+               $engine->setLimitOffset( 10, 10 );
+               $results = $engine->completionSearch( 'foo' );
+               $this->assertEquals( 1, $results->getSize() );
+               $this->assertFalse( $results->hasMoreResults() );
+       }
+
+       private function editPage( $title ) {
+               $page = WikiPage::factory( Title::newFromText( $title ) );
+               $page->doEditContent(
+                       new WikitextContent( 'UTContent' ),
+                       'UTPageSummary',
+                       EDIT_NEW | EDIT_SUPPRESS_RC
+               );
+       }
 }
index ae3d3ec..ac8a5dc 100644 (file)
@@ -1,27 +1,42 @@
 <?php
 
-use MediaWiki\MediaWikiServices;
-
 /**
  * SearchEngine implementation for returning mocked completion search results.
  */
 class MockCompletionSearchEngine extends SearchEngine {
-       private static $completionSearchResult = [];
+       /** @var string[][] */
+       private static $results = [];
+
+       /**
+        * Reset any mocked results
+        */
+       public static function clearMockResults() {
+               self::$results = [];
+       }
+
+       /**
+        * Allows returning arbitrary lists of titles for completion search.
+        * Provided results will be sliced based on offset/limit of query.
+        *
+        * For results to exit the search engine they must pass Title::isKnown.
+        * Injecting into link cache is not enough, as LinkBatch will mark them
+        * bad, they need to be injected into the DB.
+        *
+        * @param string $query Search term as seen in completionSearchBackend
+        * @param string[] $result List of titles to respond to query with
+        */
+       public static function addMockResults( $query, array $result ) {
+               // Leading : ensures we don't treat another : as a namespace separator
+               $normalized = Title::newFromText( ":$query" )->getText();
+               self::$results[$normalized] = $result;
+       }
 
        public function completionSearchBackend( $search ) {
-               if ( self::$completionSearchResult == null ) {
-                       self::$completionSearchResult = [];
-                       // TODO: Or does this have to be setup per-test?
-                       $lc = MediaWikiServices::getInstance()->getLinkCache();
-                       foreach ( range( 0, 10 ) as $i ) {
-                               $dbkey = "Search_Result_$i";
-                               $lc->addGoodLinkObj( 6543 + $i, new TitleValue( NS_MAIN, $dbkey ) );
-                               self::$completionSearchResult[] = "Search Result $i";
-                       }
+               if ( !isset( self::$results[$search] ) ) {
+                       return SearchSuggestionSet::emptySuggestionSet();
                }
-               $results = array_slice( self::$completionSearchResult, $this->offset, $this->limit );
+               $results = array_slice( self::$results[$search], $this->offset, $this->limit );
 
                return SearchSuggestionSet::fromStrings( $results );
        }
-
 }
index 4d7c78a..2b7ea47 100644 (file)
@@ -22,7 +22,7 @@ class MockSearchEngine extends SearchEngine {
                self::$results[$query] = $results;
                $lc = MediaWikiServices::getInstance()->getLinkCache();
                foreach ( $results as $result ) {
-                       // TODO: better page ids?
+                       // TODO: better page ids? Does it matter?
                        $lc->addGoodLinkObj( mt_rand(), $result->getTitle() );
                }
        }