From 83bae78c3a78a56eb4bcbabdb2cd0e1670af89a1 Mon Sep 17 00:00:00 2001 From: Erik Bernhardson Date: Wed, 13 Dec 2017 17:03:20 -0800 Subject: [PATCH] Silently drop unknown titles in completion search This mimics how full text works by silenty dropping results returned from search that no longer exist. This could be because the search index is slightly out of sync with reality, or the search engine could simply be broken. Only silent from the users perspective. We maintain a count in statsd of the number of titles dropped. This can be monitored over time to recognize any increases. Bug: T115756 Change-Id: I2f29d73e258cd448a14d35a2b4902a4fb6f61c68 --- includes/search/SearchEngine.php | 13 ++++-- includes/search/SearchSuggestionSet.php | 12 ++++++ .../includes/api/ApiQueryPrefixSearchTest.php | 26 +++++++---- .../search/SearchEnginePrefixTest.php | 11 ++--- .../includes/search/SearchEngineTest.php | 33 ++++++++++++++ .../search/MockCompletionSearchEngine.php | 43 +++++++++++++------ .../phpunit/mocks/search/MockSearchEngine.php | 2 +- 7 files changed, 109 insertions(+), 31 deletions(-) diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index 4217ab0998..b2bacdafc7 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -675,13 +675,20 @@ abstract class SearchEngine { $search = trim( $search ); // preload the titles with LinkBatch - $titles = $suggestions->map( function ( SearchSuggestion $sugg ) { + $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) { return $sugg->getSuggestedTitle(); - } ); - $lb = new LinkBatch( $titles ); + } ) ); $lb->setCaller( __METHOD__ ); $lb->execute(); + $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) { + return $sugg->getSuggestedTitle()->isKnown(); + } ); + if ( $diff > 0 ) { + MediaWikiServices::getInstance()->getStatsdDataFactory() + ->updateCount( 'search.completion.missing', $diff ); + } + $results = $suggestions->map( function ( SearchSuggestion $sugg ) { return $sugg->getSuggestedTitle()->getPrefixedText(); } ); diff --git a/includes/search/SearchSuggestionSet.php b/includes/search/SearchSuggestionSet.php index ab384206d3..cb1f831711 100644 --- a/includes/search/SearchSuggestionSet.php +++ b/includes/search/SearchSuggestionSet.php @@ -87,6 +87,18 @@ class SearchSuggestionSet { return array_map( $callback, $this->suggestions ); } + /** + * Filter the suggestions array + * @param callback $callback Callable accepting single SearchSuggestion + * instance returning bool false to remove the item. + * @return int The number of suggestions removed + */ + public function filter( $callback ) { + $before = count( $this->suggestions ); + $this->suggestions = array_values( array_filter( $this->suggestions, $callback ) ); + return $before - count( $this->suggestions ); + } + /** * Add a new suggestion at the end. * If the score of the new suggestion is greater than the worst one, diff --git a/tests/phpunit/includes/api/ApiQueryPrefixSearchTest.php b/tests/phpunit/includes/api/ApiQueryPrefixSearchTest.php index ae5924d800..749f154604 100644 --- a/tests/phpunit/includes/api/ApiQueryPrefixSearchTest.php +++ b/tests/phpunit/includes/api/ApiQueryPrefixSearchTest.php @@ -7,6 +7,23 @@ * @covers ApiQueryPrefixSearch */ class ApiQueryPrefixSearchTest extends ApiTestCase { + const TEST_QUERY = 'unittest'; + + public function setUp() { + parent::setUp(); + $this->setMwGlobals( [ + 'wgSearchType' => MockCompletionSearchEngine::class, + ] ); + MockCompletionSearchEngine::clearMockResults(); + $results = []; + foreach ( range( 0, 10 ) as $i ) { + $title = "Search_Result_$i"; + $results[] = $title; + $this->editPage( $title, 'hi there' ); + } + MockCompletionSearchEngine::addMockResults( self::TEST_QUERY, $results ); + } + public function offsetContinueProvider() { return [ 'no offset' => [ 2, 2, 0, 2 ], @@ -20,11 +37,10 @@ class ApiQueryPrefixSearchTest extends ApiTestCase { * @dataProvider offsetContinueProvider */ public function testOffsetContinue( $expectedOffset, $expectedResults, $offset, $limit ) { - $this->registerMockSearchEngine(); $response = $this->doApiRequest( [ 'action' => 'query', 'list' => 'prefixsearch', - 'pssearch' => 'example query terms', + 'pssearch' => self::TEST_QUERY, 'psoffset' => $offset, 'pslimit' => $limit, ] ); @@ -39,10 +55,4 @@ class ApiQueryPrefixSearchTest extends ApiTestCase { $this->assertEquals( $expectedOffset, $result['continue']['psoffset'] ); } } - - private function registerMockSearchEngine() { - $this->setMwGlobals( [ - 'wgSearchType' => MockCompletionSearchEngine::class, - ] ); - } } diff --git a/tests/phpunit/includes/search/SearchEnginePrefixTest.php b/tests/phpunit/includes/search/SearchEnginePrefixTest.php index dd12590554..83df61a4bc 100644 --- a/tests/phpunit/includes/search/SearchEnginePrefixTest.php +++ b/tests/phpunit/includes/search/SearchEnginePrefixTest.php @@ -45,6 +45,9 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase { $this->insertPage( 'Talk:Example' ); $this->insertPage( 'User:Example' ); + $this->insertPage( 'Barcelona' ); + $this->insertPage( 'Barbara' ); + $this->insertPage( 'External' ); } protected function setUp() { @@ -238,7 +241,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase { ], ] ], [ [ - 'Exact match not on top (T72958)', + 'Exact match not in first result should be moved to the first result (T72958)', 'provision' => [ 'Barcelona', 'Bar', @@ -252,7 +255,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase { ], ] ], [ [ - 'Exact match missing (T72958)', + 'Exact match missing from results should be added as first result (T72958)', 'provision' => [ 'Barcelona', 'Barbara', @@ -266,7 +269,7 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase { ], ] ], [ [ - 'Exact match missing and not existing', + 'Exact match missing and not existing pages should be dropped', 'provision' => [ 'Exile', 'Exist', @@ -274,8 +277,6 @@ class SearchEnginePrefixTest extends MediaWikiLangTestCase { ], 'query' => 'Ex', 'results' => [ - 'Exile', - 'Exist', 'External', ], ] ], diff --git a/tests/phpunit/includes/search/SearchEngineTest.php b/tests/phpunit/includes/search/SearchEngineTest.php index 2561fd060f..5884d19343 100644 --- a/tests/phpunit/includes/search/SearchEngineTest.php +++ b/tests/phpunit/includes/search/SearchEngineTest.php @@ -307,4 +307,37 @@ class SearchEngineTest extends MediaWikiLangTestCase { } ); $rowAugmentors['testRow'] = $rowAugmentor; } + + public function testFiltersMissing() { + $availableResults = []; + foreach ( range( 0, 11 ) as $i ) { + $title = "Search_Result_$i"; + $availableResults[] = $title; + // pages not created must be filtered + if ( $i % 2 == 0 ) { + $this->editPage( $title ); + } + } + MockCompletionSearchEngine::addMockResults( 'foo', $availableResults ); + + $engine = new MockCompletionSearchEngine(); + $engine->setLimitOffset( 10, 0 ); + $results = $engine->completionSearch( 'foo' ); + $this->assertEquals( 5, $results->getSize() ); + $this->assertTrue( $results->hasMoreResults() ); + + $engine->setLimitOffset( 10, 10 ); + $results = $engine->completionSearch( 'foo' ); + $this->assertEquals( 1, $results->getSize() ); + $this->assertFalse( $results->hasMoreResults() ); + } + + private function editPage( $title ) { + $page = WikiPage::factory( Title::newFromText( $title ) ); + $page->doEditContent( + new WikitextContent( 'UTContent' ), + 'UTPageSummary', + EDIT_NEW | EDIT_SUPPRESS_RC + ); + } } diff --git a/tests/phpunit/mocks/search/MockCompletionSearchEngine.php b/tests/phpunit/mocks/search/MockCompletionSearchEngine.php index ae3d3eccd5..ac8a5dc702 100644 --- a/tests/phpunit/mocks/search/MockCompletionSearchEngine.php +++ b/tests/phpunit/mocks/search/MockCompletionSearchEngine.php @@ -1,27 +1,42 @@ getText(); + self::$results[$normalized] = $result; + } public function completionSearchBackend( $search ) { - if ( self::$completionSearchResult == null ) { - self::$completionSearchResult = []; - // TODO: Or does this have to be setup per-test? - $lc = MediaWikiServices::getInstance()->getLinkCache(); - foreach ( range( 0, 10 ) as $i ) { - $dbkey = "Search_Result_$i"; - $lc->addGoodLinkObj( 6543 + $i, new TitleValue( NS_MAIN, $dbkey ) ); - self::$completionSearchResult[] = "Search Result $i"; - } + if ( !isset( self::$results[$search] ) ) { + return SearchSuggestionSet::emptySuggestionSet(); } - $results = array_slice( self::$completionSearchResult, $this->offset, $this->limit ); + $results = array_slice( self::$results[$search], $this->offset, $this->limit ); return SearchSuggestionSet::fromStrings( $results ); } - } diff --git a/tests/phpunit/mocks/search/MockSearchEngine.php b/tests/phpunit/mocks/search/MockSearchEngine.php index 4d7c78a017..2b7ea4760a 100644 --- a/tests/phpunit/mocks/search/MockSearchEngine.php +++ b/tests/phpunit/mocks/search/MockSearchEngine.php @@ -22,7 +22,7 @@ class MockSearchEngine extends SearchEngine { self::$results[$query] = $results; $lc = MediaWikiServices::getInstance()->getLinkCache(); foreach ( $results as $result ) { - // TODO: better page ids? + // TODO: better page ids? Does it matter? $lc->addGoodLinkObj( mt_rand(), $result->getTitle() ); } } -- 2.20.1