From 92c20832f14439629a0f4028946b06ff5bcaab3a Mon Sep 17 00:00:00 2001 From: David Causse Date: Wed, 12 Jun 2019 11:02:10 +0200 Subject: [PATCH] Deprecate SearchResult::termMatches() And start indicating that hooks relying on this data might become unreliable as this data is only populated by SearchDatabase search engines. This information was only populated by SearchDatabase implementations and due to bad initial design of SearchResult[Set] (now fixed) it forced users of these classes to carry this information for the sole purpose of highlighting. Because SearchEngine can now own their SearchResult[Set] implementations nothing that is engine specific should be exposed outside of these specific implementations. If there are some logic that still requires access to such list of terms they should be made engine specific by guarding their code against instanceof SqlSearchResult. Change-Id: I38b82c5e4c35309ee447edc3ded60ca6a18b247a Depends-On: I53fe37c65c7940f696c1e184125e01e592a976e4 --- RELEASE-NOTES-1.34 | 12 ++++ autoload.php | 1 + docs/hooks.txt | 2 +- includes/api/ApiQuerySearch.php | 21 ++---- includes/search/SearchDatabase.php | 5 ++ includes/search/SearchEngine.php | 5 +- includes/search/SearchResult.php | 21 +----- includes/search/SearchResultSet.php | 1 + includes/search/SqlSearchResult.php | 69 +++++++++++++++++++ includes/search/SqlSearchResultSet.php | 21 +++++- .../search/BasicSearchResultSetWidget.php | 5 +- .../widget/search/FullSearchResultWidget.php | 17 +++-- .../search/InterwikiSearchResultWidget.php | 5 +- includes/widget/search/SearchResultWidget.php | 3 +- .../search/SimpleSearchResultWidget.php | 3 +- .../includes/search/SearchEngineTest.php | 2 +- 16 files changed, 137 insertions(+), 56 deletions(-) create mode 100644 includes/search/SqlSearchResult.php diff --git a/RELEASE-NOTES-1.34 b/RELEASE-NOTES-1.34 index 549f7e8228..bba7df1455 100644 --- a/RELEASE-NOTES-1.34 +++ b/RELEASE-NOTES-1.34 @@ -308,6 +308,18 @@ because of Phabricator reports. instead. * Sanitizer::attributeWhitelist() and Sanitizer::setupAttributeWhitelist() have been deprecated; they will be made private in the future. +* SearchResult::termMatches() method is deprecated. It was unreliable because + only populated by few search engine implementations. Use + SqlSearchResult::getTermMatches() if really needed. +* SearchResult::getTextSnippet( $terms ) the $terms param is being deprecated + and should no longer be passed. Search engine implemenations should be + responsible for carrying relevant information needed for highlighting with + their own SearchResultSet/SearchResult sub-classes. +* SearchEngine::$searchTerms protected field is deprecated. Moved to + SearchDatabase. +* The use of the $terms param in the ShowSearchHit and ShowSearchHitTitle + hooks is highly discouraged as it's only populated by SearchDatabase search + engines. === Other changes in 1.34 === * … diff --git a/autoload.php b/autoload.php index 0b93f49758..b01827aa3f 100644 --- a/autoload.php +++ b/autoload.php @@ -1438,6 +1438,7 @@ $wgAutoloadLocalClasses = [ 'SpecialWatchlist' => __DIR__ . '/includes/specials/SpecialWatchlist.php', 'SpecialWhatLinksHere' => __DIR__ . '/includes/specials/SpecialWhatLinksHere.php', 'SqlBagOStuff' => __DIR__ . '/includes/objectcache/SqlBagOStuff.php', + 'SqlSearchResult' => __DIR__ . '/includes/search/SqlSearchResult.php', 'SqlSearchResultSet' => __DIR__ . '/includes/search/SqlSearchResultSet.php', 'Sqlite' => __DIR__ . '/maintenance/sqlite.inc', 'SqliteInstaller' => __DIR__ . '/includes/installer/SqliteInstaller.php', diff --git a/docs/hooks.txt b/docs/hooks.txt index 99a3d1abc7..f0b720b4f3 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -2986,7 +2986,7 @@ $article: The article object corresponding to the page 'ShowSearchHit': Customize display of search hit. $searchPage: The SpecialSearch instance. $result: The SearchResult to show -$terms: Search terms, for highlighting +$terms: Search terms, for highlighting (unreliable as search engine dependent). &$link: HTML of link to the matching page. May be modified. &$redirect: HTML of redirect info. May be modified. &$section: HTML of matching section. May be modified. diff --git a/includes/api/ApiQuerySearch.php b/includes/api/ApiQuerySearch.php index 98c65516c9..23f702cc96 100644 --- a/includes/api/ApiQuerySearch.php +++ b/includes/api/ApiQuerySearch.php @@ -20,8 +20,6 @@ * @file */ -use MediaWiki\MediaWikiServices; - /** * Query module to perform full text search within wiki titles and content * @@ -145,9 +143,6 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { } } - // Add the search results to the result - $terms = MediaWikiServices::getInstance()->getContentLanguage()-> - convertForSearchResult( $matches->termMatches() ); $titles = []; $count = 0; @@ -163,7 +158,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { } if ( $resultPageSet === null ) { - $vals = $this->getSearchResultData( $result, $prop, $terms ); + $vals = $this->getSearchResultData( $result, $prop ); if ( $vals ) { // Add item to results and see whether it fits $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals ); @@ -184,13 +179,13 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { // Interwiki results inside main result set $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null ); if ( $canAddInterwiki ) { - $this->addInterwikiResults( $matches, $apiResult, $prop, $terms, 'additional', + $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional', SearchResultSet::INLINE_RESULTS ); } // Interwiki results outside main result set if ( $interwiki && $resultPageSet === null ) { - $this->addInterwikiResults( $matches, $apiResult, $prop, $terms, 'interwiki', + $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki', SearchResultSet::SECONDARY_RESULTS ); } @@ -217,10 +212,9 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { * Assemble search result data. * @param SearchResult $result Search result * @param array $prop Props to extract (as keys) - * @param array $terms Terms list * @return array|null Result data or null if result is broken in some way. */ - private function getSearchResultData( SearchResult $result, $prop, $terms ) { + private function getSearchResultData( SearchResult $result, $prop ) { // Silently skip broken and missing titles if ( $result->isBrokenTitle() || $result->isMissingRevision() ) { return null; @@ -239,7 +233,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { $vals['wordcount'] = $result->getWordCount(); } if ( isset( $prop['snippet'] ) ) { - $vals['snippet'] = $result->getTextSnippet( $terms ); + $vals['snippet'] = $result->getTextSnippet(); } if ( isset( $prop['timestamp'] ) ) { $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() ); @@ -287,14 +281,13 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { * @param SearchResultSet $matches * @param ApiResult $apiResult * @param array $prop Props to extract (as keys) - * @param array $terms Terms list * @param string $section Section name where results would go * @param int $type Interwiki result type * @return int|null Number of total hits in the data or null if none was produced */ private function addInterwikiResults( SearchResultSet $matches, ApiResult $apiResult, $prop, - $terms, $section, $type + $section, $type ) { $totalhits = null; if ( $matches->hasInterwikiResults( $type ) ) { @@ -304,7 +297,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { foreach ( $interwikiMatches as $result ) { $title = $result->getTitle(); - $vals = $this->getSearchResultData( $result, $prop, $terms ); + $vals = $this->getSearchResultData( $result, $prop ); $vals['namespace'] = $result->getInterwikiNamespaceText(); $vals['title'] = $title->getText(); diff --git a/includes/search/SearchDatabase.php b/includes/search/SearchDatabase.php index 6da8f98dc6..8ea356f77a 100644 --- a/includes/search/SearchDatabase.php +++ b/includes/search/SearchDatabase.php @@ -35,6 +35,11 @@ abstract class SearchDatabase extends SearchEngine { /** @var IDatabase (backwards compatibility) */ protected $db; + /** + * @var string[] search terms + */ + protected $searchTerms = []; + /** * @param ILoadBalancer $lb The load balancer for the DB cluster to search on */ diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index fa6e7fd096..2fb4585c92 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -46,7 +46,10 @@ abstract class SearchEngine { /** @var int */ protected $offset = 0; - /** @var string[] */ + /** + * @var string[] + * @deprecated since 1.34 + */ protected $searchTerms = []; /** @var bool */ diff --git a/includes/search/SearchResult.php b/includes/search/SearchResult.php index a27d71933f..7703e38dd0 100644 --- a/includes/search/SearchResult.php +++ b/includes/search/SearchResult.php @@ -147,26 +147,11 @@ class SearchResult { } /** - * @param string[] $terms Terms to highlight + * @param string[] $terms Terms to highlight (this parameter is deprecated and ignored) * @return string Highlighted text snippet, null (and not '') if not supported */ - function getTextSnippet( $terms ) { - global $wgAdvancedSearchHighlighting; - $this->initText(); - - // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter. - list( $contextlines, $contextchars ) = $this->searchEngine->userHighlightPrefs(); - - $h = new SearchHighlighter(); - if ( count( $terms ) > 0 ) { - if ( $wgAdvancedSearchHighlighting ) { - return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); - } else { - return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); - } - } else { - return $h->highlightNone( $this->mText, $contextlines, $contextchars ); - } + function getTextSnippet( $terms = [] ) { + return ''; } /** diff --git a/includes/search/SearchResultSet.php b/includes/search/SearchResultSet.php index 92e2a17d6d..5ee96cba65 100644 --- a/includes/search/SearchResultSet.php +++ b/includes/search/SearchResultSet.php @@ -96,6 +96,7 @@ class SearchResultSet implements Countable, IteratorAggregate { * STUB * * @return string[] + * @deprecated since 1.34 (use SqlSearchResult) */ function termMatches() { return []; diff --git a/includes/search/SqlSearchResult.php b/includes/search/SqlSearchResult.php new file mode 100644 index 0000000000..25e87e70ff --- /dev/null +++ b/includes/search/SqlSearchResult.php @@ -0,0 +1,69 @@ +initFromTitle( $title ); + $this->terms = $terms; + } + + /** + * return string[] + */ + public function getTermMatches(): array { + return $this->terms; + } + + /** + * @param array $terms Terms to highlight (this parameter is deprecated) + * @return string Highlighted text snippet, null (and not '') if not supported + */ + function getTextSnippet( $terms = [] ) { + global $wgAdvancedSearchHighlighting; + $this->initText(); + + // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter. + list( $contextlines, $contextchars ) = $this->searchEngine->userHighlightPrefs(); + + $h = new SearchHighlighter(); + if ( count( $this->terms ) > 0 ) { + if ( $wgAdvancedSearchHighlighting ) { + return $h->highlightText( $this->mText, $this->terms, $contextlines, $contextchars ); + } else { + return $h->highlightSimple( $this->mText, $this->terms, $contextlines, $contextchars ); + } + } else { + return $h->highlightNone( $this->mText, $contextlines, $contextchars ); + } + } + +} diff --git a/includes/search/SqlSearchResultSet.php b/includes/search/SqlSearchResultSet.php index f4e4a23abe..87068ca3d3 100644 --- a/includes/search/SqlSearchResultSet.php +++ b/includes/search/SqlSearchResultSet.php @@ -16,12 +16,22 @@ class SqlSearchResultSet extends SearchResultSet { /** @var int|null Total number of hits for $terms */ protected $totalHits; - function __construct( IResultWrapper $resultSet, $terms, $total = null ) { + /** + * @param IResultWrapper $resultSet + * @param string[] $terms + * @param int|null $total + */ + function __construct( IResultWrapper $resultSet, array $terms, $total = null ) { + parent::__construct(); $this->resultSet = $resultSet; $this->terms = $terms; $this->totalHits = $total; } + /** + * @return string[] + * @deprecated since 1.34 + */ function termMatches() { return $this->terms; } @@ -42,10 +52,15 @@ class SqlSearchResultSet extends SearchResultSet { if ( $this->results === null ) { $this->results = []; $this->resultSet->rewind(); + $terms = \MediaWiki\MediaWikiServices::getInstance()->getContentLanguage() + ->convertForSearchResult( $this->terms ); while ( ( $row = $this->resultSet->fetchObject() ) !== false ) { - $this->results[] = SearchResult::newFromTitle( - Title::makeTitle( $row->page_namespace, $row->page_title ), $this + $result = new SqlSearchResult( + Title::makeTitle( $row->page_namespace, $row->page_title ), + $terms ); + $this->augmentResult( $result ); + $this->results[] = $result; } } return $this->results; diff --git a/includes/widget/search/BasicSearchResultSetWidget.php b/includes/widget/search/BasicSearchResultSetWidget.php index 1a885b0136..0e102878bc 100644 --- a/includes/widget/search/BasicSearchResultSetWidget.php +++ b/includes/widget/search/BasicSearchResultSetWidget.php @@ -117,12 +117,9 @@ class BasicSearchResultSetWidget { * @return string HTML */ protected function renderResultSet( SearchResultSet $resultSet, $offset ) { - $terms = MediaWikiServices::getInstance()->getContentLanguage()-> - convertForSearchResult( $resultSet->termMatches() ); - $hits = []; foreach ( $resultSet as $result ) { - $hits[] = $this->resultWidget->render( $result, $terms, $offset++ ); + $hits[] = $this->resultWidget->render( $result, $offset++ ); } return ""; diff --git a/includes/widget/search/FullSearchResultWidget.php b/includes/widget/search/FullSearchResultWidget.php index f648535c01..7212dc0498 100644 --- a/includes/widget/search/FullSearchResultWidget.php +++ b/includes/widget/search/FullSearchResultWidget.php @@ -31,11 +31,10 @@ class FullSearchResultWidget implements SearchResultWidget { /** * @param SearchResult $result The result to render - * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet) * @param int $position The result position, including offset * @return string HTML */ - public function render( SearchResult $result, $terms, $position ) { + public function render( SearchResult $result, $position ) { // If the page doesn't *exist*... our search index is out of date. // The least confusing at this point is to drop the result. // You may get less results, but... on well. :P @@ -43,7 +42,7 @@ class FullSearchResultWidget implements SearchResultWidget { return ''; } - $link = $this->generateMainLinkHtml( $result, $terms, $position ); + $link = $this->generateMainLinkHtml( $result, $position ); // If page content is not readable, just return ths title. // This is not quite safe, but better than showing excerpts from // non-readable pages. Note that hiding the entry entirely would @@ -63,7 +62,7 @@ class FullSearchResultWidget implements SearchResultWidget { $this->specialPage->getUser() ); list( $file, $desc, $thumb ) = $this->generateFileHtml( $result ); - $snippet = $result->getTextSnippet( $terms ); + $snippet = $result->getTextSnippet(); if ( $snippet ) { $extract = "
$snippet
"; } else { @@ -80,6 +79,9 @@ class FullSearchResultWidget implements SearchResultWidget { $html = null; $score = ''; $related = ''; + // TODO: remove this instanceof and always pass [], let implementors do the cast if + // they want to be SearchDatabase specific + $terms = $result instanceof \SqlSearchResult ? $result->getTermMatches() : []; if ( !Hooks::run( 'ShowSearchHit', [ $this->specialPage, $result, $terms, &$link, &$redirect, &$section, &$extract, @@ -121,11 +123,10 @@ class FullSearchResultWidget implements SearchResultWidget { * title with highlighted words). * * @param SearchResult $result - * @param string[] $terms * @param int $position * @return string HTML */ - protected function generateMainLinkHtml( SearchResult $result, $terms, $position ) { + protected function generateMainLinkHtml( SearchResult $result, $position ) { $snippet = $result->getTitleSnippet(); if ( $snippet === '' ) { $snippet = null; @@ -139,7 +140,9 @@ class FullSearchResultWidget implements SearchResultWidget { $attributes = [ 'data-serp-pos' => $position ]; Hooks::run( 'ShowSearchHitTitle', - [ &$title, &$snippet, $result, $terms, $this->specialPage, &$query, &$attributes ] ); + [ &$title, &$snippet, $result, + $result instanceof \SqlSearchResult ? $result->getTermMatches() : [], + $this->specialPage, &$query, &$attributes ] ); $link = $this->linkRenderer->makeLink( $title, diff --git a/includes/widget/search/InterwikiSearchResultWidget.php b/includes/widget/search/InterwikiSearchResultWidget.php index 745bc12c61..3f758db5bb 100644 --- a/includes/widget/search/InterwikiSearchResultWidget.php +++ b/includes/widget/search/InterwikiSearchResultWidget.php @@ -24,14 +24,13 @@ class InterwikiSearchResultWidget implements SearchResultWidget { /** * @param SearchResult $result The result to render - * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet) * @param int $position The result position, including offset * @return string HTML */ - public function render( SearchResult $result, $terms, $position ) { + public function render( SearchResult $result, $position ) { $title = $result->getTitle(); $titleSnippet = $result->getTitleSnippet(); - $snippet = $result->getTextSnippet( $terms ); + $snippet = $result->getTextSnippet(); if ( $titleSnippet ) { $titleSnippet = new HtmlArmor( $titleSnippet ); diff --git a/includes/widget/search/SearchResultWidget.php b/includes/widget/search/SearchResultWidget.php index 4f0a271e5b..e001395541 100644 --- a/includes/widget/search/SearchResultWidget.php +++ b/includes/widget/search/SearchResultWidget.php @@ -10,9 +10,8 @@ use SearchResult; interface SearchResultWidget { /** * @param SearchResult $result The result to render - * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet) * @param int $position The zero indexed result position, including offset * @return string HTML */ - public function render( SearchResult $result, $terms, $position ); + public function render( SearchResult $result, $position ); } diff --git a/includes/widget/search/SimpleSearchResultWidget.php b/includes/widget/search/SimpleSearchResultWidget.php index 86a04b1839..fe8b4d5092 100644 --- a/includes/widget/search/SimpleSearchResultWidget.php +++ b/includes/widget/search/SimpleSearchResultWidget.php @@ -26,11 +26,10 @@ class SimpleSearchResultWidget implements SearchResultWidget { /** * @param SearchResult $result The result to render - * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet) * @param int $position The result position, including offset * @return string HTML */ - public function render( SearchResult $result, $terms, $position ) { + public function render( SearchResult $result, $position ) { $title = $result->getTitle(); $titleSnippet = $result->getTitleSnippet(); if ( $titleSnippet ) { diff --git a/tests/phpunit/includes/search/SearchEngineTest.php b/tests/phpunit/includes/search/SearchEngineTest.php index 1a0393e546..2772b0d897 100644 --- a/tests/phpunit/includes/search/SearchEngineTest.php +++ b/tests/phpunit/includes/search/SearchEngineTest.php @@ -190,7 +190,7 @@ class SearchEngineTest extends MediaWikiLangTestCase { $match = $res->getIterator()->current(); $snippet = "A " . $phrase . ""; $this->assertStringStartsWith( $snippet, - $match->getTextSnippet( $res->termMatches() ), + $match->getTextSnippet(), "Highlight a phrase search" ); } -- 2.20.1