From 7e18cfc3b5ed4e957a9ed8ac7af4a68d1a95576c Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Tue, 26 Apr 2016 15:19:58 -0700 Subject: [PATCH] Infrastructure for augmenting search results Bug: T117493 Change-Id: Ia5413a7846cc961026a2dc3542b619493bc76a23 --- autoload.php | 4 + docs/hooks.txt | 7 ++ includes/search/AugmentPageProps.php | 20 ++++ includes/search/PerRowAugmentor.php | 38 ++++++++ includes/search/ResultAugmentor.php | 14 +++ includes/search/ResultSetAugmentor.php | 14 +++ includes/search/SearchEngine.php | 31 ++++++ includes/search/SearchNearMatchResultSet.php | 2 +- includes/search/SearchResult.php | 34 ++++++- includes/search/SearchResultSet.php | 94 ++++++++++++++++++- includes/search/SqlSearchResultSet.php | 2 +- includes/specials/SpecialSearch.php | 7 +- .../includes/search/SearchEngineTest.php | 46 +++++++++ 13 files changed, 303 insertions(+), 10 deletions(-) create mode 100644 includes/search/AugmentPageProps.php create mode 100644 includes/search/PerRowAugmentor.php create mode 100644 includes/search/ResultAugmentor.php create mode 100644 includes/search/ResultSetAugmentor.php diff --git a/autoload.php b/autoload.php index 96c8190d7d..9fd83ebeb8 100644 --- a/autoload.php +++ b/autoload.php @@ -153,6 +153,7 @@ $wgAutoloadLocalClasses = [ 'AtomFeed' => __DIR__ . '/includes/Feed.php', 'AtomicSectionUpdate' => __DIR__ . '/includes/deferred/AtomicSectionUpdate.php', 'AttachLatest' => __DIR__ . '/maintenance/attachLatest.php', + 'AugmentPageProps' => __DIR__ . '/includes/search/AugmentPageProps.php', 'AuthManagerSpecialPage' => __DIR__ . '/includes/specialpage/AuthManagerSpecialPage.php', 'AuthPlugin' => __DIR__ . '/includes/AuthPlugin.php', 'AuthPluginUser' => __DIR__ . '/includes/AuthPlugin.php', @@ -1043,6 +1044,7 @@ $wgAutoloadLocalClasses = [ 'PatrolLog' => __DIR__ . '/includes/logging/PatrolLog.php', 'PatrolLogFormatter' => __DIR__ . '/includes/logging/PatrolLogFormatter.php', 'Pbkdf2Password' => __DIR__ . '/includes/password/Pbkdf2Password.php', + 'PerRowAugmentor' => __DIR__ . '/includes/search/PerRowAugmentor.php', 'PermissionsError' => __DIR__ . '/includes/exception/PermissionsError.php', 'PhpHttpRequest' => __DIR__ . '/includes/HttpFunctions.php', 'PhpXmlBugTester' => __DIR__ . '/includes/installer/PhpBugTests.php', @@ -1182,6 +1184,8 @@ $wgAutoloadLocalClasses = [ 'ResourceLoaderUserTokensModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderUserTokensModule.php', 'ResourceLoaderWikiModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderWikiModule.php', 'RestbaseVirtualRESTService' => __DIR__ . '/includes/libs/virtualrest/RestbaseVirtualRESTService.php', + 'ResultAugmentor' => __DIR__ . '/includes/search/ResultAugmentor.php', + 'ResultSetAugmentor' => __DIR__ . '/includes/search/ResultSetAugmentor.php', 'ResultWrapper' => __DIR__ . '/includes/libs/rdbms/database/resultwrapper/ResultWrapper.php', 'RevDelArchiveItem' => __DIR__ . '/includes/revisiondelete/RevDelArchiveItem.php', 'RevDelArchiveList' => __DIR__ . '/includes/revisiondelete/RevDelArchiveList.php', diff --git a/docs/hooks.txt b/docs/hooks.txt index a7fb873bf8..ae0770b07f 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -2699,6 +2699,13 @@ $page: WikiPage that is being indexed $output: ParserOutput that is produced from the page $engine: SearchEngine for which the indexing is intended +'SearchResultsAugment': Allows extension to add its code to the list of search +result augmentors. +&$setAugmentors: List of whole-set augmentor objects, must implement ResultSetAugmentor +&$rowAugmentors: List of per-row augmentor objects, must implement ResultAugmentor. +Note that lists should be in the format name => object and the names in both lists should +be distinct. + 'SecondaryDataUpdates': Allows modification of the list of DataUpdates to perform when page content is modified. Currently called by AbstractContent::getSecondaryDataUpdates. diff --git a/includes/search/AugmentPageProps.php b/includes/search/AugmentPageProps.php new file mode 100644 index 0000000000..29bd463d23 --- /dev/null +++ b/includes/search/AugmentPageProps.php @@ -0,0 +1,20 @@ +propnames = $propnames; + } + + public function augmentAll( SearchResultSet $resultSet ) { + $titles = $resultSet->extractTitles(); + return PageProps::getInstance()->getProperties( $titles, $this->propnames ); + } +} diff --git a/includes/search/PerRowAugmentor.php b/includes/search/PerRowAugmentor.php new file mode 100644 index 0000000000..8eb8b17c11 --- /dev/null +++ b/includes/search/PerRowAugmentor.php @@ -0,0 +1,38 @@ +rowAugmentor = $augmentor; + } + + /** + * Produce data to augment search result set. + * @param SearchResultSet $resultSet + * @return array Data for all results + */ + public function augmentAll( SearchResultSet $resultSet ) { + $data = []; + foreach ( $resultSet->extractResults() as $result ) { + $id = $result->getTitle()->getArticleID(); + if ( !$id ) { + continue; + } + $data[$id] = $this->rowAugmentor->augment( $result ); + } + return $data; + } +} diff --git a/includes/search/ResultAugmentor.php b/includes/search/ResultAugmentor.php new file mode 100644 index 0000000000..350b780953 --- /dev/null +++ b/includes/search/ResultAugmentor.php @@ -0,0 +1,14 @@ + $row ) { + if ( isset( $setAugmentors[$name] ) ) { + throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" ); + } + $setAugmentors[$name] = new PerRowAugmentor( $row ); + } + + foreach ( $setAugmentors as $name => $augmentor ) { + $data = $augmentor->augmentAll( $resultSet ); + if ( $data ) { + $resultSet->setAugmentedData( $name, $data ); + } + } + } } /** diff --git a/includes/search/SearchNearMatchResultSet.php b/includes/search/SearchNearMatchResultSet.php index 6d667074ee..31417974d9 100644 --- a/includes/search/SearchNearMatchResultSet.php +++ b/includes/search/SearchNearMatchResultSet.php @@ -21,7 +21,7 @@ class SearchNearMatchResultSet extends SearchResultSet { return false; } $this->fetched = true; - return SearchResult::newFromTitle( $this->result ); + return SearchResult::newFromTitle( $this->result, $this ); } public function rewind() { diff --git a/includes/search/SearchResult.php b/includes/search/SearchResult.php index 21effbbc98..50db84b034 100644 --- a/includes/search/SearchResult.php +++ b/includes/search/SearchResult.php @@ -56,15 +56,25 @@ class SearchResult { */ protected $searchEngine; + /** + * A set of extension data. + * @var array[] + */ + protected $extensionData; + /** * Return a new SearchResult and initializes it with a title. * - * @param Title $title + * @param Title $title + * @param SearchResultSet $parentSet * @return SearchResult */ - public static function newFromTitle( $title ) { + public static function newFromTitle( $title, SearchResultSet $parentSet = null ) { $result = new static(); $result->initFromTitle( $title ); + if ( $parentSet ) { + $parentSet->augmentResult( $result ); + } return $result; } @@ -250,4 +260,24 @@ class SearchResult { function isFileMatch() { return false; } + + /** + * Get the extension data as: + * augmentor name => data + * @return array[] + */ + public function getExtensionData() { + return $this->extensionData; + } + + /** + * Set extension data for this result. + * The data is: + * augmentor name => data + * @param array[] $extensionData + */ + public function setExtensionData( array $extensionData ) { + $this->extensionData = $extensionData; + } + } diff --git a/includes/search/SearchResultSet.php b/includes/search/SearchResultSet.php index 69795e792b..978db2707f 100644 --- a/includes/search/SearchResultSet.php +++ b/includes/search/SearchResultSet.php @@ -42,6 +42,29 @@ class SearchResultSet { protected $containedSyntax = false; + /** + * Cache of titles. + * Lists titles of the result set, in the same order as results. + * @var Title[] + */ + private $titles; + + /** + * Cache of results - serialization of the result iterator + * as an array. + * @var SearchResult[] + */ + private $results; + + /** + * Set of result's extra data, indexed per result id + * and then per data item name. + * The structure is: + * PAGE_ID => [ augmentor name => data, ... ] + * @var array[] + */ + protected $extraData = []; + public function __construct( $containedSyntax = false ) { $this->containedSyntax = $containedSyntax; } @@ -147,15 +170,15 @@ class SearchResultSet { /** * Fetches next search result, or false. * STUB - * - * @return SearchResult + * FIXME: refactor as iterator, so we could use nicer interfaces. + * @return SearchResult|false */ function next() { return false; } /** - * Rewind result set back to begining + * Rewind result set back to beginning */ function rewind() { } @@ -176,4 +199,69 @@ class SearchResultSet { public function searchContainedSyntax() { return $this->containedSyntax; } + + /** + * Extract all the results in the result set as array. + * @return SearchResult[] + */ + public function extractResults() { + if ( is_null( $this->results ) ) { + $this->results = []; + if ( $this->numRows() == 0 ) { + // Don't bother if we've got empty result + return $this->results; + } + $this->rewind(); + while ( ( $result = $this->next() ) != false ) { + $this->results[] = $result; + } + $this->rewind(); + } + return $this->results; + } + + /** + * Extract all the titles in the result set. + * @return Title[] + */ + public function extractTitles() { + if ( is_null( $this->titles ) ) { + if ( $this->numRows() == 0 ) { + // Don't bother if we've got empty result + $this->titles = []; + } else { + $this->titles = array_map( + function ( SearchResult $result ) { + return $result->getTitle(); + }, + $this->extractResults() ); + } + } + return $this->titles; + } + + /** + * Sets augmented data for result set. + * @param string $name Extra data item name + * @param array[] $data Extra data as PAGEID => data + */ + public function setAugmentedData( $name, $data ) { + foreach ( $data as $id => $resultData ) { + $this->extraData[$id][$name] = $resultData; + } + } + + /** + * Returns extra data for specific result and store it in SearchResult object. + * @param SearchResult $result + * @return array|null List of data as name => value or null if none present. + */ + public function augmentResult( SearchResult $result ) { + $id = $result->getTitle()->getArticleID(); + if ( !$id || !isset( $this->extraData[$id] ) ) { + return null; + } + $result->setExtensionData( $this->extraData[$id] ); + return $this->extraData[$id]; + } } diff --git a/includes/search/SqlSearchResultSet.php b/includes/search/SqlSearchResultSet.php index 6b60899f10..c3985d1852 100644 --- a/includes/search/SqlSearchResultSet.php +++ b/includes/search/SqlSearchResultSet.php @@ -37,7 +37,7 @@ class SqlSearchResultSet extends SearchResultSet { } return SearchResult::newFromTitle( - Title::makeTitle( $row->page_namespace, $row->page_title ) + Title::makeTitle( $row->page_namespace, $row->page_title ), $this ); } diff --git a/includes/specials/SpecialSearch.php b/includes/specials/SpecialSearch.php index 26b86f9762..6daf19f5c3 100644 --- a/includes/specials/SpecialSearch.php +++ b/includes/specials/SpecialSearch.php @@ -403,6 +403,7 @@ class SpecialSearch extends SpecialPage { // show results if ( $numTextMatches > 0 ) { + $search->augmentSearchResults( $textMatches ); $out->addHTML( $this->showMatches( $textMatches ) ); } @@ -716,7 +717,7 @@ class SpecialSearch extends SpecialPage { * * @return string */ - protected function showMatches( &$matches, $interwiki = null ) { + protected function showMatches( $matches, $interwiki = null ) { global $wgContLang; $terms = $wgContLang->convertForSearchResult( $matches->termMatches() ); @@ -725,7 +726,7 @@ class SpecialSearch extends SpecialPage { $pos = $this->offset; if ( $result && $interwiki ) { - $out .= $this->interwikiHeader( $interwiki, $result ); + $out .= $this->interwikiHeader( $interwiki, $matches ); } $out .= "