From: dcausse Date: Thu, 31 Mar 2016 09:13:21 +0000 (+0200) Subject: Expose SearchEngine specific profiles X-Git-Tag: 1.31.0-rc.0~6733^2 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/?a=commitdiff_plain;h=31680aaddc8ddde150aa5ab2370a019a550f259d;p=lhc%2Fweb%2Fwiklou.git Expose SearchEngine specific profiles This patch introduces a way for SearchEngine implementations to expose specific search profiles useful to fine-tune the various behaviors related to search. A SearchEngine can expose a list of profiles by overriding SearchEngine::getProfiles( $profileType ), profileType stands for the type of profile being customized. Two types are added in this patch: - completion: exposed by ApiQueryPrefixSearch and ApiOpenSearch to control the behavior of the algorithm behind "search as you type" suggestions. - fulltext query independent profiles: exposed by ApiQuerySearch to customize query indpendent ranking profiles (e.g. boost by templates/incoming links/popularity/...) This patch allows api consumers that might have been confused by fuzzy suggestions to switch to stricter profiles and to officialize the behavior behind the hidden param cirrusUseCompletionSuggester. Or to control the fulltext ranking behaviors like cirrusBoostLinks=(yes|no). The list of profiles can be discovered by using ApiSandbox/ApiHelp and is totally controlled by search engine implementations. Bug: T132477 Change-Id: I66be724d8975976c98c91badbf421f237e014f89 --- diff --git a/autoload.php b/autoload.php index f635bc1a92..982a162a3b 100644 --- a/autoload.php +++ b/autoload.php @@ -1199,6 +1199,7 @@ $wgAutoloadLocalClasses = [ 'SavepointPostgres' => __DIR__ . '/includes/db/DatabasePostgres.php', 'ScopedCallback' => __DIR__ . '/includes/libs/ScopedCallback.php', 'ScopedLock' => __DIR__ . '/includes/filebackend/lockmanager/ScopedLock.php', + 'SearchApi' => __DIR__ . '/includes/api/SearchApi.php', 'SearchDatabase' => __DIR__ . '/includes/search/SearchDatabase.php', 'SearchDump' => __DIR__ . '/maintenance/dumpIterator.php', 'SearchEngine' => __DIR__ . '/includes/search/SearchEngine.php', diff --git a/includes/api/ApiOpenSearch.php b/includes/api/ApiOpenSearch.php index 058e0a3909..066aaa3bca 100644 --- a/includes/api/ApiOpenSearch.php +++ b/includes/api/ApiOpenSearch.php @@ -30,10 +30,14 @@ use MediaWiki\MediaWikiServices; * @ingroup API */ class ApiOpenSearch extends ApiBase { + use SearchApi; private $format = null; private $fm = null; + /** @var array list of api allowed params */ + private $allowedParams = null; + /** * Get the output format * @@ -80,24 +84,13 @@ class ApiOpenSearch extends ApiBase { public function execute() { $params = $this->extractRequestParams(); $search = $params['search']; - $limit = $params['limit']; - $namespaces = $params['namespace']; $suggest = $params['suggest']; - - if ( $params['redirects'] === null ) { - // Backwards compatibility, don't resolve for JSON. - $resolveRedir = $this->getFormat() !== 'json'; - } else { - $resolveRedir = $params['redirects'] === 'resolve'; - } - $results = []; - if ( !$suggest || $this->getConfig()->get( 'EnableOpenSearchSuggest' ) ) { // Open search results may be stored for a very long time $this->getMain()->setCacheMaxAge( $this->getConfig()->get( 'SearchSuggestCacheExpiry' ) ); $this->getMain()->setCacheMode( 'public' ); - $this->search( $search, $limit, $namespaces, $resolveRedir, $results ); + $results = $this->search( $search, $params ); // Allow hooks to populate extracts and images Hooks::run( 'ApiOpenSearchSuggest', [ &$results ] ); @@ -117,21 +110,17 @@ class ApiOpenSearch extends ApiBase { /** * Perform the search - * - * @param string $search Text to search - * @param int $limit Maximum items to return - * @param array $namespaces Namespaces to search - * @param bool $resolveRedir Whether to resolve redirects - * @param array &$results Put results here. Keys have to be integers. + * @param string $search the search query + * @param array $params api request params + * @return array search results. Keys are integers. */ - protected function search( $search, $limit, $namespaces, $resolveRedir, &$results ) { - $searchEngine = MediaWikiServices::getInstance()->newSearchEngine(); - $searchEngine->setLimitOffset( $limit ); - $searchEngine->setNamespaces( $namespaces ); + private function search( $search, array $params ) { + $searchEngine = $this->buildSearchEngine( $params ); $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) ); + $results = []; if ( !$titles ) { - return; + return $results; } // Special pages need unique integer ids in the return list, so we just @@ -139,6 +128,13 @@ class ApiOpenSearch extends ApiBase { // always positive articleIds that non-special pages get. $nextSpecialPageId = -1; + if ( $params['redirects'] === null ) { + // Backwards compatibility, don't resolve for JSON. + $resolveRedir = $this->getFormat() !== 'json'; + } else { + $resolveRedir = $params['redirects'] === 'resolve'; + } + if ( $resolveRedir ) { // Query for redirects $redirects = []; @@ -206,6 +202,8 @@ class ApiOpenSearch extends ApiBase { ]; } } + + return $results; } /** @@ -271,7 +269,10 @@ class ApiOpenSearch extends ApiBase { } public function getAllowedParams() { - return [ + if ( $this->allowedParams !== null ) { + return $this->allowedParams; + } + $this->allowedParams = [ 'search' => null, 'limit' => [ ApiBase::PARAM_DFLT => $this->getConfig()->get( 'OpenSearchDefaultLimit' ), @@ -295,6 +296,20 @@ class ApiOpenSearch extends ApiBase { ], 'warningsaserror' => false, ]; + + $profileParam = $this->buildProfileApiParam( SearchEngine::COMPLETION_PROFILE_TYPE, + 'apihelp-query+prefixsearch-param-profile' ); + if ( $profileParam ) { + $this->allowedParams['profile'] = $profileParam; + } + return $this->allowedParams; + } + + public function getSearchProfileParams() { + if ( isset( $this->getAllowedParams()['profile'] ) ) { + return [ SearchEngine::COMPLETION_PROFILE_TYPE => 'profile' ]; + } + return []; } protected function getExamplesMessages() { diff --git a/includes/api/ApiQueryPrefixSearch.php b/includes/api/ApiQueryPrefixSearch.php index 5c50273261..46538e0eb1 100644 --- a/includes/api/ApiQueryPrefixSearch.php +++ b/includes/api/ApiQueryPrefixSearch.php @@ -25,6 +25,11 @@ use MediaWiki\MediaWikiServices; * @ingroup API */ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase { + use SearchApi; + + /** @var array list of api allowed params */ + private $allowedParams; + public function __construct( $query, $moduleName ) { parent::__construct( $query, $moduleName, 'ps' ); } @@ -44,12 +49,9 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase { $params = $this->extractRequestParams(); $search = $params['search']; $limit = $params['limit']; - $namespaces = $params['namespace']; $offset = $params['offset']; - $searchEngine = MediaWikiServices::getInstance()->newSearchEngine(); - $searchEngine->setLimitOffset( $limit + 1, $offset ); - $searchEngine->setNamespaces( $namespaces ); + $searchEngine = $this->buildSearchEngine( $params ); $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) ); if ( $resultPageSet ) { @@ -60,7 +62,7 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase { return $current; } ); if ( count( $titles ) > $limit ) { - $this->setContinueEnumParameter( 'offset', $offset + $params['limit'] ); + $this->setContinueEnumParameter( 'offset', $offset + $limit ); array_pop( $titles ); } $resultPageSet->populateFromTitles( $titles ); @@ -72,7 +74,7 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase { $count = 0; foreach ( $titles as $title ) { if ( ++$count > $limit ) { - $this->setContinueEnumParameter( 'offset', $offset + $params['limit'] ); + $this->setContinueEnumParameter( 'offset', $offset + $limit ); break; } $vals = [ @@ -101,29 +103,45 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase { } public function getAllowedParams() { - return [ - 'search' => [ - ApiBase::PARAM_TYPE => 'string', - ApiBase::PARAM_REQUIRED => true, - ], - 'namespace' => [ - ApiBase::PARAM_DFLT => NS_MAIN, - ApiBase::PARAM_TYPE => 'namespace', - ApiBase::PARAM_ISMULTI => true, - ], - 'limit' => [ - ApiBase::PARAM_DFLT => 10, - ApiBase::PARAM_TYPE => 'limit', - ApiBase::PARAM_MIN => 1, - // Non-standard value for compatibility with action=opensearch - ApiBase::PARAM_MAX => 100, - ApiBase::PARAM_MAX2 => 200, - ], - 'offset' => [ - ApiBase::PARAM_DFLT => 0, - ApiBase::PARAM_TYPE => 'integer', - ], - ]; + if ( $this->allowedParams !== null ) { + return $this->allowedParams; + } + $this->allowedParams = [ + 'search' => [ + ApiBase::PARAM_TYPE => 'string', + ApiBase::PARAM_REQUIRED => true, + ], + 'namespace' => [ + ApiBase::PARAM_DFLT => NS_MAIN, + ApiBase::PARAM_TYPE => 'namespace', + ApiBase::PARAM_ISMULTI => true, + ], + 'limit' => [ + ApiBase::PARAM_DFLT => 10, + ApiBase::PARAM_TYPE => 'limit', + ApiBase::PARAM_MIN => 1, + // Non-standard value for compatibility with action=opensearch + ApiBase::PARAM_MAX => 100, + ApiBase::PARAM_MAX2 => 200, + ], + 'offset' => [ + ApiBase::PARAM_DFLT => 0, + ApiBase::PARAM_TYPE => 'integer', + ], + ]; + $profileParam = $this->buildProfileApiParam( SearchEngine::COMPLETION_PROFILE_TYPE, + 'apihelp-query+prefixsearch-param-profile' ); + if ( $profileParam ) { + $this->allowedParams['profile'] = $profileParam; + } + return $this->allowedParams; + } + + public function getSearchProfileParams() { + if ( isset( $this->getAllowedParams()['profile'] ) ) { + return [ SearchEngine::COMPLETION_PROFILE_TYPE => 'profile' ]; + } + return []; } protected function getExamplesMessages() { diff --git a/includes/api/ApiQuerySearch.php b/includes/api/ApiQuerySearch.php index f57d3a30cf..80798a10cd 100644 --- a/includes/api/ApiQuerySearch.php +++ b/includes/api/ApiQuerySearch.php @@ -32,6 +32,10 @@ use MediaWiki\MediaWikiServices; * @ingroup API */ class ApiQuerySearch extends ApiQueryGeneratorBase { + use SearchApi; + + /** @var array list of api allowed params */ + private $allowedParams; /** * When $wgSearchType is null, $wgSearchAlternatives[0] is null. Null isn't @@ -61,8 +65,11 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { global $wgContLang; $params = $this->extractRequestParams(); + if ( isset( $params['backend'] ) && $params['backend'] == self::BACKEND_NULL_PARAM ) { + unset( $params['backend'] ); + } + // Extract parameters - $limit = $params['limit']; $query = $params['search']; $what = $params['what']; $interwiki = $params['interwiki']; @@ -80,11 +87,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { } // Create search engine instance and set options - $type = isset( $params['backend'] ) && $params['backend'] != self::BACKEND_NULL_PARAM ? - $params['backend'] : null; - $search = MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type ); - $search->setLimitOffset( $limit + 1, $params['offset'] ); - $search->setNamespaces( $params['namespace'] ); + $search = $this->buildSearchEngine( $params ); $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] ); $query = $search->transformSearchTerm( $query ); @@ -152,6 +155,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { $titles = []; $count = 0; $result = $matches->next(); + $limit = $params['limit']; while ( $result ) { if ( ++$count > $limit ) { @@ -301,7 +305,11 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { } public function getAllowedParams() { - $params = [ + if ( $this->allowedParams !== null ) { + return $this->allowedParams; + } + + $this->allowedParams = [ 'search' => [ ApiBase::PARAM_TYPE => 'string', ApiBase::PARAM_REQUIRED => true @@ -368,13 +376,31 @@ class ApiQuerySearch extends ApiQueryGeneratorBase { if ( $alternatives[0] === null ) { $alternatives[0] = self::BACKEND_NULL_PARAM; } - $params['backend'] = [ + $this->allowedParams['backend'] = [ ApiBase::PARAM_DFLT => $searchConfig->getSearchType(), ApiBase::PARAM_TYPE => $alternatives, ]; + // @todo: support profile selection when multiple + // backends are available. The solution could be to + // merge all possible profiles and let ApiBase + // subclasses do the check. Making ApiHelp and ApiSandbox + // comprehensive might be more difficult. + } else { + $profileParam = $this->buildProfileApiParam( SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE, + 'apihelp-query+search-param-qiprofile' ); + if ( $profileParam ) { + $this->allowedParams['qiprofile'] = $profileParam; + } } - return $params; + return $this->allowedParams; + } + + public function getSearchProfileParams() { + if ( isset( $this->getAllowedParams()['qiprofile'] ) ) { + return [ SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE => 'qiprofile' ]; + } + return []; } protected function getExamplesMessages() { diff --git a/includes/api/SearchApi.php b/includes/api/SearchApi.php new file mode 100644 index 0000000000..26d7a0ea94 --- /dev/null +++ b/includes/api/SearchApi.php @@ -0,0 +1,116 @@ +getSearchEngineFactory()->create( $backendType ); + } else { + $searchEngine = MediaWikiServices::getInstance()->newSearchEngine(); + } + + $profiles = $searchEngine->getProfiles( $profileType ); + if ( $profiles ) { + $types = []; + $helpMessages = []; + $defaultProfile = null; + foreach ( $profiles as $profile ) { + $types[] = $profile['name']; + if ( isset ( $profile['desc-message'] ) ) { + $helpMessages[$profile['name']] = $profile['desc-message']; + } + if ( !empty( $profile['default'] ) ) { + $defaultProfile = $profile['name']; + } + } + return [ + ApiBase::PARAM_TYPE => $types, + ApiBase::PARAM_HELP_MSG => $helpMsg, + ApiBase::PARAM_HELP_MSG_PER_VALUE => $helpMessages, + ApiBase::PARAM_DFLT => $defaultProfile, + ]; + } + return null; + } + + /** + * Build the search engine to use. + * If $params is provided then the following searchEngine options + * will be set: + * - limit: mandatory + * - offset: optional, if set limit will be incremented by + * one ( to support the continue parameter ) + * - namespace: mandatory + * - search engine profiles defined by SearchApi::getSearchProfileParams() + * @param string[]|null API request params (must be sanitized by + * ApiBase::extractRequestParams() before) + * @return SearchEngine the search engine + */ + public function buildSearchEngine( array $params = null ) { + if ( $params != null ) { + $type = isset( $params['backend'] ) ? $params['backend'] : null; + $searchEngine = MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type ); + $limit = $params['limit']; + $namespaces = $params['namespace']; + $offset = null; + if ( isset( $params['offset'] ) ) { + // If the API supports offset then it probably + // wants to fetch limit+1 so it can check if + // more results are available to properly set + // the continue param + $offset = $params['offset']; + $limit += 1; + } + $searchEngine->setLimitOffset( $limit, $offset ); + foreach ( $this->getSearchProfileParams() as $type => $param ) { + if ( isset( $params[$param] ) ) { + $searchEngine->setFeatureData( $type, $params[$param] ); + } + } + } else { + $searchEngine = MediaWikiServices::getInstance()->newSearchEngine(); + } + return $searchEngine; + } + + /** + * @return string[] the list of supported search profile types. Key is + * the profile type and its associated value is the request param. + */ + abstract public function getSearchProfileParams(); +} diff --git a/includes/api/i18n/en.json b/includes/api/i18n/en.json index 4e9309e699..29bfce997c 100644 --- a/includes/api/i18n/en.json +++ b/includes/api/i18n/en.json @@ -962,6 +962,7 @@ "apihelp-query+prefixsearch-param-limit": "Maximum number of results to return.", "apihelp-query+prefixsearch-param-offset": "Number of results to skip.", "apihelp-query+prefixsearch-example-simple": "Search for page titles beginning with meaning.", + "apihelp-query+prefixsearch-param-profile": "Search profile to use.", "apihelp-query+protectedtitles-description": "List all titles protected from creation.", "apihelp-query+protectedtitles-param-namespace": "Only list titles in these namespaces.", @@ -1082,6 +1083,7 @@ "apihelp-query+search-param-what": "Which type of search to perform.", "apihelp-query+search-param-info": "Which metadata to return.", "apihelp-query+search-param-prop": "Which properties to return:", + "apihelp-query+search-param-qiprofile": "Query independent profile to use (affects ranking algorithm).", "apihelp-query+search-paramvalue-prop-size": "Adds the size of the page in bytes.", "apihelp-query+search-paramvalue-prop-wordcount": "Adds the word count of the page.", "apihelp-query+search-paramvalue-prop-timestamp": "Adds the timestamp of when the page was last edited.", diff --git a/includes/api/i18n/qqq.json b/includes/api/i18n/qqq.json index 6137457c1b..a7284ff197 100644 --- a/includes/api/i18n/qqq.json +++ b/includes/api/i18n/qqq.json @@ -894,6 +894,7 @@ "apihelp-query+prefixsearch-param-limit": "{{doc-apihelp-param|query+prefixsearch|limit}}", "apihelp-query+prefixsearch-param-offset": "{{doc-apihelp-param|query+prefixsearch|offset}}", "apihelp-query+prefixsearch-example-simple": "{{doc-apihelp-example|query+prefixsearch}}", + "apihelp-query+prefixsearch-param-profile": "{{doc-apihelp-param|query+prefixsearch|profile|paramvalues=1}}", "apihelp-query+protectedtitles-description": "{{doc-apihelp-description|query+protectedtitles}}", "apihelp-query+protectedtitles-param-namespace": "{{doc-apihelp-param|query+protectedtitles|namespace}}", "apihelp-query+protectedtitles-param-level": "{{doc-apihelp-param|query+protectedtitles|level}}", @@ -1006,6 +1007,7 @@ "apihelp-query+search-param-what": "{{doc-apihelp-param|query+search|what}}", "apihelp-query+search-param-info": "{{doc-apihelp-param|query+search|info}}", "apihelp-query+search-param-prop": "{{doc-apihelp-param|query+search|prop|paramvalues=1}}", + "apihelp-query+search-param-qiprofile": "{{doc-apihelp-param|query+search|qiprofile|paramvalues=1}}", "apihelp-query+search-paramvalue-prop-size": "{{doc-apihelp-paramvalue|query+search|prop|size}}", "apihelp-query+search-paramvalue-prop-wordcount": "{{doc-apihelp-paramvalue|query+search|prop|wordcount}}", "apihelp-query+search-paramvalue-prop-timestamp": "{{doc-apihelp-paramvalue|query+search|prop|timestamp}}", diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index dcef95c8f9..0171ed9c81 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -54,6 +54,12 @@ abstract class SearchEngine { /** @var array Feature values */ protected $features = []; + /** @const string profile type for completionSearch */ + const COMPLETION_PROFILE_TYPE = 'completionSearchProfile'; + + /** @const string profile type for query independent ranking features */ + const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile'; + /** * Perform a full text search query and return a result set. * If full text searches are not supported or disabled, return null. @@ -631,6 +637,24 @@ abstract class SearchEngine { return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes(); } + /** + * Get a list of supported profiles. + * Some search engine implementations may expose specific profiles to fine-tune + * its behaviors. + * The profile can be passed as a feature data with setFeatureData( $profileType, $profileName ) + * The array returned by this function contains the following keys: + * - name: the profile name to use with setFeatureData + * - desc-message: the i18n description + * - default: set to true if this profile is the default + * + * @since 1.28 + * @param $profileType the type of profiles + * @return array|null the list of profiles or null if none available + */ + public function getProfiles( $profileType ) { + return null; + } + } /**