Merge OpenSearchXml extension into core
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 5 Nov 2014 22:16:43 +0000 (17:16 -0500)
committerKunal Mehta <legoktm@gmail.com>
Thu, 27 Nov 2014 05:07:22 +0000 (21:07 -0800)
There's really no reason for the extension to exist separately from
core, and merging it reduces the risks of bitrot in both the extension
(lots of deprecated functions there) and core (missing integration with
PageImages and TextExtracts, for example).

Change-Id: Ie0ab90902ede9499879402290006466efba479e9

RELEASE-NOTES-1.25
docs/hooks.txt
includes/DefaultSettings.php
includes/api/ApiOpenSearch.php
includes/api/i18n/en.json
includes/api/i18n/qqq.json
includes/search/SearchEngine.php
opensearch_desc.php

index bc35e5c..7181e11 100644 (file)
@@ -23,6 +23,10 @@ production.
   See StartProfiler.sample for details.
 * $wgMangleFlashPolicy was added to make MediaWiki's mangling of anything that
   might be a flash policy directive configurable.
+* ApiOpenSearch now supports XML output. The OpenSearchXml extension should no
+  longer be used. If extracts and page images are desired, the TextExtracts and
+  PageImages extensions are required.
+* $wgOpenSearchTemplate is deprecated in favor of $wgOpenSearchTemplates.
 
 === New features in 1.25 ===
 * (bug 62861) Updated plural rules to CLDR 26. Includes incompatible changes
@@ -52,6 +56,9 @@ production.
 * The debug logging internals have been overhauled, and are now using the
   PSR-3 interfaces.
 * Update CSSJanus to v1.1.1.
+* Added a hook, "ApiOpenSearchSuggest", to allow extensions to provide extracts
+  and images for ApiOpenSearch output. The semantics are identical to the
+  "OpenSearchXml" hook provided by the OpenSearchXml extension.
 
 === Bug fixes in 1.25 ===
 * (bug 71003) No additional code will be generated to try to load CSS-embedded
@@ -106,6 +113,9 @@ production.
 * Query page data for generator=search and generator=prefixsearch will now
   include an "index" field, which may be used by the client for sorting the
   search results.
+* ApiOpenSearch now supports XML output.
+* ApiOpenSearch will now output descriptions and URLs as array indexes 2 and 3
+  in JSON format.
 
 === Action API internal changes in 1.25 ===
 * ApiHelp has been rewritten to support i18n and paginated HTML output.
index 1e4be55..0146b86 100644 (file)
@@ -407,6 +407,18 @@ $module: ApiBase Module object
 &$help: Array of HTML strings to be joined for the output.
 $options: Array Options passed to ApiHelp::getHelp
 
+'ApiOpenSearchSuggest': Called when constructing the OpenSearch results. Hooks
+can alter or append to the array.
+&$results: array of associative arrays. Keys are:
+  - title: Title object.
+  - redirect from: Title or null.
+  - extract: Description for this result.
+  - extract trimmed: If truthy, the extract will not be trimmed to
+    $wgOpenSearchDescriptionLength.
+  - image: Thumbnail for this result. Value is an array with subkeys 'source'
+    (url), 'width', 'height', 'alt', 'align'.
+  - url: Url for the given title.
+
 'APIQueryAfterExecute': After calling the execute() method of an
 action=query submodule. Use this to extend core API modules.
 &$module: Module object
index f83c402..85f25c2 100644 (file)
@@ -5528,9 +5528,24 @@ $wgSearchHighlightBoundaries = '[\p{Z}\p{P}\p{C}]';
  * PHP wrapper to avoid firing up mediawiki for every keystroke
  *
  * Placeholders: {searchTerms}
+ *
+ * @deprecated since 1.25 Use $wgOpenSearchTemplates['application/x-suggestions+json'] instead
  */
 $wgOpenSearchTemplate = false;
 
+/**
+ * Templates for OpenSearch suggestions, defaults to API action=opensearch
+ *
+ * Sites with heavy load would typically have these point to a custom
+ * PHP wrapper to avoid firing up mediawiki for every keystroke
+ *
+ * Placeholders: {searchTerms}
+ */
+$wgOpenSearchTemplates = array(
+       'application/x-suggestions+json' => false,
+       'application/x-suggestions+xml' => false,
+);
+
 /**
  * Enable OpenSearch suggestions requested by MediaWiki. Set this to
  * false if you've disabled scripts that use api?action=opensearch and
@@ -5545,6 +5560,11 @@ $wgEnableOpenSearchSuggest = true;
  */
 $wgOpenSearchDefaultLimit = 10;
 
+/**
+ * Minimum length of extract in <Description>. Actual extracts will last until the end of sentence.
+ */
+$wgOpenSearchDescriptionLength = 100;
+
 /**
  * Expiry time for search suggestion responses
  */
index 8fa495c..4a9e216 100644 (file)
@@ -3,6 +3,8 @@
  * Created on Oct 13, 2006
  *
  * Copyright © 2006 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
+ * Copyright © 2008 Brion Vibber <brion@wikimedia.org>
+ * Copyright © 2014 Brad Jorsch <bjorsch@wikimedia.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  */
 class ApiOpenSearch extends ApiBase {
 
+       private $format = null;
+       private $fm = null;
+
        /**
-        * Override built-in handling of format parameter.
-        * Only JSON is supported.
+        * Get the output format
         *
-        * @return ApiFormatBase
+        * @return string
         */
-       public function getCustomPrinter() {
-               $params = $this->extractRequestParams();
-               $format = $params['format'];
-               $allowed = array( 'json', 'jsonfm' );
-               if ( in_array( $format, $allowed ) ) {
-                       return $this->getMain()->createPrinterByName( $format );
+       protected function getFormat() {
+               if ( $this->format === null ) {
+                       $params = $this->extractRequestParams();
+                       $format = $params['format'];
+
+                       $allowedParams = $this->getAllowedParams();
+                       if ( !in_array( $format, $allowedParams['format'][ApiBase::PARAM_TYPE] ) ) {
+                               $format = $allowedParams['format'][ApiBase::PARAM_DFLT];
+                       }
+
+                       if ( substr( $format, -2 ) === 'fm' ) {
+                               $this->format = substr( $format, 0, -2 );
+                               $this->fm = 'fm';
+                       } else {
+                               $this->format = $format;
+                               $this->fm = '';
+                       }
                }
+               return $this->format;
+       }
+
+       public function getCustomPrinter() {
+               switch( $this->getFormat() ) {
+                       case 'json':
+                               return $this->getMain()->createPrinterByName( 'json' . $this->fm );
 
-               return $this->getMain()->createPrinterByName( $allowed[0] );
+                       case 'xml':
+                               $printer = $this->getMain()->createPrinterByName( 'xml' . $this->fm );
+                               $printer->setRootElement( 'SearchSuggestion' );
+                               return $printer;
+
+                       default:
+                               ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
+               }
        }
 
        public function execute() {
@@ -51,21 +80,166 @@ class ApiOpenSearch extends ApiBase {
                $namespaces = $params['namespace'];
                $suggest = $params['suggest'];
 
-               // Some script that was loaded regardless of wgEnableOpenSearchSuggest, likely cached.
-               if ( $suggest && !$this->getConfig()->get( 'EnableOpenSearchSuggest' ) ) {
-                       $searches = array();
+               if ( $params['redirects'] === null ) {
+                       // Backwards compatibility, don't resolve for JSON.
+                       $resolveRedir = $this->getFormat() !== 'json';
                } else {
+                       $resolveRedir = $params['redirects'] === 'resolve';
+               }
+
+               $results = array();
+
+               if ( !$suggest || $this->getConfig()->get( 'EnableOpenSearchSuggest' ) ) {
                        // Open search results may be stored for a very long time
                        $this->getMain()->setCacheMaxAge( $this->getConfig()->get( 'SearchSuggestCacheExpiry' ) );
                        $this->getMain()->setCacheMode( 'public' );
+                       $this->search( $search, $limit, $namespaces, $resolveRedir, $results );
+
+                       // Allow hooks to populate extracts and images
+                       wfRunHooks( 'ApiOpenSearchSuggest', array( &$results ) );
 
-                       $searcher = new StringPrefixSearch;
-                       $searches = $searcher->searchWithVariants( $search, $limit, $namespaces );
+                       // Trim extracts, if necessary
+                       $length = $this->getConfig()->get( 'OpenSearchDescriptionLength' );
+                       foreach ( $results as &$r ) {
+                               if ( is_string( $r['extract'] ) && !$r['extract trimmed'] ) {
+                                       $r['extract'] = self::trimExtract( $r['extract'], $length );
+                               }
+                       }
                }
-               // Set top level elements
+
+               // Populate result object
+               $this->populateResult( $search, $results );
+       }
+
+       /**
+        * Perform the search
+        *
+        * @param string $search Text to search
+        * @param int $limit Maximum items to return
+        * @param array $namespaces Namespaces to search
+        * @param bool $resolveRedir Whether to resolve redirects
+        * @param array &$results Put results here
+        */
+       protected function search( $search, $limit, $namespaces, $resolveRedir, &$results ) {
+               // Find matching titles as Title objects
+               $searcher = new TitlePrefixSearch;
+               $titles = $searcher->searchWithVariants( $search, $limit, $namespaces );
+
+               if ( $resolveRedir ) {
+                       // Query for redirects
+                       $db = $this->getDb();
+                       $lb = new LinkBatch( $titles );
+                       $res = $db->select(
+                               array( 'page', 'redirect' ),
+                               array( 'page_namespace', 'page_title', 'rd_namespace', 'rd_title' ),
+                               array(
+                                       'rd_from = page_id',
+                                       'rd_interwiki IS NULL OR rd_interwiki = ' . $db->addQuotes( '' ),
+                                       $lb->constructSet( 'page', $db ),
+                               ),
+                               __METHOD__
+                       );
+                       $redirects = array();
+                       foreach ( $res as $row ) {
+                               $redirects[$row->page_namespace][$row->page_title] =
+                                       array( $row->rd_namespace, $row->rd_title );
+                       }
+
+                       // Bypass any redirects
+                       $seen = array();
+                       foreach ( $titles as $title ) {
+                               $ns = $title->getNamespace();
+                               $dbkey = $title->getDBkey();
+                               $from = null;
+                               if ( isset( $redirects[$ns][$dbkey] ) ) {
+                                       list( $ns, $dbkey ) = $redirects[$ns][$dbkey];
+                                       $from = $title;
+                                       $title = Title::makeTitle( $ns, $dbkey );
+                               }
+                               if ( !isset( $seen[$ns][$dbkey] ) ) {
+                                       $seen[$ns][$dbkey] = true;
+                                       $results[$title->getArticleId()] = array(
+                                               'title' => $title,
+                                               'redirect from' => $from,
+                                               'extract' => false,
+                                               'extract trimmed' => false,
+                                               'image' => false,
+                                               'url' => wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT ),
+                                       );
+                               }
+                       }
+               } else {
+                       foreach ( $titles as $title ) {
+                               $results[$title->getArticleId()] = array(
+                                       'title' => $title,
+                                       'redirect from' => null,
+                                       'extract' => false,
+                                       'extract trimmed' => false,
+                                       'image' => false,
+                                       'url' => wfExpandUrl( $title->getFullUrl(), PROTO_CURRENT ),
+                               );
+                       }
+               }
+       }
+
+       /**
+        * @param string $search
+        * @param array &$results
+        */
+       protected function populateResult( $search, &$results ) {
                $result = $this->getResult();
-               $result->addValue( null, 0, $search );
-               $result->addValue( null, 1, $searches );
+
+               switch ( $this->getFormat() ) {
+                       case 'json':
+                               // http://www.opensearch.org/Specifications/OpenSearch/Extensions/Suggestions/1.1
+                               $result->addValue( null, 0, strval( $search ) );
+                               $terms = array();
+                               $descriptions = array();
+                               $urls = array();
+                               foreach ( $results as $r ) {
+                                       $terms[] = $r['title']->getPrefixedText();
+                                       $descriptions[] = strval( $r['extract'] );
+                                       $urls[] = $r['url'];
+                               }
+                               $result->addValue( null, 1, $terms );
+                               $result->addValue( null, 2, $descriptions );
+                               $result->addValue( null, 3, $urls );
+                               break;
+
+                       case 'xml':
+                               // http://msdn.microsoft.com/en-us/library/cc891508%28v=vs.85%29.aspx
+                               $imageKeys = array(
+                                       'source' => true,
+                                       'alt' => true,
+                                       'width' => true,
+                                       'height' => true,
+                                       'align' => true,
+                               );
+                               $items = array();
+                               foreach ( $results as $r ) {
+                                       $item = array();
+                                       $result->setContent( $item, $r['title']->getPrefixedText(), 'Text' );
+                                       $result->setContent( $item, $r['url'], 'Url' );
+                                       if ( is_string( $r['extract'] ) && $r['extract'] !== '' ) {
+                                               $result->setContent( $item, $r['extract'], 'Description' );
+                                       }
+                                       if ( is_array( $r['image'] ) && isset( $r['image']['source'] ) ) {
+                                               $item['Image'] = array_intersect_key( $r['image'], $imageKeys );
+                                       }
+                                       $items[] = $item;
+                               }
+                               $result->setIndexedTagName( $items, 'Item' );
+                               $result->addValue( null, 'version', '2.0' );
+                               $result->addValue( null, 'xmlns', 'http://opensearch.org/searchsuggest2' );
+                               $query = array();
+                               $result->setContent( $query, strval( $search ) );
+                               $result->addValue( null, 'Query', $query );
+                               $result->addValue( null, 'Section', $items );
+                               break;
+
+                       default:
+                               ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
+               }
        }
 
        public function getAllowedParams() {
@@ -84,9 +258,12 @@ class ApiOpenSearch extends ApiBase {
                                ApiBase::PARAM_ISMULTI => true
                        ),
                        'suggest' => false,
+                       'redirects' => array(
+                               ApiBase::PARAM_TYPE => array( 'return', 'resolve' ),
+                       ),
                        'format' => array(
                                ApiBase::PARAM_DFLT => 'json',
-                               ApiBase::PARAM_TYPE => array( 'json', 'jsonfm' ),
+                               ApiBase::PARAM_TYPE => array( 'json', 'jsonfm', 'xml', 'xmlfm' ),
                        )
                );
        }
@@ -101,4 +278,72 @@ class ApiOpenSearch extends ApiBase {
        public function getHelpUrls() {
                return 'https://www.mediawiki.org/wiki/API:Opensearch';
        }
+
+       /**
+        * Trim an extract to a sensible length.
+        *
+        * Adapted from Extension:OpenSearchXml, which adapted it from
+        * Extension:ActiveAbstract.
+        *
+        * @param string $text
+        * @param int $len Target length; actual result will continue to the end of a sentence.
+        * @return string
+        */
+       public static function trimExtract( $text, $length ) {
+               static $regex = null;
+
+               if ( $regex === null ) {
+                       $endchars = array(
+                               '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
+                               '。', // full-width ideographic full-stop
+                               '.', '!', '?', // double-width roman forms
+                               '。', // half-width ideographic full stop
+                       );
+                       $endgroup = implode( '|', $endchars );
+                       $end = "(?:$endgroup)";
+                       $sentence = ".{{$length},}?$end+";
+                       $regex = "/^($sentence)/u";
+               }
+
+               $matches = array();
+               if ( preg_match( $regex, $text, $matches ) ) {
+                       return trim( $matches[1] );
+               } else {
+                       // Just return the first line
+                       $lines = explode( "\n", $text );
+                       return trim( $lines[0] );
+               }
+       }
+
+       /**
+        * Fetch the template for a type.
+        *
+        * @param string $type MIME type
+        * @return string
+        */
+       public static function getOpenSearchTemplate( $type ) {
+               global $wgOpenSearchTemplate, $wgCanonicalServer;
+
+               if ( $wgOpenSearchTemplate && $type === 'application/x-suggestions+json' ) {
+                       return $wgOpenSearchTemplate;
+               }
+
+               $ns = implode( '|', SearchEngine::defaultNamespaces() );
+               if ( !$ns ) {
+                       $ns = "0";
+               }
+
+               switch ( $type ) {
+                       case 'application/x-suggestions+json':
+                               return $wgCanonicalServer . wfScript( 'api' )
+                                       . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
+
+                       case 'application/x-suggestions+xml':
+                               return $wgCanonicalServer . wfScript( 'api' )
+                                       . '?action=opensearch&format=xml&search={searchTerms}&namespace=' . $ns;
+
+                       default:
+                               throw new MWException( __METHOD__ . ": Unknown type '$type'" );
+               }
+       }
 }
index 17b79ff..bb5e5db 100644 (file)
        "apihelp-opensearch-param-limit": "Maximum number of results to return.",
        "apihelp-opensearch-param-namespace": "Namespaces to search.",
        "apihelp-opensearch-param-suggest": "Do nothing if [https://www.mediawiki.org/wiki/Manual:$wgEnableOpenSearchSuggest $wgEnableOpenSearchSuggest] is false.",
+       "apihelp-opensearch-param-redirects": "How to handle redirects:\n;return:Return the redirect itself.\n;resolve:Return the target page. May return fewer than $1limit results.\nFor historical reasons, the default is \"return\" for $1format=json and \"resolve\" for other formats.",
        "apihelp-opensearch-param-format": "The format of the output.",
        "apihelp-opensearch-example-te": "Find pages beginning with \"Te\"",
 
index fb368e3..8382edc 100644 (file)
        "apihelp-opensearch-param-limit": "{{doc-apihelp-param|opensearch|limit}}",
        "apihelp-opensearch-param-namespace": "{{doc-apihelp-param|opensearch|namespace}}",
        "apihelp-opensearch-param-suggest": "{{doc-apihelp-param|opensearch|suggest}}",
+       "apihelp-opensearch-param-redirects": "{{doc-apihelp-param|opensearch|redirects}}",
        "apihelp-opensearch-param-format": "{{doc-apihelp-param|opensearch|format}}",
        "apihelp-opensearch-example-te": "{{doc-apihelp-example|opensearch}}",
        "apihelp-options-description": "{{doc-apihelp-description|options}}",
index 0eb87e4..c6cbfbe 100644 (file)
@@ -500,22 +500,12 @@ class SearchEngine {
        /**
         * Get OpenSearch suggestion template
         *
+        * @deprecated since 1.25
         * @return string
         */
        public static function getOpenSearchTemplate() {
-               global $wgOpenSearchTemplate, $wgCanonicalServer;
-
-               if ( $wgOpenSearchTemplate ) {
-                       return $wgOpenSearchTemplate;
-               } else {
-                       $ns = implode( '|', SearchEngine::defaultNamespaces() );
-                       if ( !$ns ) {
-                               $ns = "0";
-                       }
-
-                       return $wgCanonicalServer . wfScript( 'api' )
-                               . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
-               }
+               wfDeprecated( __METHOD__, '1.25' );
+               return ApiOpenSearch::getOpenSearchTemplate( 'application/x-suggestions+json' );
        }
 
        /**
index ecd5051..5e5e35d 100644 (file)
@@ -78,13 +78,18 @@ $urls[] = array(
        'method' => 'get',
        'template' => $searchPage->getCanonicalURL( 'search={searchTerms}' ) );
 
-if ( $wgEnableAPI ) {
-       // JSON interface for search suggestions.
-       // Supported in Firefox 2 and later.
-       $urls[] = array(
-               'type' => 'application/x-suggestions+json',
-               'method' => 'get',
-               'template' => SearchEngine::getOpenSearchTemplate() );
+foreach ( $wgOpenSearchTemplates as $type => $template ) {
+       if ( !$template && $wgEnableAPI ) {
+               $template = ApiOpenSearch::getOpenSearchTemplate( $type );
+       }
+
+       if ( $template ) {
+               $urls[] = array(
+                       'type' => $type,
+                       'method' => 'get',
+                       'template' => $template,
+               );
+       }
 }
 
 // Allow hooks to override the suggestion URL settings in a more