Unify SearchEngine normalizeNamespace and parseNamespacePrefixes
[lhc/web/wiklou.git] / includes / search / SearchEngine.php
index bd48e21..d46918f 100644 (file)
@@ -77,7 +77,9 @@ abstract class SearchEngine {
         * @return SearchResultSet|Status|null
         */
        public function searchText( $term ) {
-               return $this->doSearchText( $term );
+               return $this->maybePaginate( function () use ( $term ) {
+                       return $this->doSearchText( $term );
+               } );
        }
 
        /**
@@ -132,7 +134,9 @@ abstract class SearchEngine {
         * @return SearchResultSet|null
         */
        public function searchTitle( $term ) {
-               return $this->doSearchTitle( $term );
+               return $this->maybePaginate( function () use ( $term ) {
+                       return $this->doSearchTitle( $term );
+               } );
        }
 
        /**
@@ -146,6 +150,40 @@ abstract class SearchEngine {
                return null;
        }
 
+       /**
+        * Performs an overfetch and shrink operation to determine if
+        * the next page is available for search engines that do not
+        * explicitly implement their own pagination.
+        *
+        * @param Closure $fn Takes no arguments
+        * @return SearchResultSet|Status<SearchResultSet>|null Result of calling $fn
+        */
+       private function maybePaginate( Closure $fn ) {
+               if ( $this instanceof PaginatingSearchEngine ) {
+                       return $fn();
+               }
+               $this->limit++;
+               try {
+                       $resultSetOrStatus = $fn();
+               } finally {
+                       $this->limit--;
+               }
+
+               $resultSet = null;
+               if ( $resultSetOrStatus instanceof SearchResultSet ) {
+                       $resultSet = $resultSetOrStatus;
+               } elseif ( $resultSetOrStatus instanceof Status &&
+                       $resultSetOrStatus->getValue() instanceof SearchResultSet
+               ) {
+                       $resultSet = $resultSetOrStatus->getValue();
+               }
+               if ( $resultSet ) {
+                       $resultSet->shrink( $this->limit );
+               }
+
+               return $resultSetOrStatus;
+       }
+
        /**
         * @since 1.18
         * @param string $feature
@@ -206,6 +244,8 @@ abstract class SearchEngine {
         * search=test&prefix=Main_Page/Archive -> test prefix:Main Page/Archive
         * @param string $term
         * @return string
+        * @deprecated since 1.32 this should now be handled internally by the
+        * search engine
         */
        public function transformSearchTerm( $term ) {
                return $term;
@@ -364,11 +404,21 @@ abstract class SearchEngine {
         * or namespace names
         *
         * @param string $query
+        * @param bool $withAllKeyword activate support of the "all:" keyword and its
+        * translations to activate searching on all namespaces.
+        * @param bool $withPrefixSearchExtractNamespaceHook call the PrefixSearchExtractNamespace hook
+        *  if classic namespace identification did not match.
         * @return false|array false if no namespace was extracted, an array
         * with the parsed query at index 0 and an array of namespaces at index
         * 1 (or null for all namespaces).
-        */
-       public static function parseNamespacePrefixes( $query ) {
+        * @throws FatalError
+        * @throws MWException
+        */
+       public static function parseNamespacePrefixes(
+               $query,
+               $withAllKeyword = true,
+               $withPrefixSearchExtractNamespaceHook = false
+       ) {
                global $wgContLang;
 
                $parsed = $query;
@@ -376,40 +426,48 @@ abstract class SearchEngine {
                        return false;
                }
                $extractedNamespace = null;
-               $allkeywords = [];
-
-               $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
-               // force all: so that we have a common syntax for all the wikis
-               if ( !in_array( 'all:', $allkeywords ) ) {
-                       $allkeywords[] = 'all:';
-               }
 
                $allQuery = false;
-               foreach ( $allkeywords as $kw ) {
-                       if ( strncmp( $query, $kw, strlen( $kw ) ) == 0 ) {
-                               $extractedNamespace = null;
-                               $parsed = substr( $query, strlen( $kw ) );
-                               $allQuery = true;
-                               break;
+               if ( $withAllKeyword ) {
+                       $allkeywords = [];
+
+                       $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
+                       // force all: so that we have a common syntax for all the wikis
+                       if ( !in_array( 'all:', $allkeywords ) ) {
+                               $allkeywords[] = 'all:';
+                       }
+
+                       foreach ( $allkeywords as $kw ) {
+                               if ( strncmp( $query, $kw, strlen( $kw ) ) == 0 ) {
+                                       $extractedNamespace = null;
+                                       $parsed = substr( $query, strlen( $kw ) );
+                                       $allQuery = true;
+                                       break;
+                               }
                        }
                }
 
                if ( !$allQuery && strpos( $query, ':' ) !== false ) {
-                       // TODO: should we unify with PrefixSearch::extractNamespace ?
                        $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
                        $index = $wgContLang->getNsIndex( $prefix );
                        if ( $index !== false ) {
                                $extractedNamespace = [ $index ];
                                $parsed = substr( $query, strlen( $prefix ) + 1 );
+                       } elseif ( $withPrefixSearchExtractNamespaceHook ) {
+                               $hookNamespaces = [ NS_MAIN ];
+                               $hookQuery = $query;
+                               Hooks::run( 'PrefixSearchExtractNamespace', [ &$hookNamespaces, &$hookQuery ] );
+                               if ( $hookQuery !== $query ) {
+                                       $parsed = $hookQuery;
+                                       $extractedNamespace = $hookNamespaces;
+                               } else {
+                                       return false;
+                               }
                        } else {
                                return false;
                        }
                }
 
-               if ( trim( $parsed ) == '' ) {
-                       $parsed = $query; // prefix was the whole query
-               }
-
                return [ $parsed, $extractedNamespace ];
        }
 
@@ -467,7 +525,7 @@ abstract class SearchEngine {
         *
         * @todo This isn't ideal, we'd really like to have content-specific handling here
         * @param Title $t Title we're indexing
-        * @param Content $c Content of the page to index
+        * @param Content|null $c Content of the page to index
         * @return string
         */
        public function getTextFromContent( Title $t, Content $c = null ) {
@@ -492,37 +550,30 @@ abstract class SearchEngine {
         * @return string Simplified search string
         */
        protected function normalizeNamespaces( $search ) {
-               // Find a Title which is not an interwiki and is in NS_MAIN
-               $title = Title::newFromText( $search );
-               $ns = $this->namespaces;
-               if ( $title && !$title->isExternal() ) {
-                       $ns = [ $title->getNamespace() ];
-                       $search = $title->getText();
-                       if ( $ns[0] == NS_MAIN ) {
-                               $ns = $this->namespaces; // no explicit prefix, use default namespaces
-                               Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
-                       }
-               } else {
-                       $title = Title::newFromText( $search . 'Dummy' );
-                       if ( $title && $title->getText() == 'Dummy'
-                                       && $title->getNamespace() != NS_MAIN
-                                       && !$title->isExternal()
-                       ) {
-                               $ns = [ $title->getNamespace() ];
-                               $search = '';
-                       } else {
-                               Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
-                       }
+               $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
+               if ( $queryAndNs !== false ) {
+                       $this->setNamespaces( $queryAndNs[1] );
+                       return $queryAndNs[0];
                }
-
-               $ns = array_map( function ( $space ) {
-                       return $space == NS_MEDIA ? NS_FILE : $space;
-               }, $ns );
-
-               $this->setNamespaces( $ns );
                return $search;
        }
 
+       /**
+        * Perform an overfetch of completion search results. This allows
+        * determining if another page of results is available.
+        *
+        * @param string $search
+        * @return SearchSuggestionSet
+        */
+       protected function completionSearchBackendOverfetch( $search ) {
+               $this->limit++;
+               try {
+                       return $this->completionSearchBackend( $search );
+               } finally {
+                       $this->limit--;
+               }
+       }
+
        /**
         * Perform a completion search.
         * Does not resolve namespaces and does not check variants.
@@ -560,7 +611,8 @@ abstract class SearchEngine {
                        return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
                }
                $search = $this->normalizeNamespaces( $search );
-               return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
+               $suggestions = $this->completionSearchBackendOverfetch( $search );
+               return $this->processCompletionResults( $search, $suggestions );
        }
 
        /**
@@ -574,8 +626,8 @@ abstract class SearchEngine {
                }
                $search = $this->normalizeNamespaces( $search );
 
-               $results = $this->completionSearchBackend( $search );
-               $fallbackLimit = $this->limit - $results->getSize();
+               $results = $this->completionSearchBackendOverfetch( $search );
+               $fallbackLimit = 1 + $this->limit - $results->getSize();
                if ( $fallbackLimit > 0 ) {
                        global $wgContLang;
 
@@ -614,15 +666,26 @@ abstract class SearchEngine {
         * @return SearchSuggestionSet
         */
        protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
+               // We over-fetched to determine pagination. Shrink back down if we have extra results
+               // and mark if pagination is possible
+               $suggestions->shrink( $this->limit );
+
                $search = trim( $search );
                // preload the titles with LinkBatch
-               $titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
+               $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
                        return $sugg->getSuggestedTitle();
-               } );
-               $lb = new LinkBatch( $titles );
+               } ) );
                $lb->setCaller( __METHOD__ );
                $lb->execute();
 
+               $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
+                       return $sugg->getSuggestedTitle()->isKnown();
+               } );
+               if ( $diff > 0 ) {
+                       MediaWikiServices::getInstance()->getStatsdDataFactory()
+                               ->updateCount( 'search.completion.missing', $diff );
+               }
+
                $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
                        return $sugg->getSuggestedTitle()->getPrefixedText();
                } );
@@ -830,7 +893,6 @@ abstract class SearchEngine {
                $setAugmentors = [];
                $rowAugmentors = [];
                Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
-
                if ( !$setAugmentors && !$rowAugmentors ) {
                        // We're done here
                        return;