From: Umherirrender Date: Wed, 27 Mar 2019 20:40:33 +0000 (+0100) Subject: Move PrefixSearch classes to own files X-Git-Tag: 1.34.0-rc.0~2267^2 X-Git-Url: http://git.cyclocoop.org/%7B%7B%20url_for%28%27admin_vote_add%27%29%20%7D%7D?a=commitdiff_plain;h=8d5a8fb1819925d955b6f23f610ecf9f90d5bb54;p=lhc%2Fweb%2Fwiklou.git Move PrefixSearch classes to own files Move all into search subfolder Change-Id: I4097b9745d22545afe6761da1fa67cf98af5b811 --- diff --git a/.phpcs.xml b/.phpcs.xml index d1e54a706c..33b66bf778 100644 --- a/.phpcs.xml +++ b/.phpcs.xml @@ -228,7 +228,6 @@ */includes/parser/Preprocessor_Hash\.php */includes/parser/Preprocessor\.php */includes/PathRouter\.php - */includes/PrefixSearch\.php */includes/profiler/SectionProfiler\.php */includes/search/SearchEngine\.php */includes/specialpage/LoginSignupSpecialPage\.php diff --git a/autoload.php b/autoload.php index 528b7fe372..5af1b1b8b3 100644 --- a/autoload.php +++ b/autoload.php @@ -1138,7 +1138,7 @@ $wgAutoloadLocalClasses = [ 'PreferencesForm' => __DIR__ . '/includes/specials/forms/PreferencesFormLegacy.php', 'PreferencesFormLegacy' => __DIR__ . '/includes/specials/forms/PreferencesFormLegacy.php', 'PreferencesFormOOUI' => __DIR__ . '/includes/specials/forms/PreferencesFormOOUI.php', - 'PrefixSearch' => __DIR__ . '/includes/PrefixSearch.php', + 'PrefixSearch' => __DIR__ . '/includes/search/PrefixSearch.php', 'PrefixingStatsdDataFactoryProxy' => __DIR__ . '/includes/libs/stats/PrefixingStatsdDataFactoryProxy.php', 'PreprocessDump' => __DIR__ . '/maintenance/preprocessDump.php', 'Preprocessor' => __DIR__ . '/includes/parser/Preprocessor.php', @@ -1450,7 +1450,7 @@ $wgAutoloadLocalClasses = [ 'StorageTypeStats' => __DIR__ . '/maintenance/storage/storageTypeStats.php', 'StoreFileOp' => __DIR__ . '/includes/libs/filebackend/fileop/StoreFileOp.php', 'StreamFile' => __DIR__ . '/includes/StreamFile.php', - 'StringPrefixSearch' => __DIR__ . '/includes/PrefixSearch.php', + 'StringPrefixSearch' => __DIR__ . '/includes/search/StringPrefixSearch.php', 'StringUtils' => __DIR__ . '/includes/libs/StringUtils.php', 'StripState' => __DIR__ . '/includes/parser/StripState.php', 'StubObject' => __DIR__ . '/includes/StubObject.php', @@ -1491,7 +1491,7 @@ $wgAutoloadLocalClasses = [ 'TitleCleanup' => __DIR__ . '/maintenance/cleanupTitles.php', 'TitleFormatter' => __DIR__ . '/includes/title/TitleFormatter.php', 'TitleParser' => __DIR__ . '/includes/title/TitleParser.php', - 'TitlePrefixSearch' => __DIR__ . '/includes/PrefixSearch.php', + 'TitlePrefixSearch' => __DIR__ . '/includes/search/TitlePrefixSearch.php', 'TitleValue' => __DIR__ . '/includes/title/TitleValue.php', 'TrackBlobs' => __DIR__ . '/maintenance/storage/trackBlobs.php', 'TrackingCategories' => __DIR__ . '/includes/TrackingCategories.php', diff --git a/includes/PrefixSearch.php b/includes/PrefixSearch.php deleted file mode 100644 index 7bc7a084a5..0000000000 --- a/includes/PrefixSearch.php +++ /dev/null @@ -1,365 +0,0 @@ -search( $search, $limit, $namespaces, $offset ); - } - - /** - * Do a prefix search of titles and return a list of matching page names. - * - * @param string $search - * @param int $limit - * @param array $namespaces Used if query is not explicitly prefixed - * @param int $offset How many results to offset from the beginning - * @return array Array of strings or Title objects - */ - public function search( $search, $limit, $namespaces = [], $offset = 0 ) { - $search = trim( $search ); - if ( $search == '' ) { - return []; // Return empty result - } - - $hasNamespace = SearchEngine::parseNamespacePrefixes( $search, false, true ); - if ( $hasNamespace !== false ) { - list( $search, $namespaces ) = $hasNamespace; - } - - return $this->searchBackend( $namespaces, $search, $limit, $offset ); - } - - /** - * Do a prefix search for all possible variants of the prefix - * @param string $search - * @param int $limit - * @param array $namespaces - * @param int $offset How many results to offset from the beginning - * - * @return array - */ - public function searchWithVariants( $search, $limit, array $namespaces, $offset = 0 ) { - $searches = $this->search( $search, $limit, $namespaces, $offset ); - - // if the content language has variants, try to retrieve fallback results - $fallbackLimit = $limit - count( $searches ); - if ( $fallbackLimit > 0 ) { - $fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()-> - autoConvertToAllVariants( $search ); - $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] ); - - foreach ( $fallbackSearches as $fbs ) { - $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces ); - $searches = array_merge( $searches, $fallbackSearchResult ); - $fallbackLimit -= count( $fallbackSearchResult ); - - if ( $fallbackLimit == 0 ) { - break; - } - } - } - return $searches; - } - - /** - * When implemented in a descendant class, receives an array of Title objects and returns - * either an unmodified array or an array of strings corresponding to titles passed to it. - * - * @param array $titles - * @return array - */ - abstract protected function titles( array $titles ); - - /** - * When implemented in a descendant class, receives an array of titles as strings and returns - * either an unmodified array or an array of Title objects corresponding to strings received. - * - * @param array $strings - * - * @return array - */ - abstract protected function strings( array $strings ); - - /** - * Do a prefix search of titles and return a list of matching page names. - * @param array $namespaces - * @param string $search - * @param int $limit - * @param int $offset How many results to offset from the beginning - * @return array Array of strings - */ - protected function searchBackend( $namespaces, $search, $limit, $offset ) { - if ( count( $namespaces ) == 1 ) { - $ns = $namespaces[0]; - if ( $ns == NS_MEDIA ) { - $namespaces = [ NS_FILE ]; - } elseif ( $ns == NS_SPECIAL ) { - return $this->titles( $this->specialSearch( $search, $limit, $offset ) ); - } - } - $srchres = []; - if ( Hooks::run( - 'PrefixSearchBackend', - [ $namespaces, $search, $limit, &$srchres, $offset ] - ) ) { - return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit, $offset ) ); - } - return $this->strings( - $this->handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) ); - } - - private function handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) { - if ( $offset === 0 ) { - // Only perform exact db match if offset === 0 - // This is still far from perfect but at least we avoid returning the - // same title afain and again when the user is scrolling with a query - // that matches a title in the db. - $rescorer = new SearchExactMatchRescorer(); - $srchres = $rescorer->rescore( $search, $namespaces, $srchres, $limit ); - } - return $srchres; - } - - /** - * Prefix search special-case for Special: namespace. - * - * @param string $search Term - * @param int $limit Max number of items to return - * @param int $offset Number of items to offset - * @return array - */ - protected function specialSearch( $search, $limit, $offset ) { - $searchParts = explode( '/', $search, 2 ); - $searchKey = $searchParts[0]; - $subpageSearch = $searchParts[1] ?? null; - - // Handle subpage search separately. - $spFactory = MediaWikiServices::getInstance()->getSpecialPageFactory(); - if ( $subpageSearch !== null ) { - // Try matching the full search string as a page name - $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey ); - if ( !$specialTitle ) { - return []; - } - $special = $spFactory->getPage( $specialTitle->getText() ); - if ( $special ) { - $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit, $offset ); - return array_map( function ( $sub ) use ( $specialTitle ) { - return $specialTitle->getSubpage( $sub ); - }, $subpages ); - } else { - return []; - } - } - - # normalize searchKey, so aliases with spaces can be found - T27675 - $contLang = MediaWikiServices::getInstance()->getContentLanguage(); - $searchKey = str_replace( ' ', '_', $searchKey ); - $searchKey = $contLang->caseFold( $searchKey ); - - // Unlike SpecialPage itself, we want the canonical forms of both - // canonical and alias title forms... - $keys = []; - foreach ( $spFactory->getNames() as $page ) { - $keys[$contLang->caseFold( $page )] = [ 'page' => $page, 'rank' => 0 ]; - } - - foreach ( $contLang->getSpecialPageAliases() as $page => $aliases ) { - if ( !in_array( $page, $spFactory->getNames() ) ) {# T22885 - continue; - } - - foreach ( $aliases as $key => $alias ) { - $keys[$contLang->caseFold( $alias )] = [ 'page' => $alias, 'rank' => $key ]; - } - } - ksort( $keys ); - - $matches = []; - foreach ( $keys as $pageKey => $page ) { - if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) { - // T29671: Don't use SpecialPage::getTitleFor() here because it - // localizes its input leading to searches for e.g. Special:All - // returning Spezial:MediaWiki-Systemnachrichten and returning - // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de' - $matches[$page['rank']][] = Title::makeTitleSafe( NS_SPECIAL, $page['page'] ); - - if ( isset( $matches[0] ) && count( $matches[0] ) >= $limit + $offset ) { - // We have enough items in primary rank, no use to continue - break; - } - } - - } - - // Ensure keys are in order - ksort( $matches ); - // Flatten the array - $matches = array_reduce( $matches, 'array_merge', [] ); - - return array_slice( $matches, $offset, $limit ); - } - - /** - * Unless overridden by PrefixSearchBackend hook... - * This is case-sensitive (First character may - * be automatically capitalized by Title::secureAndSpit() - * later on depending on $wgCapitalLinks) - * - * @param array|null $namespaces Namespaces to search in - * @param string $search Term - * @param int $limit Max number of items to return - * @param int $offset Number of items to skip - * @return Title[] Array of Title objects - */ - public function defaultSearchBackend( $namespaces, $search, $limit, $offset ) { - // Backwards compatability with old code. Default to NS_MAIN if no namespaces provided. - if ( $namespaces === null ) { - $namespaces = []; - } - if ( !$namespaces ) { - $namespaces[] = NS_MAIN; - } - - // Construct suitable prefix for each namespace. They differ in cases where - // some namespaces always capitalize and some don't. - $prefixes = []; - foreach ( $namespaces as $namespace ) { - // For now, if special is included, ignore the other namespaces - if ( $namespace == NS_SPECIAL ) { - return $this->specialSearch( $search, $limit, $offset ); - } - - $title = Title::makeTitleSafe( $namespace, $search ); - // Why does the prefix default to empty? - $prefix = $title ? $title->getDBkey() : ''; - $prefixes[$prefix][] = $namespace; - } - - $dbr = wfGetDB( DB_REPLICA ); - // Often there is only one prefix that applies to all requested namespaces, - // but sometimes there are two if some namespaces do not always capitalize. - $conds = []; - foreach ( $prefixes as $prefix => $namespaces ) { - $condition = [ - 'page_namespace' => $namespaces, - 'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() ), - ]; - $conds[] = $dbr->makeList( $condition, LIST_AND ); - } - - $table = 'page'; - $fields = [ 'page_id', 'page_namespace', 'page_title' ]; - $conds = $dbr->makeList( $conds, LIST_OR ); - $options = [ - 'LIMIT' => $limit, - 'ORDER BY' => [ 'page_title', 'page_namespace' ], - 'OFFSET' => $offset - ]; - - $res = $dbr->select( $table, $fields, $conds, __METHOD__, $options ); - - return iterator_to_array( TitleArray::newFromResult( $res ) ); - } - - /** - * Validate an array of numerical namespace indexes - * - * @param array $namespaces - * @return array (default: contains only NS_MAIN) - */ - protected function validateNamespaces( $namespaces ) { - // We will look at each given namespace against content language namespaces - $validNamespaces = MediaWikiServices::getInstance()->getContentLanguage()->getNamespaces(); - if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) { - $valid = []; - foreach ( $namespaces as $ns ) { - if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) { - $valid[] = $ns; - } - } - if ( count( $valid ) > 0 ) { - return $valid; - } - } - - return [ NS_MAIN ]; - } -} - -/** - * Performs prefix search, returning Title objects - * @deprecated Since 1.27, Use SearchEngine::defaultPrefixSearch or SearchEngine::completionSearch - * @ingroup Search - */ -class TitlePrefixSearch extends PrefixSearch { - - protected function titles( array $titles ) { - return $titles; - } - - protected function strings( array $strings ) { - $titles = array_map( 'Title::newFromText', $strings ); - $lb = new LinkBatch( $titles ); - $lb->setCaller( __METHOD__ ); - $lb->execute(); - return $titles; - } -} - -/** - * Performs prefix search, returning strings - * @deprecated Since 1.27, Use SearchEngine::prefixSearchSubpages or SearchEngine::completionSearch - * @ingroup Search - */ -class StringPrefixSearch extends PrefixSearch { - - protected function titles( array $titles ) { - return array_map( function ( Title $t ) { - return $t->getPrefixedText(); - }, $titles ); - } - - protected function strings( array $strings ) { - return $strings; - } -} diff --git a/includes/search/PrefixSearch.php b/includes/search/PrefixSearch.php new file mode 100644 index 0000000000..aa429b269d --- /dev/null +++ b/includes/search/PrefixSearch.php @@ -0,0 +1,327 @@ +search( $search, $limit, $namespaces, $offset ); + } + + /** + * Do a prefix search of titles and return a list of matching page names. + * + * @param string $search + * @param int $limit + * @param array $namespaces Used if query is not explicitly prefixed + * @param int $offset How many results to offset from the beginning + * @return array Array of strings or Title objects + */ + public function search( $search, $limit, $namespaces = [], $offset = 0 ) { + $search = trim( $search ); + if ( $search == '' ) { + return []; // Return empty result + } + + $hasNamespace = SearchEngine::parseNamespacePrefixes( $search, false, true ); + if ( $hasNamespace !== false ) { + list( $search, $namespaces ) = $hasNamespace; + } + + return $this->searchBackend( $namespaces, $search, $limit, $offset ); + } + + /** + * Do a prefix search for all possible variants of the prefix + * @param string $search + * @param int $limit + * @param array $namespaces + * @param int $offset How many results to offset from the beginning + * + * @return array + */ + public function searchWithVariants( $search, $limit, array $namespaces, $offset = 0 ) { + $searches = $this->search( $search, $limit, $namespaces, $offset ); + + // if the content language has variants, try to retrieve fallback results + $fallbackLimit = $limit - count( $searches ); + if ( $fallbackLimit > 0 ) { + $fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()-> + autoConvertToAllVariants( $search ); + $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] ); + + foreach ( $fallbackSearches as $fbs ) { + $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces ); + $searches = array_merge( $searches, $fallbackSearchResult ); + $fallbackLimit -= count( $fallbackSearchResult ); + + if ( $fallbackLimit == 0 ) { + break; + } + } + } + return $searches; + } + + /** + * When implemented in a descendant class, receives an array of Title objects and returns + * either an unmodified array or an array of strings corresponding to titles passed to it. + * + * @param array $titles + * @return array + */ + abstract protected function titles( array $titles ); + + /** + * When implemented in a descendant class, receives an array of titles as strings and returns + * either an unmodified array or an array of Title objects corresponding to strings received. + * + * @param array $strings + * + * @return array + */ + abstract protected function strings( array $strings ); + + /** + * Do a prefix search of titles and return a list of matching page names. + * @param array $namespaces + * @param string $search + * @param int $limit + * @param int $offset How many results to offset from the beginning + * @return array Array of strings + */ + protected function searchBackend( $namespaces, $search, $limit, $offset ) { + if ( count( $namespaces ) == 1 ) { + $ns = $namespaces[0]; + if ( $ns == NS_MEDIA ) { + $namespaces = [ NS_FILE ]; + } elseif ( $ns == NS_SPECIAL ) { + return $this->titles( $this->specialSearch( $search, $limit, $offset ) ); + } + } + $srchres = []; + if ( Hooks::run( + 'PrefixSearchBackend', + [ $namespaces, $search, $limit, &$srchres, $offset ] + ) ) { + return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit, $offset ) ); + } + return $this->strings( + $this->handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) ); + } + + private function handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) { + if ( $offset === 0 ) { + // Only perform exact db match if offset === 0 + // This is still far from perfect but at least we avoid returning the + // same title afain and again when the user is scrolling with a query + // that matches a title in the db. + $rescorer = new SearchExactMatchRescorer(); + $srchres = $rescorer->rescore( $search, $namespaces, $srchres, $limit ); + } + return $srchres; + } + + /** + * Prefix search special-case for Special: namespace. + * + * @param string $search Term + * @param int $limit Max number of items to return + * @param int $offset Number of items to offset + * @return array + */ + protected function specialSearch( $search, $limit, $offset ) { + $searchParts = explode( '/', $search, 2 ); + $searchKey = $searchParts[0]; + $subpageSearch = $searchParts[1] ?? null; + + // Handle subpage search separately. + $spFactory = MediaWikiServices::getInstance()->getSpecialPageFactory(); + if ( $subpageSearch !== null ) { + // Try matching the full search string as a page name + $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey ); + if ( !$specialTitle ) { + return []; + } + $special = $spFactory->getPage( $specialTitle->getText() ); + if ( $special ) { + $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit, $offset ); + return array_map( function ( $sub ) use ( $specialTitle ) { + return $specialTitle->getSubpage( $sub ); + }, $subpages ); + } else { + return []; + } + } + + # normalize searchKey, so aliases with spaces can be found - T27675 + $contLang = MediaWikiServices::getInstance()->getContentLanguage(); + $searchKey = str_replace( ' ', '_', $searchKey ); + $searchKey = $contLang->caseFold( $searchKey ); + + // Unlike SpecialPage itself, we want the canonical forms of both + // canonical and alias title forms... + $keys = []; + foreach ( $spFactory->getNames() as $page ) { + $keys[$contLang->caseFold( $page )] = [ 'page' => $page, 'rank' => 0 ]; + } + + foreach ( $contLang->getSpecialPageAliases() as $page => $aliases ) { + if ( !in_array( $page, $spFactory->getNames() ) ) {# T22885 + continue; + } + + foreach ( $aliases as $key => $alias ) { + $keys[$contLang->caseFold( $alias )] = [ 'page' => $alias, 'rank' => $key ]; + } + } + ksort( $keys ); + + $matches = []; + foreach ( $keys as $pageKey => $page ) { + if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) { + // T29671: Don't use SpecialPage::getTitleFor() here because it + // localizes its input leading to searches for e.g. Special:All + // returning Spezial:MediaWiki-Systemnachrichten and returning + // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de' + $matches[$page['rank']][] = Title::makeTitleSafe( NS_SPECIAL, $page['page'] ); + + if ( isset( $matches[0] ) && count( $matches[0] ) >= $limit + $offset ) { + // We have enough items in primary rank, no use to continue + break; + } + } + + } + + // Ensure keys are in order + ksort( $matches ); + // Flatten the array + $matches = array_reduce( $matches, 'array_merge', [] ); + + return array_slice( $matches, $offset, $limit ); + } + + /** + * Unless overridden by PrefixSearchBackend hook... + * This is case-sensitive (First character may + * be automatically capitalized by Title::secureAndSpit() + * later on depending on $wgCapitalLinks) + * + * @param array|null $namespaces Namespaces to search in + * @param string $search Term + * @param int $limit Max number of items to return + * @param int $offset Number of items to skip + * @return Title[] Array of Title objects + */ + public function defaultSearchBackend( $namespaces, $search, $limit, $offset ) { + // Backwards compatability with old code. Default to NS_MAIN if no namespaces provided. + if ( $namespaces === null ) { + $namespaces = []; + } + if ( !$namespaces ) { + $namespaces[] = NS_MAIN; + } + + // Construct suitable prefix for each namespace. They differ in cases where + // some namespaces always capitalize and some don't. + $prefixes = []; + foreach ( $namespaces as $namespace ) { + // For now, if special is included, ignore the other namespaces + if ( $namespace == NS_SPECIAL ) { + return $this->specialSearch( $search, $limit, $offset ); + } + + $title = Title::makeTitleSafe( $namespace, $search ); + // Why does the prefix default to empty? + $prefix = $title ? $title->getDBkey() : ''; + $prefixes[$prefix][] = $namespace; + } + + $dbr = wfGetDB( DB_REPLICA ); + // Often there is only one prefix that applies to all requested namespaces, + // but sometimes there are two if some namespaces do not always capitalize. + $conds = []; + foreach ( $prefixes as $prefix => $namespaces ) { + $condition = [ + 'page_namespace' => $namespaces, + 'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() ), + ]; + $conds[] = $dbr->makeList( $condition, LIST_AND ); + } + + $table = 'page'; + $fields = [ 'page_id', 'page_namespace', 'page_title' ]; + $conds = $dbr->makeList( $conds, LIST_OR ); + $options = [ + 'LIMIT' => $limit, + 'ORDER BY' => [ 'page_title', 'page_namespace' ], + 'OFFSET' => $offset + ]; + + $res = $dbr->select( $table, $fields, $conds, __METHOD__, $options ); + + return iterator_to_array( TitleArray::newFromResult( $res ) ); + } + + /** + * Validate an array of numerical namespace indexes + * + * @param array $namespaces + * @return array (default: contains only NS_MAIN) + */ + protected function validateNamespaces( $namespaces ) { + // We will look at each given namespace against content language namespaces + $validNamespaces = MediaWikiServices::getInstance()->getContentLanguage()->getNamespaces(); + if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) { + $valid = []; + foreach ( $namespaces as $ns ) { + if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) { + $valid[] = $ns; + } + } + if ( count( $valid ) > 0 ) { + return $valid; + } + } + + return [ NS_MAIN ]; + } +} diff --git a/includes/search/StringPrefixSearch.php b/includes/search/StringPrefixSearch.php new file mode 100644 index 0000000000..517518e7fa --- /dev/null +++ b/includes/search/StringPrefixSearch.php @@ -0,0 +1,39 @@ +getPrefixedText(); + }, $titles ); + } + + protected function strings( array $strings ) { + return $strings; + } +} diff --git a/includes/search/TitlePrefixSearch.php b/includes/search/TitlePrefixSearch.php new file mode 100644 index 0000000000..a548dbf920 --- /dev/null +++ b/includes/search/TitlePrefixSearch.php @@ -0,0 +1,41 @@ +setCaller( __METHOD__ ); + $lb->execute(); + return $titles; + } +}