From 353ae719635b5b0d6e69d973f56366a2dd620d65 Mon Sep 17 00:00:00 2001 From: dcausse Date: Fri, 11 Sep 2015 17:32:15 +0200 Subject: [PATCH] Extracted some code from PrefixSearch for re-usability by other search engines. (This patch is needed for Ida9b9f8) Bug: T112028 Change-Id: I35aece88333a65f6b1f55f7a87e2d14de4f5bea7 --- autoload.php | 1 + includes/PrefixSearch.php | 107 +------------- includes/search/SearchExactMatchRescorer.php | 144 +++++++++++++++++++ 3 files changed, 147 insertions(+), 105 deletions(-) create mode 100644 includes/search/SearchExactMatchRescorer.php diff --git a/autoload.php b/autoload.php index 2844dc7306..1e21a00865 100644 --- a/autoload.php +++ b/autoload.php @@ -1086,6 +1086,7 @@ $wgAutoloadLocalClasses = array( 'SearchDump' => __DIR__ . '/maintenance/dumpIterator.php', 'SearchEngine' => __DIR__ . '/includes/search/SearchEngine.php', 'SearchEngineDummy' => __DIR__ . '/includes/search/SearchEngine.php', + 'SearchExactMatchRescorer' => __DIR__ . '/includes/search/SearchExactMatchRescorer.php', 'SearchHighlighter' => __DIR__ . '/includes/search/SearchHighlighter.php', 'SearchMssql' => __DIR__ . '/includes/search/SearchMssql.php', 'SearchMySQL' => __DIR__ . '/includes/search/SearchMySQL.php', diff --git a/includes/PrefixSearch.php b/includes/PrefixSearch.php index e328e9f95a..c6f187d2b7 100644 --- a/includes/PrefixSearch.php +++ b/includes/PrefixSearch.php @@ -164,112 +164,9 @@ abstract class PrefixSearch { return $this->strings( $this->handleResultFromHook( $srchres, $namespaces, $search, $limit ) ); } - /** - * Default search backend does proper prefix searching, but custom backends - * may sort based on other algorythms that may cause the exact title match - * to not be in the results or be lower down the list. - * @param array $srchres results from the hook - * @return array munged results from the hook - */ private function handleResultFromHook( $srchres, $namespaces, $search, $limit ) { - // Pick namespace (based on PrefixSearch::defaultSearchBackend) - $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0]; - $t = Title::newFromText( $search, $ns ); - if ( !$t || !$t->exists() ) { - // No exact match so just return the search results - return $srchres; - } - $string = $t->getPrefixedText(); - $key = array_search( $string, $srchres ); - if ( $key !== false ) { - // Exact match was in the results so just move it to the front - return $this->pullFront( $key, $srchres ); - } - // Exact match not in the search results so check for some redirect handling cases - if ( $t->isRedirect() ) { - $target = $this->getRedirectTarget( $t ); - $key = array_search( $target, $srchres ); - if ( $key !== false ) { - // Exact match is a redirect to one of the returned matches so pull the - // returned match to the front. This might look odd but the alternative - // is to put the redirect in front and drop the match. The name of the - // found match is often more descriptive/better formed than the name of - // the redirect AND by definition they share a prefix. Hopefully this - // choice is less confusing and more helpful. But it might not be. But - // it is the choice we're going with for now. - return $this->pullFront( $key, $srchres ); - } - $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); - if ( isset( $redirectTargetsToRedirect[$target] ) ) { - // The exact match and something in the results list are both redirects - // to the same thing! In this case we'll pull the returned match to the - // top following the same logic above. Again, it might not be a perfect - // choice but it'll do. - return $this->pullFront( $redirectTargetsToRedirect[$target], $srchres ); - } - } else { - $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); - if ( isset( $redirectTargetsToRedirect[$string] ) ) { - // The exact match is the target of a redirect already in the results list so remove - // the redirect from the results list and push the exact match to the front - array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 ); - array_unshift( $srchres, $string ); - return $srchres; - } - } - - // Exact match is totally unique from the other results so just add it to the front - array_unshift( $srchres, $string ); - // And roll one off the end if the results are too long - if ( count( $srchres ) > $limit ) { - array_pop( $srchres ); - } - return $srchres; - } - - /** - * @param Array(string) $titles as strings - * @return Array(string => int) redirect target prefixedText to index of title in titles - * that is a redirect to it. - */ - private function redirectTargetsToRedirect( $titles ) { - $result = array(); - foreach ( $titles as $key => $titleText ) { - $title = Title::newFromText( $titleText ); - if ( !$title || !$title->isRedirect() ) { - continue; - } - $target = $this->getRedirectTarget( $title ); - if ( !$target ) { - continue; - } - $result[$target] = $key; - } - return $result; - } - - /** - * @param int $key key to pull to the front - * @return array $array with the item at $key pulled to the front - */ - private function pullFront( $key, $array ) { - $cut = array_splice( $array, $key, 1 ); - array_unshift( $array, $cut[0] ); - return $array; - } - - /** - * Get a redirect's destination from a title - * @param Title $title A title to redirect. It may not redirect or even exist - * @return null|string If title exists and redirects, get the destination's prefixed name - */ - private function getRedirectTarget( $title ) { - $page = WikiPage::factory( $title ); - if ( !$page->exists() ) { - return null; - } - $redir = $page->getRedirectTarget(); - return $redir ? $redir->getPrefixedText() : null; + $rescorer = new SearchExactMatchRescorer(); + return $rescorer->rescore( $search, $namespaces, $srchres, $limit ); } /** diff --git a/includes/search/SearchExactMatchRescorer.php b/includes/search/SearchExactMatchRescorer.php new file mode 100644 index 0000000000..0ff628def0 --- /dev/null +++ b/includes/search/SearchExactMatchRescorer.php @@ -0,0 +1,144 @@ +exists() ) { + // No exact match so just return the search results + return $srchres; + } + $string = $t->getPrefixedText(); + $key = array_search( $string, $srchres ); + if ( $key !== false ) { + // Exact match was in the results so just move it to the front + return $this->pullFront( $key, $srchres ); + } + // Exact match not in the search results so check for some redirect handling cases + if ( $t->isRedirect() ) { + $target = $this->getRedirectTarget( $t ); + $key = array_search( $target, $srchres ); + if ( $key !== false ) { + // Exact match is a redirect to one of the returned matches so pull the + // returned match to the front. This might look odd but the alternative + // is to put the redirect in front and drop the match. The name of the + // found match is often more descriptive/better formed than the name of + // the redirect AND by definition they share a prefix. Hopefully this + // choice is less confusing and more helpful. But it might not be. But + // it is the choice we're going with for now. + return $this->pullFront( $key, $srchres ); + } + $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); + if ( isset( $redirectTargetsToRedirect[$target] ) ) { + // The exact match and something in the results list are both redirects + // to the same thing! In this case we'll pull the returned match to the + // top following the same logic above. Again, it might not be a perfect + // choice but it'll do. + return $this->pullFront( $redirectTargetsToRedirect[$target], $srchres ); + } + } else { + $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); + if ( isset( $redirectTargetsToRedirect[$string] ) ) { + // The exact match is the target of a redirect already in the results list so remove + // the redirect from the results list and push the exact match to the front + array_splice( $srchres, $redirectTargetsToRedirect[$string], 1 ); + array_unshift( $srchres, $string ); + return $srchres; + } + } + + // Exact match is totally unique from the other results so just add it to the front + array_unshift( $srchres, $string ); + // And roll one off the end if the results are too long + if ( count( $srchres ) > $limit ) { + array_pop( $srchres ); + } + return $srchres; + } + + /** + * @param string[] $titles as strings + * @return array redirect target prefixedText to index of title in titles + * that is a redirect to it. + */ + private function redirectTargetsToRedirect( $titles ) { + $result = array(); + foreach ( $titles as $key => $titleText ) { + $title = Title::newFromText( $titleText ); + if ( !$title || !$title->isRedirect() ) { + continue; + } + $target = $this->getRedirectTarget( $title ); + if ( !$target ) { + continue; + } + $result[$target] = $key; + } + return $result; + } + + /** + * Returns an array where the element of $array at index $key becomes + * the first element. + * @param int $key key to pull to the front + * @return array $array with the item at $key pulled to the front + */ + private function pullFront( $key, $array ) { + $cut = array_splice( $array, $key, 1 ); + array_unshift( $array, $cut[0] ); + return $array; + } + + /** + * Get a redirect's destination from a title + * @param Title $title A title to redirect. It may not redirect or even exist + * @return null|string If title exists and redirects, get the destination's prefixed name + */ + private function getRedirectTarget( $title ) { + $page = WikiPage::factory( $title ); + if ( !$page->exists() ) { + return null; + } + $redir = $page->getRedirectTarget(); + return $redir ? $redir->getPrefixedText() : null; + } +} -- 2.20.1