From 2ec9915816577dbd749625d083e4ee590bf49f6f Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 21 Nov 2014 14:47:32 -0500 Subject: [PATCH] Handle redirects during prefix search exact match Prefix search attempts to find exact matches to the user's query that aren't returned by plugins. In some cases, like when the exact match is a redirect and the target of the redirect is also in the search results, it would result in multiple results in prefix search going the same place. This looks really silly when the top hit is "Barack obama" (a redirect) and the next one is "Barack Obama" (the target of the redirect). This handles a bunch of cases to do with redirects in the matches and when the exact match is a redirect. Bug: 736731 Change-Id: I49fe1ccec84bd5d1f44c6b91b260abf50f2cc3a1 --- includes/PrefixSearch.php | 124 ++++++++++++++++---- tests/phpunit/MediaWikiTestCase.php | 3 +- tests/phpunit/includes/PrefixSearchTest.php | 56 +++++++++ 3 files changed, 158 insertions(+), 25 deletions(-) diff --git a/includes/PrefixSearch.php b/includes/PrefixSearch.php index 839fedcd15..955313b3eb 100644 --- a/includes/PrefixSearch.php +++ b/includes/PrefixSearch.php @@ -155,34 +155,110 @@ abstract class PrefixSearch { $srchres = array(); if ( wfRunHooks( 'PrefixSearchBackend', array( $namespaces, $search, $limit, &$srchres ) ) ) { return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit ) ); + } + return $this->strings( $this->handleResultFromHook( $srchres, $namespaces, $search, $limit ) ); + } + + /** + * Default search backend does proper prefix searching, but custom backends + * may sort based on other algorythms that may cause the exact title match + * to not be in the results or be lower down the list. + * @param array $srchres results from the hook + * @return array munged results from the hook + */ + private function handleResultFromHook( $srchres, $namespaces, $search, $limit ) { + // Pick namespace (based on PrefixSearch::defaultSearchBackend) + $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0]; + $t = Title::newFromText( $search, $ns ); + if ( !$t || !$t->exists() ) { + // No exact match so just return the search results + return $srchres; + } + $string = $t->getPrefixedText(); + $key = array_search( $string, $srchres ); + if ( $key !== false ) { + // Exact match was in the results so just move it to the front + return $this->pullFront( $key, $srchres ); + } + // Exact match not in the search results so check for some redirect handling cases + if ( $t->isRedirect() ) { + $target = $this->getRedirectTarget( $t ); + $key = array_search( $target, $srchres ); + if ( $key !== false ) { + // Exact match is a redirect to one of the returned matches so pull the + // returned match to the front. This might look odd but the alternative + // is to put the redirect in front and drop the match. The name of the + // found match is often more descriptive/better formed than the name of + // the redirec AND by definition they share a prefix. Hopefully this + // choice is less confusing and more helpful. But it might now be. But + // it is the choice we're going with for now. + return $this->pullFront( $key, $srchres ); + } + $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); + if ( isset( $redirectTargetsToRedirect[ $target ] ) ) { + // The exact match and something in the results list are both redirects + // to the same thing! In this case we'll pull the returned match to the + // top following the same logic above. Again, it might not be a perfect + // choice but it'll do. + return $this->pullFront( $redirectTargetsToRedirect[ $target ], $srchres ); + } } else { - // Default search backend does proper prefix searching, but custom backends - // may sort based on other algorythms that may cause the exact title match - // to not be in the results or be lower down the list. - - // Pick namespace (based on PrefixSearch::defaultSearchBackend) - $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0]; - $t = Title::newFromText( $search, $ns ); - if ( $t ) { - // If text is a valid title and is in the search results - $string = $t->getPrefixedText(); - $key = array_search( $string, $srchres ); - if ( $key !== false ) { - // Move it to the front - $cut = array_splice( $srchres, $key, 1 ); - array_unshift( $srchres, $cut[0] ); - } elseif ( $t->exists() ) { - // Add it in front - array_unshift( $srchres, $string ); - - if ( count( $srchres ) > $limit ) { - array_pop( $srchres ); - } - } + $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres ); + if ( isset( $redirectTargetsToRedirect[ $string ] ) ) { + // The exact match is the target of a redirect already in the results list so remove + // the redirect from the results list and push the exact match to the front + array_splice( $srchres, $redirectTargetsToRedirect[ $string ], 1 ); + array_unshift( $srchres, $string ); + return $srchres; } } - return $this->strings( $srchres ); + // Exact match is totally unique from the other results so just add it to the front + array_unshift( $srchres, $string ); + // And roll one off the end if the results are too long + if ( count( $srchres ) > $limit ) { + array_pop( $srchres ); + } + return $srchres; + } + + /** + * @param Array(string) $titles as strings + * @return Array(string => int) redirect target prefixedText to index of title in titles + * that is a redirect to it. + */ + private function redirectTargetsToRedirect( $titles ) { + $result = array(); + foreach ( $titles as $key => $titleText ) { + $title = Title::newFromText( $titleText ); + if ( !$title || !$title->isRedirect() ) { + continue; + } + $target = $this->getRedirectTarget( $title ); + if ( !$target ) { + continue; + } + $result[ $target ] = $key; + } + return $result; + } + + /** + * @param int $key key to pull to the front + * @return array $array with the item at $key pulled to the front + */ + private function pullFront( $key, $array ) { + $cut = array_splice( $array, $key, 1 ); + array_unshift( $array, $cut[0] ); + return $array; + } + + private function getRedirectTarget( $title ) { + $page = WikiPage::factory( $title ); + if ( !$page->exists() ) { + return null; + } + return $page->getRedirectTarget()->getPrefixedText(); } /** diff --git a/tests/phpunit/MediaWikiTestCase.php b/tests/phpunit/MediaWikiTestCase.php index 05275c8c1e..2cd549ee3b 100644 --- a/tests/phpunit/MediaWikiTestCase.php +++ b/tests/phpunit/MediaWikiTestCase.php @@ -447,7 +447,8 @@ abstract class MediaWikiTestCase extends PHPUnit_Framework_TestCase { $comment = __METHOD__ . ': Sample page for unit test.'; // Avoid memory leak...? - LinkCache::singleton()->clear(); + // LinkCache::singleton()->clear(); + // Maybe. But doing this absolutely breaks $title->isRedirect() when called during unit tests.... $page = WikiPage::factory( $title ); $page->doEditContent( ContentHandler::makeContent( $text, $title ), $comment, 0, false, $user ); diff --git a/tests/phpunit/includes/PrefixSearchTest.php b/tests/phpunit/includes/PrefixSearchTest.php index a33f6a6bf4..411d40618d 100644 --- a/tests/phpunit/includes/PrefixSearchTest.php +++ b/tests/phpunit/includes/PrefixSearchTest.php @@ -41,6 +41,13 @@ class PrefixSearchTest extends MediaWikiLangTestCase { $this->insertPage( 'Example Foo' ); $this->insertPage( 'Example Foo/Bar' ); $this->insertPage( 'Example/Baz' ); + $this->insertPage( 'Redirect test', '#REDIRECT [[Redirect Test]]' ); + $this->insertPage( 'Redirect Test'); + $this->insertPage( 'Redirect Test Worse Result'); + $this->insertPage( 'Redirect test2', '#REDIRECT [[Redirect Test2]]' ); + $this->insertPage( 'Redirect TEST2', '#REDIRECT [[Redirect Test2]]' ); + $this->insertPage( 'Redirect Test2'); + $this->insertPage( 'Redirect Test2 Worse Result'); $this->insertPage( 'Talk:Sandbox' ); $this->insertPage( 'Talk:Example' ); @@ -196,6 +203,55 @@ class PrefixSearchTest extends MediaWikiLangTestCase { 'External', ), ) ), + array( array( + "Exact match shouldn't override already found match if " . + "exact is redirect and found isn't", + 'provision' => array( + // Target of the exact match is low in the list + 'Redirect Test Worse Result', + 'Redirect Test', + ), + 'query' => 'redirect test', + 'results' => array( + // Redirect target is pulled up and exact match isn't added + 'Redirect Test', + 'Redirect Test Worse Result', + ), + ) ), + array( array( + "Exact match shouldn't override already found match if " . + "both exact match and found match are redirect", + 'provision' => array( + // Another redirect to the same target as the exact match + // is low in the list + 'Redirect Test2 Worse Result', + 'Redirect test2', + ), + 'query' => 'redirect TEST2', + 'results' => array( + // Found redirect is pulled to the top and exact match isn't + // added + 'Redirect test2', + 'Redirect Test2 Worse Result', + ), + ) ), + array( array( + "Exact match should override any already found matches that " . + "are redirects to it", + 'provision' => array( + // Another redirect to the same target as the exact match + // is low in the list + 'Redirect Test Worse Result', + 'Redirect test', + ), + 'query' => 'Redirect Test', + 'results' => array( + // Found redirect is pulled to the top and exact match isn't + // added + 'Redirect Test', + 'Redirect Test Worse Result', + ), + ) ), ); } -- 2.20.1