From 155d555b83eca6403e07d2094b074a8ed2f301ae Mon Sep 17 00:00:00 2001 From: Timo Tijhof Date: Fri, 19 Jun 2015 21:07:24 +0100 Subject: [PATCH] MediaWiki.php: Redirect non-standard title urls to canonical Urls that use the page's title and no extra query parameters now redirect to the standard url format. Previously we only did this for variations of the title value (e.g. "Foo%20Bar"), not for variations of the overall url structure (like title=Foo -> /wiki/Foo). Existing redirect (unchanged): /wiki/Foo%20Bar /w/index.php?title=Foo%20Bar New redirects: /wiki/Foo_Bar?action=view /w/index.php?title=Foo_Bar /w/index.php?title=Foo_Bar&action=view Any intentional (or unintentional) ways a url can be rewritten by the server, such as "/?title=Foo_Bar" in case of Wikimedia, are redirected as well. While this has been a problem for many years, it went unnoticed until recently when Google started to index significantly more results of the "/?title=" form. This query returns "About 3,220,000 results": https://google.com/search?q=site:en.wikipedia.org+inurl:title+-intitle:title The only change in logic is that the titlekey comparison is now no longer a factor in deciding whether to redirect. Instead the existing comparison for the entire url is used to cover this. However I kept titlekey comparison in the redirect-loop check as otherwise this check would throw on all canonical page views where no redirect can be made. Added a comment explaining how this redirect loop was possible. Bug: T67402 Change-Id: I88ed3525141c765910e66188427b9aab36b958a9 --- includes/MediaWiki.php | 22 ++++++++++++++++------ tests/phpunit/includes/MediaWikiTest.php | 10 +++++----- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/includes/MediaWiki.php b/includes/MediaWiki.php index 932dea20d1..5510d35913 100644 --- a/includes/MediaWiki.php +++ b/includes/MediaWiki.php @@ -279,6 +279,8 @@ class MediaWiki { * - Normalise empty title: * /wiki/ -> /wiki/Main * /w/index.php?title= -> /wiki/Main + * - Normalise non-standard title urls: + * /w/index.php?title=Foo_Bar -> /wiki/Foo_Bar * - Don't redirect anything with query parameters other than 'title' or 'action=view'. * * @return bool True if a redirect was set. @@ -289,8 +291,6 @@ class MediaWiki { if ( $request->getVal( 'action', 'view' ) != 'view' || $request->wasPosted() - || ( $request->getVal( 'title' ) !== null - && $title->getPrefixedDBkey() == $request->getVal( 'title' ) ) || count( $request->getValueNames( array( 'action', 'title' ) ) ) || !Hooks::run( 'TestCanonicalRedirect', array( $request, $title, $output ) ) ) { @@ -305,7 +305,19 @@ class MediaWiki { } // Redirect to canonical url, make it a 301 to allow caching $targetUrl = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ); - if ( $targetUrl == $request->getFullRequestURL() ) { + + if ( $targetUrl != $request->getFullRequestURL() ) { + $output->setSquidMaxage( 1200 ); + $output->redirect( $targetUrl, '301' ); + return true; + } + + // If there is no title, or the title is in a non-standard encoding, we demand + // a redirect. If cgi somehow changed the 'title' query to be non-standard while + // the url is standard, the server is misconfigured. + if ( $request->getVal( 'title' ) === null + || $title->getPrefixedDBkey() != $request->getVal( 'title' ) + ) { $message = "Redirect loop detected!\n\n" . "This means the wiki got confused about what page was " . "requested; this sometimes happens when moving a wiki " . @@ -327,9 +339,7 @@ class MediaWiki { } throw new HttpError( 500, $message ); } - $output->setSquidMaxage( 1200 ); - $output->redirect( $targetUrl, '301' ); - return true; + return false; } /** diff --git a/tests/phpunit/includes/MediaWikiTest.php b/tests/phpunit/includes/MediaWikiTest.php index df94d3e30c..e1962436ec 100644 --- a/tests/phpunit/includes/MediaWikiTest.php +++ b/tests/phpunit/includes/MediaWikiTest.php @@ -34,7 +34,7 @@ class MediaWikiTest extends MediaWikiTestCase { 'url' => 'http://example.org/w/index.php?title=Foo_Bar', 'query' => array( 'title' => 'Foo_Bar' ), 'title' => 'Foo_Bar', - 'redirect' => false, + 'redirect' => 'http://example.org/wiki/Foo_Bar', ), array( // View: Script path with implicit title from page id @@ -76,21 +76,21 @@ class MediaWikiTest extends MediaWikiTestCase { 'url' => 'http://example.org/w/?title=Foo_Bar', 'query' => array( 'title' => 'Foo_Bar' ), 'title' => 'Foo_Bar', - 'redirect' => false, + 'redirect' => 'http://example.org/wiki/Foo_Bar', ), array( // View: Root path with escaped title 'url' => 'http://example.org/?title=Foo_Bar', 'query' => array( 'title' => 'Foo_Bar' ), 'title' => 'Foo_Bar', - 'redirect' => false, + 'redirect' => 'http://example.org/wiki/Foo_Bar', ), array( // View: Canonical with redundant query 'url' => 'http://example.org/wiki/Foo_Bar?action=view', 'query' => array( 'action' => 'view' ), 'title' => 'Foo_Bar', - 'redirect' => false, + 'redirect' => 'http://example.org/wiki/Foo_Bar', ), array( // Edit: Canonical view url with action query @@ -104,7 +104,7 @@ class MediaWikiTest extends MediaWikiTestCase { 'url' => 'http://example.org/w/index.php?title=Foo_Bar&action=view', 'query' => array( 'title' => 'Foo_Bar', 'action' => 'view' ), 'title' => 'Foo_Bar', - 'redirect' => false, + 'redirect' => 'http://example.org/wiki/Foo_Bar', ), array( // Edit: Index with action query -- 2.20.1