From ade24dadcd262fe6a421c65afc1f231c5c158532 Mon Sep 17 00:00:00 2001 From: Ori Livneh Date: Sun, 21 Jun 2015 21:26:22 -0700 Subject: [PATCH] Construct clean canonical URLs for wiki pages, ignoring request URL Canonical URLs allow webmasters to indicate the preferred URL form for accessing some content that can be reached via a multitude of URL patterns. It is usually (but not always) distinct from the request URL, which may feature things like aliases and session-specific query parameters. We currently derive canonical URLs from request URLs, which is backwards: it is the web application, not the client, that ought to know the canonical way to refer to some content. This patch ensures MediaWiki derives a clean canonical URL for all wiki pages from the request context's title object and action. For some assurance that this is the correct approach, see: http://googlewebmastercentral.blogspot.com/2009/02/specify-your-canonical.html This Google blog post identifies Wikia as exemplary in its usage of canonical URLs. Wikia disregards things like the requested revision ID (oldid=NNN) when constructing the canonical URL. See, for example: http://fallout.wikia.com/wiki/Aqua_Pura_delivery_program?oldid=2171222 Wikia goes as far as canonicalizing the action=history to the page view URL. I think that this is incorrect, because the history and info actions are not views of the page content, but rather its associated metadata. This affects all requests where "setArticleRelated" is true. This is typically all urls that show content (title query, curid, oldid, diff), and all actions thereof (edit, delete, purge, info, history etc.). It does not apply to File pages and Special pages. Bug: T67402 Change-Id: I1549ca056637981a0d751020c634b9fab387f7bc --- includes/OutputPage.php | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/includes/OutputPage.php b/includes/OutputPage.php index f02f752785..0ed847e137 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -3524,8 +3524,25 @@ class OutputPage extends ContextSource { if ( $canonicalUrl !== false ) { $canonicalUrl = wfExpandUrl( $canonicalUrl, PROTO_CANONICAL ); } else { - $reqUrl = $this->getRequest()->getRequestURL(); - $canonicalUrl = wfExpandUrl( $reqUrl, PROTO_CANONICAL ); + if ( $this->isArticleRelated() ) { + // This affects all requests where "setArticleRelated" is true. This is + // typically all requests that show content (query title, curid, oldid, diff), + // and all wikipage actions (edit, delete, purge, info, history etc.). + // It does not apply to File pages and Special pages. + // 'history' and 'info' actions address page metadata rather than the page + // content itself, so they may not be canonicalized to the view page url. + // TODO: this ought to be better encapsulated in the Action class. + $action = Action::getActionName( $this->getContext() ); + if ( in_array( $action, array( 'history', 'info' ) ) ) { + $query = "action={$action}"; + } else { + $query = ''; + } + $canonicalUrl = $this->getTitle()->getCanonicalURL( $query ); + } else { + $reqUrl = $this->getRequest()->getRequestURL(); + $canonicalUrl = wfExpandUrl( $reqUrl, PROTO_CANONICAL ); + } } } if ( $canonicalUrl !== false ) { -- 2.20.1