From 0235ed688a3bd8caad6b55d4bd6842c6fc4bd802 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 22 Dec 2008 23:38:58 +0000 Subject: [PATCH] * (bug 2585) HTTP 404 return code is now given for a page view if the page does not exist, allowing spiders and link checkers to detect broken links. This is less expansive than the old 2005 implementation (r11307), hitting only page views (won't affect action=edit) and doesn't attempt to cover error conditions either (many of which should probably return a different code). Pages which exist in the DB or return true for Title::isAlwaysKnown() such as file pages for existing files, as well as category pages that exist, are treated as existing by returning true for Article::hasViewableContent(). --- RELEASE-NOTES | 3 ++- includes/Article.php | 23 +++++++++++++++++++++++ includes/CategoryPage.php | 13 +++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index e6cc2ae7f9..5b7fff3654 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -241,7 +241,8 @@ The following extensions are migrated into MediaWiki 1.14: * (bug 16760) Add CSS-class to action links of Special:Log * (bug 505) Time zones can now be specified by location in user preferences, avoiding the need to manually update for DST. Patch by Brad Jorsch. - +* (bug 2585) HTTP 404 return code is now given for a page view if the page + does not exist, allowing spiders and link checkers to detect broken links. === Bug fixes in 1.14 === diff --git a/includes/Article.php b/includes/Article.php index 527638d9ad..1b4bcce954 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -508,6 +508,18 @@ class Article { public function exists() { return $this->getId() > 0; } + + /** + * Check if this page is something we're going to be showing + * some sort of sensible content for. If we return false, page + * views (plain action=view) will return an HTTP 404 response, + * so spiders and robots can know they're following a bad link. + * + * @return bool + */ + public function hasViewableContent() { + return $this->exists() || $this->mTitle->isAlwaysKnown(); + } /** * @return int The view count for the page @@ -714,6 +726,7 @@ class Article { $rdfrom = $wgRequest->getVal( 'rdfrom' ); $diffOnly = $wgRequest->getBool( 'diffonly', $wgUser->getOption( 'diffonly' ) ); $purge = $wgRequest->getVal( 'action' ) == 'purge'; + $return404 = false; $wgOut->setArticleFlag( true ); @@ -813,12 +826,22 @@ class Article { $text = wfMsg( 'noarticletext' ); } } + # Non-existent pages if( $this->getID() === 0 ) { $wgOut->setRobotPolicy( 'noindex,nofollow' ); $text = "
\n$text\n
"; + if( !$this->hasViewableContent() ) { + // If there's no backing content, send a 404 Not Found + // for better machine handling of broken links. + $return404 = true; + } } + if( $return404 ) { + $wgRequest->response()->header( "HTTP/1.x 404 Not Found" ); + } + # Another whitelist check in case oldid is altering the title if( !$this->mTitle->userCanRead() ) { $wgOut->loginToUse(); diff --git a/includes/CategoryPage.php b/includes/CategoryPage.php index f150e372e9..f5c2540830 100644 --- a/includes/CategoryPage.php +++ b/includes/CategoryPage.php @@ -36,6 +36,19 @@ class CategoryPage extends Article { $this->closeShowCategory(); } } + + /** + * Don't return a 404 for categories in use. + */ + function hasViewableContent() { + if( parent::hasViewableContent() ) { + return true; + } else { + $cat = Category::newFromTitle( $this->mTitle ); + return $cat->getId() != 0; + } + + } function openShowCategory() { # For overloading -- 2.20.1