From 9bf022910a77d8e107077abaef1309c98d2a4bf8 Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Wed, 23 Jul 2008 19:49:46 +0000 Subject: [PATCH] (bug 8068) New __INDEX__ and __NOINDEX__ magic words allow control of search engine indexing on a per-article basis. Remarks: * Currently __INDEX__ will override __NOINDEX__ regardless of their relative positions, due to the way things are written. Instead, the last one on the page should win. This should be pretty easy to fix. * __INDEX__ and __NOINDEX__ override $wgArticleRobotPolicies. This is almost certainly incorrect, but it's not totally obvious how to fix it, because of the way the code is structured. Probably not a big deal, but should probably be fixed at some point. * Anyone can add and remove the magic words, and there's no config option to disable them. It's not obvious whether this is okay or not. It would be a one-line change to OutputPage.php to have a config option to ignore the magic words, maybe per-namespace or who knows what. --- RELEASE-NOTES | 3 ++- includes/MagicWord.php | 4 ++++ includes/OutputPage.php | 2 ++ includes/parser/Parser.php | 9 +++++++++ includes/parser/ParserOutput.php | 3 +++ languages/messages/MessagesEn.php | 2 ++ 6 files changed, 22 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 03a4a6dba7..7850c0f9f2 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -24,7 +24,8 @@ None yet === New features in 1.14 === -None yet +* (bug 8068) New __INDEX__ and __NOINDEX__ magic words allow control of search +engine indexing on a per-article basis. === Bug fixes in 1.14 === diff --git a/includes/MagicWord.php b/includes/MagicWord.php index 5284e6211b..ca2a113307 100644 --- a/includes/MagicWord.php +++ b/includes/MagicWord.php @@ -105,6 +105,8 @@ class MagicWord { 'numberofadmins', 'defaultsort', 'pagesincategory', + 'index', + 'noindex', ); /* Array of caching hints for ParserCache */ @@ -153,6 +155,8 @@ class MagicWord { 'noeditsection', 'newsectionlink', 'hiddencat', + 'index', + 'noindex', ); diff --git a/includes/OutputPage.php b/includes/OutputPage.php index bc7a567ab7..baacbfd7de 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -475,6 +475,8 @@ class OutputPage { $this->mLanguageLinks += $parserOutput->getLanguageLinks(); $this->addCategoryLinks( $parserOutput->getCategories() ); $this->mNewSectionLink = $parserOutput->getNewSection(); + # FIXME: This probably overrides $wgArticleRobotPolicies, is that wise? + $this->setIndexPolicy( $parserOutput->getIndexPolicy() ); $this->addKeywords( $parserOutput ); $this->mParseWarnings = $parserOutput->getWarnings(); if ( $parserOutput->getCacheTime() == -1 ) { diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index b9e0ceef98..404f94aeed 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -3380,6 +3380,15 @@ class Parser wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); } } + # (bug 8068) Allow control over whether robots index a page. FIXME: + # __INDEX__ always overrides __NOINDEX__ here! This is not desirable, + # the last one on the page should win. + if( isset( $this->mDoubleUnderscores['noindex'] ) ) { + $this->mOutput->setIndexPolicy( 'noindex' ); + } elseif( isset( $this->mDoubleUnderscores['index'] ) ) { + $this->mOutput->setIndexPolicy( 'index' ); + } + return $text; } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index f98d56418d..a15a71d4ac 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -24,6 +24,7 @@ class ParserOutput $mWarnings, # Warning text to be returned to the user. Wikitext formatted, in the key only $mSections, # Table of contents $mProperties; # Name/value pairs to be cached in the DB + private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. /** * Overridden title for display @@ -69,6 +70,7 @@ class ParserOutput function getSubtitle() { return $this->mSubtitle; } function getOutputHooks() { return (array)$this->mOutputHooks; } function getWarnings() { return array_keys( $this->mWarnings ); } + function getIndexPolicy() { return $this->mIndexPolicy; } function containsOldMagic() { return $this->mContainsOldMagic; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } @@ -78,6 +80,7 @@ class ParserOutput function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } function setTitleText( $t ) { return wfSetVar( $this->mTitleText, $t ); } function setSections( $toc ) { return wfSetVar( $this->mSections, $toc ); } + function setIndexPolicy( $policy ) { return wfSetVar( $this->mIndexPolicy, $policy ); } function addCategory( $c, $sort ) { $this->mCategories[$c] = $sort; } function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index 2361b84b62..1ec6886532 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -340,6 +340,8 @@ $magicWords = array( 'hiddencat' => array( 1, '__HIDDENCAT__' ), 'pagesincategory' => array( 1, 'PAGESINCATEGORY', 'PAGESINCAT' ), 'pagesize' => array( 1, 'PAGESIZE' ), + 'index' => array( 1, '__INDEX__' ), + 'noindex' => array( 1, '__NOINDEX__' ), ); /** -- 2.20.1