Include parsed revision ID in parser cache
authorBrad Jorsch <bjorsch@wikimedia.org>
Tue, 1 Apr 2014 14:52:24 +0000 (10:52 -0400)
committerBrad Jorsch <bjorsch@wikimedia.org>
Tue, 1 Apr 2014 16:15:34 +0000 (12:15 -0400)
One theory for what's behind bug 46014 is that the vandal submits the
edit, then someone (maybe the vandal) gets into the branch of
Article::view that uses PoolWorkArticleView, then ClueBot comes along
and reverts before the PoolWorkArticleView actually executes. Once that
PoolWorkArticleView actually does execute, it overwrites the parser
cache entry from ClueBot's revert with the one from the old edit.

To detect this sort of thing, let's include the revision id in the
parser cache entry and consider it expired if that doesn't match. Which
makes sense to do anyway.

And for good measure, let's have PoolWorkArticleView not save to the
parser cache if !$isCurrent.

Bug: 46014
Change-Id: Ifcc4d2f67f3b77f990eb2fa45417a25bd6c7b790

includes/WikiPage.php
includes/jobqueue/jobs/RefreshLinksJob.php
includes/parser/CacheTime.php
includes/parser/ParserCache.php

index af74f47..18409b0 100644 (file)
@@ -2070,6 +2070,7 @@ class WikiPage implements Page, IDBAccessObject {
 
                $edit = (object)array();
                $edit->revid = $revid;
+               $edit->timestamp = wfTimestampNow();
 
                $edit->pstContent = $content ? $content->preSaveTransform( $this->mTitle, $user, $popts ) : null;
 
@@ -2127,7 +2128,9 @@ class WikiPage implements Page, IDBAccessObject {
                // Save it to the parser cache
                if ( $wgEnableParserCache ) {
                        $parserCache = ParserCache::singleton();
-                       $parserCache->save( $editInfo->output, $this, $editInfo->popts );
+                       $parserCache->save(
+                               $editInfo->output, $this, $editInfo->popts, $editInfo->timestamp, $editInfo->revid
+                       );
                }
 
                // Update the links tables and other secondary data
@@ -3605,9 +3608,9 @@ class PoolWorkArticleView extends PoolCounterWork {
                                $this->page->getTitle()->getPrefixedDBkey() ) );
                }
 
-               if ( $this->cacheable && $this->parserOutput->isCacheable() ) {
+               if ( $this->cacheable && $this->parserOutput->isCacheable() && $isCurrent ) {
                        ParserCache::singleton()->save(
-                               $this->parserOutput, $this->page, $this->parserOptions, $cacheTime );
+                               $this->parserOutput, $this->page, $this->parserOptions, $cacheTime, $this->revid );
                }
 
                // Make sure file cache is not used on uncacheable content.
index 3bcb4fc..f82af27 100644 (file)
@@ -163,7 +163,9 @@ class RefreshLinksJob extends Job {
                                && $parserOutput->isCacheable()
                        ) {
                                $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
-                               ParserCache::singleton()->save( $parserOutput, $page, $parserOptions, $ctime );
+                               ParserCache::singleton()->save(
+                                       $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
+                               );
                        }
                }
 
index e1b3f28..6dda64c 100644 (file)
@@ -35,7 +35,8 @@ class CacheTime {
        var     $mVersion = Parser::VERSION,  # Compatibility check
                $mCacheTime = '',             # Time when this object was generated, or -1 for uncacheable. Used in ParserCache.
                $mCacheExpiry = null,         # Seconds after which the object should expire, use 0 for uncachable. Used in ParserCache.
-               $mContainsOldMagic;           # Boolean variable indicating if the input contained variables like {{CURRENTDAY}}
+               $mContainsOldMagic,           # Boolean variable indicating if the input contained variables like {{CURRENTDAY}}
+               $mCacheRevisionId = null;     # Revision ID that was parsed
 
        /**
         * @return string TS_MW timestamp
@@ -69,6 +70,22 @@ class CacheTime {
                return wfSetVar( $this->mCacheTime, $t );
        }
 
+       /**
+        * @since 1.23
+        * @return int|null Revision id, if any was set
+        */
+       function getCacheRevisionId() {
+               return $this->mCacheRevisionId;
+       }
+
+       /**
+        * @since 1.23
+        * @param $id int Revision id
+        */
+       function setCacheRevisionId( $id ) {
+               $this->mCacheRevisionId = $id;
+       }
+
        /**
         * Sets the number of seconds after which this object should expire.
         * This value is used with the ParserCache.
@@ -152,4 +169,20 @@ class CacheTime {
                        version_compare( $this->mVersion, Parser::VERSION, "lt" );
        }
 
+       /**
+        * Return true if this cached output object is for a different revision of
+        * the page.
+        *
+        * @todo We always return false if $this->getCacheRevisionId() is null;
+        * this prevents invalidating the whole parser cache when this change is
+        * deployed. Someday that should probably be changed.
+        *
+        * @since 1.23
+        * @param int $id the affected article's current revision id
+        * @return Boolean
+        */
+       public function isDifferentRevision( $id ) {
+               $cached = $this->getCacheRevisionId();
+               return $cached !== null && $id !== $cached;
+       }
 }
index 040c74f..4c7084e 100644 (file)
@@ -146,6 +146,12 @@ class ParserCache {
                                $cacheTime = $optionsKey->getCacheTime();
                                wfDebug( "Parser options key expired, touched " . $article->getTouched() . ", epoch $wgCacheEpoch, cached $cacheTime\n" );
                                return false;
+                       } elseif ( $optionsKey->isDifferentRevision( $article->getLatest() ) ) {
+                               wfIncrStats( "pcache_miss_revid" );
+                               $revId = $article->getLatest();
+                               $cachedRevId = $optionsKey->getCacheRevisionId();
+                               wfDebug( "ParserOutput key is for an old revision, latest $revId, cached $cachedRevId\n" );
+                               return false;
                        }
 
                        // $optionsKey->mUsedOptions is set by save() by calling ParserOutput::getUsedOptions()
@@ -211,6 +217,12 @@ class ParserCache {
                        $cacheTime = $value->getCacheTime();
                        wfDebug( "ParserOutput key expired, touched $touched, epoch $wgCacheEpoch, cached $cacheTime\n" );
                        $value = false;
+               } elseif ( $value->isDifferentRevision( $article->getLatest() ) ) {
+                       wfIncrStats( "pcache_miss_revid" );
+                       $revId = $article->getLatest();
+                       $cachedRevId = $value->getCacheRevisionId();
+                       wfDebug( "ParserOutput key is for an old revision, latest $revId, cached $cachedRevId\n" );
+                       $value = false;
                } else {
                        wfIncrStats( "pcache_hit" );
                }
@@ -224,11 +236,16 @@ class ParserCache {
         * @param WikiPage $page
         * @param ParserOptions $popts
         * @param string $cacheTime Time when the cache was generated
+        * @param int $revId Revision ID that was parsed
         */
-       public function save( $parserOutput, $page, $popts, $cacheTime = null ) {
+       public function save( $parserOutput, $page, $popts, $cacheTime = null, $revId = null ) {
                $expire = $parserOutput->getCacheExpiry();
                if ( $expire > 0 ) {
                        $cacheTime = $cacheTime ?: wfTimestampNow();
+                       if ( !$revId ) {
+                               $revision = $page->getRevision();
+                               $revId = $revision ? $revision->getId() : null;
+                       }
 
                        $optionsKey = new CacheTime;
                        $optionsKey->mUsedOptions = $parserOutput->getUsedOptions();
@@ -236,6 +253,8 @@ class ParserCache {
 
                        $optionsKey->setCacheTime( $cacheTime );
                        $parserOutput->setCacheTime( $cacheTime );
+                       $optionsKey->setCacheRevisionId( $revId );
+                       $parserOutput->setCacheRevisionId( $revId );
 
                        $optionsKey->setContainsOldMagic( $parserOutput->containsOldMagic() );
 
@@ -245,8 +264,13 @@ class ParserCache {
                        // Save the timestamp so that we don't have to load the revision row on view
                        $parserOutput->setTimestamp( $page->getTimestamp() );
 
-                       $parserOutput->mText .= "\n<!-- Saved in parser cache with key $parserOutputKey and timestamp $cacheTime\n -->\n";
-                       wfDebug( "Saved in parser cache with key $parserOutputKey and timestamp $cacheTime\n" );
+                       $msg = "Saved in parser cache with key $parserOutputKey" .
+                               " and timestamp $cacheTime" .
+                               " and revision id $revId" .
+                               "\n";
+
+                       $parserOutput->mText .= "\n<!-- $msg -->\n";
+                       wfDebug( $msg );
 
                        // Save the parser output
                        $this->mMemc->set( $parserOutputKey, $parserOutput, $expire );