parser: inject the time for {{REVISIONTIMESTAMP}} on pre-save parse
authorAaron Schulz <aschulz@wikimedia.org>
Fri, 19 Apr 2019 01:36:40 +0000 (18:36 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sun, 9 Jun 2019 12:12:57 +0000 (13:12 +0100)
DerivedPageDataUpdater::prepareContent already locks in the revision
timestamp before insertion, so inject that into the parser options
used for any pre-save parse (e.g for edit filters).

This means that a reparse is no longer needed within in the same save
request to get the post-save canonical output. A parse will still be
required if the edit filter output used an edit stash output, since
the revision timestamp is not set at stash time.

Instead of using vary-revision, add a vary-revision-timestamp flag
for the revision timestamp words. The month/day/hour variants retain
their prior optimizations for allowing edit stash output reuse for
the post-save canonical output.

Change-Id: Ic2c13db4d21197c79a89de0de56745ca32918eb6

includes/GlobalFunctions.php
includes/Revision/RenderedRevision.php
includes/Revision/RevisionRenderer.php
includes/Storage/DerivedPageDataUpdater.php
includes/Storage/PageEditStash.php
includes/page/WikiPage.php
includes/parser/Parser.php
includes/parser/ParserOptions.php
includes/parser/ParserOutput.php

index 7256eab..16c63da 100644 (file)
@@ -1882,10 +1882,9 @@ function wfTimestampOrNull( $outputtype = TS_UNIX, $ts = null ) {
 /**
  * Convenience function; returns MediaWiki timestamp for the present time.
  *
- * @return string
+ * @return string TS_MW timestamp
  */
 function wfTimestampNow() {
-       # return NOW
        return MWTimestamp::now( TS_MW );
 }
 
index c4a0054..4acb9c0 100644 (file)
@@ -291,19 +291,28 @@ class RenderedRevision implements SlotRenderingProvider {
 
                $this->setRevisionInternal( $rev );
 
-               $this->pruneRevisionSensitiveOutput( $this->revision->getId() );
+               $this->pruneRevisionSensitiveOutput(
+                       $this->revision->getId(),
+                       $this->revision->getTimestamp()
+               );
        }
 
        /**
         * Prune any output that depends on the revision ID.
         *
-        * @param int|bool  $actualRevId The actual rev id, to check the used speculative rev ID
+        * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID
         *        against, or false to not purge on vary-revision-id, or true to purge on
         *        vary-revision-id unconditionally.
+        * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the
+        *        parser output revision timestamp, or false to not purge on vary-revision-timestamp
         */
-       private function pruneRevisionSensitiveOutput( $actualRevId ) {
+       private function pruneRevisionSensitiveOutput( $actualRevId, $actualRevTimestamp ) {
                if ( $this->revisionOutput ) {
-                       if ( $this->outputVariesOnRevisionMetaData( $this->revisionOutput, $actualRevId ) ) {
+                       if ( $this->outputVariesOnRevisionMetaData(
+                               $this->revisionOutput,
+                               $actualRevId,
+                               $actualRevTimestamp
+                       ) ) {
                                $this->revisionOutput = null;
                        }
                } else {
@@ -311,7 +320,11 @@ class RenderedRevision implements SlotRenderingProvider {
                }
 
                foreach ( $this->slotsOutput as $role => $output ) {
-                       if ( $this->outputVariesOnRevisionMetaData( $output, $actualRevId ) ) {
+                       if ( $this->outputVariesOnRevisionMetaData(
+                               $output,
+                               $actualRevId,
+                               $actualRevTimestamp
+                       ) ) {
                                unset( $this->slotsOutput[$role] );
                        }
                }
@@ -372,19 +385,24 @@ class RenderedRevision implements SlotRenderingProvider {
        /**
         * @param ParserOutput $out
         * @param int|bool  $actualRevId The actual rev id, to check the used speculative rev ID
-        *        against, or false to not purge on vary-revision-id, or true to purge on
+        *        against, false to not purge on vary-revision-id, or true to purge on
         *        vary-revision-id unconditionally.
+        * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the
+        *        parser output revision timestamp, false to not purge on vary-revision-timestamp,
+        *        or true to purge on vary-revision-timestamp unconditionally.
         * @return bool
         */
-       private function outputVariesOnRevisionMetaData( ParserOutput $out, $actualRevId ) {
+       private function outputVariesOnRevisionMetaData(
+               ParserOutput $out,
+               $actualRevId,
+               $actualRevTimestamp
+       ) {
                $method = __METHOD__;
 
                if ( $out->getFlag( 'vary-revision' ) ) {
-                       // If {{PAGEID}} resolved to 0 or {{REVISIONTIMESTAMP}} used the current
-                       // timestamp rather than that of an actual revision, then those words need
-                       // to resolve to the actual page ID or revision timestamp, respectively.
+                       // If {{PAGEID}} resolved to 0, then that word need to resolve to the actual page ID
                        $this->saveParseLogger->info(
-                               "$method: Prepared output has vary-revision...\n"
+                               "$method: Prepared output has vary-revision..."
                        );
                        return true;
                } elseif ( $out->getFlag( 'vary-revision-id' )
@@ -392,7 +410,16 @@ class RenderedRevision implements SlotRenderingProvider {
                        && ( $actualRevId === true || $out->getSpeculativeRevIdUsed() !== $actualRevId )
                ) {
                        $this->saveParseLogger->info(
-                               "$method: Prepared output has vary-revision-id with wrong ID...\n"
+                               "$method: Prepared output has vary-revision-id with wrong ID..."
+                       );
+                       return true;
+               } elseif ( $out->getFlag( 'vary-revision-timestamp' )
+                       && $actualRevTimestamp !== false
+                       && ( $actualRevTimestamp === true ||
+                               $out->getRevisionTimestampUsed() !== $actualRevTimestamp )
+               ) {
+                       $this->saveParseLogger->info(
+                               "$method: Prepared output has vary-revision-timestamp with wrong timestamp..."
                        );
                        return true;
                } elseif ( $out->getFlag( 'vary-revision-exists' ) ) {
@@ -400,7 +427,7 @@ class RenderedRevision implements SlotRenderingProvider {
                        // Note that edit stashing always uses '-', which can be used for both
                        // edit filter checks and canonical parser cache.
                        $this->saveParseLogger->info(
-                               "$method: Prepared output has vary-revision-exists...\n"
+                               "$method: Prepared output has vary-revision-exists..."
                        );
                        return true;
                } else {
@@ -412,7 +439,7 @@ class RenderedRevision implements SlotRenderingProvider {
                        // constructs the ParserOptions: For a null-edit, setCurrentRevisionCallback is called
                        // with the old, existing revision.
 
-                       wfDebug( "$method: Keeping prepared output...\n" );
+                       $this->saveParseLogger->debug( "$method: Keeping prepared output..." );
                        return false;
                }
        }
index f97390a..a63e4f1 100644 (file)
@@ -132,6 +132,13 @@ class RevisionRenderer {
                        return $this->getSpeculativeRevId( $dbIndex );
                } );
 
+               if ( !$rev->getId() && $rev->getTimestamp() ) {
+                       // This is an unsaved revision with an already determined timestamp.
+                       // Make the "current" time used during parsing match that of the revision.
+                       // Any REVISION* parser variables will match up if the revision is saved.
+                       $options->setTimestamp( $rev->getTimestamp() );
+               }
+
                $title = Title::newFromLinkTarget( $rev->getPageAsLinkTarget() );
 
                $renderedRevision = new RenderedRevision(
index ff5541d..5f7401a 100644 (file)
@@ -51,6 +51,9 @@ use MessageCache;
 use ParserCache;
 use ParserOptions;
 use ParserOutput;
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+use Psr\Log\NullLogger;
 use RecentChangesUpdateJob;
 use ResourceLoaderWikiModule;
 use Revision;
@@ -93,7 +96,7 @@ use WikiPage;
  * @since 1.32
  * @ingroup Page
  */
-class DerivedPageDataUpdater implements IDBAccessObject {
+class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface {
 
        /**
         * @var UserIdentity|null
@@ -135,6 +138,11 @@ class DerivedPageDataUpdater implements IDBAccessObject {
         */
        private $loadbalancerFactory;
 
+       /**
+        * @var LoggerInterface
+        */
+       private $logger;
+
        /**
         * @var string see $wgArticleCountMethod
         */
@@ -292,6 +300,11 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                // XXX only needed for waiting for replicas to catch up; there should be a narrower
                // interface for that.
                $this->loadbalancerFactory = $loadbalancerFactory;
+               $this->logger = new NullLogger();
+       }
+
+       public function setLogger( LoggerInterface $logger ) {
+               $this->logger = $logger;
        }
 
        /**
@@ -849,11 +862,12 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                if ( $stashedEdit ) {
                        /** @var ParserOutput $output */
                        $output = $stashedEdit->output;
-
                        // TODO: this should happen when stashing the ParserOutput, not now!
                        $output->setCacheTime( $stashedEdit->timestamp );
 
                        $renderHints['known-revision-output'] = $output;
+
+                       $this->logger->debug( __METHOD__ . ': using stashed edit output...' );
                }
 
                // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions
index 46f957f..bda4286 100644 (file)
@@ -280,6 +280,12 @@ class PageEditStash {
                                "Cache for key '{key}' has vary_revision_id; post-insertion parse possible.",
                                $context
                        );
+               } elseif ( $editInfo->output->getFlag( 'vary-revision-timestamp' ) ) {
+                       // Similar to the above if we didn't guess the timestamp correctly.
+                       $logger->debug(
+                               "Cache for key '{key}' has vary_revision_timestamp; post-insertion parse possible.",
+                               $context
+                       );
                }
 
                return $editInfo;
index 863a3f3..ee6adf1 100644 (file)
@@ -1697,6 +1697,7 @@ class WikiPage implements Page, IDBAccessObject {
                        MediaWikiServices::getInstance()->getDBLoadBalancerFactory()
                );
 
+               $derivedDataUpdater->setLogger( LoggerFactory::getInstance( 'SaveParse' ) );
                $derivedDataUpdater->setRcWatchCategoryMembership( $wgRCWatchCategoryMembership );
                $derivedDataUpdater->setArticleCountMethod( $wgArticleCountMethod );
 
index 27c34a6..04c9c9a 100644 (file)
@@ -2785,7 +2785,7 @@ class Parser {
                                        # The vary-revision flag must be set, because the magic word
                                        # will have a different value once the page is saved.
                                        $this->mOutput->setFlag( 'vary-revision' );
-                                       wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
+                                       wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision" );
                                }
                                $value = $pageid ?: null;
                                break;
@@ -2802,13 +2802,14 @@ class Parser {
                                                $value = '-';
                                        } else {
                                                $this->mOutput->setFlag( 'vary-revision-exists' );
+                                               wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-exists" );
                                                $value = '';
                                        }
                                } else {
                                        # Inform the edit saving system that getting the canonical output after
                                        # revision insertion requires another parse using the actual revision ID
                                        $this->mOutput->setFlag( 'vary-revision-id' );
-                                       wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
+                                       wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id" );
                                        $value = $this->getRevisionId();
                                        if ( $value === 0 ) {
                                                $rev = $this->getRevisionObject();
@@ -2838,17 +2839,13 @@ class Parser {
                                $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
                                break;
                        case 'revisiontimestamp':
-                               # Let the edit saving system know we should parse the page
-                               # *after* a revision ID has been assigned. This is for null edits.
-                               $this->mOutput->setFlag( 'vary-revision' );
-                               wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
-                               $value = $this->getRevisionTimestamp();
+                               $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
                                break;
                        case 'revisionuser':
-                               # Let the edit saving system know we should parse the page
-                               # *after* a revision ID has been assigned for null edits.
+                               # Inform the edit saving system that getting the canonical output after
+                               # revision insertion requires a parse that used the actual user ID
                                $this->mOutput->setFlag( 'vary-user' );
-                               wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
+                               wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user" );
                                $value = $this->getRevisionUser();
                                break;
                        case 'revisionsize':
@@ -2996,7 +2993,7 @@ class Parser {
        /**
         * @param int $start
         * @param int $len
-        * @param int $mtts Max time-till-save; sets vary-revision if result might change by then
+        * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
         * @param string $variable Parser variable name
         * @return string
         */
@@ -3005,7 +3002,10 @@ class Parser {
                $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
                # Possibly set vary-revision if there is not yet an associated revision
                if ( !$this->getRevisionObject() ) {
-                       # Get the timezone-adjusted timestamp $mtts seconds in the future
+                       # Get the timezone-adjusted timestamp $mtts seconds in the future.
+                       # This future is relative to the current time and not that of the
+                       # parser options. The rendered timestamp can be compared to that
+                       # of the timestamp specified by the parser options.
                        $resThen = substr(
                                $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
                                $start,
@@ -3013,10 +3013,10 @@ class Parser {
                        );
 
                        if ( $resNow !== $resThen ) {
-                               # Let the edit saving system know we should parse the page
-                               # *after* a revision ID has been assigned. This is for null edits.
-                               $this->mOutput->setFlag( 'vary-revision' );
-                               wfDebug( __METHOD__ . ": $variable used, setting vary-revision...\n" );
+                               # Inform the edit saving system that getting the canonical output after
+                               # revision insertion requires a parse that used an actual revision timestamp
+                               $this->mOutput->setFlag( 'vary-revision-timestamp' );
+                               wfDebug( __METHOD__ . ": $variable used, setting vary-revision-timestamp" );
                        }
                }
 
@@ -3738,6 +3738,7 @@ class Parser {
                                        // If we transclude ourselves, the final result
                                        // will change based on the new version of the page
                                        $this->mOutput->setFlag( 'vary-revision' );
+                                       wfDebug( __METHOD__ . ": self transclusion, setting vary-revision" );
                                }
                        }
                }
@@ -5915,7 +5916,7 @@ class Parser {
         *
         * The return value will be either:
         *   - a) Positive, indicating a specific revision ID (current or old)
-        *   - b) Zero, meaning the revision ID specified by getCurrentRevisionCallback()
+        *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
         *   - c) Null, meaning the parse is for preview mode and there is no revision
         *
         * @return int|null
@@ -5970,20 +5971,25 @@ class Parser {
        /**
         * Get the timestamp associated with the current revision, adjusted for
         * the default server-local timestamp
-        * @return string
+        * @return string TS_MW timestamp
         */
        public function getRevisionTimestamp() {
-               if ( is_null( $this->mRevisionTimestamp ) ) {
-                       $revObject = $this->getRevisionObject();
-                       $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
-
-                       # The cryptic '' timezone parameter tells to use the site-default
-                       # timezone offset instead of the user settings.
-                       # Since this value will be saved into the parser cache, served
-                       # to other users, and potentially even used inside links and such,
-                       # it needs to be consistent for all visitors.
-                       $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
+               if ( $this->mRevisionTimestamp !== null ) {
+                       return $this->mRevisionTimestamp;
                }
+
+               # Use specified revision timestamp, falling back to the current timestamp
+               $revObject = $this->getRevisionObject();
+               $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
+               $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
+
+               # The cryptic '' timezone parameter tells to use the site-default
+               # timezone offset instead of the user settings.
+               # Since this value will be saved into the parser cache, served
+               # to other users, and potentially even used inside links and such,
+               # it needs to be consistent for all visitors.
+               $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
+
                return $this->mRevisionTimestamp;
        }
 
index 66b1612..2954b13 100644 (file)
@@ -895,7 +895,7 @@ class ParserOptions {
 
        /**
         * Timestamp used for {{CURRENTDAY}} etc.
-        * @return string
+        * @return string TS_MW timestamp
         */
        public function getTimestamp() {
                if ( !isset( $this->mTimestamp ) ) {
index ef22a1f..ccc6b37 100644 (file)
@@ -207,6 +207,9 @@ class ParserOutput extends CacheTime {
        /** @var int|null Assumed rev ID for {{REVISIONID}} if no revision is set */
        private $mSpeculativeRevId;
 
+       /** @var int|null Assumed rev timestamp for {{REVISIONTIMESTAMP}} if no revision is set */
+       private $revisionTimestampUsed;
+
        /** string CSS classes to use for the wrapping div, stored in the array keys.
         * If no class is given, no wrapper is added.
         */
@@ -439,6 +442,22 @@ class ParserOutput extends CacheTime {
                return $this->mSpeculativeRevId;
        }
 
+       /**
+        * @param string $timestamp TS_MW timestamp
+        * @since 1.34
+        */
+       public function setRevisionTimestampUsed( $timestamp ) {
+               $this->revisionTimestampUsed = $timestamp;
+       }
+
+       /**
+        * @return string|null TS_MW timestamp or null if not used
+        * @since 1.34
+        */
+       public function getRevisionTimestampUsed() {
+               return $this->revisionTimestampUsed;
+       }
+
        public function &getLanguageLinks() {
                return $this->mLanguageLinks;
        }