parser: add speculative page IDs to use with {{PAGEID}}
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 27 Jun 2019 04:30:35 +0000 (21:30 -0700)
committerTim Starling <tstarling@wikimedia.org>
Fri, 26 Jul 2019 06:41:00 +0000 (16:41 +1000)
This works similarly to speculative rev IDs with {{REVISIONID}}.
Re-parses can be avoided if the page ID is correctly guessed.

Also make the {{PAGEID:X}} parser function set vary-page-id.

Bug: T226785
Change-Id: I0b19be45e6ddd6cde330bfcd09d243e4e5beda01

RELEASE-NOTES-1.34
includes/Revision/RenderedRevision.php
includes/Revision/RevisionRenderer.php
includes/Storage/PageEditStash.php
includes/content/WikitextContent.php
includes/parser/CoreParserFunctions.php
includes/parser/Parser.php
includes/parser/ParserOptions.php
includes/parser/ParserOutput.php

index 9330d10..368d2a4 100644 (file)
@@ -309,6 +309,8 @@ because of Phabricator reports.
   deprecated since 1.33.
 * The static properties mw.Api.errors and mw.Api.warnings, deprecated in 1.29,
   have been removed.
+* ParserOption::getSpeculativeRevIdCallback(), deprecated in 1.28, has been
+  removed.
 * The UploadVerification hook, deprecated in 1.28, has been removed. Instead,
   use the UploadVerifyFile hook.
 * UploadBase:: and UploadFromChunks::stashFileGetKey() and stashSession(),
index 818494a..a913244 100644 (file)
@@ -292,6 +292,7 @@ class RenderedRevision implements SlotRenderingProvider {
                $this->setRevisionInternal( $rev );
 
                $this->pruneRevisionSensitiveOutput(
+                       $this->revision->getPageId(),
                        $this->revision->getId(),
                        $this->revision->getTimestamp()
                );
@@ -300,16 +301,25 @@ class RenderedRevision implements SlotRenderingProvider {
        /**
         * Prune any output that depends on the revision ID.
         *
+        * @param int|bool $actualPageId The actual page id, to check the used speculative page ID
+        *        against; false, to not purge on vary-page-id; true, to purge on vary-page-id
+        *        unconditionally.
         * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID
-        *        against, or false to not purge on vary-revision-id, or true to purge on
+        *        against,; false, to not purge on vary-revision-id; true, to purge on
         *        vary-revision-id unconditionally.
         * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the
-        *        parser output revision timestamp, or false to not purge on vary-revision-timestamp
+        *        parser output revision timestamp; false, to not purge on vary-revision-timestamp;
+        *        true, to purge on vary-revision-timestamp unconditionally.
         */
-       private function pruneRevisionSensitiveOutput( $actualRevId, $actualRevTimestamp ) {
+       private function pruneRevisionSensitiveOutput(
+               $actualPageId,
+               $actualRevId,
+               $actualRevTimestamp
+       ) {
                if ( $this->revisionOutput ) {
                        if ( $this->outputVariesOnRevisionMetaData(
                                $this->revisionOutput,
+                               $actualPageId,
                                $actualRevId,
                                $actualRevTimestamp
                        ) ) {
@@ -322,6 +332,7 @@ class RenderedRevision implements SlotRenderingProvider {
                foreach ( $this->slotsOutput as $role => $output ) {
                        if ( $this->outputVariesOnRevisionMetaData(
                                $output,
+                               $actualPageId,
                                $actualRevId,
                                $actualRevTimestamp
                        ) ) {
@@ -384,16 +395,20 @@ class RenderedRevision implements SlotRenderingProvider {
 
        /**
         * @param ParserOutput $out
-        * @param int|bool  $actualRevId The actual rev id, to check the used speculative rev ID
-        *        against, false to not purge on vary-revision-id, or true to purge on
+        * @param int|bool $actualPageId The actual page id, to check the used speculative page ID
+        *        against; false, to not purge on vary-page-id; true, to purge on vary-page-id
+        *        unconditionally.
+        * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID
+        *        against,; false, to not purge on vary-revision-id; true, to purge on
         *        vary-revision-id unconditionally.
         * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the
-        *        parser output revision timestamp, false to not purge on vary-revision-timestamp,
-        *        or true to purge on vary-revision-timestamp unconditionally.
+        *        parser output revision timestamp; false, to not purge on vary-revision-timestamp;
+        *        true, to purge on vary-revision-timestamp unconditionally.
         * @return bool
         */
        private function outputVariesOnRevisionMetaData(
                ParserOutput $out,
+               $actualPageId,
                $actualRevId,
                $actualRevTimestamp
        ) {
@@ -420,6 +435,13 @@ class RenderedRevision implements SlotRenderingProvider {
                ) {
                        $logger->info( "$varyMsg (vary-revision-timestamp and wrong timestamp)", $context );
                        return true;
+               } elseif (
+                       $out->getFlag( 'vary-page-id' )
+                       && $actualPageId !== false
+                       && ( $actualPageId === true || $out->getSpeculativePageIdUsed() !== $actualPageId )
+               ) {
+                       $logger->info( "$varyMsg (vary-page-id and wrong ID)", $context );
+                       return true;
                } elseif ( $out->getFlag( 'vary-revision-exists' ) ) {
                        // If {{REVISIONID}} resolved to '', it now needs to resolve to '-'.
                        // Note that edit stashing always uses '-', which can be used for both
index 99150c1..bbda0e5 100644 (file)
@@ -130,6 +130,9 @@ class RevisionRenderer {
                $options->setSpeculativeRevIdCallback( function () use ( $dbIndex ) {
                        return $this->getSpeculativeRevId( $dbIndex );
                } );
+               $options->setSpeculativePageIdCallback( function () use ( $dbIndex ) {
+                       return $this->getSpeculativePageId( $dbIndex );
+               } );
 
                if ( !$rev->getId() && $rev->getTimestamp() ) {
                        // This is an unsaved revision with an already determined timestamp.
@@ -166,7 +169,8 @@ class RevisionRenderer {
                // HACK: But don't use a fresh connection in unit tests, since it would not have
                // the fake tables. This should be handled by the LoadBalancer!
                $flags = defined( 'MW_PHPUNIT_TEST' ) || $dbIndex === DB_REPLICA
-                       ? 0 : ILoadBalancer::CONN_TRX_AUTOCOMMIT;
+                       ? 0
+                       : ILoadBalancer::CONN_TRX_AUTOCOMMIT;
 
                $db = $this->loadBalancer->getConnectionRef( $dbIndex, [], $this->dbDomain, $flags );
 
@@ -178,6 +182,25 @@ class RevisionRenderer {
                );
        }
 
+       private function getSpeculativePageId( $dbIndex ) {
+               // Use a fresh master connection in order to see the latest data, by avoiding
+               // stale data from REPEATABLE-READ snapshots.
+               // HACK: But don't use a fresh connection in unit tests, since it would not have
+               // the fake tables. This should be handled by the LoadBalancer!
+               $flags = defined( 'MW_PHPUNIT_TEST' ) || $dbIndex === DB_REPLICA
+                       ? 0
+                       : ILoadBalancer::CONN_TRX_AUTOCOMMIT;
+
+               $db = $this->loadBalancer->getConnectionRef( $dbIndex, [], $this->wikiId, $flags );
+
+               return 1 + (int)$db->selectField(
+                       'page',
+                       'MAX(page_id)',
+                       [],
+                       __METHOD__
+               );
+       }
+
        /**
         * This implements the layout for combining the output of multiple slots.
         *
index 4671d99..a0ef07d 100644 (file)
@@ -271,7 +271,7 @@ class PageEditStash {
                        // This can be used for the initial parse, e.g. for filters or doEditContent(),
                        // but a second parse will be triggered in doEditUpdates() no matter what
                        $logger->info(
-                               "Cache for key '{key}' has 'vary-revision'; post-insertion parse inevitable.",
+                               "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
                                $context
                        );
                } else {
@@ -281,7 +281,9 @@ class PageEditStash {
                                // Similar to the above if we didn't guess the timestamp correctly
                                'vary-revision-timestamp',
                                // Similar to the above if we didn't guess the content correctly
-                               'vary-revision-sha1'
+                               'vary-revision-sha1',
+                               // Similar to the above if we didn't guess page ID correctly
+                               'vary-page-id'
                        ];
                        foreach ( $flagsMaybeReparse as $flag ) {
                                if ( $editInfo->output->getFlag( $flag ) ) {
index 455eb0d..8e5e0a8 100644 (file)
@@ -329,7 +329,7 @@ class WikitextContent extends TextContent {
         * using the global Parser service.
         *
         * @param Title $title
-        * @param int $revId Revision to pass to the parser (default: null)
+        * @param int|null $revId Revision to pass to the parser (default: null)
         * @param ParserOptions $options (default: null)
         * @param bool $generateHtml (default: true)
         * @param ParserOutput &$output ParserOutput representing the HTML form of the text,
index 5aa1a69..a7916c5 100644 (file)
@@ -1187,39 +1187,44 @@ class CoreParserFunctions {
         */
        public static function pageid( $parser, $title = null ) {
                $t = Title::newFromText( $title );
-               if ( is_null( $t ) ) {
+               if ( !$t ) {
                        return '';
+               } elseif ( !$t->canExist() || $t->isExternal() ) {
+                       return 0; // e.g. special page or interwiki link
                }
-               // Use title from parser to have correct pageid after edit
+
+               $parserOutput = $parser->getOutput();
+
                if ( $t->equals( $parser->getTitle() ) ) {
-                       $t = $parser->getTitle();
-                       return $t->getArticleID();
-               }
+                       // Revision is for the same title that is currently being parsed.
+                       // Use the title from Parser in case a new page ID was injected into it.
+                       $parserOutput->setFlag( 'vary-page-id' );
+                       $id = $parser->getTitle()->getArticleID();
+                       if ( $id ) {
+                               $parserOutput->setSpeculativePageIdUsed( $id );
+                       }
 
-               // These can't have ids
-               if ( !$t->canExist() || $t->isExternal() ) {
-                       return 0;
+                       return $id;
                }
 
-               // Check the link cache, maybe something already looked it up.
+               // Check the link cache for the title
                $linkCache = MediaWikiServices::getInstance()->getLinkCache();
                $pdbk = $t->getPrefixedDBkey();
                $id = $linkCache->getGoodLinkID( $pdbk );
-               if ( $id != 0 ) {
-                       $parser->mOutput->addLink( $t, $id );
-                       return $id;
-               }
-               if ( $linkCache->isBadLink( $pdbk ) ) {
-                       $parser->mOutput->addLink( $t, 0 );
+               if ( $id != 0 || $linkCache->isBadLink( $pdbk ) ) {
+                       $parserOutput->addLink( $t, $id );
+
                        return $id;
                }
 
                // We need to load it from the DB, so mark expensive
                if ( $parser->incrementExpensiveFunctionCount() ) {
                        $id = $t->getArticleID();
-                       $parser->mOutput->addLink( $t, $id );
+                       $parserOutput->addLink( $t, $id );
+
                        return $id;
                }
+
                return null;
        }
 
index 6600f8f..e5bf94a 100644 (file)
@@ -2783,15 +2783,16 @@ class Parser {
                                $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
                                break;
                        case 'pageid': // requested in T25427
-                               $pageid = $this->getTitle()->getArticleID();
-                               if ( $pageid == 0 ) {
-                                       # 0 means the page doesn't exist in the database,
-                                       # which means the user is previewing a new page.
-                                       # The vary-revision flag must be set, because the magic word
-                                       # will have a different value once the page is saved.
-                                       $this->setOutputFlag( 'vary-revision', '{{PAGEID}} on new page' );
+                               # Inform the edit saving system that getting the canonical output
+                               # after page insertion requires a parse that used that exact page ID
+                               $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
+                               $value = $this->mTitle->getArticleID();
+                               if ( !$value ) {
+                                       $value = $this->mOptions->getSpeculativePageId();
+                                       if ( $value ) {
+                                               $this->mOutput->setSpeculativePageIdUsed( $value );
+                                       }
                                }
-                               $value = $pageid ?: null;
                                break;
                        case 'revisionid':
                                if (
@@ -2810,7 +2811,7 @@ class Parser {
                                        }
                                } else {
                                        # Inform the edit saving system that getting the canonical output after
-                                       # revision insertion requires another parse using the actual revision ID
+                                       # revision insertion requires a parse that used that exact revision ID
                                        $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
                                        $value = $this->getRevisionId();
                                        if ( $value === 0 ) {
@@ -5934,19 +5935,21 @@ class Parser {
         * @since 1.23 (public since 1.23)
         */
        public function getRevisionObject() {
-               if ( !is_null( $this->mRevisionObject ) ) {
+               if ( $this->mRevisionObject ) {
                        return $this->mRevisionObject;
                }
 
                // NOTE: try to get the RevisionObject even if mRevisionId is null.
-               // This is useful when parsing revision that has not yet been saved.
+               // This is useful when parsing revision that has not yet been saved.
                // However, if we get back a saved revision even though we are in
                // preview mode, we'll have to ignore it, see below.
                // NOTE: This callback may be used to inject an OLD revision that was
                // already loaded, so "current" is a bit of a misnomer. We can't just
                // skip it if mRevisionId is set.
                $rev = call_user_func(
-                       $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
+                       $this->mOptions->getCurrentRevisionCallback(),
+                       $this->getTitle(),
+                       $this
                );
 
                if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
index 709f159..69ee1c5 100644 (file)
@@ -62,6 +62,7 @@ class ParserOptions {
        private static $lazyOptions = [
                'dateformat' => [ __CLASS__, 'initDateFormat' ],
                'speculativeRevId' => [ __CLASS__, 'initSpeculativeRevId' ],
+               'speculativePageId' => [ __CLASS__, 'initSpeculativePageId' ],
        ];
 
        /**
@@ -117,11 +118,6 @@ class ParserOptions {
         */
        private $mExtraKey = '';
 
-       /**
-        * @name Option accessors
-        * @{
-        */
-
        /**
         * Fetch an option and track that is was accessed
         * @since 1.30
@@ -856,6 +852,20 @@ class ParserOptions {
                return $this->getOption( 'speculativeRevId' );
        }
 
+       /**
+        * A guess for {{PAGEID}}, calculated using the callback provided via
+        * setSpeculativeRevPageCallback(). For consistency, the value will be calculated upon the
+        * first call of this method, and re-used for subsequent calls.
+        *
+        * If no callback was defined via setSpeculativePageIdCallback(), this method will return false.
+        *
+        * @since 1.34
+        * @return int|false
+        */
+       public function getSpeculativePageId() {
+               return $this->getOption( 'speculativePageId' );
+       }
+
        /**
         * Callback registered with ParserOptions::$lazyOptions, triggered by getSpeculativeRevId().
         *
@@ -871,27 +881,40 @@ class ParserOptions {
        }
 
        /**
-        * Callback to generate a guess for {{REVISIONID}}
-        * @since 1.28
-        * @deprecated since 1.32, use getSpeculativeRevId() instead!
-        * @return callable|null
+        * Callback registered with ParserOptions::$lazyOptions, triggered by getSpeculativePageId().
+        *
+        * @param ParserOptions $popt
+        * @return bool|false
         */
-       public function getSpeculativeRevIdCallback() {
-               return $this->getOption( 'speculativeRevIdCallback' );
+       private static function initSpeculativePageId( ParserOptions $popt ) {
+               $cb = $popt->getOption( 'speculativePageIdCallback' );
+               $id = $cb ? $cb() : null;
+
+               // returning null would result in this being re-called every access
+               return $id ?? false;
        }
 
        /**
         * Callback to generate a guess for {{REVISIONID}}
-        * @since 1.28
-        * @param callable|null $x New value (null is no change)
+        * @param callable|null $x New value
         * @return callable|null Old value
+        * @since 1.28
         */
        public function setSpeculativeRevIdCallback( $x ) {
                $this->setOption( 'speculativeRevId', null ); // reset
-               return $this->setOptionLegacy( 'speculativeRevIdCallback', $x );
+               return $this->setOption( 'speculativeRevIdCallback', $x );
        }
 
-       /**@}*/
+       /**
+        * Callback to generate a guess for {{PAGEID}}
+        * @param callable|null $x New value
+        * @return callable|null Old value
+        * @since 1.34
+        */
+       public function setSpeculativePageIdCallback( $x ) {
+               $this->setOption( 'speculativePageId', null ); // reset
+               return $this->setOption( 'speculativePageIdCallback', $x );
+       }
 
        /**
         * Timestamp used for {{CURRENTDAY}} etc.
@@ -1102,6 +1125,8 @@ class ParserOptions {
                                'templateCallback' => [ Parser::class, 'statelessFetchTemplate' ],
                                'speculativeRevIdCallback' => null,
                                'speculativeRevId' => null,
+                               'speculativePageIdCallback' => null,
+                               'speculativePageId' => null,
                        ];
 
                        Hooks::run( 'ParserOptionsRegister', [
index 23e5911..dbe609a 100644 (file)
@@ -210,9 +210,16 @@ class ParserOutput extends CacheTime {
         */
        private $mFlags = [];
 
+       /** @var string[] */
+       private static $speculativeFields = [
+               'speculativePageIdUsed',
+               'speculativeRevIdUsed',
+               'revisionTimestampUsed'
+       ];
        /** @var int|null Assumed rev ID for {{REVISIONID}} if no revision is set */
-       private $mSpeculativeRevId;
-
+       private $speculativeRevIdUsed;
+       /** @var int|null Assumed page ID for {{PAGEID}} if no revision is set */
+       private $speculativePageIdUsed;
        /** @var int|null Assumed rev timestamp for {{REVISIONTIMESTAMP}} if no revision is set */
        private $revisionTimestampUsed;
 
@@ -440,7 +447,7 @@ class ParserOutput extends CacheTime {
         * @since 1.28
         */
        public function setSpeculativeRevIdUsed( $id ) {
-               $this->mSpeculativeRevId = $id;
+               $this->speculativeRevIdUsed = $id;
        }
 
        /**
@@ -448,7 +455,23 @@ class ParserOutput extends CacheTime {
         * @since 1.28
         */
        public function getSpeculativeRevIdUsed() {
-               return $this->mSpeculativeRevId;
+               return $this->speculativeRevIdUsed;
+       }
+
+       /**
+        * @param int $id
+        * @since 1.34
+        */
+       public function setSpeculativePageIdUsed( $id ) {
+               $this->speculativePageIdUsed = $id;
+       }
+
+       /**
+        * @return int|null
+        * @since 1.34
+        */
+       public function getSpeculativePageIdUsed() {
+               return $this->speculativePageIdUsed;
        }
 
        /**
@@ -1320,18 +1343,13 @@ class ParserOutput extends CacheTime {
                $this->mWarnings = self::mergeMap( $this->mWarnings, $source->mWarnings ); // don't use getter
                $this->mTimestamp = $this->useMaxValue( $this->mTimestamp, $source->getTimestamp() );
 
-               if ( $this->mSpeculativeRevId && $source->mSpeculativeRevId
-                       && $this->mSpeculativeRevId !== $source->mSpeculativeRevId
-               ) {
-                       wfLogWarning(
-                               'Inconsistent speculative revision ID encountered while merging parser output!'
-                       );
+               foreach ( self::$speculativeFields as $field ) {
+                       if ( $this->$field && $source->$field && $this->$field !== $source->$field ) {
+                               wfLogWarning( __METHOD__ . ": inconsistent '$field' properties!" );
+                       }
+                       $this->$field = $this->useMaxValue( $this->$field, $source->$field );
                }
 
-               $this->mSpeculativeRevId = $this->useMaxValue(
-                       $this->mSpeculativeRevId,
-                       $source->getSpeculativeRevIdUsed()
-               );
                $this->mParseStartTime = $this->useEachMinValue(
                        $this->mParseStartTime,
                        $source->mParseStartTime