From: Aaron Schulz Date: Thu, 27 Jun 2019 04:30:35 +0000 (-0700) Subject: parser: add speculative page IDs to use with {{PAGEID}} X-Git-Tag: 1.34.0-rc.0~871 X-Git-Url: http://git.cyclocoop.org/%22%20.%20generer_url_ecrire%28%22config_fonctions%22%2C%20%22image_process=%24process%22%29%20.%20%22?a=commitdiff_plain;h=5099ee9f7273ea156eea8afa033548b800828639;p=lhc%2Fweb%2Fwiklou.git parser: add speculative page IDs to use with {{PAGEID}} This works similarly to speculative rev IDs with {{REVISIONID}}. Re-parses can be avoided if the page ID is correctly guessed. Also make the {{PAGEID:X}} parser function set vary-page-id. Bug: T226785 Change-Id: I0b19be45e6ddd6cde330bfcd09d243e4e5beda01 --- diff --git a/RELEASE-NOTES-1.34 b/RELEASE-NOTES-1.34 index 9330d105f4..368d2a4731 100644 --- a/RELEASE-NOTES-1.34 +++ b/RELEASE-NOTES-1.34 @@ -309,6 +309,8 @@ because of Phabricator reports. deprecated since 1.33. * The static properties mw.Api.errors and mw.Api.warnings, deprecated in 1.29, have been removed. +* ParserOption::getSpeculativeRevIdCallback(), deprecated in 1.28, has been + removed. * The UploadVerification hook, deprecated in 1.28, has been removed. Instead, use the UploadVerifyFile hook. * UploadBase:: and UploadFromChunks::stashFileGetKey() and stashSession(), diff --git a/includes/Revision/RenderedRevision.php b/includes/Revision/RenderedRevision.php index 818494a092..a9132445a5 100644 --- a/includes/Revision/RenderedRevision.php +++ b/includes/Revision/RenderedRevision.php @@ -292,6 +292,7 @@ class RenderedRevision implements SlotRenderingProvider { $this->setRevisionInternal( $rev ); $this->pruneRevisionSensitiveOutput( + $this->revision->getPageId(), $this->revision->getId(), $this->revision->getTimestamp() ); @@ -300,16 +301,25 @@ class RenderedRevision implements SlotRenderingProvider { /** * Prune any output that depends on the revision ID. * + * @param int|bool $actualPageId The actual page id, to check the used speculative page ID + * against; false, to not purge on vary-page-id; true, to purge on vary-page-id + * unconditionally. * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID - * against, or false to not purge on vary-revision-id, or true to purge on + * against,; false, to not purge on vary-revision-id; true, to purge on * vary-revision-id unconditionally. * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the - * parser output revision timestamp, or false to not purge on vary-revision-timestamp + * parser output revision timestamp; false, to not purge on vary-revision-timestamp; + * true, to purge on vary-revision-timestamp unconditionally. */ - private function pruneRevisionSensitiveOutput( $actualRevId, $actualRevTimestamp ) { + private function pruneRevisionSensitiveOutput( + $actualPageId, + $actualRevId, + $actualRevTimestamp + ) { if ( $this->revisionOutput ) { if ( $this->outputVariesOnRevisionMetaData( $this->revisionOutput, + $actualPageId, $actualRevId, $actualRevTimestamp ) ) { @@ -322,6 +332,7 @@ class RenderedRevision implements SlotRenderingProvider { foreach ( $this->slotsOutput as $role => $output ) { if ( $this->outputVariesOnRevisionMetaData( $output, + $actualPageId, $actualRevId, $actualRevTimestamp ) ) { @@ -384,16 +395,20 @@ class RenderedRevision implements SlotRenderingProvider { /** * @param ParserOutput $out - * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID - * against, false to not purge on vary-revision-id, or true to purge on + * @param int|bool $actualPageId The actual page id, to check the used speculative page ID + * against; false, to not purge on vary-page-id; true, to purge on vary-page-id + * unconditionally. + * @param int|bool $actualRevId The actual rev id, to check the used speculative rev ID + * against,; false, to not purge on vary-revision-id; true, to purge on * vary-revision-id unconditionally. * @param string|bool $actualRevTimestamp The actual rev timestamp, to check against the - * parser output revision timestamp, false to not purge on vary-revision-timestamp, - * or true to purge on vary-revision-timestamp unconditionally. + * parser output revision timestamp; false, to not purge on vary-revision-timestamp; + * true, to purge on vary-revision-timestamp unconditionally. * @return bool */ private function outputVariesOnRevisionMetaData( ParserOutput $out, + $actualPageId, $actualRevId, $actualRevTimestamp ) { @@ -420,6 +435,13 @@ class RenderedRevision implements SlotRenderingProvider { ) { $logger->info( "$varyMsg (vary-revision-timestamp and wrong timestamp)", $context ); return true; + } elseif ( + $out->getFlag( 'vary-page-id' ) + && $actualPageId !== false + && ( $actualPageId === true || $out->getSpeculativePageIdUsed() !== $actualPageId ) + ) { + $logger->info( "$varyMsg (vary-page-id and wrong ID)", $context ); + return true; } elseif ( $out->getFlag( 'vary-revision-exists' ) ) { // If {{REVISIONID}} resolved to '', it now needs to resolve to '-'. // Note that edit stashing always uses '-', which can be used for both diff --git a/includes/Revision/RevisionRenderer.php b/includes/Revision/RevisionRenderer.php index 99150c1368..bbda0e5f43 100644 --- a/includes/Revision/RevisionRenderer.php +++ b/includes/Revision/RevisionRenderer.php @@ -130,6 +130,9 @@ class RevisionRenderer { $options->setSpeculativeRevIdCallback( function () use ( $dbIndex ) { return $this->getSpeculativeRevId( $dbIndex ); } ); + $options->setSpeculativePageIdCallback( function () use ( $dbIndex ) { + return $this->getSpeculativePageId( $dbIndex ); + } ); if ( !$rev->getId() && $rev->getTimestamp() ) { // This is an unsaved revision with an already determined timestamp. @@ -166,7 +169,8 @@ class RevisionRenderer { // HACK: But don't use a fresh connection in unit tests, since it would not have // the fake tables. This should be handled by the LoadBalancer! $flags = defined( 'MW_PHPUNIT_TEST' ) || $dbIndex === DB_REPLICA - ? 0 : ILoadBalancer::CONN_TRX_AUTOCOMMIT; + ? 0 + : ILoadBalancer::CONN_TRX_AUTOCOMMIT; $db = $this->loadBalancer->getConnectionRef( $dbIndex, [], $this->dbDomain, $flags ); @@ -178,6 +182,25 @@ class RevisionRenderer { ); } + private function getSpeculativePageId( $dbIndex ) { + // Use a fresh master connection in order to see the latest data, by avoiding + // stale data from REPEATABLE-READ snapshots. + // HACK: But don't use a fresh connection in unit tests, since it would not have + // the fake tables. This should be handled by the LoadBalancer! + $flags = defined( 'MW_PHPUNIT_TEST' ) || $dbIndex === DB_REPLICA + ? 0 + : ILoadBalancer::CONN_TRX_AUTOCOMMIT; + + $db = $this->loadBalancer->getConnectionRef( $dbIndex, [], $this->wikiId, $flags ); + + return 1 + (int)$db->selectField( + 'page', + 'MAX(page_id)', + [], + __METHOD__ + ); + } + /** * This implements the layout for combining the output of multiple slots. * diff --git a/includes/Storage/PageEditStash.php b/includes/Storage/PageEditStash.php index 4671d99f15..a0ef07d651 100644 --- a/includes/Storage/PageEditStash.php +++ b/includes/Storage/PageEditStash.php @@ -271,7 +271,7 @@ class PageEditStash { // This can be used for the initial parse, e.g. for filters or doEditContent(), // but a second parse will be triggered in doEditUpdates() no matter what $logger->info( - "Cache for key '{key}' has 'vary-revision'; post-insertion parse inevitable.", + "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.", $context ); } else { @@ -281,7 +281,9 @@ class PageEditStash { // Similar to the above if we didn't guess the timestamp correctly 'vary-revision-timestamp', // Similar to the above if we didn't guess the content correctly - 'vary-revision-sha1' + 'vary-revision-sha1', + // Similar to the above if we didn't guess page ID correctly + 'vary-page-id' ]; foreach ( $flagsMaybeReparse as $flag ) { if ( $editInfo->output->getFlag( $flag ) ) { diff --git a/includes/content/WikitextContent.php b/includes/content/WikitextContent.php index 455eb0de4f..8e5e0a8305 100644 --- a/includes/content/WikitextContent.php +++ b/includes/content/WikitextContent.php @@ -329,7 +329,7 @@ class WikitextContent extends TextContent { * using the global Parser service. * * @param Title $title - * @param int $revId Revision to pass to the parser (default: null) + * @param int|null $revId Revision to pass to the parser (default: null) * @param ParserOptions $options (default: null) * @param bool $generateHtml (default: true) * @param ParserOutput &$output ParserOutput representing the HTML form of the text, diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index 5aa1a691b0..a7916c5796 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -1187,39 +1187,44 @@ class CoreParserFunctions { */ public static function pageid( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) { + if ( !$t ) { return ''; + } elseif ( !$t->canExist() || $t->isExternal() ) { + return 0; // e.g. special page or interwiki link } - // Use title from parser to have correct pageid after edit + + $parserOutput = $parser->getOutput(); + if ( $t->equals( $parser->getTitle() ) ) { - $t = $parser->getTitle(); - return $t->getArticleID(); - } + // Revision is for the same title that is currently being parsed. + // Use the title from Parser in case a new page ID was injected into it. + $parserOutput->setFlag( 'vary-page-id' ); + $id = $parser->getTitle()->getArticleID(); + if ( $id ) { + $parserOutput->setSpeculativePageIdUsed( $id ); + } - // These can't have ids - if ( !$t->canExist() || $t->isExternal() ) { - return 0; + return $id; } - // Check the link cache, maybe something already looked it up. + // Check the link cache for the title $linkCache = MediaWikiServices::getInstance()->getLinkCache(); $pdbk = $t->getPrefixedDBkey(); $id = $linkCache->getGoodLinkID( $pdbk ); - if ( $id != 0 ) { - $parser->mOutput->addLink( $t, $id ); - return $id; - } - if ( $linkCache->isBadLink( $pdbk ) ) { - $parser->mOutput->addLink( $t, 0 ); + if ( $id != 0 || $linkCache->isBadLink( $pdbk ) ) { + $parserOutput->addLink( $t, $id ); + return $id; } // We need to load it from the DB, so mark expensive if ( $parser->incrementExpensiveFunctionCount() ) { $id = $t->getArticleID(); - $parser->mOutput->addLink( $t, $id ); + $parserOutput->addLink( $t, $id ); + return $id; } + return null; } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 6600f8f72b..e5bf94a602 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -2783,15 +2783,16 @@ class Parser { $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); break; case 'pageid': // requested in T25427 - $pageid = $this->getTitle()->getArticleID(); - if ( $pageid == 0 ) { - # 0 means the page doesn't exist in the database, - # which means the user is previewing a new page. - # The vary-revision flag must be set, because the magic word - # will have a different value once the page is saved. - $this->setOutputFlag( 'vary-revision', '{{PAGEID}} on new page' ); + # Inform the edit saving system that getting the canonical output + # after page insertion requires a parse that used that exact page ID + $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' ); + $value = $this->mTitle->getArticleID(); + if ( !$value ) { + $value = $this->mOptions->getSpeculativePageId(); + if ( $value ) { + $this->mOutput->setSpeculativePageIdUsed( $value ); + } } - $value = $pageid ?: null; break; case 'revisionid': if ( @@ -2810,7 +2811,7 @@ class Parser { } } else { # Inform the edit saving system that getting the canonical output after - # revision insertion requires another parse using the actual revision ID + # revision insertion requires a parse that used that exact revision ID $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' ); $value = $this->getRevisionId(); if ( $value === 0 ) { @@ -5934,19 +5935,21 @@ class Parser { * @since 1.23 (public since 1.23) */ public function getRevisionObject() { - if ( !is_null( $this->mRevisionObject ) ) { + if ( $this->mRevisionObject ) { return $this->mRevisionObject; } // NOTE: try to get the RevisionObject even if mRevisionId is null. - // This is useful when parsing revision that has not yet been saved. + // This is useful when parsing a revision that has not yet been saved. // However, if we get back a saved revision even though we are in // preview mode, we'll have to ignore it, see below. // NOTE: This callback may be used to inject an OLD revision that was // already loaded, so "current" is a bit of a misnomer. We can't just // skip it if mRevisionId is set. $rev = call_user_func( - $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this + $this->mOptions->getCurrentRevisionCallback(), + $this->getTitle(), + $this ); if ( $this->mRevisionId === null && $rev && $rev->getId() ) { diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 709f159bb0..69ee1c502e 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -62,6 +62,7 @@ class ParserOptions { private static $lazyOptions = [ 'dateformat' => [ __CLASS__, 'initDateFormat' ], 'speculativeRevId' => [ __CLASS__, 'initSpeculativeRevId' ], + 'speculativePageId' => [ __CLASS__, 'initSpeculativePageId' ], ]; /** @@ -117,11 +118,6 @@ class ParserOptions { */ private $mExtraKey = ''; - /** - * @name Option accessors - * @{ - */ - /** * Fetch an option and track that is was accessed * @since 1.30 @@ -856,6 +852,20 @@ class ParserOptions { return $this->getOption( 'speculativeRevId' ); } + /** + * A guess for {{PAGEID}}, calculated using the callback provided via + * setSpeculativeRevPageCallback(). For consistency, the value will be calculated upon the + * first call of this method, and re-used for subsequent calls. + * + * If no callback was defined via setSpeculativePageIdCallback(), this method will return false. + * + * @since 1.34 + * @return int|false + */ + public function getSpeculativePageId() { + return $this->getOption( 'speculativePageId' ); + } + /** * Callback registered with ParserOptions::$lazyOptions, triggered by getSpeculativeRevId(). * @@ -871,27 +881,40 @@ class ParserOptions { } /** - * Callback to generate a guess for {{REVISIONID}} - * @since 1.28 - * @deprecated since 1.32, use getSpeculativeRevId() instead! - * @return callable|null + * Callback registered with ParserOptions::$lazyOptions, triggered by getSpeculativePageId(). + * + * @param ParserOptions $popt + * @return bool|false */ - public function getSpeculativeRevIdCallback() { - return $this->getOption( 'speculativeRevIdCallback' ); + private static function initSpeculativePageId( ParserOptions $popt ) { + $cb = $popt->getOption( 'speculativePageIdCallback' ); + $id = $cb ? $cb() : null; + + // returning null would result in this being re-called every access + return $id ?? false; } /** * Callback to generate a guess for {{REVISIONID}} - * @since 1.28 - * @param callable|null $x New value (null is no change) + * @param callable|null $x New value * @return callable|null Old value + * @since 1.28 */ public function setSpeculativeRevIdCallback( $x ) { $this->setOption( 'speculativeRevId', null ); // reset - return $this->setOptionLegacy( 'speculativeRevIdCallback', $x ); + return $this->setOption( 'speculativeRevIdCallback', $x ); } - /**@}*/ + /** + * Callback to generate a guess for {{PAGEID}} + * @param callable|null $x New value + * @return callable|null Old value + * @since 1.34 + */ + public function setSpeculativePageIdCallback( $x ) { + $this->setOption( 'speculativePageId', null ); // reset + return $this->setOption( 'speculativePageIdCallback', $x ); + } /** * Timestamp used for {{CURRENTDAY}} etc. @@ -1102,6 +1125,8 @@ class ParserOptions { 'templateCallback' => [ Parser::class, 'statelessFetchTemplate' ], 'speculativeRevIdCallback' => null, 'speculativeRevId' => null, + 'speculativePageIdCallback' => null, + 'speculativePageId' => null, ]; Hooks::run( 'ParserOptionsRegister', [ diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 23e5911574..dbe609a5b7 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -210,9 +210,16 @@ class ParserOutput extends CacheTime { */ private $mFlags = []; + /** @var string[] */ + private static $speculativeFields = [ + 'speculativePageIdUsed', + 'speculativeRevIdUsed', + 'revisionTimestampUsed' + ]; /** @var int|null Assumed rev ID for {{REVISIONID}} if no revision is set */ - private $mSpeculativeRevId; - + private $speculativeRevIdUsed; + /** @var int|null Assumed page ID for {{PAGEID}} if no revision is set */ + private $speculativePageIdUsed; /** @var int|null Assumed rev timestamp for {{REVISIONTIMESTAMP}} if no revision is set */ private $revisionTimestampUsed; @@ -440,7 +447,7 @@ class ParserOutput extends CacheTime { * @since 1.28 */ public function setSpeculativeRevIdUsed( $id ) { - $this->mSpeculativeRevId = $id; + $this->speculativeRevIdUsed = $id; } /** @@ -448,7 +455,23 @@ class ParserOutput extends CacheTime { * @since 1.28 */ public function getSpeculativeRevIdUsed() { - return $this->mSpeculativeRevId; + return $this->speculativeRevIdUsed; + } + + /** + * @param int $id + * @since 1.34 + */ + public function setSpeculativePageIdUsed( $id ) { + $this->speculativePageIdUsed = $id; + } + + /** + * @return int|null + * @since 1.34 + */ + public function getSpeculativePageIdUsed() { + return $this->speculativePageIdUsed; } /** @@ -1320,18 +1343,13 @@ class ParserOutput extends CacheTime { $this->mWarnings = self::mergeMap( $this->mWarnings, $source->mWarnings ); // don't use getter $this->mTimestamp = $this->useMaxValue( $this->mTimestamp, $source->getTimestamp() ); - if ( $this->mSpeculativeRevId && $source->mSpeculativeRevId - && $this->mSpeculativeRevId !== $source->mSpeculativeRevId - ) { - wfLogWarning( - 'Inconsistent speculative revision ID encountered while merging parser output!' - ); + foreach ( self::$speculativeFields as $field ) { + if ( $this->$field && $source->$field && $this->$field !== $source->$field ) { + wfLogWarning( __METHOD__ . ": inconsistent '$field' properties!" ); + } + $this->$field = $this->useMaxValue( $this->$field, $source->$field ); } - $this->mSpeculativeRevId = $this->useMaxValue( - $this->mSpeculativeRevId, - $source->getSpeculativeRevIdUsed() - ); $this->mParseStartTime = $this->useEachMinValue( $this->mParseStartTime, $source->mParseStartTime