From a1f7fd3adaa380b276aefa467ec90fce4c916ce6 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Fri, 12 Oct 2018 13:45:23 -0700 Subject: [PATCH] Split out new RefreshSecondaryDataUpdate class Make DerivedPageDataUpdater bundle all the related DataUpdate tasks on page change with a RefreshSecondaryDataUpdate wrapper. If one of the DataUpdate tasks fails, then the entire bundle of updates can be re-run in the form of enqueueing a RefreshLinksJob instance (these jobs are idempotent). If several of the bundled tasks fail, it is easy for DeferredUpdates to know that only one RefreshLinksJob should be enqueued. The goal is to make DataUpdate tasks more reliable and resilient. Most of these deferred update failures are due to ephemeral problems like lock contention. Since the job queue is already able to reliably store and retry jobs, and the time that a regular web request can spend in post-send is more limited, it makes the most sense to just enqueue tasks as jobs if they fail post-send. Make LinkUpdate no longer defined as enqueueable as RefreshLinksJob since they are not very congruent (LinksUpdate only does some of the work that RefreshLinksJob does). Only the wrapper, with the bundle of DataUpdate instances, is congruent to RefreshLinksJob. This change does not itself implement the enqueue-on-failure logic in DeferredUpdates, but is merely a prerequisite. Bug: T206288 Change-Id: I191103c1aeff4c9fedbf524ee387dad9bdf5fab8 --- autoload.php | 1 + includes/Storage/DerivedPageDataUpdater.php | 26 +++- includes/deferred/LinksUpdate.php | 37 +----- .../deferred/RefreshSecondaryDataUpdate.php | 117 ++++++++++++++++++ 4 files changed, 141 insertions(+), 40 deletions(-) create mode 100644 includes/deferred/RefreshSecondaryDataUpdate.php diff --git a/autoload.php b/autoload.php index 9dad6f2b1f..bb4de22cce 100644 --- a/autoload.php +++ b/autoload.php @@ -1215,6 +1215,7 @@ $wgAutoloadLocalClasses = [ 'RefreshImageMetadata' => __DIR__ . '/maintenance/refreshImageMetadata.php', 'RefreshLinks' => __DIR__ . '/maintenance/refreshLinks.php', 'RefreshLinksJob' => __DIR__ . '/includes/jobqueue/jobs/RefreshLinksJob.php', + 'RefreshSecondaryDataUpdate' => __DIR__ . '/includes/deferred/RefreshSecondaryDataUpdate.php', 'RegexlikeReplacer' => __DIR__ . '/includes/libs/replacers/RegexlikeReplacer.php', 'RemexStripTagHandler' => __DIR__ . '/includes/parser/RemexStripTagHandler.php', 'RemoveInvalidEmails' => __DIR__ . '/maintenance/removeInvalidEmails.php', diff --git a/includes/Storage/DerivedPageDataUpdater.php b/includes/Storage/DerivedPageDataUpdater.php index 9ce12b4b13..8dedc70211 100644 --- a/includes/Storage/DerivedPageDataUpdater.php +++ b/includes/Storage/DerivedPageDataUpdater.php @@ -24,6 +24,7 @@ namespace MediaWiki\Storage; use ApiStashEdit; use CategoryMembershipChangeJob; +use RefreshSecondaryDataUpdate; use Content; use ContentHandler; use DataUpdate; @@ -1590,14 +1591,31 @@ class DerivedPageDataUpdater implements IDBAccessObject { $update->setRevision( $legacyRevision ); $update->setTriggeringUser( $triggeringUser ); } - if ( $options['defer'] === false ) { - if ( $options['transactionTicket'] !== null ) { + } + + if ( $options['defer'] === false ) { + foreach ( $updates as $update ) { + if ( $update instanceof DataUpdate && $options['transactionTicket'] !== null ) { $update->setTransactionTicket( $options['transactionTicket'] ); } $update->doUpdate(); - } else { - DeferredUpdates::addUpdate( $update, $options['defer'] ); } + } else { + $cacheTime = $this->getCanonicalParserOutput()->getCacheTime(); + // Bundle all of the data updates into a single deferred update wrapper so that + // any failure will cause at most one refreshLinks job to be enqueued by + // DeferredUpdates::doUpdates(). This is hard to do when there are many separate + // updates that are not defined as being related. + $update = new RefreshSecondaryDataUpdate( + $this->wikiPage, + $updates, + $options, + $cacheTime, + $this->loadbalancerFactory->getLocalDomainID() + ); + $update->setRevision( $legacyRevision ); + $update->setTriggeringUser( $triggeringUser ); + DeferredUpdates::addUpdate( $update, $options['defer'] ); } } diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 7a31e26253..101a1e296b 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -32,7 +32,7 @@ use Wikimedia\ScopedCallback; * * See docs/deferred.txt */ -class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate { +class LinksUpdate extends DataUpdate { // @todo make members protected, but make sure extensions don't break /** @var int Page ID of the article linked from */ @@ -1187,39 +1187,4 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate { return $this->db; } - - public function getAsJobSpecification() { - if ( $this->user ) { - $userInfo = [ - 'userId' => $this->user->getId(), - 'userName' => $this->user->getName(), - ]; - } else { - $userInfo = false; - } - - if ( $this->mRevision ) { - $triggeringRevisionId = $this->mRevision->getId(); - } else { - $triggeringRevisionId = false; - } - - return [ - 'wiki' => WikiMap::getWikiIdFromDbDomain( $this->getDB()->getDomainID() ), - 'job' => new JobSpecification( - 'refreshLinksPrioritized', - [ - // Reuse the parser cache if it was saved - 'rootJobTimestamp' => $this->mParserOutput->getCacheTime(), - 'useRecursiveLinksUpdate' => $this->mRecursive, - 'triggeringUser' => $userInfo, - 'triggeringRevisionId' => $triggeringRevisionId, - 'causeAction' => $this->getCauseAction(), - 'causeAgent' => $this->getCauseAgent() - ], - [ 'removeDuplicates' => true ], - $this->getTitle() - ) - ]; - } } diff --git a/includes/deferred/RefreshSecondaryDataUpdate.php b/includes/deferred/RefreshSecondaryDataUpdate.php new file mode 100644 index 0000000000..8086a7050d --- /dev/null +++ b/includes/deferred/RefreshSecondaryDataUpdate.php @@ -0,0 +1,117 @@ +page = $page; + $this->updates = $updates; + $this->causeAction = $options['causeAction'] ?? 'unknown'; + $this->causeAgent = $options['causeAgent'] ?? 'unknown'; + $this->recursive = !empty( $options['recursive'] ); + $this->cacheTimestamp = $cacheTime; + $this->domain = $domain; + } + + public function doUpdate() { + foreach ( $this->updates as $update ) { + $update->doUpdate(); + } + } + + /** + * Set the revision corresponding to this LinksUpdate + * @param Revision $revision + */ + public function setRevision( Revision $revision ) { + $this->revision = $revision; + } + + /** + * Set the User who triggered this LinksUpdate + * @param User $user + */ + public function setTriggeringUser( User $user ) { + $this->user = $user; + } + + public function getAsJobSpecification() { + return [ + 'wiki' => WikiMap::getWikiIdFromDomain( $this->domain ), + 'job' => new JobSpecification( + 'refreshLinksPrioritized', + [ + // Reuse the parser cache if it was saved + 'rootJobTimestamp' => $this->cacheTimestamp, + 'useRecursiveLinksUpdate' => $this->recursive, + 'triggeringUser' => $this->user + ? [ + 'userId' => $this->user->getId(), + 'userName' => $this->user->getName() + ] + : false, + 'triggeringRevisionId' => $this->revision ? $this->revision->getId() : false, + 'causeAction' => $this->getCauseAction(), + 'causeAgent' => $this->getCauseAgent() + ], + [ 'removeDuplicates' => true ], + $this->page->getTitle() + ) + ]; + } +} -- 2.20.1