Split out new RefreshSecondaryDataUpdate class
authorAaron Schulz <aschulz@wikimedia.org>
Fri, 12 Oct 2018 20:45:23 +0000 (13:45 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Fri, 15 Mar 2019 17:14:50 +0000 (17:14 +0000)
Make DerivedPageDataUpdater bundle all the related DataUpdate tasks
on page change with a RefreshSecondaryDataUpdate wrapper. If one of
the DataUpdate tasks fails, then the entire bundle of updates can be
re-run in the form of enqueueing a RefreshLinksJob instance (these
jobs are idempotent). If several of the bundled tasks fail, it is easy
for DeferredUpdates to know that only one RefreshLinksJob should be
enqueued.

The goal is to make DataUpdate tasks more reliable and resilient.
Most of these deferred update failures are due to ephemeral problems
like lock contention. Since the job queue is already able to reliably
store and retry jobs, and the time that a regular web request can spend
in post-send is more limited, it makes the most sense to just enqueue
tasks as jobs if they fail post-send.

Make LinkUpdate no longer defined as enqueueable as RefreshLinksJob
since they are not very congruent (LinksUpdate only does some of the
work that RefreshLinksJob does). Only the wrapper, with the bundle of
DataUpdate instances, is congruent to RefreshLinksJob.

This change does not itself implement the enqueue-on-failure logic
in DeferredUpdates, but is merely a prerequisite.

Bug: T206288
Change-Id: I191103c1aeff4c9fedbf524ee387dad9bdf5fab8

autoload.php
includes/Storage/DerivedPageDataUpdater.php
includes/deferred/LinksUpdate.php
includes/deferred/RefreshSecondaryDataUpdate.php [new file with mode: 0644]

index 9dad6f2..bb4de22 100644 (file)
@@ -1215,6 +1215,7 @@ $wgAutoloadLocalClasses = [
        'RefreshImageMetadata' => __DIR__ . '/maintenance/refreshImageMetadata.php',
        'RefreshLinks' => __DIR__ . '/maintenance/refreshLinks.php',
        'RefreshLinksJob' => __DIR__ . '/includes/jobqueue/jobs/RefreshLinksJob.php',
+       'RefreshSecondaryDataUpdate' => __DIR__ . '/includes/deferred/RefreshSecondaryDataUpdate.php',
        'RegexlikeReplacer' => __DIR__ . '/includes/libs/replacers/RegexlikeReplacer.php',
        'RemexStripTagHandler' => __DIR__ . '/includes/parser/RemexStripTagHandler.php',
        'RemoveInvalidEmails' => __DIR__ . '/maintenance/removeInvalidEmails.php',
index 9ce12b4..8dedc70 100644 (file)
@@ -24,6 +24,7 @@ namespace MediaWiki\Storage;
 
 use ApiStashEdit;
 use CategoryMembershipChangeJob;
+use RefreshSecondaryDataUpdate;
 use Content;
 use ContentHandler;
 use DataUpdate;
@@ -1590,14 +1591,31 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                                $update->setRevision( $legacyRevision );
                                $update->setTriggeringUser( $triggeringUser );
                        }
-                       if ( $options['defer'] === false ) {
-                               if ( $options['transactionTicket'] !== null ) {
+               }
+
+               if ( $options['defer'] === false ) {
+                       foreach ( $updates as $update ) {
+                               if ( $update instanceof DataUpdate && $options['transactionTicket'] !== null ) {
                                        $update->setTransactionTicket( $options['transactionTicket'] );
                                }
                                $update->doUpdate();
-                       } else {
-                               DeferredUpdates::addUpdate( $update, $options['defer'] );
                        }
+               } else {
+                       $cacheTime = $this->getCanonicalParserOutput()->getCacheTime();
+                       // Bundle all of the data updates into a single deferred update wrapper so that
+                       // any failure will cause at most one refreshLinks job to be enqueued by
+                       // DeferredUpdates::doUpdates(). This is hard to do when there are many separate
+                       // updates that are not defined as being related.
+                       $update = new RefreshSecondaryDataUpdate(
+                               $this->wikiPage,
+                               $updates,
+                               $options,
+                               $cacheTime,
+                               $this->loadbalancerFactory->getLocalDomainID()
+                       );
+                       $update->setRevision( $legacyRevision );
+                       $update->setTriggeringUser( $triggeringUser );
+                       DeferredUpdates::addUpdate( $update, $options['defer'] );
                }
        }
 
index 7a31e26..101a1e2 100644 (file)
@@ -32,7 +32,7 @@ use Wikimedia\ScopedCallback;
  *
  * See docs/deferred.txt
  */
-class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
+class LinksUpdate extends DataUpdate {
        // @todo make members protected, but make sure extensions don't break
 
        /** @var int Page ID of the article linked from */
@@ -1187,39 +1187,4 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
 
                return $this->db;
        }
-
-       public function getAsJobSpecification() {
-               if ( $this->user ) {
-                       $userInfo = [
-                               'userId' => $this->user->getId(),
-                               'userName' => $this->user->getName(),
-                       ];
-               } else {
-                       $userInfo = false;
-               }
-
-               if ( $this->mRevision ) {
-                       $triggeringRevisionId = $this->mRevision->getId();
-               } else {
-                       $triggeringRevisionId = false;
-               }
-
-               return [
-                       'wiki' => WikiMap::getWikiIdFromDbDomain( $this->getDB()->getDomainID() ),
-                       'job'  => new JobSpecification(
-                               'refreshLinksPrioritized',
-                               [
-                                       // Reuse the parser cache if it was saved
-                                       'rootJobTimestamp' => $this->mParserOutput->getCacheTime(),
-                                       'useRecursiveLinksUpdate' => $this->mRecursive,
-                                       'triggeringUser' => $userInfo,
-                                       'triggeringRevisionId' => $triggeringRevisionId,
-                                       'causeAction' => $this->getCauseAction(),
-                                       'causeAgent' => $this->getCauseAgent()
-                               ],
-                               [ 'removeDuplicates' => true ],
-                               $this->getTitle()
-                       )
-               ];
-       }
 }
diff --git a/includes/deferred/RefreshSecondaryDataUpdate.php b/includes/deferred/RefreshSecondaryDataUpdate.php
new file mode 100644 (file)
index 0000000..8086a70
--- /dev/null
@@ -0,0 +1,117 @@
+<?php
+/**
+ * Updater for secondary data after a page edit.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Update object handling the cleanup of secondary data after a page was edited.
+ *
+ * This makes makes it possible for DeferredUpdates to have retry logic using a single
+ * refreshLinks job if any of the bundled updates fail.
+ */
+class RefreshSecondaryDataUpdate extends DataUpdate implements EnqueueableDataUpdate {
+       /** @var WikiPage */
+       private $page;
+       /** @var DeferrableUpdate[] */
+       private $updates;
+       /** @var bool */
+       private $recursive;
+       /** @var string */
+       private $cacheTimestamp;
+       /** @var string Database domain ID */
+       private $domain;
+
+       /** @var Revision|null */
+       private $revision;
+       /** @var User|null */
+       private $user;
+
+       /**
+        * @param WikiPage $page Page we are updating
+        * @param DeferrableUpdate[] $updates Updates from DerivedPageDataUpdater::getSecondaryUpdates()
+        * @param array $options Options map (causeAction, causeAgent, recursive)
+        * @param string $cacheTime Result of ParserOutput::getCacheTime() for the source output
+        * @param string $domain The database domain ID of the wiki the update is for
+        */
+       function __construct(
+               WikiPage $page,
+               array $updates,
+               array $options,
+               $cacheTime,
+               $domain
+       ) {
+               parent::__construct();
+
+               $this->page = $page;
+               $this->updates = $updates;
+               $this->causeAction = $options['causeAction'] ?? 'unknown';
+               $this->causeAgent = $options['causeAgent'] ?? 'unknown';
+               $this->recursive = !empty( $options['recursive'] );
+               $this->cacheTimestamp = $cacheTime;
+               $this->domain = $domain;
+       }
+
+       public function doUpdate() {
+               foreach ( $this->updates as $update ) {
+                       $update->doUpdate();
+               }
+       }
+
+       /**
+        * Set the revision corresponding to this LinksUpdate
+        * @param Revision $revision
+        */
+       public function setRevision( Revision $revision ) {
+               $this->revision = $revision;
+       }
+
+       /**
+        * Set the User who triggered this LinksUpdate
+        * @param User $user
+        */
+       public function setTriggeringUser( User $user ) {
+               $this->user = $user;
+       }
+
+       public function getAsJobSpecification() {
+               return [
+                       'wiki' => WikiMap::getWikiIdFromDomain( $this->domain ),
+                       'job'  => new JobSpecification(
+                               'refreshLinksPrioritized',
+                               [
+                                       // Reuse the parser cache if it was saved
+                                       'rootJobTimestamp' => $this->cacheTimestamp,
+                                       'useRecursiveLinksUpdate' => $this->recursive,
+                                       'triggeringUser' => $this->user
+                                               ? [
+                                                       'userId' => $this->user->getId(),
+                                                       'userName' => $this->user->getName()
+                                               ]
+                                               : false,
+                                       'triggeringRevisionId' => $this->revision ? $this->revision->getId() : false,
+                                       'causeAction' => $this->getCauseAction(),
+                                       'causeAgent' => $this->getCauseAgent()
+                               ],
+                               [ 'removeDuplicates' => true ],
+                               $this->page->getTitle()
+                       )
+               ];
+       }
+}