Merge "Improvements to RefreshLinksJob/DeleteLinksJob locking"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 19 Jul 2016 21:43:17 +0000 (21:43 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 19 Jul 2016 21:43:17 +0000 (21:43 +0000)
includes/deferred/LinksUpdate.php
includes/jobqueue/jobs/DeleteLinksJob.php
includes/jobqueue/jobs/RefreshLinksJob.php

index d4a61fa..22944eb 100644 (file)
@@ -168,18 +168,16 @@ class LinksUpdate extends SqlDataUpdate implements EnqueueableDataUpdate {
         *
         * @param IDatabase $dbw
         * @param integer $pageId
-        * @return ScopedCallback|null Returns null on failure
+        * @param string $why One of (job, atomicity)
+        * @return ScopedCallback
         * @throws RuntimeException
         * @since 1.27
         */
-       public static function acquirePageLock( IDatabase $dbw, $pageId ) {
-               $scopedLock = $dbw->getScopedLockAndFlush(
-                       "LinksUpdate:pageid:$pageId",
-                       __METHOD__,
-                       15
-               );
+       public static function acquirePageLock( IDatabase $dbw, $pageId, $why = 'atomicity' ) {
+               $key = "LinksUpdate:$why:pageid:$pageId";
+               $scopedLock = $dbw->getScopedLockAndFlush( $key, __METHOD__, 15 );
                if ( !$scopedLock ) {
-                       throw new RuntimeException( "Could not acquire lock on page #$pageId." );
+                       throw new RuntimeException( "Could not acquire lock '$key'." );
                }
 
                return $scopedLock;
index ca5d534..f39f8fd 100644 (file)
@@ -42,6 +42,10 @@ class DeleteLinksJob extends Job {
                }
 
                $pageId = $this->params['pageId'];
+
+               // Serialize links updates by page ID so they see each others' changes
+               $scopedLock = LinksUpdate::acquirePageLock( wfGetDB( DB_MASTER ), $pageId, 'job' );
+
                if ( WikiPage::newFromID( $pageId, WikiPage::READ_LATEST ) ) {
                        // The page was restored somehow or something went wrong
                        $this->setLastError( "deleteLinks: Page #$pageId exists" );
index c76ea4f..8fba728 100644 (file)
@@ -128,8 +128,18 @@ class RefreshLinksJob extends Job {
         * @return bool
         */
        protected function runForTitle( Title $title ) {
+               $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
+
                $page = WikiPage::factory( $title );
                $page->loadPageData( WikiPage::READ_LATEST );
+
+               // Serialize links updates by page ID so they see each others' changes
+               $scopedLock = LinksUpdate::acquirePageLock( wfGetDB( DB_MASTER ), $page->getId(), 'job' );
+               // Get the latest ID *after* acquirePageLock() flushed the transaction.
+               // This is used to detect edits/moves after loadPageData() but before the scope lock.
+               // The works around the chicken/egg problem of determining the scope lock key.
+               $latest = $title->getLatestRevID( Title::GAID_FOR_UPDATE );
+
                if ( !empty( $this->params['triggeringRevisionId'] ) ) {
                        // Fetch the specified revision; lockAndGetLatest() below detects if the page
                        // was edited since and aborts in order to avoid corrupting the link tables
@@ -142,15 +152,15 @@ class RefreshLinksJob extends Job {
                        $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
                }
 
-               $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
-
                if ( !$revision ) {
                        $stats->increment( 'refreshlinks.rev_not_found' );
                        $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
                        return false; // just deleted?
-               } elseif ( !$revision->isCurrent() || $revision->getPage() != $page->getId() ) {
-                       // If the revision isn't current, there's no point in doing a bunch
-                       // of work just to fail at the lockAndGetLatest() check later.
+               } elseif ( $revision->getId() != $latest || $revision->getPage() !== $page->getId() ) {
+                       // Do not clobber over newer updates with older ones. If all jobs where FIFO and
+                       // serialized, it would be OK to update links based on older revisions since it
+                       // would eventually get to the latest. Since that is not the case (by design),
+                       // only update the link tables to a state matching the current revision's output.
                        $stats->increment( 'refreshlinks.rev_not_current' );
                        $this->setLastError( "Revision {$revision->getId()} is not current" );
                        return false;
@@ -250,17 +260,6 @@ class RefreshLinksJob extends Job {
                        }
                }
 
-               $latestNow = $page->lockAndGetLatest();
-               if ( !$latestNow || $revision->getId() != $latestNow ) {
-                       // Do not clobber over newer updates with older ones. If all jobs where FIFO and
-                       // serialized, it would be OK to update links based on older revisions since it
-                       // would eventually get to the latest. Since that is not the case (by design),
-                       // only update the link tables to a state matching the current revision's output.
-                       $stats->increment( 'refreshlinks.rev_cas_failure' );
-                       $this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" );
-                       return false;
-               }
-
                DataUpdate::runUpdates( $updates );
 
                InfoAction::invalidateCache( $title );