Better handle already-used rev_id when restoring
authorMatthew Flaschen <mflaschen@wikimedia.org>
Mon, 23 May 2016 18:57:10 +0000 (11:57 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Fri, 12 Aug 2016 05:29:12 +0000 (22:29 -0700)
Leave it in the archive table, rather than the data being gone
from both the archive and revision table.

Bug: T135852
Change-Id: I202cea7910be82f1073b4365209e3b320e85d62e

includes/page/WikiPage.php
includes/specials/SpecialUndelete.php
languages/i18n/en.json
languages/i18n/qqq.json

index bded84d..8eba1cc 100644 (file)
@@ -1144,7 +1144,9 @@ class WikiPage implements Page, IDBAccessObject {
         * @param IDatabase $dbw
         * @param int|null $pageId Custom page ID that will be used for the insert statement
         *
-        * @return bool|int The newly created page_id key; false if the title already existed
+        * @return bool|int The newly created page_id key; false if the row was not
+        *   inserted, e.g. because the title already existed or because the specified
+        *   page ID is already in use.
         */
        public function insertOn( $dbw, $pageId = null ) {
                $pageIdForInsert = $pageId ?: $dbw->nextSequenceValue( 'page_page_id_seq' );
index 5d230c0..65f0680 100644 (file)
@@ -354,6 +354,9 @@ class PageArchive {
         * Once restored, the items will be removed from the archive tables.
         * The deletion log will be updated with an undeletion notice.
         *
+        * This also sets Status objects, $this->fileStatus and $this->revisionStatus
+        * (depending what operations are attempted).
+        *
         * @param array $timestamps Pass an empty array to restore all revisions,
         *   otherwise list the ones to undelete.
         * @param string $comment
@@ -439,9 +442,8 @@ class PageArchive {
        }
 
        /**
-        * This is the meaty bit -- restores archived revisions of the given page
-        * to the cur/old tables. If the page currently exists, all revisions will
-        * be stuffed into old, otherwise the most recent will go into cur.
+        * This is the meaty bit -- It restores archived revisions of the given page
+        * to the revision table.
         *
         * @param array $timestamps Pass an empty array to restore all revisions,
         *   otherwise list the ones to undelete.
@@ -455,8 +457,10 @@ class PageArchive {
                        throw new ReadOnlyError();
                }
 
-               $restoreAll = empty( $timestamps );
                $dbw = wfGetDB( DB_MASTER );
+               $dbw->startAtomic( __METHOD__ );
+
+               $restoreAll = empty( $timestamps );
 
                # Does this page already exist? We'll have to update it...
                $article = WikiPage::factory( $this->title );
@@ -477,11 +481,9 @@ class PageArchive {
                        # Page already exists. Import the history, and if necessary
                        # we'll update the latest revision field in the record.
 
-                       $previousRevId = $page->page_latest;
-
                        # Get the time span of this page
                        $previousTimestamp = $dbw->selectField( 'revision', 'rev_timestamp',
-                               [ 'rev_id' => $previousRevId ],
+                               [ 'rev_id' => $page->page_latest ],
                                __METHOD__ );
 
                        if ( $previousTimestamp === false ) {
@@ -489,13 +491,13 @@ class PageArchive {
 
                                $status = Status::newGood( 0 );
                                $status->warning( 'undeleterevision-missing' );
+                               $dbw->endAtomic( __METHOD__ );
 
                                return $status;
                        }
                } else {
                        # Have to create a new article...
                        $makepage = true;
-                       $previousRevId = 0;
                        $previousTimestamp = 0;
                }
 
@@ -508,7 +510,9 @@ class PageArchive {
                }
 
                $fields = [
+                       'ar_id',
                        'ar_rev_id',
+                       'rev_id',
                        'ar_text',
                        'ar_comment',
                        'ar_user',
@@ -531,11 +535,14 @@ class PageArchive {
                /**
                 * Select each archived revision...
                 */
-               $result = $dbw->select( 'archive',
+               $result = $dbw->select(
+                       [ 'archive', 'revision' ],
                        $fields,
                        $oldWhere,
                        __METHOD__,
-                       /* options */ [ 'ORDER BY' => 'ar_timestamp' ]
+                       /* options */
+                       [ 'ORDER BY' => 'ar_timestamp' ],
+                       [ 'revision' => [ 'LEFT JOIN', 'ar_rev_id=rev_id' ] ]
                );
 
                $rev_count = $result->numRows();
@@ -544,116 +551,182 @@ class PageArchive {
 
                        $status = Status::newGood( 0 );
                        $status->warning( "undelete-no-results" );
+                       $dbw->endAtomic( __METHOD__ );
 
                        return $status;
                }
 
-               $result->seek( $rev_count - 1 ); // move to last
-               $row = $result->fetchObject(); // get newest archived rev
-               $oldPageId = (int)$row->ar_page_id; // pass this to ArticleUndelete hook
-               $result->seek( 0 ); // move back
+               // We use ar_id because there can be duplicate ar_rev_id even for the same
+               // page.  In this case, we may be able to restore the first one.
+               $restoreFailedArIds = [];
 
-               // grab the content to check consistency with global state before restoring the page.
-               $revision = Revision::newFromArchiveRow( $row,
-                       [
-                               'title' => $article->getTitle(), // used to derive default content model
-                       ]
-               );
-               $user = User::newFromName( $revision->getUserText( Revision::RAW ), false );
-               $content = $revision->getContent( Revision::RAW );
+               // Map rev_id to the ar_id that is allowed to use it.  When checking later,
+               // if it doesn't match, the current ar_id can not be restored.
 
-               // NOTE: article ID may not be known yet. prepareSave() should not modify the database.
-               $status = $content->prepareSave( $article, 0, -1, $user );
+               // Value can be an ar_id or -1 (-1 means no ar_id can use it, since the
+               // rev_id is taken before we even start the restore).
+               $allowedRevIdToArIdMap = [];
 
-               if ( !$status->isOK() ) {
-                       return $status;
-               }
+               $latestRestorableRow = null;
 
-               if ( $makepage ) {
-                       // Check the state of the newest to-be version...
-                       if ( !$unsuppress && ( $row->ar_deleted & Revision::DELETED_TEXT ) ) {
-                               return Status::newFatal( "undeleterevdel" );
+               foreach ( $result as $row ) {
+                       if ( $row->ar_rev_id ) {
+                               // rev_id is taken even before we start restoring.
+                               if ( $row->ar_rev_id === $row->rev_id ) {
+                                       $restoreFailedArIds[] = $row->ar_id;
+                                       $allowedRevIdToArIdMap[$row->ar_rev_id] = -1;
+                               } else {
+                                       // rev_id is not taken yet in the DB, but it might be taken
+                                       // by a prior revision in the same restore operation. If
+                                       // not, we need to reserve it.
+                                       if ( isset( $allowedRevIdToArIdMap[$row->ar_rev_id] ) ) {
+                                               $restoreFailedArIds[] = $row->ar_id;
+                                       } else {
+                                               $allowedRevIdToArIdMap[$row->ar_rev_id] = $row->ar_id;
+                                               $latestRestorableRow = $row;
+                                       }
+                               }
+                       } else {
+                               // If ar_rev_id is null, there can't be a collision, and a
+                               // rev_id will be chosen automatically.
+                               $latestRestorableRow = $row;
                        }
-                       // Safe to insert now...
-                       $newid = $article->insertOn( $dbw, $row->ar_page_id );
-                       if ( $newid === false ) {
-                               // The old ID is reserved; let's pick another
-                               $newid = $article->insertOn( $dbw );
+               }
+
+               $result->seek( 0 ); // move back
+
+               $oldPageId = 0;
+               if ( $latestRestorableRow !== null ) {
+                       $oldPageId = (int)$latestRestorableRow->ar_page_id; // pass this to ArticleUndelete hook
+
+                       // grab the content to check consistency with global state before restoring the page.
+                       $revision = Revision::newFromArchiveRow( $latestRestorableRow,
+                               [
+                                       'title' => $article->getTitle(), // used to derive default content model
+                               ]
+                       );
+                       $user = User::newFromName( $revision->getUserText( Revision::RAW ), false );
+                       $content = $revision->getContent( Revision::RAW );
+
+                       // NOTE: article ID may not be known yet. prepareSave() should not modify the database.
+                       $status = $content->prepareSave( $article, 0, -1, $user );
+                       if ( !$status->isOK() ) {
+                               $dbw->endAtomic( __METHOD__ );
+
+                               return $status;
                        }
-                       $pageId = $newid;
+               }
+
+               $newid = false; // newly created page ID
+               $restored = 0; // number of revisions restored
+               /** @var Revision $revision */
+               $revision = null;
+
+               // If there are no restorable revisions, we can skip most of the steps.
+               if ( $latestRestorableRow === null ) {
+                       $failedRevisionCount = $rev_count;
                } else {
-                       // Check if a deleted revision will become the current revision...
-                       if ( $row->ar_timestamp > $previousTimestamp ) {
+                       if ( $makepage ) {
                                // Check the state of the newest to-be version...
-                               if ( !$unsuppress && ( $row->ar_deleted & Revision::DELETED_TEXT ) ) {
+                               if ( !$unsuppress
+                                       && ( $latestRestorableRow->ar_deleted & Revision::DELETED_TEXT )
+                               ) {
+                                       $dbw->endAtomic( __METHOD__ );
+
                                        return Status::newFatal( "undeleterevdel" );
                                }
+                               // Safe to insert now...
+                               $newid = $article->insertOn( $dbw, $latestRestorableRow->ar_page_id );
+                               if ( $newid === false ) {
+                                       // The old ID is reserved; let's pick another
+                                       $newid = $article->insertOn( $dbw );
+                               }
+                               $pageId = $newid;
+                       } else {
+                               // Check if a deleted revision will become the current revision...
+                               if ( $latestRestorableRow->ar_timestamp > $previousTimestamp ) {
+                                       // Check the state of the newest to-be version...
+                                       if ( !$unsuppress
+                                               && ( $latestRestorableRow->ar_deleted & Revision::DELETED_TEXT )
+                                       ) {
+                                               $dbw->endAtomic( __METHOD__ );
+
+                                               return Status::newFatal( "undeleterevdel" );
+                                       }
+                               }
+
+                               $newid = false;
+                               $pageId = $article->getId();
                        }
 
-                       $newid = false;
-                       $pageId = $article->getId();
-               }
+                       foreach ( $result as $row ) {
+                               // Check for key dupes due to needed archive integrity.
+                               if ( $row->ar_rev_id && $allowedRevIdToArIdMap[$row->ar_rev_id] !== $row->ar_id ) {
+                                       continue;
+                               }
+                               // Insert one revision at a time...maintaining deletion status
+                               // unless we are specifically removing all restrictions...
+                               $revision = Revision::newFromArchiveRow( $row,
+                                       [
+                                               'page' => $pageId,
+                                               'title' => $this->title,
+                                               'deleted' => $unsuppress ? 0 : $row->ar_deleted
+                                       ] );
 
-               $revision = null;
-               $restored = 0;
+                               $revision->insertOn( $dbw );
+                               $restored++;
 
-               foreach ( $result as $row ) {
-                       // Check for key dupes due to needed archive integrity.
-                       if ( $row->ar_rev_id ) {
-                               $exists = $dbw->selectField( 'revision', '1',
-                                       [ 'rev_id' => $row->ar_rev_id ], __METHOD__ );
-                               if ( $exists ) {
-                                       continue; // don't throw DB errors
-                               }
+                               Hooks::run( 'ArticleRevisionUndeleted',
+                                       [ &$this->title, $revision, $row->ar_page_id ] );
                        }
-                       // Insert one revision at a time...maintaining deletion status
-                       // unless we are specifically removing all restrictions...
-                       $revision = Revision::newFromArchiveRow( $row,
-                               [
-                                       'page' => $pageId,
-                                       'title' => $this->title,
-                                       'deleted' => $unsuppress ? 0 : $row->ar_deleted
-                               ] );
-
-                       $revision->insertOn( $dbw );
-                       $restored++;
 
-                       Hooks::run( 'ArticleRevisionUndeleted', [ &$this->title, $revision, $row->ar_page_id ] );
-               }
-               # Now that it's safely stored, take it out of the archive
-               $dbw->delete( 'archive',
-                       $oldWhere,
-                       __METHOD__ );
+                       // Now that it's safely stored, take it out of the archive
+                       // Don't delete rows that we failed to restore
+                       $toDeleteConds = $oldWhere;
+                       $failedRevisionCount = count( $restoreFailedArIds );
+                       if ( $failedRevisionCount > 0 ) {
+                               $toDeleteConds[] = 'ar_id NOT IN ( ' . $dbw->makeList( $restoreFailedArIds ) . ' )';
+                       }
 
-               // Was anything restored at all?
-               if ( $restored == 0 ) {
-                       return Status::newGood( 0 );
+                       $dbw->delete( 'archive',
+                               $toDeleteConds,
+                               __METHOD__ );
                }
 
-               $created = (bool)$newid;
+               $status = Status::newGood( $restored );
 
-               // Attach the latest revision to the page...
-               $wasnew = $article->updateIfNewerOn( $dbw, $revision, $previousRevId );
-               if ( $created || $wasnew ) {
-                       // Update site stats, link tables, etc
-                       $article->doEditUpdates(
-                               $revision,
-                               User::newFromName( $revision->getUserText( Revision::RAW ), false ),
-                               [
-                                       'created' => $created,
-                                       'oldcountable' => $oldcountable,
-                                       'restored' => true
-                               ]
-                       );
+               if ( $failedRevisionCount > 0 ) {
+                       $status->warning(
+                               wfMessage( 'undeleterevision-duplicate-revid', $failedRevisionCount ) );
                }
 
-               Hooks::run( 'ArticleUndelete', [ &$this->title, $created, $comment, $oldPageId ] );
+               // Was anything restored at all?
+               if ( $restored ) {
+                       $created = (bool)$newid;
+                       // Attach the latest revision to the page...
+                       $wasnew = $article->updateIfNewerOn( $dbw, $revision );
+                       if ( $created || $wasnew ) {
+                               // Update site stats, link tables, etc
+                               $article->doEditUpdates(
+                                       $revision,
+                                       User::newFromName( $revision->getUserText( Revision::RAW ), false ),
+                                       [
+                                               'created' => $created,
+                                               'oldcountable' => $oldcountable,
+                                               'restored' => true
+                                       ]
+                               );
+                       }
 
-               if ( $this->title->getNamespace() == NS_FILE ) {
-                       DeferredUpdates::addUpdate( new HTMLCacheUpdate( $this->title, 'imagelinks' ) );
+                       Hooks::run( 'ArticleUndelete', [ &$this->title, $created, $comment, $oldPageId ] );
+                       if ( $this->title->getNamespace() == NS_FILE ) {
+                               DeferredUpdates::addUpdate( new HTMLCacheUpdate( $this->title, 'imagelinks' ) );
+                       }
                }
 
-               return Status::newGood( $restored );
+               $dbw->endAtomic( __METHOD__ );
+
+               return $status;
        }
 
        /**
index b55cb76..51eaf01 100644 (file)
        "undeletehistorynoadmin": "This page has been deleted.\nThe reason for deletion is shown in the summary below, along with details of the users who had edited this page before deletion.\nThe actual text of these deleted revisions is only available to administrators.",
        "undelete-revision": "Deleted revision of $1 (as of $4, at $5) by $3:",
        "undeleterevision-missing": "Invalid or missing revision.\nYou may have a bad link, or the revision may have been restored or removed from the archive.",
+       "undeleterevision-duplicate-revid": "{{PLURAL:$1|One revision|$1 revisions}} could not be restored, because {{PLURAL:$1|its|their}} <code>rev_id</code> was already in use.",
        "undelete-nodiff": "No previous revision found.",
        "undeletebtn": "Restore",
        "undeletelink": "view/restore",
        "undeletedrevisions": "{{PLURAL:$1|1 revision|$1 revisions}} restored",
        "undeletedrevisions-files": "{{PLURAL:$1|1 revision|$1 revisions}} and {{PLURAL:$2|1 file|$2 files}} restored",
        "undeletedfiles": "{{PLURAL:$1|1 file|$1 files}} restored",
-       "cannotundelete": "Undelete failed:\n$1",
+       "cannotundelete": "Some or all of the undeletion failed:\n$1",
        "undeletedpage": "<strong>$1 has been restored</strong>\n\nConsult the [[Special:Log/delete|deletion log]] for a record of recent deletions and restorations.",
        "undelete-header": "See [[Special:Log/delete|the deletion log]] for recently deleted pages.",
        "undelete-search-title": "Search deleted pages",
index 953ebbb..6b5bab3 100644 (file)
        "undeletehistorynoadmin": "Used in [[Special:Undelete]].\n\nSee also:\n* {{msg-mw|Undeletehistory}}\n* {{msg-mw|Undeleterevdel}}",
        "undelete-revision": "Shown in \"View and restore deleted pages\" ([[Special:Undelete/$1]]).\nParameters:\n* $1 - deleted page name\n* $2 - (unused)\n* $3 - username (author of revision, not who deleted it)\n* $4 - date of the revision (localized)\n* $5 - time of the revision (localized)\nExample (in English):\n* Deleted revision of [[Main Page]] (as of 14 September 2013, at 08:17) by [[User:Username|Username]]:",
        "undeleterevision-missing": "Used as warning when undeleting the revision.",
+       "undeleterevision-duplicate-revid": "Used as warning when some revisions could not be undeleted due to <code>rev_id</code> collisions.  Parameters:\n* - Number of revisions that could not be restored for this reason.",
        "undelete-nodiff": "Used in [[Special:Undelete]].",
        "undeletebtn": "Shown on [[Special:Undelete]] as button caption and on [[Special:Log/delete|deletion log]] after each entry (for sysops).\n\n{{Identical|Restore}}",
        "undeletelink": "Display name of link to undelete a page used on [[Special:Log/delete]]\n\n{{Identical|View}}\n{{Identical|Restore}}",