[MCR] RevisionStore, enable insertions for new schema
[lhc/web/wiklou.git] / includes / Storage / RevisionStore.php
index ce09a6e..3f852b0 100644 (file)
@@ -69,6 +69,8 @@ use Wikimedia\Rdbms\LoadBalancer;
 class RevisionStore
        implements IDBAccessObject, RevisionFactory, RevisionLookup, LoggerAwareInterface {
 
+       const ROW_CACHE_KEY = 'revision-row-1.29';
+
        /**
         * @var SqlBlobStore
         */
@@ -109,6 +111,19 @@ class RevisionStore
         */
        private $logger;
 
+       /**
+        * @var NameTableStore
+        */
+       private $contentModelStore;
+
+       /**
+        * @var NameTableStore
+        */
+       private $slotRoleStore;
+
+       /** @var int One of the MIGRATION_* constants */
+       private $mcrMigrationStage;
+
        /**
         * @todo $blobStore should be allowed to be any BlobStore!
         *
@@ -116,6 +131,9 @@ class RevisionStore
         * @param SqlBlobStore $blobStore
         * @param WANObjectCache $cache
         * @param CommentStore $commentStore
+        * @param NameTableStore $contentModelStore
+        * @param NameTableStore $slotRoleStore
+        * @param int $migrationStage
         * @param ActorMigration $actorMigration
         * @param bool|string $wikiId
         */
@@ -124,15 +142,26 @@ class RevisionStore
                SqlBlobStore $blobStore,
                WANObjectCache $cache,
                CommentStore $commentStore,
+               NameTableStore $contentModelStore,
+               NameTableStore $slotRoleStore,
+               $migrationStage,
                ActorMigration $actorMigration,
                $wikiId = false
        ) {
                Assert::parameterType( 'string|boolean', $wikiId, '$wikiId' );
+               Assert::parameterType( 'integer', $migrationStage, '$migrationStage' );
+
+               if ( $migrationStage > MIGRATION_WRITE_BOTH ) {
+                       throw new InvalidArgumentException( 'New schema is not fully supported yet' );
+               }
 
                $this->loadBalancer = $loadBalancer;
                $this->blobStore = $blobStore;
                $this->cache = $cache;
                $this->commentStore = $commentStore;
+               $this->contentModelStore = $contentModelStore;
+               $this->slotRoleStore = $slotRoleStore;
+               $this->mcrMigrationStage = $migrationStage;
                $this->actorMigration = $actorMigration;
                $this->wikiId = $wikiId;
                $this->logger = new NullLogger();
@@ -158,8 +187,14 @@ class RevisionStore
 
        /**
         * @param bool $contentHandlerUseDB
+        * @throws MWException
         */
        public function setContentHandlerUseDB( $contentHandlerUseDB ) {
+               if ( !$contentHandlerUseDB && $this->mcrMigrationStage > MIGRATION_OLD ) {
+                       throw new MWException(
+                               'Content model must be stored in the database for multi content revision migration.'
+                       );
+               }
                $this->contentHandlerUseDB = $contentHandlerUseDB;
        }
 
@@ -334,6 +369,7 @@ class RevisionStore
                        throw new InvalidArgumentException( 'At least one slot needs to be defined!' );
                }
 
+               // RevisionStore currently only supports writing a single slot
                if ( $rev->getSlotRoles() !== [ 'main' ] ) {
                        throw new InvalidArgumentException( 'Only the main slot is supported for now!' );
                }
@@ -347,13 +383,15 @@ class RevisionStore
                        : $rev->getParentId();
 
                // Record the text (or external storage URL) to the blob store
-               $slot = $rev->getSlot( 'main', RevisionRecord::RAW );
+               $mainSlot = $rev->getSlot( 'main', RevisionRecord::RAW );
 
                $size = $this->failOnNull( $rev->getSize(), 'size field' );
                $sha1 = $this->failOnEmpty( $rev->getSha1(), 'sha1 field' );
 
-               if ( !$slot->hasAddress() ) {
-                       $content = $slot->getContent();
+               $dbw->startAtomic( __METHOD__ );
+
+               if ( !$mainSlot->hasAddress() ) {
+                       $content = $mainSlot->getContent();
                        $format = $content->getDefaultFormat();
                        $model = $content->getModel();
 
@@ -368,18 +406,18 @@ class RevisionStore
                        $blobHints = [
                                BlobStore::DESIGNATION_HINT => 'page-content', // BlobStore may be used for other things too.
                                BlobStore::PAGE_HINT => $pageId,
-                               BlobStore::ROLE_HINT => $slot->getRole(),
+                               BlobStore::ROLE_HINT => $mainSlot->getRole(),
                                BlobStore::PARENT_HINT => $parentId,
-                               BlobStore::SHA1_HINT => $slot->getSha1(),
+                               BlobStore::SHA1_HINT => $mainSlot->getSha1(),
                                BlobStore::MODEL_HINT => $model,
                                BlobStore::FORMAT_HINT => $format,
                        ];
 
                        $blobAddress = $this->blobStore->storeBlob( $data, $blobHints );
                } else {
-                       $blobAddress = $slot->getAddress();
-                       $model = $slot->getModel();
-                       $format = $slot->getFormat();
+                       $blobAddress = $mainSlot->getAddress();
+                       $model = $mainSlot->getModel();
+                       $format = $mainSlot->getFormat();
                }
 
                $textId = $this->blobStore->getTextIdFromAddress( $blobAddress );
@@ -402,11 +440,10 @@ class RevisionStore
                $this->failOnNull( $user->getId(), 'user field' );
                $this->failOnEmpty( $user->getName(), 'user_text field' );
 
-               # Record the edit in revisions
-               $row = [
+               // Record the edit in revisions
+               $revisionRow = [
                        'rev_page'       => $pageId,
                        'rev_parent_id'  => $parentId,
-                       'rev_text_id'    => $textId,
                        'rev_minor_edit' => $rev->isMinor() ? 1 : 0,
                        'rev_timestamp'  => $dbw->timestamp( $timestamp ),
                        'rev_deleted'    => $rev->getVisibility(),
@@ -416,54 +453,106 @@ class RevisionStore
 
                if ( $rev->getId() !== null ) {
                        // Needed to restore revisions with their original ID
-                       $row['rev_id'] = $rev->getId();
+                       $revisionRow['rev_id'] = $rev->getId();
                }
 
                list( $commentFields, $commentCallback ) =
                        $this->commentStore->insertWithTempTable( $dbw, 'rev_comment', $comment );
-               $row += $commentFields;
+               $revisionRow += $commentFields;
 
                list( $actorFields, $actorCallback ) =
                        $this->actorMigration->getInsertValuesWithTempTable( $dbw, 'rev_user', $user );
-               $row += $actorFields;
+               $revisionRow += $actorFields;
+
+               if ( $this->mcrMigrationStage <= MIGRATION_WRITE_BOTH ) {
+                       $revisionRow['rev_text_id'] = $textId;
 
-               if ( $this->contentHandlerUseDB ) {
                        // MCR migration note: rev_content_model and rev_content_format will go away
+                       if ( $this->contentHandlerUseDB ) {
+                               $defaultModel = ContentHandler::getDefaultModelFor( $title );
+                               $defaultFormat = ContentHandler::getForModelID( $defaultModel )->getDefaultFormat();
 
-                       $defaultModel = ContentHandler::getDefaultModelFor( $title );
-                       $defaultFormat = ContentHandler::getForModelID( $defaultModel )->getDefaultFormat();
+                               $revisionRow['rev_content_model'] = ( $model === $defaultModel ) ? null : $model;
+                               $revisionRow['rev_content_format'] = ( $format === $defaultFormat ) ? null : $format;
+                       }
+               } else {
+                       /**
+                        * rev_text_id has NOT NULL and no DEFAULT, so set to 0 when we are not writing to it.
+                        * WARNING: This should NOT be removed after migration until a schema change has been
+                        * made in WMF production giving rev_text_id a DEFAULT value of 0 (otherwise inserts
+                        * will fail)
+                        * Task: https://phabricator.wikimedia.org/T190148#4064625
+                        */
+                       $revisionRow['rev_text_id'] = 0;
+               }
+
+               $dbw->insert( 'revision', $revisionRow, __METHOD__ );
+
+               $hasSlots = false;
+               $contentId = false;
+
+               if ( isset( $revisionRow['rev_id'] ) ) {
+                       // Restoring a revision, slots should already exist,
+                       // unless the archive row wasn't migrated yet.
+                       if ( $this->mcrMigrationStage === MIGRATION_NEW ) {
+                               $hasSlots = true;
+                       } else {
+                               $contentId = $this->findSlotContentId( $dbw, $revisionRow['rev_id'], 'main' );
+                               $hasSlots = (bool)$contentId;
+                       }
+               } else {
+                       // not restoring a revision, use auto-increment value
+                       $revisionRow['rev_id'] = intval( $dbw->insertId() );
+               }
 
-                       $row['rev_content_model'] = ( $model === $defaultModel ) ? null : $model;
-                       $row['rev_content_format'] = ( $format === $defaultFormat ) ? null : $format;
+               if ( $this->mcrMigrationStage > MIGRATION_OLD && $mainSlot->hasContentId() ) {
+                       // re-use content row of inherited slot!
+                       $contentId = $mainSlot->getContentId();
                }
 
-               $dbw->insert( 'revision', $row, __METHOD__ );
+               $revisionId = $revisionRow['rev_id'];
 
-               if ( !isset( $row['rev_id'] ) ) {
-                       // only if auto-increment was used
-                       $row['rev_id'] = intval( $dbw->insertId() );
-               }
-               $commentCallback( $row['rev_id'] );
-               $actorCallback( $row['rev_id'], $row );
+               $commentCallback( $revisionId );
+               $actorCallback( $revisionId, $revisionRow );
 
                // Insert IP revision into ip_changes for use when querying for a range.
                if ( $user->getId() === 0 && IP::isValid( $user->getName() ) ) {
                        $ipcRow = [
-                               'ipc_rev_id'        => $row['rev_id'],
-                               'ipc_rev_timestamp' => $row['rev_timestamp'],
+                               'ipc_rev_id'        => $revisionId,
+                               'ipc_rev_timestamp' => $revisionRow['rev_timestamp'],
                                'ipc_hex'           => IP::toHex( $user->getName() ),
                        ];
                        $dbw->insert( 'ip_changes', $ipcRow, __METHOD__ );
                }
 
-               $newSlot = SlotRecord::newSaved( $row['rev_id'], $textId, $blobAddress, $slot );
+               if ( $this->mcrMigrationStage >= MIGRATION_WRITE_BOTH ) {
+
+                       // Only insert slot rows for new revisions (not restored revisions).
+                       // Also, never insert content rows if not inserting slot rows.
+                       if ( !$hasSlots ) {
+
+                               // Only insert content rows for new content (not inherited content)
+                               if ( !$contentId ) {
+                                       Assert::invariant( !$hasSlots, 'Re-using slots, but not content ID is known' );
+                                       $contentId = $this->insertContentRowOn( $mainSlot, $dbw, $blobAddress );
+                               }
+
+                               $this->insertSlotRowOn( $mainSlot, $dbw, $revisionId, $contentId );
+                       }
+               } else {
+                       $contentId = null;
+               }
+
+               $dbw->endAtomic( __METHOD__ );
+
+               $newSlot = SlotRecord::newSaved( $revisionId, $contentId, $blobAddress, $mainSlot );
                $slots = new RevisionSlots( [ 'main' => $newSlot ] );
 
                $rev = new RevisionStoreRecord(
                        $title,
                        $user,
                        $comment,
-                       (object)$row,
+                       (object)$revisionRow,
                        $slots,
                        $this->wikiId
                );
@@ -485,7 +574,7 @@ class RevisionStore
                Assert::postcondition( $newSlot !== null, 'revision must have a main slot' );
                Assert::postcondition(
                        $newSlot->getAddress() !== null,
-                       'main slot must have an addess'
+                       'main slot must have an address'
                );
 
                Hooks::run( 'RevisionRecordInserted', [ $rev ] );
@@ -497,6 +586,41 @@ class RevisionStore
                return $rev;
        }
 
+       /**
+        * @param SlotRecord $slot
+        * @param IDatabase $dbw
+        * @param int $revisionId
+        * @param int $contentId
+        */
+       private function insertSlotRowOn( SlotRecord $slot, IDatabase $dbw, $revisionId, $contentId ) {
+               $slotRow = [
+                       'slot_revision_id' => $revisionId,
+                       'slot_role_id' => $this->slotRoleStore->acquireId( $slot->getRole() ),
+                       'slot_content_id' => $contentId,
+                       // If the slot has a specific origin use that ID, otherwise use the ID of the revision
+                       // that we just inserted.
+                       'slot_origin' => $slot->hasOrigin() ? $slot->getOrigin() : $revisionId,
+               ];
+               $dbw->insert( 'slots', $slotRow, __METHOD__ );
+       }
+
+       /**
+        * @param SlotRecord $slot
+        * @param IDatabase $dbw
+        * @param string $blobAddress
+        * @return int content row ID
+        */
+       private function insertContentRowOn( SlotRecord $slot, IDatabase $dbw, $blobAddress ) {
+               $contentRow = [
+                       'content_size' => $slot->getSize(),
+                       'content_sha1' => $slot->getSha1(),
+                       'content_model' => $this->contentModelStore->acquireId( $slot->getModel() ),
+                       'content_address' => $blobAddress,
+               ];
+               $dbw->insert( 'content', $contentRow, __METHOD__ );
+               return intval( $dbw->insertId() );
+       }
+
        /**
         * MCR migration note: this corresponds to Revision::checkContentModel
         *
@@ -581,20 +705,25 @@ class RevisionStore
                $fields = [ 'page_latest', 'page_namespace', 'page_title',
                        'rev_id', 'rev_text_id', 'rev_len', 'rev_sha1' ];
 
-               if ( $this->contentHandlerUseDB ) {
-                       $fields[] = 'rev_content_model';
-                       $fields[] = 'rev_content_format';
+               if ( $this->mcrMigrationStage < MIGRATION_NEW ) {
+                       $fields[] = 'rev_text_id';
+                       if ( $this->contentHandlerUseDB ) {
+                               $fields[] = 'rev_content_model';
+                               $fields[] = 'rev_content_format';
+                       }
                }
 
+               // Don't use getQueryInfo() top avoid locking extra tables, compare T191892.
+               // XXX: perhaps getQueryInfo() could support 'no-comment' and 'no-actor' as options.
                $current = $dbw->selectRow(
                        [ 'page', 'revision' ],
                        $fields,
                        [
                                'page_id' => $title->getArticleID(),
-                               'page_latest=rev_id',
                        ],
                        __METHOD__,
-                       [ 'FOR UPDATE' ] // T51581
+                       [ 'FOR UPDATE' ], // T51581
+                       [ 'page' => [ 'JOIN', 'page_latest=rev_id' ] ]
                );
 
                if ( $current ) {
@@ -605,19 +734,20 @@ class RevisionStore
                                'actor'       => $user->getActorId(),
                                'comment'     => $comment,
                                'minor_edit'  => $minor,
-                               'text_id'     => $current->rev_text_id,
                                'parent_id'   => $current->page_latest,
                                'slot_origin' => $current->page_latest,
                                'len'         => $current->rev_len,
                                'sha1'        => $current->rev_sha1
                        ];
 
-                       if ( $this->contentHandlerUseDB ) {
-                               $fields['content_model'] = $current->rev_content_model;
-                               $fields['content_format'] = $current->rev_content_format;
-                       }
+                       if ( $this->mcrMigrationStage < MIGRATION_NEW ) {
+                               $fields['text_id'] = $current->rev_text_id;
 
-                       $fields['title'] = Title::makeTitle( $current->page_namespace, $current->page_title );
+                               if ( $this->contentHandlerUseDB ) {
+                                       $fields['content_model'] = $current->rev_content_model;
+                                       $fields['content_format'] = $current->rev_content_format;
+                               }
+                       }
 
                        $mainSlot = $this->emulateMainSlot_1_29( $fields, self::READ_LATEST, $title );
                        $revision = new MutableRevisionRecord( $title, $this->wikiId );
@@ -755,7 +885,6 @@ class RevisionStore
                $mainSlotRow->model_name = null;
                $mainSlotRow->slot_revision_id = null;
                $mainSlotRow->content_address = null;
-               $mainSlotRow->slot_content_id = null;
 
                $content = null;
                $blobData = null;
@@ -768,7 +897,6 @@ class RevisionStore
                        }
 
                        if ( isset( $row->rev_text_id ) && $row->rev_text_id > 0 ) {
-                               $mainSlotRow->slot_content_id = $row->rev_text_id;
                                $mainSlotRow->content_address = SqlBlobStore::makeAddressFromTextId(
                                        $row->rev_text_id
                                );
@@ -803,9 +931,6 @@ class RevisionStore
                } elseif ( is_array( $row ) ) {
                        $mainSlotRow->slot_revision_id = isset( $row['id'] ) ? intval( $row['id'] ) : null;
 
-                       $mainSlotRow->slot_content_id = isset( $row['text_id'] )
-                               ? intval( $row['text_id'] )
-                               : null;
                        $mainSlotRow->slot_origin = isset( $row['slot_origin'] )
                                ? intval( $row['slot_origin'] )
                                : null;
@@ -874,7 +999,18 @@ class RevisionStore
                        };
                }
 
-               $mainSlotRow->slot_id = $mainSlotRow->slot_revision_id;
+               // NOTE: this callback will be looped through RevisionSlot::newInherited(), allowing
+               // the inherited slot to have the same content_id as the original slot. In that case,
+               // $slot will be the inherited slot, while $mainSlotRow still refers to the original slot.
+               $mainSlotRow->slot_content_id =
+                       function ( SlotRecord $slot ) use ( $queryFlags, $mainSlotRow ) {
+                               list( $dbMode, ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
+                               $db = $this->getDBConnectionRef( $dbMode );
+                               return $this->findSlotContentId( $db, $mainSlotRow->slot_revision_id, 'main' );
+                       };
+
+               // use negative IDs for fake slot records.
+               $mainSlotRow->slot_id = -( $mainSlotRow->slot_revision_id );
                return new SlotRecord( $mainSlotRow, $content );
        }
 
@@ -1580,7 +1716,7 @@ class RevisionStore
        private function fetchRevisionRowFromConds( IDatabase $db, $conditions, $flags = 0 ) {
                $this->checkDatabaseWikiId( $db );
 
-               $revQuery = self::getQueryInfo( [ 'page', 'user' ] );
+               $revQuery = $this->getQueryInfo( [ 'page', 'user' ] );
                $options = [];
                if ( ( $flags & self::READ_LOCKING ) == self::READ_LOCKING ) {
                        $options[] = 'FOR UPDATE';
@@ -1595,12 +1731,50 @@ class RevisionStore
                );
        }
 
+       /**
+        * Finds the ID of a content row for a given revision and slot role.
+        * This can be used to re-use content rows even while the content ID
+        * is still missing from SlotRecords, in MIGRATION_WRITE_BOTH mode.
+        *
+        * @todo remove after MCR schema migration is complete.
+        *
+        * @param IDatabase $db
+        * @param int $revId
+        * @param string $role
+        *
+        * @return int|null
+        */
+       private function findSlotContentId( IDatabase $db, $revId, $role ) {
+               if ( $this->mcrMigrationStage < MIGRATION_WRITE_BOTH ) {
+                       return null;
+               }
+
+               try {
+                       $roleId = $this->slotRoleStore->getId( $role );
+                       $conditions = [
+                               'slot_revision_id' => $revId,
+                               'slot_role_id' => $roleId,
+                       ];
+
+                       $contentId = $db->selectField( 'slots', 'slot_content_id', $conditions, __METHOD__ );
+
+                       return $contentId ?: null;
+               } catch ( NameTableAccessException $ex ) {
+                       // If the role is missing from the slot_roles table,
+                       // the corresponding row in slots cannot exist.
+                       return null;
+               }
+       }
+
        /**
         * Return the tables, fields, and join conditions to be selected to create
         * a new revision object.
         *
         * MCR migration note: this replaces Revision::getQueryInfo
         *
+        * If the format of fields returned changes in any way then the cache key provided by
+        * self::getRevisionRowCacheKey should be updated.
+        *
         * @since 1.31
         *
         * @param array $options Any combination of the following strings
@@ -1624,7 +1798,6 @@ class RevisionStore
                $ret['fields'] = array_merge( $ret['fields'], [
                        'rev_id',
                        'rev_page',
-                       'rev_text_id',
                        'rev_timestamp',
                        'rev_minor_edit',
                        'rev_deleted',
@@ -1643,9 +1816,13 @@ class RevisionStore
                $ret['fields'] = array_merge( $ret['fields'], $actorQuery['fields'] );
                $ret['joins'] = array_merge( $ret['joins'], $actorQuery['joins'] );
 
-               if ( $this->contentHandlerUseDB ) {
-                       $ret['fields'][] = 'rev_content_format';
-                       $ret['fields'][] = 'rev_content_model';
+               if ( $this->mcrMigrationStage < MIGRATION_NEW ) {
+                       $ret['fields'][] = 'rev_text_id';
+
+                       if ( $this->contentHandlerUseDB ) {
+                               $ret['fields'][] = 'rev_content_format';
+                               $ret['fields'][] = 'rev_content_model';
+                       }
                }
 
                if ( in_array( 'page', $options, true ) ) {
@@ -1671,6 +1848,10 @@ class RevisionStore
                }
 
                if ( in_array( 'text', $options, true ) ) {
+                       if ( $this->mcrMigrationStage === MIGRATION_NEW ) {
+                               throw new InvalidArgumentException( 'text table can no longer be joined directly' );
+                       }
+
                        $ret['tables'][] = 'text';
                        $ret['fields'] = array_merge( $ret['fields'], [
                                'old_text',
@@ -1706,7 +1887,6 @@ class RevisionStore
                                        'ar_namespace',
                                        'ar_title',
                                        'ar_rev_id',
-                                       'ar_text_id',
                                        'ar_timestamp',
                                        'ar_minor_edit',
                                        'ar_deleted',
@@ -1717,9 +1897,13 @@ class RevisionStore
                        'joins' => $commentQuery['joins'] + $actorQuery['joins'],
                ];
 
-               if ( $this->contentHandlerUseDB ) {
-                       $ret['fields'][] = 'ar_content_format';
-                       $ret['fields'][] = 'ar_content_model';
+               if ( $this->mcrMigrationStage < MIGRATION_NEW ) {
+                       $ret['fields'][] = 'ar_text_id';
+
+                       if ( $this->contentHandlerUseDB ) {
+                               $ret['fields'][] = 'ar_content_format';
+                               $ret['fields'][] = 'ar_content_model';
+                       }
                }
 
                return $ret;
@@ -1937,7 +2121,7 @@ class RevisionStore
                        return false;
                }
 
-               $revQuery = self::getQueryInfo();
+               $revQuery = $this->getQueryInfo();
                $res = $db->select(
                        $revQuery['tables'],
                        [
@@ -1995,7 +2179,7 @@ class RevisionStore
 
                $row = $this->cache->getWithSetCallback(
                        // Page/rev IDs passed in from DB to reflect history merges
-                       $this->cache->makeGlobalKey( 'revision-row-1.29', $db->getDomainID(), $pageId, $revId ),
+                       $this->getRevisionRowCacheKey( $db, $pageId, $revId ),
                        WANObjectCache::TTL_WEEK,
                        function ( $curValue, &$ttl, array &$setOpts ) use ( $db, $pageId, $revId ) {
                                $setOpts += Database::getCacheSetOptions( $db );
@@ -2019,6 +2203,26 @@ class RevisionStore
                }
        }
 
+       /**
+        * Get a cache key for use with a row as selected with getQueryInfo( [ 'page', 'user' ] )
+        * Caching rows without 'page' or 'user' could lead to issues.
+        * If the format of the rows returned by the query provided by getQueryInfo changes the
+        * cache key should be updated to avoid conflicts.
+        *
+        * @param IDatabase $db
+        * @param int $pageId
+        * @param int $revId
+        * @return string
+        */
+       private function getRevisionRowCacheKey( IDatabase $db, $pageId, $revId ) {
+               return $this->cache->makeGlobalKey(
+                       self::ROW_CACHE_KEY,
+                       $db->getDomainID(),
+                       $pageId,
+                       $revId
+               );
+       }
+
        // TODO: move relevant methods from Title here, e.g. getFirstRevision, isBigDeletion, etc.
 
 }