From e61a1caaddb58cc26bf5f912940afbb2a6f65355 Mon Sep 17 00:00:00 2001 From: daniel Date: Sun, 27 Aug 2017 17:29:18 +0200 Subject: [PATCH] [MCR] Break Revision into RevisionRecord and RevisionStore Bug: T174025 Change-Id: I140f43a6fb443b38483f41f268c906b9cea64cf7 --- autoload.php | 17 + includes/Storage/BlobAccessException.php | 34 + includes/Storage/BlobStore.php | 113 + .../Storage/IncompleteRevisionException.php | 32 + includes/Storage/MutableRevisionRecord.php | 328 +++ includes/Storage/MutableRevisionSlots.php | 137 ++ includes/Storage/RevisionAccessException.php | 34 + includes/Storage/RevisionArchiveRecord.php | 165 ++ includes/Storage/RevisionFactory.php | 94 + includes/Storage/RevisionLookup.php | 118 + includes/Storage/RevisionRecord.php | 479 +++++ includes/Storage/RevisionSlots.php | 189 ++ includes/Storage/RevisionStore.php | 1914 +++++++++++++++++ includes/Storage/RevisionStoreRecord.php | 207 ++ includes/Storage/SlotRecord.php | 430 ++++ includes/Storage/SqlBlobStore.php | 580 +++++ includes/Storage/SuppressedDataException.php | 33 + includes/user/UserIdentityValue.php | 70 + .../includes/Storage/RevisionRecordTest.php | 15 + .../Storage/RevisionStoreRecordTest.php | 685 ++++++ 20 files changed, 5674 insertions(+) create mode 100644 includes/Storage/BlobAccessException.php create mode 100644 includes/Storage/BlobStore.php create mode 100644 includes/Storage/IncompleteRevisionException.php create mode 100644 includes/Storage/MutableRevisionRecord.php create mode 100644 includes/Storage/MutableRevisionSlots.php create mode 100644 includes/Storage/RevisionAccessException.php create mode 100644 includes/Storage/RevisionArchiveRecord.php create mode 100644 includes/Storage/RevisionFactory.php create mode 100644 includes/Storage/RevisionLookup.php create mode 100644 includes/Storage/RevisionRecord.php create mode 100644 includes/Storage/RevisionSlots.php create mode 100644 includes/Storage/RevisionStore.php create mode 100644 includes/Storage/RevisionStoreRecord.php create mode 100644 includes/Storage/SlotRecord.php create mode 100644 includes/Storage/SqlBlobStore.php create mode 100644 includes/Storage/SuppressedDataException.php create mode 100644 includes/user/UserIdentityValue.php create mode 100644 tests/phpunit/includes/Storage/RevisionRecordTest.php create mode 100644 tests/phpunit/includes/Storage/RevisionStoreRecordTest.php diff --git a/autoload.php b/autoload.php index 2661fd7ed3..483cbb0370 100644 --- a/autoload.php +++ b/autoload.php @@ -945,6 +945,22 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Shell\\Result' => __DIR__ . '/includes/shell/Result.php', 'MediaWiki\\Shell\\Shell' => __DIR__ . '/includes/shell/Shell.php', 'MediaWiki\\Site\\MediaWikiPageNameNormalizer' => __DIR__ . '/includes/site/MediaWikiPageNameNormalizer.php', + 'MediaWiki\\Storage\\BlobAccessException' => __DIR__ . '/includes/Storage/BlobAccessException.php', + 'MediaWiki\\Storage\\BlobStore' => __DIR__ . '/includes/Storage/BlobStore.php', + 'MediaWiki\\Storage\\IncompleteRevisionException' => __DIR__ . '/includes/Storage/IncompleteRevisionException.php', + 'MediaWiki\\Storage\\MutableRevisionRecord' => __DIR__ . '/includes/Storage/MutableRevisionRecord.php', + 'MediaWiki\\Storage\\MutableRevisionSlots' => __DIR__ . '/includes/Storage/MutableRevisionSlots.php', + 'MediaWiki\\Storage\\RevisionAccessException' => __DIR__ . '/includes/Storage/RevisionAccessException.php', + 'MediaWiki\\Storage\\RevisionArchiveRecord' => __DIR__ . '/includes/Storage/RevisionArchiveRecord.php', + 'MediaWiki\\Storage\\RevisionFactory' => __DIR__ . '/includes/Storage/RevisionFactory.php', + 'MediaWiki\\Storage\\RevisionLookup' => __DIR__ . '/includes/Storage/RevisionLookup.php', + 'MediaWiki\\Storage\\RevisionRecord' => __DIR__ . '/includes/Storage/RevisionRecord.php', + 'MediaWiki\\Storage\\RevisionSlots' => __DIR__ . '/includes/Storage/RevisionSlots.php', + 'MediaWiki\\Storage\\RevisionStore' => __DIR__ . '/includes/Storage/RevisionStore.php', + 'MediaWiki\\Storage\\RevisionStoreRecord' => __DIR__ . '/includes/Storage/RevisionStoreRecord.php', + 'MediaWiki\\Storage\\SlotRecord' => __DIR__ . '/includes/Storage/SlotRecord.php', + 'MediaWiki\\Storage\\SqlBlobStore' => __DIR__ . '/includes/Storage/SqlBlobStore.php', + 'MediaWiki\\Storage\\SuppressedDataException' => __DIR__ . '/includes/Storage/SuppressedDataException.php', 'MediaWiki\\Tidy\\BalanceActiveFormattingElements' => __DIR__ . '/includes/tidy/Balancer.php', 'MediaWiki\\Tidy\\BalanceElement' => __DIR__ . '/includes/tidy/Balancer.php', 'MediaWiki\\Tidy\\BalanceMarker' => __DIR__ . '/includes/tidy/Balancer.php', @@ -964,6 +980,7 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Tidy\\RemexMungerData' => __DIR__ . '/includes/tidy/RemexMungerData.php', 'MediaWiki\\Tidy\\TidyDriverBase' => __DIR__ . '/includes/tidy/TidyDriverBase.php', 'MediaWiki\\User\\UserIdentity' => __DIR__ . '/includes/user/UserIdentity.php', + 'MediaWiki\\User\\UserIdentityValue' => __DIR__ . '/includes/user/UserIdentityValue.php', 'MediaWiki\\Widget\\ComplexNamespaceInputWidget' => __DIR__ . '/includes/widget/ComplexNamespaceInputWidget.php', 'MediaWiki\\Widget\\ComplexTitleInputWidget' => __DIR__ . '/includes/widget/ComplexTitleInputWidget.php', 'MediaWiki\\Widget\\DateInputWidget' => __DIR__ . '/includes/widget/DateInputWidget.php', diff --git a/includes/Storage/BlobAccessException.php b/includes/Storage/BlobAccessException.php new file mode 100644 index 0000000000..ffc5ecabf4 --- /dev/null +++ b/includes/Storage/BlobAccessException.php @@ -0,0 +1,34 @@ +getPageAsLinkTarget() ); + $rev = new MutableRevisionRecord( $title, $parent->getWikiId() ); + + $rev->setComment( $comment ); + $rev->setUser( $user ); + $rev->setTimestamp( $timestamp ); + + foreach ( $parent->getSlotRoles() as $role ) { + $slot = $parent->getSlot( $role, self::RAW ); + $rev->inheritSlot( $slot ); + } + + $rev->setPageId( $parent->getPageId() ); + $rev->setParentId( $parent->getId() ); + + return $rev; + } + + /** + * @note Avoid calling this constructor directly. Use the appropriate methods + * in RevisionStore instead. + * + * @param Title $title The title of the page this Revision is associated with. + * @param bool|string $wikiId the wiki ID of the site this Revision belongs to, + * or false for the local site. + * + * @throws MWException + */ + function __construct( Title $title, $wikiId = false ) { + $slots = new MutableRevisionSlots(); + + parent::__construct( $title, $slots, $wikiId ); + + $this->mSlots = $slots; // redundant, but nice for static analysis + } + + /** + * @param int $parentId + */ + public function setParentId( $parentId ) { + Assert::parameterType( 'integer', $parentId, '$parentId' ); + + $this->mParentId = $parentId; + } + + /** + * Sets the given slot. If a slot with the same role is already present in the revision, + * it is replaced. + * + * @note This can only be used with a fresh "unattached" SlotRecord. Calling code that has a + * SlotRecord from another revision should use inheritSlot(). Calling code that has access to + * a Content object can use setContent(). + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param SlotRecord $slot + */ + public function setSlot( SlotRecord $slot ) { + if ( $slot->hasRevision() && $slot->getRevision() !== $this->getId() ) { + throw new InvalidArgumentException( + 'The given slot must be an unsaved, unattached one. ' + . 'This slot is already attached to revision ' . $slot->getRevision() . '. ' + . 'Use inheritSlot() instead to preserve a slot from a previous revision.' + ); + } + + $this->mSlots->setSlot( $slot ); + $this->resetAggregateValues(); + } + + /** + * "Inherits" the given slot's content. + * + * If a slot with the same role is already present in the revision, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param SlotRecord $parentSlot + */ + public function inheritSlot( SlotRecord $parentSlot ) { + $slot = SlotRecord::newInherited( $parentSlot ); + $this->setSlot( $slot ); + } + + /** + * Sets the content for the slot with the given role. + * + * If a slot with the same role is already present in the revision, it is replaced. + * Calling code that has access to a SlotRecord can use inheritSlot() instead. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param string $role + * @param Content $content + */ + public function setContent( $role, Content $content ) { + $this->mSlots->setContent( $role, $content ); + $this->resetAggregateValues(); + } + + /** + * Removes the slot with the given role from this revision. + * This effectively ends the "stream" with that role on the revision's page. + * Future revisions will no longer inherit this slot, unless it is added back explicitly. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param string $role + */ + public function removeSlot( $role ) { + $this->mSlots->removeSlot( $role ); + $this->resetAggregateValues(); + } + + /** + * @param CommentStoreComment $comment + */ + public function setComment( CommentStoreComment $comment ) { + $this->mComment = $comment; + } + + /** + * Set revision hash, for optimization. Prevents getSha1() from re-calculating the hash. + * + * @note This should only be used if the calling code is sure that the given hash is correct + * for the revision's content, and there is no chance of the content being manipulated + * later. When in doubt, this method should not be called. + * + * @param string $sha1 SHA1 hash as a base36 string. + */ + public function setSha1( $sha1 ) { + Assert::parameterType( 'string', $sha1, '$sha1' ); + + $this->mSha1 = $sha1; + } + + /** + * Set nominal revision size, for optimization. Prevents getSize() from re-calculating the size. + * + * @note This should only be used if the calling code is sure that the given size is correct + * for the revision's content, and there is no chance of the content being manipulated + * later. When in doubt, this method should not be called. + * + * @param int $size nominal size in bogo-bytes + */ + public function setSize( $size ) { + Assert::parameterType( 'integer', $size, '$size' ); + + $this->mSize = $size; + } + + /** + * @param int $visibility + */ + public function setVisibility( $visibility ) { + Assert::parameterType( 'integer', $visibility, '$visibility' ); + + $this->mDeleted = $visibility; + } + + /** + * @param string $timestamp A timestamp understood by wfTimestamp + */ + public function setTimestamp( $timestamp ) { + Assert::parameterType( 'string', $timestamp, '$timestamp' ); + + $this->mTimestamp = wfTimestamp( TS_MW, $timestamp ); + } + + /** + * @param bool $minorEdit + */ + public function setMinorEdit( $minorEdit ) { + Assert::parameterType( 'boolean', $minorEdit, '$minorEdit' ); + + $this->mMinorEdit = $minorEdit; + } + + /** + * Set the revision ID. + * + * MCR migration note: this replaces Revision::setId() + * + * @warning Use this with care, especially when preparing a revision for insertion + * into the database! The revision ID should only be fixed in special cases + * like preserving the original ID when restoring a revision. + * + * @param int $id + */ + public function setId( $id ) { + Assert::parameterType( 'integer', $id, '$id' ); + + $this->mId = $id; + } + + /** + * Sets the user identity associated with the revision + * + * @param UserIdentity $user + */ + public function setUser( UserIdentity $user ) { + $this->mUser = $user; + } + + /** + * @param int $pageId + */ + public function setPageId( $pageId ) { + Assert::parameterType( 'integer', $pageId, '$pageId' ); + + if ( $this->mTitle->exists() && $pageId !== $this->mTitle->getArticleID() ) { + throw new InvalidArgumentException( + 'The given Title does not belong to page ID ' . $this->mPageId + ); + } + + $this->mPageId = $pageId; + } + + /** + * Returns the nominal size of this revision. + * + * MCR migration note: this replaces Revision::getSize + * + * @return int The nominal size, may be computed on the fly if not yet known. + */ + public function getSize() { + // If not known, re-calculate and remember. Will be reset when slots change. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * Returns the base36 sha1 of this revision. + * + * MCR migration note: this replaces Revision::getSha1 + * + * @return string The revision hash, may be computed on the fly if not yet known. + */ + public function getSha1() { + // If not known, re-calculate and remember. Will be reset when slots change. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * Invalidate cached aggregate values such as hash and size. + */ + private function resetAggregateValues() { + $this->mSize = null; + $this->mSha1 = null; + } + +} diff --git a/includes/Storage/MutableRevisionSlots.php b/includes/Storage/MutableRevisionSlots.php new file mode 100644 index 0000000000..2e675c8937 --- /dev/null +++ b/includes/Storage/MutableRevisionSlots.php @@ -0,0 +1,137 @@ +getRole(); + $inherited[$role] = SlotRecord::newInherited( $slot ); + } + + return new MutableRevisionSlots( $inherited ); + } + + /** + * @param SlotRecord[] $slots An array of SlotRecords. + */ + public function __construct( array $slots = [] ) { + parent::__construct( $slots ); + } + + /** + * Sets the given slot. + * If a slot with the same role is already present, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param SlotRecord $slot + */ + public function setSlot( SlotRecord $slot ) { + if ( !is_array( $this->slots ) ) { + $this->getSlots(); // initialize $this->slots + } + + $role = $slot->getRole(); + $this->slots[$role] = $slot; + } + + /** + * Sets the content for the slot with the given role. + * If a slot with the same role is already present, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param string $role + * @param Content $content + */ + public function setContent( $role, Content $content ) { + $slot = SlotRecord::newUnsaved( $role, $content ); + $this->setSlot( $slot ); + } + + /** + * Remove the slot for the given role, discontinue the corresponding stream. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param string $role + */ + public function removeSlot( $role ) { + if ( !is_array( $this->slots ) ) { + $this->getSlots(); // initialize $this->slots + } + + unset( $this->slots[$role] ); + } + + /** + * Return all slots that are not inherited. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] + */ + public function getTouchedSlots() { + return array_filter( + $this->getSlots(), + function ( SlotRecord $slot ) { + return !$slot->isInherited(); + } + ); + } + + /** + * Return all slots that are inherited. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] + */ + public function getInheritedSlots() { + return array_filter( + $this->getSlots(), + function ( SlotRecord $slot ) { + return $slot->isInherited(); + } + ); + } + +} diff --git a/includes/Storage/RevisionAccessException.php b/includes/Storage/RevisionAccessException.php new file mode 100644 index 0000000000..ee6efc0a0c --- /dev/null +++ b/includes/Storage/RevisionAccessException.php @@ -0,0 +1,34 @@ +mArchiveId = intval( $row->ar_id ); + + // NOTE: ar_page_id may be different from $this->mTitle->getArticleID() in some cases, + // notably when a partially restored page has been moved, and a new page has been created + // with the same title. Archive rows for that title will then have the wrong page id. + $this->mPageId = isset( $row->ar_page_id ) ? intval( $row->ar_page_id ) : $title->getArticleID(); + + // NOTE: ar_parent_id = 0 indicates that there is no parent revision, while null + // indicates that the parent revision is unknown. As per MW 1.31, the database schema + // allows ar_parent_id to be NULL. + $this->mParentId = isset( $row->ar_parent_id ) ? intval( $row->ar_parent_id ) : null; + $this->mId = isset( $row->ar_rev_id ) ? intval( $row->ar_rev_id ) : null; + $this->mComment = $comment; + $this->mUser = $user; + $this->mTimestamp = wfTimestamp( TS_MW, $row->ar_timestamp ); + $this->mMinorEdit = boolval( $row->ar_minor_edit ); + $this->mDeleted = intval( $row->ar_deleted ); + $this->mSize = intval( $row->ar_len ); + $this->mSha1 = isset( $row->ar_sha1 ) ? $row->ar_sha1 : null; + } + + /** + * Get archive row ID + * + * @return int + */ + public function getArchiveId() { + return $this->mId; + } + + /** + * @return int|null The revision id, or null if the original revision ID + * was not recorded in the archive table. + */ + public function getId() { + // overwritten just to refine the contract specification. + return parent::getId(); + } + + /** + * @return int The nominal revision size, never null. May be computed on the fly. + */ + public function getSize() { + // If length is null, calculate and remember it (potentially SLOW!). + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * @return string The revision hash, never null. May be computed on the fly. + */ + public function getSha1() { + // If hash is null, calculate it and remember (potentially SLOW!) + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * @param int $audience + * @param User|null $user + * + * @return UserIdentity The identity of the revision author, null if access is forbidden. + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getUser( $audience, $user ); + } + + /** + * @param int $audience + * @param User|null $user + * + * @return CommentStoreComment The revision comment, null if access is forbidden. + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getComment( $audience, $user ); + } + + /** + * @return string never null + */ + public function getTimestamp() { + // overwritten just to add a guarantee to the contract + return parent::getTimestamp(); + } + +} diff --git a/includes/Storage/RevisionFactory.php b/includes/Storage/RevisionFactory.php new file mode 100644 index 0000000000..86e8c06fbb --- /dev/null +++ b/includes/Storage/RevisionFactory.php @@ -0,0 +1,94 @@ +ar_user, etc. + * + * @return RevisionRecord + */ + public function newRevisionFromArchiveRow( + $row, + $queryFlags = 0, + Title $title = null, + array $overrides = [] + ); + +} diff --git a/includes/Storage/RevisionLookup.php b/includes/Storage/RevisionLookup.php new file mode 100644 index 0000000000..5cd157ba07 --- /dev/null +++ b/includes/Storage/RevisionLookup.php @@ -0,0 +1,118 @@ +mTitle = $title; + $this->mSlots = $slots; + $this->mWiki = $wikiId; + + // XXX: this is a sensible default, but we may not have a Title object here in the future. + $this->mPageId = $title->getArticleID(); + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * @param RevisionRecord $rec + * + * @return bool True if this RevisionRecord is known to have same content as $rec. + * False if the content is different (or not known to be the same). + */ + public function hasSameContent( RevisionRecord $rec ) { + if ( $rec === $this ) { + return true; + } + + if ( $this->getId() !== null && $this->getId() === $rec->getId() ) { + return true; + } + + // check size before hash, since size is quicker to compute + if ( $this->getSize() !== $rec->getSize() ) { + return false; + } + + // instead of checking the hash, we could also check the content addresses of all slots. + + if ( $this->getSha1() === $rec->getSha1() ) { + return true; + } + + return false; + } + + /** + * Returns the Content of the given slot of this revision. + * Call getSlotNames() to get a list of available slots. + * + * Note that for mutable Content objects, each call to this method will return a + * fresh clone. + * + * MCR migration note: this replaces Revision::getContent + * + * @param string $role The role name of the desired slot + * @param int $audience + * @param User|null $user + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return Content|null The content of the given slot, or null if access is forbidden. + */ + public function getContent( $role, $audience = self::FOR_PUBLIC, User $user = null ) { + // XXX: throwing an exception would be nicer, but would a further + // departure from the signature of Revision::getContent(), and thus + // more complex and error prone refactoring. + if ( !$this->audienceCan( self::DELETED_TEXT, $audience, $user ) ) { + return null; + } + + $content = $this->getSlot( $role, $audience, $user )->getContent(); + return $content->copy(); + } + + /** + * Returns meta-data for the given slot. + * + * @param string $role The role name of the desired slot + * @param int $audience + * @param User|null $user + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return SlotRecord The slot meta-data. If access to the slot content is forbidden, + * calling getContent() on the SlotRecord will throw an exception. + */ + public function getSlot( $role, $audience = self::FOR_PUBLIC, User $user = null ) { + $slot = $this->mSlots->getSlot( $role ); + + if ( !$this->audienceCan( self::DELETED_TEXT, $audience, $user ) ) { + return SlotRecord::newWithSuppressedContent( $slot ); + } + + return $slot; + } + + /** + * Returns the slot names (roles) of all slots present in this revision. + * getContent() will succeed only for the names returned by this method. + * + * @return string[] + */ + public function getSlotRoles() { + return $this->mSlots->getSlotRoles(); + } + + /** + * Get revision ID. Depending on the concrete subclass, this may return null if + * the revision ID is not known (e.g. because the revision does not yet exist + * in the database). + * + * MCR migration note: this replaces Revision::getId + * + * @return int|null + */ + public function getId() { + return $this->mId; + } + + /** + * Get parent revision ID (the original previous page revision). + * If there is no parent revision, this returns 0. + * If the parent revision is undefined or unknown, this returns null. + * + * @note As of MW 1.31, the database schema allows the parent ID to be + * NULL to indicate that it is unknown. + * + * MCR migration note: this replaces Revision::getParentId + * + * @return int|null + */ + public function getParentId() { + return $this->mParentId; + } + + /** + * Returns the nominal size of this revision, in bogo-bytes. + * May be calculated on the fly if not known, which may in the worst + * case may involve loading all content. + * + * MCR migration note: this replaces Revision::getSize + * + * @return int + */ + abstract public function getSize(); + + /** + * Returns the base36 sha1 of this revision. This hash is derived from the + * hashes of all slots associated with the revision. + * May be calculated on the fly if not known, which may in the worst + * case may involve loading all content. + * + * MCR migration note: this replaces Revision::getSha1 + * + * @return string + */ + abstract public function getSha1(); + + /** + * Get the page ID. If the page does not yet exist, the page ID is 0. + * + * MCR migration note: this replaces Revision::getPage + * + * @return int + */ + public function getPageId() { + return $this->mPageId; + } + + /** + * Get the ID of the wiki this revision belongs to. + * + * @return string|false The wiki's logical name, of false to indicate the local wiki. + */ + public function getWikiId() { + return $this->mWiki; + } + + /** + * Returns the title of the page this revision is associated with as a LinkTarget object. + * + * MCR migration note: this replaces Revision::getTitle + * + * @return LinkTarget + */ + public function getPageAsLinkTarget() { + return $this->mTitle; + } + + /** + * Fetch revision's author's user identity, if it's available to the specified audience. + * If the specified audience does not have access to it, null will be + * returned. Depending on the concrete subclass, null may also be returned if the user is + * not yet specified. + * + * MCR migration note: this replaces Revision::getUser + * + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the ID regardless of permissions + * @param User|null $user User object to check for, only if FOR_THIS_USER is passed + * to the $audience parameter + * @return UserIdentity|null + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + if ( !$this->audienceCan( self::DELETED_USER, $audience, $user ) ) { + return null; + } else { + return $this->mUser; + } + } + + /** + * Fetch revision comment, if it's available to the specified audience. + * If the specified audience does not have access to the comment, + * this will return null. Depending on the concrete subclass, null may also be returned + * if the comment is not yet specified. + * + * MCR migration note: this replaces Revision::getComment + * + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the text regardless of permissions + * @param User|null $user User object to check for, only if FOR_THIS_USER is passed + * to the $audience parameter + * + * @return CommentStoreComment|null + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + if ( !$this->audienceCan( self::DELETED_COMMENT, $audience, $user ) ) { + return null; + } else { + return $this->mComment; + } + } + + /** + * MCR migration note: this replaces Revision::isMinor + * + * @return bool + */ + public function isMinor() { + return (bool)$this->mMinorEdit; + } + + /** + * MCR migration note: this replaces Revision::isDeleted + * + * @param int $field One of DELETED_* bitfield constants + * + * @return bool + */ + public function isDeleted( $field ) { + return ( $this->getVisibility() & $field ) == $field; + } + + /** + * Get the deletion bitfield of the revision + * + * MCR migration note: this replaces Revision::getVisibility + * + * @return int + */ + public function getVisibility() { + return (int)$this->mDeleted; + } + + /** + * MCR migration note: this replaces Revision::getTimestamp. + * + * May return null if the timestamp was not specified. + * + * @return string|null + */ + public function getTimestamp() { + return $this->mTimestamp; + } + + /** + * Check that the given audience has access to the given field. + * + * MCR migration note: this corresponds to Revision::userCan + * + * @param int $field One of self::DELETED_TEXT, + * self::DELETED_COMMENT, + * self::DELETED_USER + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the text regardless of permissions + * @param User|null $user User object to check. Required if $audience is FOR_THIS_USER, + * ignored otherwise. + * + * @return bool + */ + protected function audienceCan( $field, $audience, User $user = null ) { + if ( $audience == self::FOR_PUBLIC && $this->isDeleted( $field ) ) { + return false; + } elseif ( $audience == self::FOR_THIS_USER ) { + if ( !$user ) { + throw new InvalidArgumentException( + 'A User object must be given when checking FOR_THIS_USER audience.' + ); + } + + if ( !$this->userCan( $field, $user ) ) { + return false; + } + } + + return true; + } + + /** + * Determine if the current user is allowed to view a particular + * field of this revision, if it's marked as deleted. + * + * MCR migration note: this corresponds to Revision::userCan + * + * @param int $field One of self::DELETED_TEXT, + * self::DELETED_COMMENT, + * self::DELETED_USER + * @param User $user User object to check + * @return bool + */ + protected function userCan( $field, User $user ) { + // TODO: use callback for permission checks, so we don't need to know a Title object! + return self::userCanBitfield( $this->getVisibility(), $field, $user, $this->mTitle ); + } + + /** + * Determine if the current user is allowed to view a particular + * field of this revision, if it's marked as deleted. This is used + * by various classes to avoid duplication. + * + * MCR migration note: this replaces Revision::userCanBitfield + * + * @param int $bitfield Current field + * @param int $field One of self::DELETED_TEXT = File::DELETED_FILE, + * self::DELETED_COMMENT = File::DELETED_COMMENT, + * self::DELETED_USER = File::DELETED_USER + * @param User $user User object to check + * @param Title|null $title A Title object to check for per-page restrictions on, + * instead of just plain userrights + * @return bool + */ + public static function userCanBitfield( $bitfield, $field, User $user, Title $title = null ) { + if ( $bitfield & $field ) { // aspect is deleted + if ( $bitfield & self::DELETED_RESTRICTED ) { + $permissions = [ 'suppressrevision', 'viewsuppressed' ]; + } elseif ( $field & self::DELETED_TEXT ) { + $permissions = [ 'deletedtext' ]; + } else { + $permissions = [ 'deletedhistory' ]; + } + $permissionlist = implode( ', ', $permissions ); + if ( $title === null ) { + wfDebug( "Checking for $permissionlist due to $field match on $bitfield\n" ); + return call_user_func_array( [ $user, 'isAllowedAny' ], $permissions ); + } else { + $text = $title->getPrefixedText(); + wfDebug( "Checking for $permissionlist on $text due to $field match on $bitfield\n" ); + foreach ( $permissions as $perm ) { + if ( $title->userCan( $perm, $user ) ) { + return true; + } + } + return false; + } + } else { + return true; + } + } + +} diff --git a/includes/Storage/RevisionSlots.php b/includes/Storage/RevisionSlots.php new file mode 100644 index 0000000000..8d3d7e3d70 --- /dev/null +++ b/includes/Storage/RevisionSlots.php @@ -0,0 +1,189 @@ +slots = $slots; + } else { + $this->setSlotsInternal( $slots ); + } + } + + /** + * @param SlotRecord[] $slots + */ + private function setSlotsInternal( array $slots ) { + $this->slots = []; + + // re-key the slot array + foreach ( $slots as $slot ) { + $role = $slot->getRole(); + $this->slots[$role] = $slot; + } + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * Returns the Content of the given slot. + * Call getSlotNames() to get a list of available slots. + * + * Note that for mutable Content objects, each call to this method will return a + * fresh clone. + * + * @param string $role The role name of the desired slot + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return Content + */ + public function getContent( $role ) { + // Return a copy to be safe. Immutable content objects return $this from copy(). + return $this->getSlot( $role )->getContent()->copy(); + } + + /** + * Returns the SlotRecord of the given slot. + * Call getSlotNames() to get a list of available slots. + * + * @param string $role The role name of the desired slot + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return SlotRecord + */ + public function getSlot( $role ) { + $slots = $this->getSlots(); + + if ( isset( $slots[$role] ) ) { + return $slots[$role]; + } else { + throw new RevisionAccessException( 'No such slot: ' . $role ); + } + } + + /** + * Returns the slot names (roles) of all slots present in this revision. + * getContent() will succeed only for the names returned by this method. + * + * @return string[] + */ + public function getSlotRoles() { + $slots = $this->getSlots(); + return array_keys( $slots ); + } + + /** + * Computes the total nominal size of the revision's slots, in bogo-bytes. + * + * @warn This is potentially expensive! It may cause all slot's content to be loaded + * and deserialized. + * + * @return int + */ + public function computeSize() { + return array_reduce( $this->getSlots(), function ( $accu, SlotRecord $slot ) { + return $accu + $slot->getSize(); + }, 0 ); + } + + /** + * Returns an associative array that maps role names to SlotRecords. Each SlotRecord + * represents the content meta-data of a slot, together they define the content of + * a revision. + * + * @note This may cause the content meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] revision slot/content rows, keyed by slot role name. + */ + public function getSlots() { + if ( is_callable( $this->slots ) ) { + $slots = call_user_func( $this->slots ); + + Assert::postcondition( + is_array( $slots ), + 'Slots info callback should return an array of objects' + ); + + $this->setSlotsInternal( $slots ); + } + + return $this->slots; + } + + /** + * Computes the combined hash of the revisions's slots. + * + * @note For backwards compatibility, the combined hash of a single slot + * is that slot's hash. For consistency, the combined hash of an empty set of slots + * is the hash of the empty string. + * + * @warn This is potentially expensive! It may cause all slot's content to be loaded + * and deserialized, then re-serialized and hashed. + * + * @return string + */ + public function computeSha1() { + $slots = $this->getSlots(); + ksort( $slots ); + + if ( empty( $slots ) ) { + return SlotRecord::base36Sha1( '' ); + } + + return array_reduce( $slots, function ( $accu, SlotRecord $slot ) { + return $accu === null + ? $slot->getSha1() + : SlotRecord::base36Sha1( $accu . $slot->getSha1() ); + }, null ); + } + +} diff --git a/includes/Storage/RevisionStore.php b/includes/Storage/RevisionStore.php new file mode 100644 index 0000000000..b8debb8b6a --- /dev/null +++ b/includes/Storage/RevisionStore.php @@ -0,0 +1,1914 @@ +loadBalancer = $loadBalancer; + $this->blobStore = $blobStore; + $this->cache = $cache; + $this->wikiId = $wikiId; + } + + /** + * @return bool + */ + public function getContentHandlerUseDB() { + return $this->contentHandlerUseDB; + } + + /** + * @param bool $contentHandlerUseDB + */ + public function setContentHandlerUseDB( $contentHandlerUseDB ) { + $this->contentHandlerUseDB = $contentHandlerUseDB; + } + + /** + * @return LoadBalancer + */ + private function getDBLoadBalancer() { + return $this->loadBalancer; + } + + /** + * @param int $mode DB_MASTER or DB_REPLICA + * + * @return IDatabase + */ + private function getDBConnection( $mode ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnection( $mode, [], $this->wikiId ); + } + + /** + * @param IDatabase $connection + */ + private function releaseDBConnection( IDatabase $connection ) { + $lb = $this->getDBLoadBalancer(); + $lb->reuseConnection( $connection ); + } + + /** + * @param int $mode DB_MASTER or DB_REPLICA + * + * @return DBConnRef + */ + private function getDBConnectionRef( $mode ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnectionRef( $mode, [], $this->wikiId ); + } + + /** + * Determines the page Title based on the available information. + * + * MCR migration note: this corresponds to Revision::getTitle + * + * @param int|null $pageId + * @param int|null $revId + * @param int $queryFlags + * + * @return Title + * @throws RevisionAccessException + */ + private function getTitle( $pageId, $revId, $queryFlags = 0 ) { + if ( !$pageId && !$revId ) { + throw new InvalidArgumentException( '$pageId and $revId cannot both be 0 or null' ); + } + + $title = null; + + // Loading by ID is best, but Title::newFromID does not support that for foreign IDs. + if ( $pageId !== null && $pageId > 0 && $this->wikiId === false ) { + // TODO: better foreign title handling (introduce TitleFactory) + $title = Title::newFromID( $pageId, $queryFlags ); + } + + // rev_id is defined as NOT NULL, but this revision may not yet have been inserted. + if ( !$title && $revId !== null && $revId > 0 ) { + list( $dbMode, $dbOptions, , ) = DBAccessObjectUtils::getDBOptions( $queryFlags ); + + $dbr = $this->getDbConnectionRef( $dbMode ); + // @todo: Title::getSelectFields(), or Title::getQueryInfo(), or something like that + $row = $dbr->selectRow( + [ 'revision', 'page' ], + [ + 'page_namespace', + 'page_title', + 'page_id', + 'page_latest', + 'page_is_redirect', + 'page_len', + ], + [ 'rev_id' => $revId ], + __METHOD__, + $dbOptions, + [ 'page' => [ 'JOIN', 'page_id=rev_page' ] ] + ); + if ( $row ) { + // TODO: better foreign title handling (introduce TitleFactory) + $title = Title::newFromRow( $row ); + } + } + + if ( !$title ) { + throw new RevisionAccessException( + "Could not determine title for page ID $pageId and revision ID $revId" + ); + } + + return $title; + } + + /** + * @param mixed $value + * @param string $name + * + * @throw IncompleteRevisionException if $value is null + * @return mixed $value, if $value is not null + */ + private function failOnNull( $value, $name ) { + if ( $value === null ) { + throw new IncompleteRevisionException( + "$name must not be " . var_export( $value, true ) . "!" + ); + } + + return $value; + } + + /** + * @param mixed $value + * @param string $name + * + * @throw IncompleteRevisionException if $value is empty + * @return mixed $value, if $value is not null + */ + private function failOnEmpty( $value, $name ) { + if ( $value === null || $value === 0 || $value === '' ) { + throw new IncompleteRevisionException( + "$name must not be " . var_export( $value, true ) . "!" + ); + } + + return $value; + } + + /** + * Insert a new revision into the database, returning the new revision ID + * number on success and dies horribly on failure. + * + * MCR migration note: this replaces Revision::insertOn + * + * @param RevisionRecord $rev + * @param IDatabase $dbw (master connection) + * + * @throws InvalidArgumentException + * @return RevisionRecord the new revision record. + */ + public function insertRevisionOn( RevisionRecord $rev, IDatabase $dbw ) { + // TODO: pass in a DBTransactionContext instead of a database connection. + $this->checkDatabaseWikiId( $dbw ); + + if ( !$rev->getSlotRoles() ) { + throw new InvalidArgumentException( 'At least one slot needs to be defined!' ); + } + + if ( $rev->getSlotRoles() !== [ 'main' ] ) { + throw new InvalidArgumentException( 'Only the main slot is supported for now!' ); + } + + // TODO: we shouldn't need an actual Title here. + $title = Title::newFromLinkTarget( $rev->getPageAsLinkTarget() ); + $pageId = $this->failOnEmpty( $rev->getPageId(), 'rev_page field' ); // check this early + + $parentId = $rev->getParentId() === null + ? $this->getPreviousRevisionId( $dbw, $rev ) + : $rev->getParentId(); + + // Record the text (or external storage URL) to the blob store + $slot = $rev->getSlot( 'main', RevisionRecord::RAW ); + + $size = $this->failOnNull( $rev->getSize(), 'size field' ); + $sha1 = $this->failOnEmpty( $rev->getSha1(), 'sha1 field' ); + + if ( !$slot->hasAddress() ) { + $content = $slot->getContent(); + $format = $content->getDefaultFormat(); + $model = $content->getModel(); + + $this->checkContentModel( $content, $title ); + + $data = $content->serialize( $format ); + + // Hints allow the blob store to optimize by "leaking" application level information to it. + // TODO: with the new MCR storage schema, we rev_id have this before storing the blobs. + // When we have it, add rev_id as a hint. Can be used with rev_parent_id for + // differential storage or compression of subsequent revisions. + $blobHints = [ + BlobStore::DESIGNATION_HINT => 'page-content', // BlobStore may be used for other things too. + BlobStore::PAGE_HINT => $pageId, + BlobStore::ROLE_HINT => $slot->getRole(), + BlobStore::PARENT_HINT => $parentId, + BlobStore::SHA1_HINT => $slot->getSha1(), + BlobStore::MODEL_HINT => $model, + BlobStore::FORMAT_HINT => $format, + ]; + + $blobAddress = $this->blobStore->storeBlob( $data, $blobHints ); + } else { + $blobAddress = $slot->getAddress(); + $model = $slot->getModel(); + $format = $slot->getFormat(); + } + + $textId = $this->blobStore->getTextIdFromAddress( $blobAddress ); + + if ( !$textId ) { + throw new LogicException( + 'Blob address not supported in 1.29 database schema: ' . $blobAddress + ); + } + + // getTextIdFromAddress() is free to insert something into the text table, so $textId + // may be a new value, not anything already contained in $blobAddress. + $blobAddress = 'tt:' . $textId; + + $comment = $this->failOnNull( $rev->getComment( RevisionRecord::RAW ), 'comment' ); + $user = $this->failOnNull( $rev->getUser( RevisionRecord::RAW ), 'user' ); + $timestamp = $this->failOnEmpty( $rev->getTimestamp(), 'timestamp field' ); + + # Record the edit in revisions + $row = [ + 'rev_page' => $pageId, + 'rev_parent_id' => $parentId, + 'rev_text_id' => $textId, + 'rev_minor_edit' => $rev->isMinor() ? 1 : 0, + 'rev_user' => $this->failOnNull( $user->getId(), 'user field' ), + 'rev_user_text' => $this->failOnEmpty( $user->getName(), 'user_text field' ), + 'rev_timestamp' => $dbw->timestamp( $timestamp ), + 'rev_deleted' => $rev->getVisibility(), + 'rev_len' => $size, + 'rev_sha1' => $sha1, + ]; + + if ( $rev->getId() !== null ) { + // Needed to restore revisions with their original ID + $row['rev_id'] = $rev->getId(); + } + + list( $commentFields, $commentCallback ) = + CommentStore::newKey( 'rev_comment' )->insertWithTempTable( $dbw, $comment ); + $row += $commentFields; + + if ( $this->contentHandlerUseDB ) { + // MCR migration note: rev_content_model and rev_content_format will go away + + $defaultModel = ContentHandler::getDefaultModelFor( $title ); + $defaultFormat = ContentHandler::getForModelID( $defaultModel )->getDefaultFormat(); + + $row['rev_content_model'] = ( $model === $defaultModel ) ? null : $model; + $row['rev_content_format'] = ( $format === $defaultFormat ) ? null : $format; + } + + $dbw->insert( 'revision', $row, __METHOD__ ); + + if ( !isset( $row['rev_id'] ) ) { + // only if auto-increment was used + $row['rev_id'] = intval( $dbw->insertId() ); + } + $commentCallback( $row['rev_id'] ); + + // Insert IP revision into ip_changes for use when querying for a range. + if ( $row['rev_user'] === 0 && IP::isValid( $row['rev_user_text'] ) ) { + $ipcRow = [ + 'ipc_rev_id' => $row['rev_id'], + 'ipc_rev_timestamp' => $row['rev_timestamp'], + 'ipc_hex' => IP::toHex( $row['rev_user_text'] ), + ]; + $dbw->insert( 'ip_changes', $ipcRow, __METHOD__ ); + } + + $newSlot = SlotRecord::newSaved( $row['rev_id'], $blobAddress, $slot ); + $slots = new RevisionSlots( [ 'main' => $newSlot ] ); + + $user = new UserIdentityValue( intval( $row['rev_user'] ), $row['rev_user_text'] ); + + $rev = new RevisionStoreRecord( + $title, + $user, + $comment, + (object)$row, + $slots, + $this->wikiId + ); + + $newSlot = $rev->getSlot( 'main', RevisionRecord::RAW ); + + // sanity checks + Assert::postcondition( $rev->getId() > 0, 'revision must have an ID' ); + Assert::postcondition( $rev->getPageId() > 0, 'revision must have a page ID' ); + Assert::postcondition( + $rev->getComment( RevisionRecord::RAW ) !== null, + 'revision must have a comment' + ); + Assert::postcondition( + $rev->getUser( RevisionRecord::RAW ) !== null, + 'revision must have a user' + ); + + Assert::postcondition( $newSlot !== null, 'revision must have a main slot' ); + Assert::postcondition( + $newSlot->getAddress() !== null, + 'main slot must have an addess' + ); + + Hooks::run( 'RevisionRecordInserted', [ $rev ] ); + + return $rev; + } + + /** + * MCR migration note: this corresponds to Revision::checkContentModel + * + * @param Content $content + * @param Title $title + * + * @throws MWException + * @throws MWUnknownContentModelException + */ + private function checkContentModel( Content $content, Title $title ) { + // Note: may return null for revisions that have not yet been inserted + + $model = $content->getModel(); + $format = $content->getDefaultFormat(); + $handler = $content->getContentHandler(); + + $name = "$title"; + + if ( !$handler->isSupportedFormat( $format ) ) { + throw new MWException( "Can't use format $format with content model $model on $name" ); + } + + if ( !$this->contentHandlerUseDB ) { + // if $wgContentHandlerUseDB is not set, + // all revisions must use the default content model and format. + + $defaultModel = ContentHandler::getDefaultModelFor( $title ); + $defaultHandler = ContentHandler::getForModelID( $defaultModel ); + $defaultFormat = $defaultHandler->getDefaultFormat(); + + if ( $model != $defaultModel ) { + throw new MWException( "Can't save non-default content model with " + . "\$wgContentHandlerUseDB disabled: model is $model, " + . "default for $name is $defaultModel" + ); + } + + if ( $format != $defaultFormat ) { + throw new MWException( "Can't use non-default content format with " + . "\$wgContentHandlerUseDB disabled: format is $format, " + . "default for $name is $defaultFormat" + ); + } + } + + if ( !$content->isValid() ) { + throw new MWException( + "New content for $name is not valid! Content model is $model" + ); + } + } + + /** + * Create a new null-revision for insertion into a page's + * history. This will not re-save the text, but simply refer + * to the text from the previous version. + * + * Such revisions can for instance identify page rename + * operations and other such meta-modifications. + * + * MCR migration note: this replaces Revision::newNullRevision + * + * @todo Introduce newFromParentRevision(). newNullRevision can then be based on that + * (or go away). + * + * @param IDatabase $dbw + * @param Title $title Title of the page to read from + * @param CommentStoreComment $comment RevisionRecord's summary + * @param bool $minor Whether the revision should be considered as minor + * @param User $user The user to attribute the revision to + * @return RevisionRecord|null RevisionRecord or null on error + */ + public function newNullRevision( + IDatabase $dbw, + Title $title, + CommentStoreComment $comment, + $minor, + User $user + ) { + $this->checkDatabaseWikiId( $dbw ); + + $fields = [ 'page_latest', 'page_namespace', 'page_title', + 'rev_id', 'rev_text_id', 'rev_len', 'rev_sha1' ]; + + if ( $this->contentHandlerUseDB ) { + $fields[] = 'rev_content_model'; + $fields[] = 'rev_content_format'; + } + + $current = $dbw->selectRow( + [ 'page', 'revision' ], + $fields, + [ + 'page_id' => $title->getArticleID(), + 'page_latest=rev_id', + ], + __METHOD__, + [ 'FOR UPDATE' ] // T51581 + ); + + if ( $current ) { + $fields = [ + 'page' => $title->getArticleID(), + 'user_text' => $user->getName(), + 'user' => $user->getId(), + 'comment' => $comment, + 'minor_edit' => $minor, + 'text_id' => $current->rev_text_id, + 'parent_id' => $current->page_latest, + 'len' => $current->rev_len, + 'sha1' => $current->rev_sha1 + ]; + + if ( $this->contentHandlerUseDB ) { + $fields['content_model'] = $current->rev_content_model; + $fields['content_format'] = $current->rev_content_format; + } + + $fields['title'] = Title::makeTitle( $current->page_namespace, $current->page_title ); + + $mainSlot = $this->emulateMainSlot_1_29( $fields, 0, $title ); + $revision = new MutableRevisionRecord( $title, $this->wikiId ); + $this->initializeMutableRevisionFromArray( $revision, $fields ); + $revision->setSlot( $mainSlot ); + } else { + $revision = null; + } + + return $revision; + } + + /** + * MCR migration note: this replaces Revision::isUnpatrolled + * + * @return int Rcid of the unpatrolled row, zero if there isn't one + */ + public function isUnpatrolled( RevisionRecord $rev ) { + $rc = $this->getRecentChange( $rev ); + if ( $rc && $rc->getAttribute( 'rc_patrolled' ) == 0 ) { + return $rc->getAttribute( 'rc_id' ); + } else { + return 0; + } + } + + /** + * Get the RC object belonging to the current revision, if there's one + * + * MCR migration note: this replaces Revision::getRecentChange + * + * @todo move this somewhere else? + * + * @param RevisionRecord $rev + * @param int $flags (optional) $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * + * @return null|RecentChange + */ + public function getRecentChange( RevisionRecord $rev, $flags = 0 ) { + $dbr = $this->getDBConnection( DB_REPLICA ); + + list( $dbType, ) = DBAccessObjectUtils::getDBOptions( $flags ); + + $userIdentity = $rev->getUser( RevisionRecord::RAW ); + + if ( !$userIdentity ) { + // If the revision has no user identity, chances are it never went + // into the database, and doesn't have an RC entry. + return null; + } + + // TODO: Select by rc_this_oldid alone - but as of Nov 2017, there is no index on that! + $rc = RecentChange::newFromConds( + [ + 'rc_user_text' => $userIdentity->getName(), + 'rc_timestamp' => $dbr->timestamp( $rev->getTimestamp() ), + 'rc_this_oldid' => $rev->getId() + ], + __METHOD__, + $dbType + ); + + $this->releaseDBConnection( $dbr ); + + // XXX: cache this locally? Glue it to the RevisionRecord? + return $rc; + } + + /** + * Maps fields of the archive row to corresponding revision rows. + * + * @param object $archiveRow + * + * @return object a revision row object, corresponding to $archiveRow. + */ + private static function mapArchiveFields( $archiveRow ) { + $fieldMap = [ + // keep with ar prefix: + 'ar_id' => 'ar_id', + + // not the same suffix: + 'ar_page_id' => 'rev_page', + 'ar_rev_id' => 'rev_id', + + // same suffix: + 'ar_text_id' => 'rev_text_id', + 'ar_timestamp' => 'rev_timestamp', + 'ar_user_text' => 'rev_user_text', + 'ar_user' => 'rev_user', + 'ar_minor_edit' => 'rev_minor_edit', + 'ar_deleted' => 'rev_deleted', + 'ar_len' => 'rev_len', + 'ar_parent_id' => 'rev_parent_id', + 'ar_sha1' => 'rev_sha1', + 'ar_comment' => 'rev_comment', + 'ar_comment_cid' => 'rev_comment_cid', + 'ar_comment_id' => 'rev_comment_id', + 'ar_comment_text' => 'rev_comment_text', + 'ar_comment_data' => 'rev_comment_data', + 'ar_comment_old' => 'rev_comment_old', + 'ar_content_format' => 'rev_content_format', + 'ar_content_model' => 'rev_content_model', + ]; + + if ( empty( $archiveRow->ar_text_id ) ) { + $fieldMap['ar_text'] = 'old_text'; + $fieldMap['ar_flags'] = 'old_flags'; + } + + $revRow = new stdClass(); + foreach ( $fieldMap as $arKey => $revKey ) { + if ( property_exists( $archiveRow, $arKey ) ) { + $revRow->$revKey = $archiveRow->$arKey; + } + } + + return $revRow; + } + + /** + * Constructs a RevisionRecord for the revisions main slot, based on the MW1.29 schema. + * + * @param object|array $row Either a database row or an array + * @param int $queryFlags for callbacks + * @param Title $title + * + * @return SlotRecord The main slot, extracted from the MW 1.29 style row. + * @throws MWException + */ + private function emulateMainSlot_1_29( $row, $queryFlags, Title $title ) { + $mainSlotRow = new stdClass(); + $mainSlotRow->role_name = 'main'; + + $content = null; + $blobData = null; + $blobFlags = ''; + + if ( is_object( $row ) ) { + // archive row + if ( !isset( $row->rev_id ) && isset( $row->ar_user ) ) { + $row = $this->mapArchiveFields( $row ); + } + + if ( isset( $row->rev_text_id ) && $row->rev_text_id > 0 ) { + $mainSlotRow->cont_address = 'tt:' . $row->rev_text_id; + } elseif ( isset( $row->ar_id ) ) { + $mainSlotRow->cont_address = 'ar:' . $row->ar_id; + } + + if ( isset( $row->old_text ) ) { + // this happens when the text-table gets joined directly, in the pre-1.30 schema + $blobData = isset( $row->old_text ) ? strval( $row->old_text ) : null; + $blobFlags = isset( $row->old_flags ) ? strval( $row->old_flags ) : ''; + } + + $mainSlotRow->slot_revision = intval( $row->rev_id ); + + $mainSlotRow->cont_size = isset( $row->rev_len ) ? intval( $row->rev_len ) : null; + $mainSlotRow->cont_sha1 = isset( $row->rev_sha1 ) ? strval( $row->rev_sha1 ) : null; + $mainSlotRow->model_name = isset( $row->rev_content_model ) + ? strval( $row->rev_content_model ) + : null; + // XXX: in the future, we'll probably always use the default format, and drop content_format + $mainSlotRow->format_name = isset( $row->rev_content_format ) + ? strval( $row->rev_content_format ) + : null; + } elseif ( is_array( $row ) ) { + $mainSlotRow->slot_revision = isset( $row['id'] ) ? intval( $row['id'] ) : null; + + $mainSlotRow->cont_address = isset( $row['text_id'] ) + ? 'tt:' . intval( $row['text_id'] ) + : null; + $mainSlotRow->cont_size = isset( $row['len'] ) ? intval( $row['len'] ) : null; + $mainSlotRow->cont_sha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null; + + $mainSlotRow->model_name = isset( $row['content_model'] ) + ? strval( $row['content_model'] ) : null; // XXX: must be a string! + // XXX: in the future, we'll probably always use the default format, and drop content_format + $mainSlotRow->format_name = isset( $row['content_format'] ) + ? strval( $row['content_format'] ) : null; + $blobData = isset( $row['text'] ) ? rtrim( strval( $row['text'] ) ) : null; + $blobFlags = isset( $row['flags'] ) ? trim( strval( $row['flags'] ) ) : ''; + + // if we have a Content object, override mText and mContentModel + if ( !empty( $row['content'] ) ) { + if ( !( $row['content'] instanceof Content ) ) { + throw new MWException( 'content field must contain a Content object.' ); + } + + /** @var Content $content */ + $content = $row['content']; + $handler = $content->getContentHandler(); + + $mainSlotRow->model_name = $content->getModel(); + + // XXX: in the future, we'll probably always use the default format. + if ( $mainSlotRow->format_name === null ) { + $mainSlotRow->format_name = $handler->getDefaultFormat(); + } + } + } else { + throw new MWException( 'Revision constructor passed invalid row format.' ); + } + + // With the old schema, the content changes with every revision. + // ...except for null-revisions. Would be nice if we could detect them. + $mainSlotRow->slot_inherited = 0; + + if ( $mainSlotRow->model_name === null ) { + $mainSlotRow->model_name = function ( SlotRecord $slot ) use ( $title ) { + // TODO: MCR: consider slot role in getDefaultModelFor()! Use LinkTarget! + // TODO: MCR: deprecate $title->getModel(). + return ContentHandler::getDefaultModelFor( $title ); + }; + } + + if ( !$content ) { + $content = function ( SlotRecord $slot ) + use ( $blobData, $blobFlags, $queryFlags, $mainSlotRow ) + { + return $this->loadSlotContent( + $slot, + $blobData, + $blobFlags, + $mainSlotRow->format_name, + $queryFlags + ); + }; + } + + return new SlotRecord( $mainSlotRow, $content ); + } + + /** + * Loads a Content object based on a slot row. + * + * This method does not call $slot->getContent(), and may be used as a callback + * called by $slot->getContent(). + * + * MCR migration note: this roughly corresponds to Revision::getContentInternal + * + * @param SlotRecord $slot The SlotRecord to load content for + * @param string|null $blobData The content blob, in the form indicated by $blobFlags + * @param string $blobFlags Flags indicating how $blobData needs to be processed + * @param string|null $blobFormat MIME type indicating how $dataBlob is encoded + * @param int $queryFlags + * + * @throw RevisionAccessException + * @return Content + */ + private function loadSlotContent( + SlotRecord $slot, + $blobData = null, + $blobFlags = '', + $blobFormat = null, + $queryFlags = 0 + ) { + if ( $blobData !== null ) { + Assert::parameterType( 'string', $blobData, '$blobData' ); + Assert::parameterType( 'string', $blobFlags, '$blobFlags' ); + + $cacheKey = $slot->hasAddress() ? $slot->getAddress() : null; + + $data = $this->blobStore->expandBlob( $blobData, $blobFlags, $cacheKey ); + + if ( $data === false ) { + throw new RevisionAccessException( + "Failed to expand blob data using flags $blobFlags (key: $cacheKey)" + ); + } + } else { + $address = $slot->getAddress(); + try { + $data = $this->blobStore->getBlob( $address, $queryFlags ); + } catch ( BlobAccessException $e ) { + throw new RevisionAccessException( + "Failed to load data blob from $address: " . $e->getMessage(), 0, $e + ); + } + } + + // Unserialize content + $handler = ContentHandler::getForModelID( $slot->getModel() ); + + $content = $handler->unserializeContent( $data, $blobFormat ); + return $content; + } + + /** + * Load a page revision from a given revision ID number. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromId + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param int $id + * @param int $flags (optional) + * @return RevisionRecord|null + */ + public function getRevisionById( $id, $flags = 0 ) { + return $this->newRevisionFromConds( [ 'rev_id' => intval( $id ) ], $flags ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given link target. If not attached + * to that link target, will return null. + * + * MCR migration note: this replaces Revision::newFromTitle + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param LinkTarget $linkTarget + * @param int $revId (optional) + * @param int $flags Bitfield (optional) + * @return RevisionRecord|null + */ + public function getRevisionByTitle( LinkTarget $linkTarget, $revId = 0, $flags = 0 ) { + $conds = [ + 'page_namespace' => $linkTarget->getNamespace(), + 'page_title' => $linkTarget->getDBkey() + ]; + if ( $revId ) { + // Use the specified revision ID. + // Note that we use newRevisionFromConds here because we want to retry + // and fall back to master if the page is not found on a replica. + // Since the caller supplied a revision ID, we are pretty sure the revision is + // supposed to exist, so we should try hard to find it. + $conds['rev_id'] = $revId; + return $this->newRevisionFromConds( $conds, $flags ); + } else { + // Use a join to get the latest revision. + // Note that we don't use newRevisionFromConds here because we don't want to retry + // and fall back to master. The assumption is that we only want to force the fallback + // if we are quite sure the revision exists because the caller supplied a revision ID. + // If the page isn't found at all on a replica, it probably simply does not exist. + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + + $conds[] = 'rev_id=page_latest'; + $rev = $this->loadRevisionFromConds( $db, $conds, $flags ); + + $this->releaseDBConnection( $db ); + return $rev; + } + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page ID. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromPageId + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master (since 1.20) + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param int $pageId + * @param int $revId (optional) + * @param int $flags Bitfield (optional) + * @return RevisionRecord|null + */ + public function getRevisionByPageId( $pageId, $revId = 0, $flags = 0 ) { + $conds = [ 'page_id' => $pageId ]; + if ( $revId ) { + // Use the specified revision ID. + // Note that we use newRevisionFromConds here because we want to retry + // and fall back to master if the page is not found on a replica. + // Since the caller supplied a revision ID, we are pretty sure the revision is + // supposed to exist, so we should try hard to find it. + $conds['rev_id'] = $revId; + return $this->newRevisionFromConds( $conds, $flags ); + } else { + // Use a join to get the latest revision. + // Note that we don't use newRevisionFromConds here because we don't want to retry + // and fall back to master. The assumption is that we only want to force the fallback + // if we are quite sure the revision exists because the caller supplied a revision ID. + // If the page isn't found at all on a replica, it probably simply does not exist. + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + + $conds[] = 'rev_id=page_latest'; + $rev = $this->loadRevisionFromConds( $db, $conds, $flags ); + + $this->releaseDBConnection( $db ); + return $rev; + } + } + + /** + * Load the revision for the given title with the given timestamp. + * WARNING: Timestamps may in some circumstances not be unique, + * so this isn't the best key to use. + * + * MCR migration note: this replaces Revision::loadFromTimestamp + * + * @param Title $title + * @param string $timestamp + * @return RevisionRecord|null + */ + public function getRevisionFromTimestamp( $title, $timestamp ) { + return $this->newRevisionFromConds( + [ + 'rev_timestamp' => $timestamp, + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Make a fake revision object from an archive table row. This is queried + * for permissions or even inserted (as in Special:Undelete) + * + * MCR migration note: this replaces Revision::newFromArchiveRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * @param array $overrides associative array with fields of $row to override. This may be + * used e.g. to force the parent revision ID or page ID. Keys in the array are fields + * names from the archive table without the 'ar_' prefix, i.e. use 'parent_id' to + * override ar_parent_id. + * + * @return RevisionRecord + * @throws MWException + */ + public function newRevisionFromArchiveRow( + $row, + $queryFlags = 0, + Title $title = null, + array $overrides = [] + ) { + Assert::parameterType( 'object', $row, '$row' ); + + // check second argument, since Revision::newFromArchiveRow had $overrides in that spot. + Assert::parameterType( 'integer', $queryFlags, '$queryFlags' ); + + if ( !$title && isset( $overrides['title'] ) ) { + if ( !( $overrides['title'] instanceof Title ) ) { + throw new MWException( 'title field override must contain a Title object.' ); + } + + $title = $overrides['title']; + } + + if ( !isset( $title ) ) { + if ( isset( $row->ar_namespace ) && isset( $row->ar_title ) ) { + $title = Title::makeTitle( $row->ar_namespace, $row->ar_title ); + } else { + throw new InvalidArgumentException( + 'A Title or ar_namespace and ar_title must be given' + ); + } + } + + foreach ( $overrides as $key => $value ) { + $field = "ar_$key"; + $row->$field = $value; + } + + $user = $this->getUserIdentityFromRowObject( $row, 'ar_' ); + + $comment = CommentStore::newKey( 'ar_comment' ) + // Legacy because $row may have come from self::selectFields() + ->getCommentLegacy( $this->getDBConnection( DB_REPLICA ), $row, true ); + + $mainSlot = $this->emulateMainSlot_1_29( $row, $queryFlags, $title ); + $slots = new RevisionSlots( [ 'main' => $mainSlot ] ); + + return new RevisionArchiveRecord( $title, $user, $comment, $row, $slots, $this->wikiId ); + } + + /** + * @param object $row + * @param string $prefix Field prefix, such as 'rev_' or 'ar_'. + * + * @return UserIdentityValue + */ + private function getUserIdentityFromRowObject( $row, $prefix = 'rev_' ) { + $idField = "{$prefix}user"; + $nameField = "{$prefix}user_text"; + + $userId = intval( $row->$idField ); + + if ( isset( $row->user_name ) ) { + $userName = $row->user_name; + } elseif ( isset( $row->$nameField ) ) { + $userName = $row->$nameField; + } else { + $userName = User::whoIs( $userId ); + } + + if ( $userName === false ) { + wfWarn( __METHOD__ . ': Cannot determine user name for user ID ' . $userId ); + $userName = ''; + } + + return new UserIdentityValue( $userId, $userName ); + } + + /** + * @see RevisionFactory::newRevisionFromRow_1_29 + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * + * @return RevisionRecord + * @throws MWException + * @throws RevisionAccessException + */ + private function newRevisionFromRow_1_29( $row, $queryFlags = 0, Title $title = null ) { + Assert::parameterType( 'object', $row, '$row' ); + + if ( !$title ) { + $pageId = isset( $row->rev_page ) ? $row->rev_page : 0; // XXX: also check page_id? + $revId = isset( $row->rev_id ) ? $row->rev_id : 0; + + $title = $this->getTitle( $pageId, $revId ); + } + + if ( !isset( $row->page_latest ) ) { + $row->page_latest = $title->getLatestRevID(); + if ( $row->page_latest === 0 && $title->exists() ) { + wfWarn( 'Encountered title object in limbo: ID ' . $title->getArticleID() ); + } + } + + $user = $this->getUserIdentityFromRowObject( $row ); + + $comment = CommentStore::newKey( 'rev_comment' ) + // Legacy because $row may have come from self::selectFields() + ->getCommentLegacy( $this->getDBConnection( DB_REPLICA ), $row, true ); + + $mainSlot = $this->emulateMainSlot_1_29( $row, $queryFlags, $title ); + $slots = new RevisionSlots( [ 'main' => $mainSlot ] ); + + return new RevisionStoreRecord( $title, $user, $comment, $row, $slots, $this->wikiId ); + } + + /** + * @see RevisionFactory::newRevisionFromRow + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * + * @return RevisionRecord + */ + public function newRevisionFromRow( $row, $queryFlags = 0, Title $title = null ) { + return $this->newRevisionFromRow_1_29( $row, $queryFlags, $title ); + } + + /** + * Constructs a new MutableRevisionRecord based on the given associative array following + * the MW1.29 convention for the Revision constructor. + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param array $fields + * @param int $queryFlags + * @param Title|null $title + * + * @return MutableRevisionRecord + * @throws MWException + * @throws RevisionAccessException + */ + public function newMutableRevisionFromArray( + array $fields, + $queryFlags = 0, + Title $title = null + ) { + if ( !$title && isset( $fields['title'] ) ) { + if ( !( $fields['title'] instanceof Title ) ) { + throw new MWException( 'title field must contain a Title object.' ); + } + + $title = $fields['title']; + } + + if ( !$title ) { + $pageId = isset( $fields['page'] ) ? $fields['page'] : 0; + $revId = isset( $fields['id'] ) ? $fields['id'] : 0; + + $title = $this->getTitle( $pageId, $revId ); + } + + if ( !isset( $fields['page'] ) ) { + $fields['page'] = $title->getArticleID( $queryFlags ); + } + + // if we have a content object, use it to set the model and type + if ( !empty( $fields['content'] ) ) { + if ( !( $fields['content'] instanceof Content ) ) { + throw new MWException( 'content field must contain a Content object.' ); + } + + if ( !empty( $fields['text_id'] ) ) { + throw new MWException( + "Text already stored in external store (id {$fields['text_id']}), " . + "can't serialize content object" + ); + } + } + + // Replaces old lazy loading logic in Revision::getUserText. + if ( !isset( $fields['user_text'] ) && isset( $fields['user'] ) ) { + if ( $fields['user'] instanceof UserIdentity ) { + /** @var User $user */ + $user = $fields['user']; + $fields['user_text'] = $user->getName(); + $fields['user'] = $user->getId(); + } else { + // TODO: wrap this in a callback to make it lazy again. + $name = $fields['user'] === 0 ? false : User::whoIs( $fields['user'] ); + + if ( $name === false ) { + throw new MWException( + 'user_text not given, and unknown user ID ' . $fields['user'] + ); + } + + $fields['user_text'] = $name; + } + } + + if ( + isset( $fields['comment'] ) + && !( $fields['comment'] instanceof CommentStoreComment ) + ) { + $commentData = isset( $fields['comment_data'] ) ? $fields['comment_data'] : null; + + if ( $fields['comment'] instanceof Message ) { + $fields['comment'] = CommentStoreComment::newUnsavedComment( + $fields['comment'], + $commentData + ); + } else { + $commentText = trim( strval( $fields['comment'] ) ); + $fields['comment'] = CommentStoreComment::newUnsavedComment( + $commentText, + $commentData + ); + } + } + + $mainSlot = $this->emulateMainSlot_1_29( $fields, $queryFlags, $title ); + + $revision = new MutableRevisionRecord( $title, $this->wikiId ); + $this->initializeMutableRevisionFromArray( $revision, $fields ); + $revision->setSlot( $mainSlot ); + + return $revision; + } + + /** + * @param MutableRevisionRecord $record + * @param array $fields + */ + private function initializeMutableRevisionFromArray( + MutableRevisionRecord $record, + array $fields + ) { + /** @var UserIdentity $user */ + $user = null; + + if ( isset( $fields['user'] ) && ( $fields['user'] instanceof UserIdentity ) ) { + $user = $fields['user']; + } elseif ( isset( $fields['user'] ) && isset( $fields['user_text'] ) ) { + $user = new UserIdentityValue( intval( $fields['user'] ), $fields['user_text'] ); + } elseif ( isset( $fields['user'] ) ) { + $user = User::newFromId( intval( $fields['user'] ) ); + } elseif ( isset( $fields['user_text'] ) ) { + $user = User::newFromName( $fields['user_text'] ); + + // User::newFromName will return false for IP addresses (and invalid names) + if ( $user == false ) { + $user = new UserIdentityValue( 0, $fields['user_text'] ); + } + } + + if ( $user ) { + $record->setUser( $user ); + } + + $timestamp = isset( $fields['timestamp'] ) + ? strval( $fields['timestamp'] ) + : wfTimestampNow(); // TODO: use a callback, so we can override it for testing. + + $record->setTimestamp( $timestamp ); + + if ( isset( $fields['page'] ) ) { + $record->setPageId( intval( $fields['page'] ) ); + } + + if ( isset( $fields['id'] ) ) { + $record->setId( intval( $fields['id'] ) ); + } + if ( isset( $fields['parent_id'] ) ) { + $record->setParentId( intval( $fields['parent_id'] ) ); + } + + if ( isset( $fields['sha1'] ) ) { + $record->setSha1( $fields['sha1'] ); + } + if ( isset( $fields['size'] ) ) { + $record->setSize( intval( $fields['size'] ) ); + } + + if ( isset( $fields['minor_edit'] ) ) { + $record->setMinorEdit( intval( $fields['minor_edit'] ) !== 0 ); + } + if ( isset( $fields['deleted'] ) ) { + $record->setVisibility( intval( $fields['deleted'] ) ); + } + + if ( isset( $fields['comment'] ) ) { + Assert::parameterType( + CommentStoreComment::class, + $fields['comment'], + '$row[\'comment\']' + ); + $record->setComment( $fields['comment'] ); + } + } + + /** + * Load a page revision from a given revision ID number. + * Returns null if no such revision can be found. + * + * MCR migration note: this corresponds to Revision::loadFromId + * + * @note direct use is deprecated! + * @todo remove when unused! there seem to be no callers of Revision::loadFromId + * + * @param IDatabase $db + * @param int $id + * + * @return RevisionRecord|null + */ + public function loadRevisionFromId( IDatabase $db, $id ) { + return $this->loadRevisionFromConds( $db, [ 'rev_id' => intval( $id ) ] ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page. If not attached + * to that page, will return null. + * + * MCR migration note: this replaces Revision::loadFromPageId + * + * @note direct use is deprecated! + * @todo remove when unused! + * + * @param IDatabase $db + * @param int $pageid + * @param int $id + * @return RevisionRecord|null + */ + public function loadRevisionFromPageId( IDatabase $db, $pageid, $id = 0 ) { + $conds = [ 'rev_page' => intval( $pageid ), 'page_id' => intval( $pageid ) ]; + if ( $id ) { + $conds['rev_id'] = intval( $id ); + } else { + $conds[] = 'rev_id=page_latest'; + } + return $this->loadRevisionFromConds( $db, $conds ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page. If not attached + * to that page, will return null. + * + * MCR migration note: this replaces Revision::loadFromTitle + * + * @note direct use is deprecated! + * @todo remove when unused! + * + * @param IDatabase $db + * @param Title $title + * @param int $id + * + * @return RevisionRecord|null + */ + public function loadRevisionFromTitle( IDatabase $db, $title, $id = 0 ) { + if ( $id ) { + $matchId = intval( $id ); + } else { + $matchId = 'page_latest'; + } + + return $this->loadRevisionFromConds( + $db, + [ + "rev_id=$matchId", + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Load the revision for the given title with the given timestamp. + * WARNING: Timestamps may in some circumstances not be unique, + * so this isn't the best key to use. + * + * MCR migration note: this replaces Revision::loadFromTimestamp + * + * @note direct use is deprecated! Use getRevisionFromTimestamp instead! + * @todo remove when unused! + * + * @param IDatabase $db + * @param Title $title + * @param string $timestamp + * @return RevisionRecord|null + */ + public function loadRevisionFromTimestamp( IDatabase $db, $title, $timestamp ) { + return $this->loadRevisionFromConds( $db, + [ + 'rev_timestamp' => $db->timestamp( $timestamp ), + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Given a set of conditions, fetch a revision + * + * This method should be used if we are pretty sure the revision exists. + * Unless $flags has READ_LATEST set, this method will first try to find the revision + * on a replica before hitting the master database. + * + * MCR migration note: this corresponds to Revision::newFromConds + * + * @param array $conditions + * @param int $flags (optional) + * @param Title $title + * + * @return RevisionRecord|null + */ + private function newRevisionFromConds( $conditions, $flags = 0, Title $title = null ) { + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + $rev = $this->loadRevisionFromConds( $db, $conditions, $flags, $title ); + $this->releaseDBConnection( $db ); + + $lb = $this->getDBLoadBalancer(); + + // Make sure new pending/committed revision are visibile later on + // within web requests to certain avoid bugs like T93866 and T94407. + if ( !$rev + && !( $flags & self::READ_LATEST ) + && $lb->getServerCount() > 1 + && $lb->hasOrMadeRecentMasterChanges() + ) { + $flags = self::READ_LATEST; + $db = $this->getDBConnection( DB_MASTER ); + $rev = $this->loadRevisionFromConds( $db, $conditions, $flags, $title ); + $this->releaseDBConnection( $db ); + } + + return $rev; + } + + /** + * Given a set of conditions, fetch a revision from + * the given database connection. + * + * MCR migration note: this corresponds to Revision::loadFromConds + * + * @param IDatabase $db + * @param array $conditions + * @param int $flags (optional) + * @param Title $title + * + * @return RevisionRecord|null + */ + private function loadRevisionFromConds( + IDatabase $db, + $conditions, + $flags = 0, + Title $title = null + ) { + $row = $this->fetchRevisionRowFromConds( $db, $conditions, $flags ); + if ( $row ) { + $rev = $this->newRevisionFromRow( $row, $flags, $title ); + + return $rev; + } + + return null; + } + + /** + * Throws an exception if the given database connection does not belong to the wiki this + * RevisionStore is bound to. + * + * @param IDatabase $db + * @throws MWException + */ + private function checkDatabaseWikiId( IDatabase $db ) { + $storeWiki = $this->wikiId; + $dbWiki = $db->getDomainID(); + + if ( $dbWiki === $storeWiki ) { + return; + } + + // XXX: we really want the default database ID... + $storeWiki = $storeWiki ?: wfWikiID(); + $dbWiki = $dbWiki ?: wfWikiID(); + + if ( $dbWiki !== $storeWiki ) { + throw new MWException( "RevisionStore for $storeWiki " + . "cannot be used with a DB connection for $dbWiki" ); + } + } + + /** + * Given a set of conditions, return a row with the + * fields necessary to build RevisionRecord objects. + * + * MCR migration note: this corresponds to Revision::fetchFromConds + * + * @param IDatabase $db + * @param array $conditions + * @param int $flags (optional) + * + * @return object|false data row as a raw object + */ + private function fetchRevisionRowFromConds( IDatabase $db, $conditions, $flags = 0 ) { + $this->checkDatabaseWikiId( $db ); + + $revQuery = self::getQueryInfo( [ 'page', 'user' ] ); + $options = []; + if ( ( $flags & self::READ_LOCKING ) == self::READ_LOCKING ) { + $options[] = 'FOR UPDATE'; + } + return $db->selectRow( + $revQuery['tables'], + $revQuery['fields'], + $conditions, + __METHOD__, + $options, + $revQuery['joins'] + ); + } + + /** + * Return the tables, fields, and join conditions to be selected to create + * a new revision object. + * + * MCR migration note: this replaces Revision::getQueryInfo + * + * @since 1.31 + * + * @param array $options Any combination of the following strings + * - 'page': Join with the page table, and select fields to identify the page + * - 'user': Join with the user table, and select the user name + * - 'text': Join with the text table, and select fields to load page text + * + * @return array With three keys: + * - tables: (string[]) to include in the `$table` to `IDatabase->select()` + * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` + * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + */ + public function getQueryInfo( $options = [] ) { + $ret = [ + 'tables' => [], + 'fields' => [], + 'joins' => [], + ]; + + $ret['tables'][] = 'revision'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'rev_id', + 'rev_page', + 'rev_text_id', + 'rev_timestamp', + 'rev_user_text', + 'rev_user', + 'rev_minor_edit', + 'rev_deleted', + 'rev_len', + 'rev_parent_id', + 'rev_sha1', + ] ); + + $commentQuery = CommentStore::newKey( 'rev_comment' )->getJoin(); + $ret['tables'] = array_merge( $ret['tables'], $commentQuery['tables'] ); + $ret['fields'] = array_merge( $ret['fields'], $commentQuery['fields'] ); + $ret['joins'] = array_merge( $ret['joins'], $commentQuery['joins'] ); + + if ( $this->contentHandlerUseDB ) { + $ret['fields'][] = 'rev_content_format'; + $ret['fields'][] = 'rev_content_model'; + } + + if ( in_array( 'page', $options, true ) ) { + $ret['tables'][] = 'page'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'page_namespace', + 'page_title', + 'page_id', + 'page_latest', + 'page_is_redirect', + 'page_len', + ] ); + $ret['joins']['page'] = [ 'INNER JOIN', [ 'page_id = rev_page' ] ]; + } + + if ( in_array( 'user', $options, true ) ) { + $ret['tables'][] = 'user'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'user_name', + ] ); + $ret['joins']['user'] = [ 'LEFT JOIN', [ 'rev_user != 0', 'user_id = rev_user' ] ]; + } + + if ( in_array( 'text', $options, true ) ) { + $ret['tables'][] = 'text'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'old_text', + 'old_flags' + ] ); + $ret['joins']['text'] = [ 'INNER JOIN', [ 'rev_text_id=old_id' ] ]; + } + + return $ret; + } + + /** + * Return the tables, fields, and join conditions to be selected to create + * a new archived revision object. + * + * MCR migration note: this replaces Revision::getArchiveQueryInfo + * + * @since 1.31 + * + * @return array With three keys: + * - tables: (string[]) to include in the `$table` to `IDatabase->select()` + * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` + * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + */ + public function getArchiveQueryInfo() { + $commentQuery = CommentStore::newKey( 'ar_comment' )->getJoin(); + $ret = [ + 'tables' => [ 'archive' ] + $commentQuery['tables'], + 'fields' => [ + 'ar_id', + 'ar_page_id', + 'ar_namespace', + 'ar_title', + 'ar_rev_id', + 'ar_text', + 'ar_text_id', + 'ar_timestamp', + 'ar_user_text', + 'ar_user', + 'ar_minor_edit', + 'ar_deleted', + 'ar_len', + 'ar_parent_id', + 'ar_sha1', + ] + $commentQuery['fields'], + 'joins' => $commentQuery['joins'], + ]; + + if ( $this->contentHandlerUseDB ) { + $ret['fields'][] = 'ar_content_format'; + $ret['fields'][] = 'ar_content_model'; + } + + return $ret; + } + + /** + * Do a batched query for the sizes of a set of revisions. + * + * MCR migration note: this replaces Revision::getParentLengths + * + * @param IDatabase $db + * @param int[] $revIds + * @return int[] associative array mapping revision IDs from $revIds to the nominal size + * of the corresponding revision. + */ + public function listRevisionSizes( IDatabase $db, array $revIds ) { + $this->checkDatabaseWikiId( $db ); + + $revLens = []; + if ( !$revIds ) { + return $revLens; // empty + } + + $res = $db->select( + 'revision', + [ 'rev_id', 'rev_len' ], + [ 'rev_id' => $revIds ], + __METHOD__ + ); + + foreach ( $res as $row ) { + $revLens[$row->rev_id] = intval( $row->rev_len ); + } + + return $revLens; + } + + /** + * Get previous revision for this title + * + * MCR migration note: this replaces Revision::getPrevious + * + * @param RevisionRecord $rev + * + * @return RevisionRecord|null + */ + public function getPreviousRevision( RevisionRecord $rev ) { + $title = $this->getTitle( $rev->getPageId(), $rev->getId() ); + $prev = $title->getPreviousRevisionID( $rev->getId() ); + if ( $prev ) { + return $this->getRevisionByTitle( $title, $prev ); + } + return null; + } + + /** + * Get next revision for this title + * + * MCR migration note: this replaces Revision::getNext + * + * @param RevisionRecord $rev + * + * @return RevisionRecord|null + */ + public function getNextRevision( RevisionRecord $rev ) { + $title = $this->getTitle( $rev->getPageId(), $rev->getId() ); + $next = $title->getNextRevisionID( $rev->getId() ); + if ( $next ) { + return $this->getRevisionByTitle( $title, $next ); + } + return null; + } + + /** + * Get previous revision Id for this page_id + * This is used to populate rev_parent_id on save + * + * MCR migration note: this corresponds to Revision::getPreviousRevisionId + * + * @param IDatabase $db + * @param RevisionRecord $rev + * + * @return int + */ + private function getPreviousRevisionId( IDatabase $db, RevisionRecord $rev ) { + $this->checkDatabaseWikiId( $db ); + + if ( $rev->getPageId() === null ) { + return 0; + } + # Use page_latest if ID is not given + if ( !$rev->getId() ) { + $prevId = $db->selectField( + 'page', 'page_latest', + [ 'page_id' => $rev->getPageId() ], + __METHOD__ + ); + } else { + $prevId = $db->selectField( + 'revision', 'rev_id', + [ 'rev_page' => $rev->getPageId(), 'rev_id < ' . $rev->getId() ], + __METHOD__, + [ 'ORDER BY' => 'rev_id DESC' ] + ); + } + return intval( $prevId ); + } + + /** + * Get rev_timestamp from rev_id, without loading the rest of the row + * + * MCR migration note: this replaces Revision::getTimestampFromId + * + * @param Title $title + * @param int $id + * @param int $flags + * @return string|bool False if not found + */ + public function getTimestampFromId( $title, $id, $flags = 0 ) { + $db = $this->getDBConnection( + ( $flags & IDBAccessObject::READ_LATEST ) ? DB_MASTER : DB_REPLICA + ); + + $conds = [ 'rev_id' => $id ]; + $conds['rev_page'] = $title->getArticleID(); + $timestamp = $db->selectField( 'revision', 'rev_timestamp', $conds, __METHOD__ ); + + $this->releaseDBConnection( $db ); + return ( $timestamp !== false ) ? wfTimestamp( TS_MW, $timestamp ) : false; + } + + /** + * Get count of revisions per page...not very efficient + * + * MCR migration note: this replaces Revision::countByPageId + * + * @param IDatabase $db + * @param int $id Page id + * @return int + */ + public function countRevisionsByPageId( IDatabase $db, $id ) { + $this->checkDatabaseWikiId( $db ); + + $row = $db->selectRow( 'revision', + [ 'revCount' => 'COUNT(*)' ], + [ 'rev_page' => $id ], + __METHOD__ + ); + if ( $row ) { + return intval( $row->revCount ); + } + return 0; + } + + /** + * Get count of revisions per page...not very efficient + * + * MCR migration note: this replaces Revision::countByTitle + * + * @param IDatabase $db + * @param Title $title + * @return int + */ + public function countRevisionsByTitle( IDatabase $db, $title ) { + $id = $title->getArticleID(); + if ( $id ) { + return $this->countRevisionsByPageId( $db, $id ); + } + return 0; + } + + /** + * Check if no edits were made by other users since + * the time a user started editing the page. Limit to + * 50 revisions for the sake of performance. + * + * MCR migration note: this replaces Revision::userWasLastToEdit + * + * @deprecated since 1.31; Can possibly be removed, since the self-conflict suppression + * logic in EditPage that uses this seems conceptually dubious. Revision::userWasLastToEdit + * has been deprecated since 1.24. + * + * @param IDatabase $db The Database to perform the check on. + * @param int $pageId The ID of the page in question + * @param int $userId The ID of the user in question + * @param string $since Look at edits since this time + * + * @return bool True if the given user was the only one to edit since the given timestamp + */ + public function userWasLastToEdit( IDatabase $db, $pageId, $userId, $since ) { + $this->checkDatabaseWikiId( $db ); + + if ( !$userId ) { + return false; + } + + $res = $db->select( + 'revision', + 'rev_user', + [ + 'rev_page' => $pageId, + 'rev_timestamp > ' . $db->addQuotes( $db->timestamp( $since ) ) + ], + __METHOD__, + [ 'ORDER BY' => 'rev_timestamp ASC', 'LIMIT' => 50 ] + ); + foreach ( $res as $row ) { + if ( $row->rev_user != $userId ) { + return false; + } + } + return true; + } + + /** + * Load a revision based on a known page ID and current revision ID from the DB + * + * This method allows for the use of caching, though accessing anything that normally + * requires permission checks (aside from the text) will trigger a small DB lookup. + * + * MCR migration note: this replaces Revision::newKnownCurrent + * + * @param Title $title the associated page title + * @param int $revId current revision of this page. Defaults to $title->getLatestRevID(). + * + * @return RevisionRecord|bool Returns false if missing + */ + public function getKnownCurrentRevision( Title $title, $revId ) { + $db = $this->getDBConnectionRef( DB_REPLICA ); + + $pageId = $title->getArticleID(); + + if ( !$pageId ) { + return false; + } + + if ( !$revId ) { + $revId = $title->getLatestRevID(); + } + + if ( !$revId ) { + wfWarn( + 'No latest revision known for page ' . $title->getPrefixedDBkey() + . ' even though it exists with page ID ' . $pageId + ); + return false; + } + + $row = $this->cache->getWithSetCallback( + // Page/rev IDs passed in from DB to reflect history merges + $this->cache->makeGlobalKey( 'revision-row-1.29', $db->getDomainID(), $pageId, $revId ), + WANObjectCache::TTL_WEEK, + function ( $curValue, &$ttl, array &$setOpts ) use ( $db, $pageId, $revId ) { + $setOpts += Database::getCacheSetOptions( $db ); + + $conds = [ + 'rev_page' => intval( $pageId ), + 'page_id' => intval( $pageId ), + 'rev_id' => intval( $revId ), + ]; + + $row = $this->fetchRevisionRowFromConds( $db, $conds ); + return $row ?: false; // don't cache negatives + } + ); + + // Reflect revision deletion and user renames + if ( $row ) { + return $this->newRevisionFromRow( $row, 0, $title ); + } else { + return false; + } + } + + // TODO: move relevant methods from Title here, e.g. getFirstRevision, isBigDeletion, etc. + +} diff --git a/includes/Storage/RevisionStoreRecord.php b/includes/Storage/RevisionStoreRecord.php new file mode 100644 index 0000000000..50ae8d57d9 --- /dev/null +++ b/includes/Storage/RevisionStoreRecord.php @@ -0,0 +1,207 @@ +mId = intval( $row->rev_id ); + $this->mPageId = intval( $row->rev_page ); + $this->mComment = $comment; + + $timestamp = wfTimestamp( TS_MW, $row->rev_timestamp ); + Assert::parameter( is_string( $timestamp ), '$row->rev_timestamp', 'must be a valid timestamp' ); + + $this->mUser = $user; + $this->mMinorEdit = boolval( $row->rev_minor_edit ); + $this->mTimestamp = $timestamp; + $this->mDeleted = intval( $row->rev_deleted ); + + // NOTE: rev_parent_id = 0 indicates that there is no parent revision, while null + // indicates that the parent revision is unknown. As per MW 1.31, the database schema + // allows rev_parent_id to be NULL. + $this->mParentId = isset( $row->rev_parent_id ) ? intval( $row->rev_parent_id ) : null; + $this->mSize = isset( $row->rev_len ) ? intval( $row->rev_len ) : null; + $this->mSha1 = isset( $row->rev_sha1 ) ? $row->rev_sha1 : null; + + // NOTE: we must not call $this->mTitle->getLatestRevID() here, since the state of + // page_latest may be in limbo during revision creation. In that case, calling + // $this->mTitle->getLatestRevID() would cause a bad value to be cached in the Title + // object. During page creation, that bad value would be 0. + if ( isset( $row->page_latest ) ) { + $this->mCurrent = ( $row->rev_id == $row->page_latest ); + } + + // sanity check + if ( + $this->mPageId && $this->mTitle->exists() + && $this->mPageId !== $this->mTitle->getArticleID() + ) { + throw new InvalidArgumentException( + 'The given Title does not belong to page ID ' . $this->mPageId + ); + } + } + + /** + * MCR migration note: this replaces Revision::isCurrent + * + * @return bool + */ + public function isCurrent() { + return $this->mCurrent; + } + + /** + * MCR migration note: this replaces Revision::isDeleted + * + * @param int $field One of DELETED_* bitfield constants + * + * @return bool + */ + public function isDeleted( $field ) { + if ( $this->isCurrent() && $field === self::DELETED_TEXT ) { + // Current revisions of pages cannot have the content hidden. Skipping this + // check is very useful for Parser as it fetches templates using newKnownCurrent(). + // Calling getVisibility() in that case triggers a verification database query. + return false; // no need to check + } + + return parent::isDeleted( $field ); + } + + protected function userCan( $field, User $user ) { + if ( $this->isCurrent() && $field === self::DELETED_TEXT ) { + // Current revisions of pages cannot have the content hidden. Skipping this + // check is very useful for Parser as it fetches templates using newKnownCurrent(). + // Calling getVisibility() in that case triggers a verification database query. + return true; // no need to check + } + + return parent::userCan( $field, $user ); + } + + /** + * @return int The revision id, never null. + */ + public function getId() { + // overwritten just to add a guarantee to the contract + return parent::getId(); + } + + /** + * @return string The nominal revision size, never null. May be computed on the fly. + */ + public function getSize() { + // If length is null, calculate and remember it (potentially SLOW!). + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * @return string The revision hash, never null. May be computed on the fly. + */ + public function getSha1() { + // If hash is null, calculate it and remember (potentially SLOW!) + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * @param int $audience + * @param User|null $user + * + * @return UserIdentity The identity of the revision author, null if access is forbidden. + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getUser( $audience, $user ); + } + + /** + * @param int $audience + * @param User|null $user + * + * @return CommentStoreComment The revision comment, null if access is forbidden. + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getComment( $audience, $user ); + } + + /** + * @return string timestamp, never null + */ + public function getTimestamp() { + // overwritten just to add a guarantee to the contract + return parent::getTimestamp(); + } + +} diff --git a/includes/Storage/SlotRecord.php b/includes/Storage/SlotRecord.php new file mode 100644 index 0000000000..8769330d11 --- /dev/null +++ b/includes/Storage/SlotRecord.php @@ -0,0 +1,430 @@ +row; + + return new SlotRecord( $row, function () { + throw new SuppressedDataException( 'Content suppressed!' ); + } ); + } + + /** + * Constructs a new SlotRecord from an existing SlotRecord, overriding some fields. + * The slot's content cannot be overwritten. + * + * @param SlotRecord $slot + * @param array $overrides + * + * @return SlotRecord + */ + private static function newDerived( SlotRecord $slot, array $overrides = [] ) { + $row = $slot->row; + + foreach ( $overrides as $key => $value ) { + $row->$key = $value; + } + + return new SlotRecord( $row, $slot->content ); + } + + /** + * Constructs a new SlotRecord for a new revision, inheriting the content of the given SlotRecord + * of a previous revision. + * + * @param SlotRecord $slot + * + * @return SlotRecord + */ + public static function newInherited( SlotRecord $slot ) { + return self::newDerived( $slot, [ + 'slot_inherited' => true, + 'slot_revision' => null, + ] ); + } + + /** + * Constructs a new Slot from a Content object for a new revision. + * This is the preferred way to construct a slot for storing Content that + * resulted from a user edit. + * + * @param string $role + * @param Content $content + * @param bool $inherited + * + * @return SlotRecord + */ + public static function newUnsaved( $role, Content $content, $inherited = false ) { + Assert::parameterType( 'boolean', $inherited, '$inherited' ); + Assert::parameterType( 'string', $role, '$role' ); + + $row = [ + 'slot_id' => null, // not yet known + 'slot_address' => null, // not yet known. need setter? + 'slot_revision' => null, // not yet known + 'slot_inherited' => $inherited, + 'cont_size' => null, // compute later + 'cont_sha1' => null, // compute later + 'role_name' => $role, + 'model_name' => $content->getModel(), + ]; + + return new SlotRecord( (object)$row, $content ); + } + + /** + * Constructs a SlotRecord for a newly saved revision, based on the proto-slot that was + * supplied to the code that performed the save operation. This adds information that + * has only become available during saving, particularly the revision ID and blob address. + * + * @param int $revisionId + * @param string $blobAddress + * @param SlotRecord $protoSlot The proto-slot that was provided to the code that then + * + * @return SlotRecord + */ + public static function newSaved( $revisionId, $blobAddress, SlotRecord $protoSlot ) { + Assert::parameterType( 'integer', $revisionId, '$revisionId' ); + Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); + + return self::newDerived( $protoSlot, [ + 'slot_revision' => $revisionId, + 'cont_address' => $blobAddress, + ] ); + } + + /** + * SlotRecord constructor. + * + * The following fields are supported by the $row parameter: + * + * $row->blob_data + * $row->blob_address + * + * @param object $row A database row composed of fields of the slot and content tables, + * as a raw object. Any field value can be a callback that produces the field value + * given this SlotRecord as a parameter. However, plain strings cannot be used as + * callbacks here, for security reasons. + * @param Content|callable $content The content object associated with the slot, or a + * callback that will return that Content object, given this SlotRecord as a parameter. + */ + public function __construct( $row, $content ) { + Assert::parameterType( 'object', $row, '$row' ); + Assert::parameterType( 'Content|callable', $content, '$content' ); + + $this->row = $row; + $this->content = $content; + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * Returns the Content of the given slot. + * + * @note This is free to load Content from whatever subsystem is necessary, + * performing potentially expensive operations and triggering I/O-related + * failure modes. + * + * @note This method does not apply audience filtering. + * + * @throws SuppressedDataException if access to the content is not allowed according + * to the audience check performed by RevisionRecord::getSlot(). + * + * @return Content The slot's content. This is a direct reference to the internal instance, + * copy before exposing to application logic! + */ + public function getContent() { + if ( $this->content instanceof Content ) { + return $this->content; + } + + $obj = call_user_func( $this->content, $this ); + + Assert::postcondition( + $obj instanceof Content, + 'Slot content callback should return a Content object' + ); + + $this->content = $obj; + + return $this->content; + } + + /** + * Returns the string value of a data field from the database row supplied to the constructor. + * If the field was set to a callback, that callback is invoked and the result returned. + * + * @param string $name + * + * @throws OutOfBoundsException + * @return mixed Returns the field's value, or null if the field is NULL in the DB row. + */ + private function getField( $name ) { + if ( !isset( $this->row->$name ) ) { + // distinguish between unknown and uninitialized fields + if ( property_exists( $this->row, $name ) ) { + throw new IncompleteRevisionException( 'Uninitialized field: ' . $name ); + } else { + throw new OutOfBoundsException( 'No such field: ' . $name ); + } + } + + $value = $this->row->$name; + + // NOTE: allow callbacks, but don't trust plain string callables from the database! + if ( !is_string( $value ) && is_callable( $value ) ) { + $value = call_user_func( $value, $this ); + $this->setField( $name, $value ); + } + + return $value; + } + + /** + * Returns the string value of a data field from the database row supplied to the constructor. + * + * @param string $name + * + * @throws OutOfBoundsException + * @throws IncompleteRevisionException + * @return string Returns the string value + */ + private function getStringField( $name ) { + return strval( $this->getField( $name ) ); + } + + /** + * Returns the int value of a data field from the database row supplied to the constructor. + * + * @param string $name + * + * @throws OutOfBoundsException + * @throws IncompleteRevisionException + * @return int Returns the int value + */ + private function getIntField( $name ) { + return intval( $this->getField( $name ) ); + } + + /** + * @param string $name + * @return bool whether this record contains the given field + */ + private function hasField( $name ) { + return isset( $this->row->$name ); + } + + /** + * Returns the ID of the revision this slot is associated with. + * + * @return int + */ + public function getRevision() { + return $this->getIntField( 'slot_revision' ); + } + + /** + * Whether this slot was inherited from an older revision. + * + * @return bool + */ + public function isInherited() { + return $this->getIntField( 'slot_inherited' ) !== 0; + } + + /** + * Whether this slot has an address. Slots will have an address if their + * content has been stored. While building a new revision, + * SlotRecords will not have an address associated. + * + * @return bool + */ + public function hasAddress() { + return $this->hasField( 'cont_address' ); + } + + /** + * Whether this slot has revision ID associated. Slots will have a revision ID associated + * only if they were loaded as part of an existing revision. While building a new revision, + * Slotrecords will not have a revision ID associated. + * + * @return bool + */ + public function hasRevision() { + return $this->hasField( 'slot_revision' ); + } + + /** + * Returns the role of the slot. + * + * @return string + */ + public function getRole() { + return $this->getStringField( 'role_name' ); + } + + /** + * Returns the address of this slot's content. + * This address can be used with BlobStore to load the Content object. + * + * @return string + */ + public function getAddress() { + return $this->getStringField( 'cont_address' ); + } + + /** + * Returns the content size + * + * @return int size of the content, in bogo-bytes, as reported by Content::getSize. + */ + public function getSize() { + try { + $size = $this->getIntField( 'cont_size' ); + } catch ( IncompleteRevisionException $ex ) { + $size = $this->getContent()->getSize(); + $this->setField( 'cont_size', $size ); + } + + return $size; + } + + /** + * Returns the content size + * + * @return string hash of the content. + */ + public function getSha1() { + try { + $sha1 = $this->getStringField( 'cont_sha1' ); + } catch ( IncompleteRevisionException $ex ) { + $format = $this->hasField( 'format_name' ) + ? $this->getStringField( 'format_name' ) + : null; + + $data = $this->getContent()->serialize( $format ); + $sha1 = self::base36Sha1( $data ); + $this->setField( 'cont_sha1', $sha1 ); + } + + return $sha1; + } + + /** + * Returns the content model. This is the model name that decides + * which ContentHandler is appropriate for interpreting the + * data of the blob referenced by the address returned by getAddress(). + * + * @return string the content model of the content + */ + public function getModel() { + try { + $model = $this->getStringField( 'model_name' ); + } catch ( IncompleteRevisionException $ex ) { + $model = $this->getContent()->getModel(); + $this->setField( 'model_name', $model ); + } + + return $model; + } + + /** + * Returns the blob serialization format as a MIME type. + * + * @note When this method returns null, the caller is expected + * to auto-detect the serialization format, or to rely on + * the default format associated with the content model. + * + * @return string|null + */ + public function getFormat() { + // XXX: we currently do not plan to store the format for each slot! + + if ( $this->hasField( 'format_name' ) ) { + return $this->getStringField( 'format_name' ); + } + + return null; + } + + /** + * @param string $name + * @param string|int|null $value + */ + private function setField( $name, $value ) { + $this->row->$name = $value; + } + + /** + * Get the base 36 SHA-1 value for a string of text + * + * MCR migration note: this replaces Revision::base36Sha1 + * + * @param string $blob + * @return string + */ + public static function base36Sha1( $blob ) { + return \Wikimedia\base_convert( sha1( $blob ), 16, 36, 31 ); + } + +} diff --git a/includes/Storage/SqlBlobStore.php b/includes/Storage/SqlBlobStore.php new file mode 100644 index 0000000000..0714633285 --- /dev/null +++ b/includes/Storage/SqlBlobStore.php @@ -0,0 +1,580 @@ +dbLoadBalancer = $dbLoadBalancer; + $this->cache = $cache; + $this->wikiId = $wikiId; + } + + /** + * @return int time for which blobs can be cached, in seconds + */ + public function getCacheExpiry() { + return $this->cacheExpiry; + } + + /** + * @param int $cacheExpiry time for which blobs can be cached, in seconds + */ + public function setCacheExpiry( $cacheExpiry ) { + Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' ); + + $this->cacheExpiry = $cacheExpiry; + } + + /** + * @return bool whether blobs should be compressed for storage + */ + public function getCompressBlobs() { + return $this->compressBlobs; + } + + /** + * @param bool $compressBlobs whether blobs should be compressed for storage + */ + public function setCompressBlobs( $compressBlobs ) { + $this->compressBlobs = $compressBlobs; + } + + /** + * @return false|string The legacy encoding to assume for blobs that are not marked as utf8. + * False means handling of legacy encoding is disabled, and utf8 assumed. + */ + public function getLegacyEncoding() { + return $this->legacyEncoding; + } + + /** + * @return Language|null The locale to use when decoding from a legacy encoding, or null + * if handling of legacy encoding is disabled. + */ + public function getLegacyEncodingConversionLang() { + return $this->legacyEncodingConversionLang; + } + + /** + * @param string $legacyEncoding The legacy encoding to assume for blobs that are + * not marked as utf8. + * @param Language $language The locale to use when decoding from a legacy encoding. + */ + public function setLegacyEncoding( $legacyEncoding, Language $language ) { + Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' ); + + $this->legacyEncoding = $legacyEncoding; + $this->legacyEncodingConversionLang = $language; + } + + /** + * @return bool Whether to use the ExternalStore mechanism for storing blobs. + */ + public function getUseExternalStore() { + return $this->useExternalStore; + } + + /** + * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs. + */ + public function setUseExternalStore( $useExternalStore ) { + Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' ); + + $this->useExternalStore = $useExternalStore; + } + + /** + * @return LoadBalancer + */ + private function getDBLoadBalancer() { + return $this->dbLoadBalancer; + } + + /** + * @param int $index A database index, like DB_MASTER or DB_REPLICA + * + * @return IDatabase + */ + private function getDBConnection( $index ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnection( $index, [], $this->wikiId ); + } + + /** + * Stores an arbitrary blob of data and returns an address that can be used with + * getBlob() to retrieve the same blob of data, + * + * @param string $data + * @param array $hints An array of hints. + * + * @throws BlobAccessException + * @return string an address that can be used with getBlob() to retrieve the data. + */ + public function storeBlob( $data, $hints = [] ) { + try { + $flags = $this->compressData( $data ); + + # Write to external storage if required + if ( $this->useExternalStore ) { + // Store and get the URL + $data = ExternalStore::insertToDefault( $data ); + if ( !$data ) { + throw new BlobAccessException( "Failed to store text to external storage" ); + } + if ( $flags ) { + $flags .= ','; + } + $flags .= 'external'; + + // TODO: we could also return an address for the external store directly here. + // That would mean bypassing the text table entirely when the external store is + // used. We'll need to assess expected fallout before doing that. + } + + $dbw = $this->getDBConnection( DB_REPLICA ); + + $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' ); + $dbw->insert( + 'text', + [ + 'old_id' => $old_id, + 'old_text' => $data, + 'old_flags' => $flags, + ], + __METHOD__ + ); + + $textId = $dbw->insertId(); + + return 'tt:' . $textId; + } catch ( MWException $e ) { + throw new BlobAccessException( $e->getMessage(), 0, $e ); + } + } + + /** + * Retrieve a blob, given an address. + * Currently hardcoded to the 'text' table storage engine. + * + * MCR migration note: this replaces Revision::loadText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throws BlobAccessException + * @return string + */ + public function getBlob( $blobAddress, $queryFlags = 0 ) { + Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); + + // No negative caching; negative hits on text rows may be due to corrupted replica DBs + $blob = $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ), + $this->getCacheTTL(), + function () use ( $blobAddress, $queryFlags ) { + return $this->fetchBlob( $blobAddress, $queryFlags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ] + ); + + if ( $blob === false ) { + throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress ); + } + + return $blob; + } + + /** + * MCR migration note: this corresponds to Revision::fetchText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throw BlobAccessException + * @return string|false + */ + private function fetchBlob( $blobAddress, $queryFlags ) { + list( $schema, $id, ) = self::splitBlobAddress( $blobAddress ); + + //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL! + //TODO: MCR: also support 'ar' schema for content blobs in old style archive rows! + if ( $schema === 'tt' ) { + $textId = intval( $id ); + } else { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Unknown blob address schema: $schema" ); + } + + if ( !$textId || $id !== (string)$textId ) { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Bad blob address: $blobAddress" ); + } + + // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables + // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases. + $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST ) + ? self::READ_LATEST_IMMUTABLE + : 0; + + list( $index, $options, $fallbackIndex, $fallbackOptions ) = + DBAccessObjectUtils::getDBOptions( $queryFlags ); + + // Text data is immutable; check replica DBs first. + $row = $this->getDBConnection( $index )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $options + ); + + // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate + // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ. + if ( !$row && $fallbackIndex !== null ) { + $row = $this->getDBConnection( $fallbackIndex )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $fallbackOptions + ); + } + + if ( !$row ) { + wfWarn( __METHOD__ . ": No text row with ID $textId." ); + return false; + } + + $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress ); + + if ( $blob === false ) { + wfWarn( __METHOD__ . ": Bad data in text row $textId." ); + return false; + } + + return $blob; + } + + /** + * Expand a raw data blob according to the flags given. + * + * MCR migration note: this replaces Revision::getRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param string $raw The raw blob data, to be processed according to $flags. + * May be the blob itself, or the blob compressed, or just the address + * of the actual blob, depending on $flags. + * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'. + * @param string|null $cacheKey May be used for caching if given + * + * @return false|string The expanded blob or false on failure + */ + public function expandBlob( $raw, $flags, $cacheKey = null ) { + if ( is_string( $flags ) ) { + $flags = explode( ',', $flags ); + } + + // Use external methods for external objects, text in table is URL-only then + if ( in_array( 'external', $flags ) ) { + $url = $raw; + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) == 1 || $parts[1] == '' ) { + return false; + } + + if ( $cacheKey ) { + // Make use of the wiki-local revision text cache. + // The cached value should be decompressed, so handle that and return here. + // NOTE: we rely on $this->cache being the right cache for $this->wikiId! + return $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ), + $this->getCacheTTL(), + function () use ( $url, $flags ) { + // No negative caching per BlobStore::getBlob() + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + + return $this->decompressData( $blob, $flags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ] + ); + } else { + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + return $this->decompressData( $blob, $flags ); + } + } else { + return $this->decompressData( $raw, $flags ); + } + } + + /** + * If $wgCompressRevisions is enabled, we will compress data. + * The input string is modified in place. + * Return value is the flags field: contains 'gzip' if the + * data is compressed, and 'utf-8' if we're saving in UTF-8 + * mode. + * + * MCR migration note: this replaces Revision::compressRevisionText + * + * @note direct use is deprecated! + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed &$blob Reference to a text + * + * @return string + */ + public function compressData( &$blob ) { + $blobFlags = []; + + // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData(). + // XXX: if $this->legacyEncoding is not set, we could skip this. May be risky, though. + $blobFlags[] = 'utf-8'; + + if ( $this->compressBlobs ) { + if ( function_exists( 'gzdeflate' ) ) { + $deflated = gzdeflate( $blob ); + + if ( $deflated === false ) { + wfLogWarning( __METHOD__ . ': gzdeflate() failed' ); + } else { + $blob = $deflated; + $blobFlags[] = 'gzip'; + } + } else { + wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" ); + } + } + return implode( ',', $blobFlags ); + } + + /** + * Re-converts revision text according to its flags. + * + * MCR migration note: this replaces Revision::decompressRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed $blob Reference to a text + * @param array $blobFlags Compression flags + * + * @return string|bool Decompressed text, or false on failure + */ + public function decompressData( $blob, $blobFlags ) { + if ( $blob === false ) { + // Text failed to be fetched; nothing to do + return false; + } + + if ( in_array( 'gzip', $blobFlags ) ) { + # Deal with optional compression of archived pages. + # This can be done periodically via maintenance/compressOld.php, and + # as pages are saved if $wgCompressRevisions is set. + $blob = gzinflate( $blob ); + + if ( $blob === false ) { + wfLogWarning( __METHOD__ . ': gzinflate() failed' ); + return false; + } + } + + if ( in_array( 'object', $blobFlags ) ) { + # Generic compressed storage + $obj = unserialize( $blob ); + if ( !is_object( $obj ) ) { + // Invalid object + return false; + } + $blob = $obj->getText(); + } + + // Needed to support old revisions left over from from the 1.4 / 1.5 migration. + if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang + && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags ) + ) { + # Old revisions kept around in a legacy encoding? + # Upconvert on demand. + # ("utf8" checked for compatibility with some broken + # conversion scripts 2008-12-30) + $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob ); + } + + return $blob; + } + + /** + * Get the text cache TTL + * + * MCR migration note: this replaces Revision::getCacheTTL + * + * @return int + */ + private function getCacheTTL() { + if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION ) + <= WANObjectCache::QOS_EMULATION_SQL + ) { + // Do not cache RDBMs blobs in...the RDBMs store + $ttl = WANObjectCache::TTL_UNCACHEABLE; + } else { + $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE; + } + + return $ttl; + } + + /** + * Returns an ID corresponding to the old_id field in the text table, corresponding + * to the given $address. + * + * Currently, $address must start with 'tt:' followed by a decimal integer representing + * the old_id; if $address does not start with 'tt:', null is returned. However, + * the implementation may change to insert rows into the text table on the fly. + * + * @note This method exists for use with the text table based storage schema. + * It should not be assumed that is will function with all future kinds of content addresses. + * + * @deprecated since 1.31, so not assume that all blob addresses refer to a row in the text + * table. This method should become private once the relevant refactoring in WikiPage is + * complete. + * + * @param string $address + * + * @return int|null + */ + public function getTextIdFromAddress( $address ) { + list( $schema, $id, ) = self::splitBlobAddress( $address ); + + if ( $schema !== 'tt' ) { + return null; + } + + $textId = intval( $id ); + + if ( !$textId || $id !== (string)$textId ) { + throw new InvalidArgumentException( "Malformed text_id: $id" ); + } + + return $textId; + } + + /** + * Splits a blob address into three parts: the schema, the ID, and parameters/flags. + * + * @param string $address + * + * @throws InvalidArgumentException + * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array. + */ + private static function splitBlobAddress( $address ) { + if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) { + throw new InvalidArgumentException( "Bad blob address: $address" ); + } + + $schema = strtolower( $m[1] ); + $id = $m[2]; + $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : []; + + return [ $schema, $id, $parameters ]; + } + +} diff --git a/includes/Storage/SuppressedDataException.php b/includes/Storage/SuppressedDataException.php new file mode 100644 index 0000000000..24f16a6482 --- /dev/null +++ b/includes/Storage/SuppressedDataException.php @@ -0,0 +1,33 @@ +id = $id; + $this->name = $name; + } + + /** + * @return int The user ID. May be 0 for anonymous users or for users with no local account. + */ + public function getId() { + return $this->id; + } + + /** + * @return string The user's logical name. May be an IPv4 or IPv6 address for anonymous users. + */ + public function getName() { + return $this->name; + } + +} diff --git a/tests/phpunit/includes/Storage/RevisionRecordTest.php b/tests/phpunit/includes/Storage/RevisionRecordTest.php new file mode 100644 index 0000000000..788d763ed1 --- /dev/null +++ b/tests/phpunit/includes/Storage/RevisionRecordTest.php @@ -0,0 +1,15 @@ +resetArticleID( 17 ); + + $user = new UserIdentityValue( 11, 'Tester' ); + $comment = CommentStoreComment::newUnsavedComment( 'Hello World' ); + + $main = SlotRecord::newUnsaved( 'main', new TextContent( 'Lorem Ipsum' ) ); + $aux = SlotRecord::newUnsaved( 'aux', new TextContent( 'Frumious Bandersnatch' ) ); + $slots = new RevisionSlots( [ $main, $aux ] ); + + $row = [ + 'rev_id' => '7', + 'rev_page' => strval( $title->getArticleID() ), + 'rev_timestamp' => '20200101000000', + 'rev_deleted' => 0, + 'rev_minor_edit' => 0, + 'rev_parent_id' => '5', + 'rev_len' => $slots->computeSize(), + 'rev_sha1' => $slots->computeSha1(), + 'page_latest' => '18', + ]; + + $row = array_merge( $row, $overrides ); + + return new RevisionStoreRecord( $title, $user, $comment, (object)$row, $slots ); + } + + public function provideConstructor() { + $title = Title::newFromText( 'Dummy' ); + $title->resetArticleID( 17 ); + + $user = new UserIdentityValue( 11, 'Tester' ); + $comment = CommentStoreComment::newUnsavedComment( 'Hello World' ); + + $main = SlotRecord::newUnsaved( 'main', new TextContent( 'Lorem Ipsum' ) ); + $aux = SlotRecord::newUnsaved( 'aux', new TextContent( 'Frumious Bandersnatch' ) ); + $slots = new RevisionSlots( [ $main, $aux ] ); + + $protoRow = [ + 'rev_id' => '7', + 'rev_page' => strval( $title->getArticleID() ), + 'rev_timestamp' => '20200101000000', + 'rev_deleted' => 0, + 'rev_minor_edit' => 0, + 'rev_parent_id' => '5', + 'rev_len' => $slots->computeSize(), + 'rev_sha1' => $slots->computeSha1(), + 'page_latest' => '18', + ]; + + $row = $protoRow; + yield 'all info' => [ + $title, + $user, + $comment, + (object)$row, + $slots, + 'acmewiki' + ]; + + $row = $protoRow; + $row['rev_minor_edit'] = '1'; + $row['rev_deleted'] = strval( RevisionRecord::DELETED_USER ); + + yield 'minor deleted' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + $row['page_latest'] = $row['rev_id']; + + yield 'latest' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + unset( $row['rev_parent'] ); + + yield 'no parent' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + unset( $row['rev_len'] ); + unset( $row['rev_sha1'] ); + + yield 'no length, no hash' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + yield 'no length, no hash' => [ + Title::newFromText( 'DummyDoesNotExist' ), + $user, + $comment, + (object)$row, + $slots + ]; + } + + /** + * @dataProvider provideConstructor + * + * @param Title $title + * @param UserIdentity $user + * @param CommentStoreComment $comment + * @param object $row + * @param RevisionSlots $slots + * @param bool $wikiId + */ + public function testConstructorAndGetters( + Title $title, + UserIdentity $user, + CommentStoreComment $comment, + $row, + RevisionSlots $slots, + $wikiId = false + ) { + $rec = new RevisionStoreRecord( $title, $user, $comment, $row, $slots, $wikiId ); + + $this->assertSame( $title, $rec->getPageAsLinkTarget(), 'getPageAsLinkTarget' ); + $this->assertSame( $user, $rec->getUser( RevisionRecord::RAW ), 'getUser' ); + $this->assertSame( $comment, $rec->getComment(), 'getComment' ); + + $this->assertSame( $slots->getSlotRoles(), $rec->getSlotRoles(), 'getSlotRoles' ); + $this->assertSame( $wikiId, $rec->getWikiId(), 'getWikiId' ); + + $this->assertSame( (int)$row->rev_id, $rec->getId(), 'getId' ); + $this->assertSame( (int)$row->rev_page, $rec->getPageId(), 'getId' ); + $this->assertSame( $row->rev_timestamp, $rec->getTimestamp(), 'getTimestamp' ); + $this->assertSame( (int)$row->rev_deleted, $rec->getVisibility(), 'getVisibility' ); + $this->assertSame( (bool)$row->rev_minor_edit, $rec->isMinor(), 'getIsMinor' ); + + if ( isset( $row->rev_parent_id ) ) { + $this->assertSame( (int)$row->rev_parent_id, $rec->getParentId(), 'getParentId' ); + } else { + $this->assertSame( 0, $rec->getParentId(), 'getParentId' ); + } + + if ( isset( $row->rev_len ) ) { + $this->assertSame( (int)$row->rev_len, $rec->getSize(), 'getSize' ); + } else { + $this->assertSame( $slots->computeSize(), $rec->getSize(), 'getSize' ); + } + + if ( isset( $row->rev_sha1 ) ) { + $this->assertSame( $row->rev_sha1, $rec->getSha1(), 'getSha1' ); + } else { + $this->assertSame( $slots->computeSha1(), $rec->getSha1(), 'getSha1' ); + } + + if ( isset( $row->page_latest ) ) { + $this->assertSame( + (int)$row->rev_id === (int)$row->page_latest, + $rec->isCurrent(), + 'isCurrent' + ); + } else { + $this->assertSame( + false, + $rec->isCurrent(), + 'isCurrent' + ); + } + } + + public function provideConstructorFailure() { + $title = Title::newFromText( 'Dummy' ); + $title->resetArticleID( 17 ); + + $user = new UserIdentityValue( 11, 'Tester' ); + + $comment = CommentStoreComment::newUnsavedComment( 'Hello World' ); + + $main = SlotRecord::newUnsaved( 'main', new TextContent( 'Lorem Ipsum' ) ); + $aux = SlotRecord::newUnsaved( 'aux', new TextContent( 'Frumious Bandersnatch' ) ); + $slots = new RevisionSlots( [ $main, $aux ] ); + + $protoRow = [ + 'rev_id' => '7', + 'rev_page' => strval( $title->getArticleID() ), + 'rev_timestamp' => '20200101000000', + 'rev_deleted' => 0, + 'rev_minor_edit' => 0, + 'rev_parent_id' => '5', + 'rev_len' => $slots->computeSize(), + 'rev_sha1' => $slots->computeSha1(), + 'page_latest' => '18', + ]; + + yield 'not a row' => [ + $title, + $user, + $comment, + 'not a row', + $slots, + 'acmewiki' + ]; + + $row = $protoRow; + $row['rev_timestamp'] = 'kittens'; + + yield 'bad timestamp' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + $row['rev_page'] = 99; + + yield 'page ID mismatch' => [ + $title, + $user, + $comment, + (object)$row, + $slots + ]; + + $row = $protoRow; + + yield 'bad wiki' => [ + $title, + $user, + $comment, + (object)$row, + $slots, + 12345 + ]; + } + + /** + * @dataProvider provideConstructorFailure + * + * @param Title $title + * @param UserIdentity $user + * @param CommentStoreComment $comment + * @param object $row + * @param RevisionSlots $slots + * @param bool $wikiId + */ + public function testConstructorFailure( + Title $title, + UserIdentity $user, + CommentStoreComment $comment, + $row, + RevisionSlots $slots, + $wikiId = false + ) { + $this->setExpectedException( InvalidArgumentException::class ); + new RevisionStoreRecord( $title, $user, $comment, $row, $slots, $wikiId ); + } + + private function provideAudienceCheckData( $field ) { + yield 'field accessible for oversighter (ALL)' => [ + Revisionrecord::SUPPRESSED_ALL, + [ 'oversight' ], + true, + false + ]; + + yield 'field accessible for oversighter' => [ + Revisionrecord::DELETED_RESTRICTED | $field, + [ 'oversight' ], + true, + false + ]; + + yield 'field not accessible for sysops (ALL)' => [ + Revisionrecord::SUPPRESSED_ALL, + [ 'sysop' ], + false, + false + ]; + + yield 'field not accessible for sysops' => [ + Revisionrecord::DELETED_RESTRICTED | $field, + [ 'sysop' ], + false, + false + ]; + + yield 'field accessible for sysops' => [ + $field, + [ 'sysop' ], + true, + false + ]; + + yield 'field suppressed for logged in users' => [ + $field, + [ 'user' ], + false, + false + ]; + + yield 'unrelated field suppressed' => [ + $field === Revisionrecord::DELETED_COMMENT + ? Revisionrecord::DELETED_USER + : Revisionrecord::DELETED_COMMENT, + [ 'user' ], + true, + true + ]; + + yield 'nothing suppressed' => [ + 0, + [ 'user' ], + true, + true + ]; + } + + public function testSerialization_fails() { + $this->setExpectedException( LogicException::class ); + $rev = $this->newRevision(); + serialize( $rev ); + } + + public function provideGetComment_audience() { + return $this->provideAudienceCheckData( RevisionRecord::DELETED_COMMENT ); + } + + private function forceStandardPermissions() { + $this->setMwGlobals( + 'wgGroupPermissions', + [ + 'user' => [ + 'viewsuppressed' => false, + 'suppressrevision' => false, + 'deletedtext' => false, + 'deletedhistory' => false, + ], + 'sysop' => [ + 'viewsuppressed' => false, + 'suppressrevision' => false, + 'deletedtext' => true, + 'deletedhistory' => true, + ], + 'oversight' => [ + 'deletedtext' => true, + 'deletedhistory' => true, + 'viewsuppressed' => true, + 'suppressrevision' => true, + ], + ] + ); + } + + /** + * @dataProvider provideGetComment_audience + */ + public function testGetComment_audience( $visibility, $groups, $userCan, $publicCan ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $groups )->getUser(); + $rev = $this->newRevision( [ 'rev_deleted' => $visibility ] ); + + $this->assertNotNull( $rev->getComment( RevisionRecord::RAW ), 'raw can' ); + + $this->assertSame( + $publicCan, + $rev->getComment( RevisionRecord::FOR_PUBLIC ) !== null, + 'public can' + ); + $this->assertSame( + $userCan, + $rev->getComment( RevisionRecord::FOR_THIS_USER, $user ) !== null, + 'user can' + ); + } + + public function provideGetUser_audience() { + return $this->provideAudienceCheckData( RevisionRecord::DELETED_USER ); + } + + /** + * @dataProvider provideGetUser_audience + */ + public function testGetUser_audience( $visibility, $groups, $userCan, $publicCan ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $groups )->getUser(); + $rev = $this->newRevision( [ 'rev_deleted' => $visibility ] ); + + $this->assertNotNull( $rev->getUser( RevisionRecord::RAW ), 'raw can' ); + + $this->assertSame( + $publicCan, + $rev->getUser( RevisionRecord::FOR_PUBLIC ) !== null, + 'public can' + ); + $this->assertSame( + $userCan, + $rev->getUser( RevisionRecord::FOR_THIS_USER, $user ) !== null, + 'user can' + ); + } + + public function provideGetSlot_audience() { + return $this->provideAudienceCheckData( RevisionRecord::DELETED_TEXT ); + } + + /** + * @dataProvider provideGetSlot_audience + */ + public function testGetSlot_audience( $visibility, $groups, $userCan, $publicCan ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $groups )->getUser(); + $rev = $this->newRevision( [ 'rev_deleted' => $visibility ] ); + + // NOTE: slot meta-data is never suppressed, just the content is! + $this->assertNotNull( $rev->getSlot( 'main', RevisionRecord::RAW ), 'raw can' ); + $this->assertNotNull( $rev->getSlot( 'main', RevisionRecord::FOR_PUBLIC ), 'public can' ); + + $this->assertNotNull( + $rev->getSlot( 'main', RevisionRecord::FOR_THIS_USER, $user ), + 'user can' + ); + + try { + $rev->getSlot( 'main', RevisionRecord::FOR_PUBLIC )->getContent(); + $exception = null; + } catch ( SuppressedDataException $ex ) { + $exception = $ex; + } + + $this->assertSame( + $publicCan, + $exception === null, + 'public can' + ); + + try { + $rev->getSlot( 'main', RevisionRecord::FOR_THIS_USER, $user )->getContent(); + $exception = null; + } catch ( SuppressedDataException $ex ) { + $exception = $ex; + } + + $this->assertSame( + $userCan, + $exception === null, + 'user can' + ); + } + + public function provideGetSlot_audience_latest() { + return $this->provideAudienceCheckData( RevisionRecord::DELETED_TEXT ); + } + + /** + * @dataProvider provideGetSlot_audience_latest + */ + public function testGetSlot_audience_latest( $visibility, $groups, $userCan, $publicCan ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $groups )->getUser(); + $rev = $this->newRevision( + [ + 'rev_deleted' => $visibility, + 'rev_id' => 11, + 'page_latest' => 11, // revision is current + ] + ); + + // sanity check + $this->assertTrue( $rev->isCurrent(), 'isCurrent()' ); + + // NOTE: slot meta-data is never suppressed, just the content is! + $this->assertNotNull( $rev->getSlot( 'main', RevisionRecord::RAW ), 'raw can' ); + $this->assertNotNull( $rev->getSlot( 'main', RevisionRecord::FOR_PUBLIC ), 'public can' ); + + $this->assertNotNull( + $rev->getSlot( 'main', RevisionRecord::FOR_THIS_USER, $user ), + 'user can' + ); + + // NOTE: the content of the current revision is never suppressed! + // Check that getContent() doesn't throw SuppressedDataException + $rev->getSlot( 'main', RevisionRecord::RAW )->getContent(); + $rev->getSlot( 'main', RevisionRecord::FOR_PUBLIC )->getContent(); + $rev->getSlot( 'main', RevisionRecord::FOR_THIS_USER, $user )->getContent(); + } + + /** + * @dataProvider provideGetSlot_audience + */ + public function testGetContent_audience( $visibility, $groups, $userCan, $publicCan ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $groups )->getUser(); + $rev = $this->newRevision( [ 'rev_deleted' => $visibility ] ); + + $this->assertNotNull( $rev->getContent( 'main', RevisionRecord::RAW ), 'raw can' ); + + $this->assertSame( + $publicCan, + $rev->getContent( 'main', RevisionRecord::FOR_PUBLIC ) !== null, + 'public can' + ); + $this->assertSame( + $userCan, + $rev->getContent( 'main', RevisionRecord::FOR_THIS_USER, $user ) !== null, + 'user can' + ); + } + + public function testGetSlot() { + $rev = $this->newRevision(); + + $slot = $rev->getSlot( 'main' ); + $this->assertNotNull( $slot, 'getSlot()' ); + $this->assertSame( 'main', $slot->getRole(), 'getRole()' ); + } + + public function testGetContent() { + $rev = $this->newRevision(); + + $content = $rev->getSlot( 'main' ); + $this->assertNotNull( $content, 'getContent()' ); + $this->assertSame( CONTENT_MODEL_TEXT, $content->getModel(), 'getModel()' ); + } + + public function provideUserCanBitfield() { + yield [ 0, 0, [], null, true ]; + // Bitfields match, user has no permissions + yield [ + RevisionRecord::DELETED_TEXT, + RevisionRecord::DELETED_TEXT, + [], + null, + false + ]; + yield [ + RevisionRecord::DELETED_COMMENT, + RevisionRecord::DELETED_COMMENT, + [], + null, + false, + ]; + yield [ + RevisionRecord::DELETED_USER, + RevisionRecord::DELETED_USER, + [], + null, + false + ]; + yield [ + RevisionRecord::DELETED_RESTRICTED, + RevisionRecord::DELETED_RESTRICTED, + [], + null, + false, + ]; + // Bitfields match, user (admin) does have permissions + yield [ + RevisionRecord::DELETED_TEXT, + RevisionRecord::DELETED_TEXT, + [ 'sysop' ], + null, + true, + ]; + yield [ + RevisionRecord::DELETED_COMMENT, + RevisionRecord::DELETED_COMMENT, + [ 'sysop' ], + null, + true, + ]; + yield [ + RevisionRecord::DELETED_USER, + RevisionRecord::DELETED_USER, + [ 'sysop' ], + null, + true, + ]; + // Bitfields match, user (admin) does not have permissions + yield [ + RevisionRecord::DELETED_RESTRICTED, + RevisionRecord::DELETED_RESTRICTED, + [ 'sysop' ], + null, + false, + ]; + // Bitfields match, user (oversight) does have permissions + yield [ + RevisionRecord::DELETED_RESTRICTED, + RevisionRecord::DELETED_RESTRICTED, + [ 'oversight' ], + null, + true, + ]; + // Check permissions using the title + yield [ + RevisionRecord::DELETED_TEXT, + RevisionRecord::DELETED_TEXT, + [ 'sysop' ], + Title::newFromText( __METHOD__ ), + true, + ]; + yield [ + RevisionRecord::DELETED_TEXT, + RevisionRecord::DELETED_TEXT, + [], + Title::newFromText( __METHOD__ ), + false, + ]; + } + + /** + * @dataProvider provideUserCanBitfield + * @covers RevisionRecord::userCanBitfield + */ + public function testUserCanBitfield( $bitField, $field, $userGroups, $title, $expected ) { + $this->forceStandardPermissions(); + + $user = $this->getTestUser( $userGroups )->getUser(); + + $this->assertSame( + $expected, + RevisionRecord::userCanBitfield( $bitField, $field, $user, $title ) + ); + } + + public function testHasSameContent() { + // TBD + } + + public function testIsDeleted() { + // TBD + } + + public function testUserCan() { + // TBD + } + +} -- 2.20.1