3 * A handle for managing updates for derived page data on edit, import, purge, etc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 namespace MediaWiki\Storage
;
26 use CategoryMembershipChangeJob
;
33 use InvalidArgumentException
;
38 use MediaWiki\Edit\PreparedEdit
;
39 use MediaWiki\MediaWikiServices
;
40 use MediaWiki\User\UserIdentity
;
45 use Psr\Log\LoggerInterface
;
46 use Psr\Log\NullLogger
;
47 use RecentChangesUpdateJob
;
48 use ResourceLoaderWikiModule
;
54 use Wikimedia\Assert\Assert
;
58 * A handle for managing updates for derived page data on edit, import, purge, etc.
60 * @note Avoid direct usage of DerivedPageDataUpdater.
62 * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly
63 * providing access to post-PST content and ParserOutput to callbacks during revision creation,
64 * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on
65 * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and
66 * Content::getSecondaryDataUpdates().
68 * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance,
69 * and re-used by callback code over the course of an update operation. It's a stepping stone
70 * one the way to a more complete refactoring of WikiPage.
72 * When using a DerivedPageDataUpdater, the following life cycle must be observed:
73 * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required
74 * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates
75 * require prepareContent or prepareUpdate to have been called first, to initialize the
76 * DerivedPageDataUpdater.
78 * @see docs/pageupdater.txt for more information.
80 * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases
88 class DerivedPageDataUpdater
implements IDBAccessObject
{
91 * @var UserIdentity|null
103 private $parserCache;
108 private $revisionStore;
113 private $contentLanguage;
116 * @var LoggerInterface
118 private $saveParseLogger;
123 private $jobQueueGroup;
128 private $messageCache;
131 * @var string see $wgArticleCountMethod
133 private $articleCountMethod;
136 * @var boolean see $wgRCWatchCategoryMembership
138 private $rcWatchCategoryMembership = false;
141 * See $options on prepareUpdate.
148 'oldcountable' => null,
149 'oldredirect' => null,
153 * The state of the relevant row in page table before the edit.
154 * This is determined by the first call to grabCurrentRevision, prepareContent,
156 * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will
157 * attempt to emulate the state of the page table before the edit.
161 private $pageState = null;
164 * @var RevisionSlotsUpdate|null
166 private $slotsUpdate = null;
169 * @var MutableRevisionSlots|null
171 private $pstContentSlots = null;
174 * @var object[] anonymous objects with two fields, using slot roles as keys:
175 * - hasHtml: whether the output contains HTML
176 * - ParserOutput: the slot's parser output
178 private $slotsOutput = [];
181 * @var ParserOutput|null
183 private $canonicalParserOutput = null;
186 * @var ParserOptions|null
188 private $canonicalParserOptions = null;
191 * @var RevisionRecord
193 private $revision = null;
196 * A stage identifier for managing the life cycle of this instance.
197 * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'.
199 * @see docs/pageupdater.txt for documentation of the life cycle.
203 private $stage = 'new';
206 * Transition table for managing the life cycle of DerivedPageDateUpdater instances.
208 * XXX: Overkill. This is a linear order, we could just count. Names are nice though,
209 * and constants are also overkill...
211 * @see docs/pageupdater.txt for documentation of the life cycle.
215 private static $transitions = [
218 'knows-current' => true,
219 'has-content' => true,
220 'has-revision' => true,
223 'knows-current' => true,
224 'has-content' => true,
225 'has-revision' => true,
228 'has-content' => true,
229 'has-revision' => true,
232 'has-revision' => true,
238 * @param WikiPage $wikiPage ,
239 * @param RevisionStore $revisionStore
240 * @param ParserCache $parserCache
241 * @param JobQueueGroup $jobQueueGroup
242 * @param MessageCache $messageCache
243 * @param Language $contentLanguage
244 * @param LoggerInterface $saveParseLogger
246 public function __construct(
248 RevisionStore
$revisionStore,
249 ParserCache
$parserCache,
250 JobQueueGroup
$jobQueueGroup,
251 MessageCache
$messageCache,
252 Language
$contentLanguage,
253 LoggerInterface
$saveParseLogger = null
255 $this->wikiPage
= $wikiPage;
257 $this->parserCache
= $parserCache;
258 $this->revisionStore
= $revisionStore;
259 $this->jobQueueGroup
= $jobQueueGroup;
260 $this->messageCache
= $messageCache;
261 $this->contentLanguage
= $contentLanguage;
263 // XXX: replace all wfDebug calls with a Logger. Do we nede more than one logger here?
264 $this->saveParseLogger
= $saveParseLogger ?
: new NullLogger();
268 * Transition function for managing the life cycle of this instances.
270 * @see docs/pageupdater.txt for documentation of the life cycle.
272 * @param string $newStage the new stage
273 * @return string the previous stage
275 * @throws LogicException If a transition to the given stage is not possible in the current
278 private function doTransition( $newStage ) {
279 $this->assertTransition( $newStage );
281 $oldStage = $this->stage
;
282 $this->stage
= $newStage;
288 * Asserts that a transition to the given stage is possible, without performing it.
290 * @see docs/pageupdater.txt for documentation of the life cycle.
292 * @param string $newStage the new stage
294 * @throws LogicException If this instance is not in the expected stage
296 private function assertTransition( $newStage ) {
297 if ( empty( self
::$transitions[$this->stage
][$newStage] ) ) {
298 throw new LogicException( "Cannot transition from {$this->stage} to $newStage" );
303 * @return bool|string
305 private function getWikiId() {
306 // TODO: get from RevisionStore
311 * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting
312 * the the given revision.
314 * @param UserIdentity|null $user The user creating the revision in question
315 * @param RevisionRecord|null $revision New revision (after save, if already saved)
316 * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST)
317 * @param null|int $parentId Parent revision of the edit (use 0 for page creation)
321 public function isReusableFor(
322 UserIdentity
$user = null,
323 RevisionRecord
$revision = null,
324 RevisionSlotsUpdate
$slotsUpdate = null,
329 && $revision->getParentId() !== $parentId
331 throw new InvalidArgumentException( '$parentId should match the parent of $revision' );
336 && $revision->getUser( RevisionRecord
::RAW
)->getName() !== $user->getName()
338 throw new InvalidArgumentException( '$user should match the author of $revision' );
341 if ( $user && $this->user
&& $user->getName() !== $this->user
->getName() ) {
345 if ( $revision && $this->revision
&& $this->revision
->getId() !== $revision->getId() ) {
349 if ( $revision && !$user ) {
350 $user = $revision->getUser( RevisionRecord
::RAW
);
353 if ( $this->pageState
355 && $revision->getParentId() !== null
356 && $this->pageState
['oldId'] !== $revision->getParentId()
361 if ( $this->pageState
362 && $parentId !== null
363 && $this->pageState
['oldId'] !== $parentId
370 && $this->revision
->getUser( RevisionRecord
::RAW
)->getName() !== $user->getName()
377 && $revision->getUser( RevisionRecord
::RAW
)->getName() !== $this->user
->getName()
382 // NOTE: this check is the primary reason for having the $this->slotsUpdate field!
383 if ( $this->slotsUpdate
385 && !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate )
390 if ( $this->pstContentSlots
392 && !$this->pstContentSlots
->hasSameContent( $revision->getSlots() )
401 * @param string $articleCountMethod "any" or "link".
402 * @see $wgArticleCountMethod
404 public function setArticleCountMethod( $articleCountMethod ) {
405 $this->articleCountMethod
= $articleCountMethod;
409 * @param bool $rcWatchCategoryMembership
410 * @see $wgRCWatchCategoryMembership
412 public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) {
413 $this->rcWatchCategoryMembership
= $rcWatchCategoryMembership;
419 private function getTitle() {
420 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
421 return $this->wikiPage
->getTitle();
427 private function getWikiPage() {
428 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
429 return $this->wikiPage
;
433 * Determines whether the page being edited already existed.
434 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
437 * @throws LogicException if called before grabCurrentRevision
439 public function pageExisted() {
440 $this->assertHasPageState( __METHOD__
);
442 return $this->pageState
['oldId'] > 0;
446 * Returns the revision that was current before the edit. This would be null if the edit
447 * created the page, or the revision's parent for a regular edit, or the revision itself
449 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
451 * @return RevisionRecord|null the revision that was current before the edit, or null if
452 * the edit created the page.
454 private function getOldRevision() {
455 $this->assertHasPageState( __METHOD__
);
457 // If 'oldRevision' is not set, load it!
458 // Useful if $this->oldPageState is initialized by prepareUpdate.
459 if ( !array_key_exists( 'oldRevision', $this->pageState
) ) {
460 /** @var int $oldId */
461 $oldId = $this->pageState
['oldId'];
462 $flags = $this->useMaster() ? RevisionStore
::READ_LATEST
: 0;
463 $this->pageState
['oldRevision'] = $oldId
464 ?
$this->revisionStore
->getRevisionById( $oldId, $flags )
468 return $this->pageState
['oldRevision'];
472 * Returns the revision that was the page's current revision when grabCurrentRevision()
475 * During an edit, that revision will act as the logical parent of the new revision.
477 * Some updates are performed based on the difference between the database state at the
478 * moment this method is first called, and the state after the edit.
480 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
482 * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception
483 * to avoid confusion, since the page's current revision is then the new revision after
484 * the edit, which was presumably passed to prepareUpdate() as the $revision parameter.
485 * Use getOldRevision() instead to access the revision that used to be current before the
488 * @return RevisionRecord|null the page's current revision, or null if the page does not
491 public function grabCurrentRevision() {
492 if ( $this->pageState
) {
493 return $this->pageState
['oldRevision'];
496 $this->assertTransition( 'knows-current' );
498 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
499 $wikiPage = $this->getWikiPage();
501 // Do not call WikiPage::clear(), since the caller may already have caused page data
502 // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now.
503 $wikiPage->loadPageData( self
::READ_LATEST
);
504 $rev = $wikiPage->getRevision();
505 $current = $rev ?
$rev->getRevisionRecord() : null;
508 'oldRevision' => $current,
509 'oldId' => $rev ?
$rev->getId() : 0,
510 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table
511 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table
514 $this->doTransition( 'knows-current' );
516 return $this->pageState
['oldRevision'];
520 * Whether prepareUpdate() or prepareContent() have been called on this instance.
524 public function isContentPrepared() {
525 return $this->pstContentSlots
!== null;
529 * Whether prepareUpdate() has been called on this instance.
533 public function isUpdatePrepared() {
534 return $this->revision
!== null;
540 private function getPageId() {
541 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
542 return $this->wikiPage
->getId();
548 private function getTimestampNow() {
549 // TODO: allow an override to be injected for testing
550 return wfTimestampNow();
554 * Whether the content of the target revision is publicly visible.
558 public function isContentPublic() {
559 if ( $this->revision
) {
560 // XXX: if that revision is the current revision, this can be skipped
561 return !$this->revision
->isDeleted( RevisionRecord
::DELETED_TEXT
);
563 // If the content has not been saved yet, it cannot have been suppressed yet.
569 * Returns the slot, modified or inherited, after PST, with no audience checks applied.
571 * @param string $role slot role name
573 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
577 public function getRawSlot( $role ) {
578 return $this->getSlots()->getSlot( $role );
582 * Returns the content of the given slot, with no audience checks.
584 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
586 * @param string $role slot role name
589 public function getRawContent( $role ) {
590 return $this->getRawSlot( $role )->getContent();
594 * Returns the content model of the given slot
596 * @param string $role slot role name
599 private function getContentModel( $role ) {
600 return $this->getRawSlot( $role )->getModel();
604 * @param string $role slot role name
605 * @return ContentHandler
607 private function getContentHandler( $role ) {
608 // TODO: inject something like a ContentHandlerRegistry
609 return ContentHandler
::getForModelID( $this->getContentModel( $role ) );
612 private function useMaster() {
613 // TODO: can we just set a flag to true in prepareContent()?
614 return $this->wikiPage
->wasLoadedFrom( self
::READ_LATEST
);
620 public function isCountable() {
621 // NOTE: Keep in sync with WikiPage::isCountable.
623 if ( !$this->getTitle()->isContentPage() ) {
627 if ( !$this->isContentPublic() ) {
628 // This should be irrelevant: countability only applies to the current revision,
629 // and the current revision is never suppressed.
633 if ( $this->isRedirect() ) {
639 if ( $this->articleCountMethod
=== 'link' ) {
640 $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() );
643 // TODO: MCR: ask all slots if they have links [SlotHandler/PageTypeHandler]
644 $mainContent = $this->getRawContent( 'main' );
645 return $mainContent->isCountable( $hasLinks );
651 public function isRedirect() {
652 // NOTE: main slot determines redirect status
653 $mainContent = $this->getRawContent( 'main' );
655 return $mainContent->isRedirect();
659 * @param RevisionRecord $rev
663 private function revisionIsRedirect( RevisionRecord
$rev ) {
664 // NOTE: main slot determines redirect status
665 $mainContent = $rev->getContent( 'main', RevisionRecord
::RAW
);
667 return $mainContent->isRedirect();
671 * Prepare updates based on an update which has not yet been saved.
673 * This may be used to create derived data that is needed when creating a new revision;
674 * particularly, this makes available the slots of the new revision via the getSlots()
675 * method, after applying PST and slot inheritance.
677 * The derived data prepared for revision creation may then later be re-used by doUpdates(),
678 * without the need to re-calculate.
680 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
682 * @note: Calling this method more than once with the same $slotsUpdate
683 * has no effect. Calling this method multiple times with different content will cause
686 * @note: Calling this method after prepareUpdate() has been called will cause an exception.
688 * @param User $user The user to act as context for pre-save transformation (PST).
689 * Type hint should be reduced to UserIdentity at some point.
690 * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated
691 * by this edit, before PST.
692 * @param bool $useStash Whether to use stashed ParserOutput
694 public function prepareContent(
696 RevisionSlotsUpdate
$slotsUpdate,
699 if ( $this->slotsUpdate
) {
700 if ( !$this->user
) {
701 throw new LogicException(
702 'Unexpected state: $this->slotsUpdate was initialized, '
703 . 'but $this->user was not.'
707 if ( $this->user
->getName() !== $user->getName() ) {
708 throw new LogicException( 'Can\'t call prepareContent() again for different user! '
709 . 'Expected ' . $this->user
->getName() . ', got ' . $user->getName()
713 if ( !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate ) ) {
714 throw new LogicException(
715 'Can\'t call prepareContent() again with different slot content!'
719 return; // prepareContent() already done, nothing to do
722 $this->assertTransition( 'has-content' );
724 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
725 $title = $this->getTitle();
727 $parentRevision = $this->grabCurrentRevision();
729 $this->slotsOutput
= [];
730 $this->canonicalParserOutput
= null;
731 $this->canonicalParserOptions
= null;
733 // The edit may have already been prepared via api.php?action=stashedit
734 $stashedEdit = false;
736 // TODO: MCR: allow output for all slots to be stashed.
737 if ( $useStash && $slotsUpdate->isModifiedSlot( 'main' ) ) {
738 $mainContent = $slotsUpdate->getModifiedSlot( 'main' )->getContent();
739 $legacyUser = User
::newFromIdentity( $user );
740 $stashedEdit = ApiStashEdit
::checkCache( $title, $mainContent, $legacyUser );
743 if ( $stashedEdit ) {
744 /** @var ParserOutput $output */
745 $output = $stashedEdit->output
;
747 // TODO: this should happen when stashing the ParserOutput, not now!
748 $output->setCacheTime( $stashedEdit->timestamp
);
750 // TODO: MCR: allow output for all slots to be stashed.
751 $this->canonicalParserOutput
= $output;
754 $userPopts = ParserOptions
::newFromUserAndLang( $user, $this->contentLanguage
);
755 Hooks
::run( 'ArticlePrepareTextForEdit', [ $wikiPage, $userPopts ] );
758 $this->slotsUpdate
= $slotsUpdate;
760 if ( $parentRevision ) {
761 // start out by inheriting all parent slots
762 $this->pstContentSlots
= MutableRevisionSlots
::newFromParentRevisionSlots(
763 $parentRevision->getSlots()->getSlots()
766 $this->pstContentSlots
= new MutableRevisionSlots();
769 foreach ( $slotsUpdate->getModifiedRoles() as $role ) {
770 $slot = $slotsUpdate->getModifiedSlot( $role );
772 if ( $slot->isInherited() ) {
773 // No PST for inherited slots! Note that "modified" slots may still be inherited
774 // from an earlier version, e.g. for rollbacks.
776 } elseif ( $role === 'main' && $stashedEdit ) {
777 // TODO: MCR: allow PST content for all slots to be stashed.
778 $pstSlot = SlotRecord
::newUnsaved( $role, $stashedEdit->pstContent
);
780 $content = $slot->getContent();
781 $pstContent = $content->preSaveTransform( $title, $this->user
, $userPopts );
782 $pstSlot = SlotRecord
::newUnsaved( $role, $pstContent );
785 $this->pstContentSlots
->setSlot( $pstSlot );
788 foreach ( $slotsUpdate->getRemovedRoles() as $role ) {
789 $this->pstContentSlots
->removeSlot( $role );
792 $this->options
['created'] = ( $parentRevision === null );
793 $this->options
['changed'] = ( $parentRevision === null
794 ||
!$this->pstContentSlots
->hasSameContent( $parentRevision->getSlots() ) );
796 $this->doTransition( 'has-content' );
799 private function assertHasPageState( $method ) {
800 if ( !$this->pageState
) {
801 throw new LogicException(
802 'Must call grabCurrentRevision() or prepareContent() '
803 . 'or prepareUpdate() before calling ' . $method
808 private function assertPrepared( $method ) {
809 if ( !$this->pstContentSlots
) {
810 throw new LogicException(
811 'Must call prepareContent() or prepareUpdate() before calling ' . $method
817 * Whether the edit creates the page.
821 public function isCreation() {
822 $this->assertPrepared( __METHOD__
);
823 return $this->options
['created'];
827 * Whether the edit created, or should create, a new revision (that is, it's not a null-edit).
829 * @warning: at present, "null-revisions" that do not change content but do have a revision
830 * record would return false after prepareContent(), but true after prepareUpdate()!
831 * This should probably be fixed.
835 public function isChange() {
836 $this->assertPrepared( __METHOD__
);
837 return $this->options
['changed'];
841 * Whether the page was a redirect before the edit.
845 public function wasRedirect() {
846 $this->assertHasPageState( __METHOD__
);
848 if ( $this->pageState
['oldIsRedirect'] === null ) {
849 /** @var RevisionRecord $rev */
850 $rev = $this->pageState
['oldRevision'];
852 $this->pageState
['oldIsRedirect'] = $this->revisionIsRedirect( $rev );
854 $this->pageState
['oldIsRedirect'] = false;
858 return $this->pageState
['oldIsRedirect'];
862 * Returns the slots of the target revision, after PST.
864 * @return RevisionSlots
866 public function getSlots() {
867 $this->assertPrepared( __METHOD__
);
868 return $this->pstContentSlots
;
872 * Returns the RevisionSlotsUpdate for this updater.
874 * @return RevisionSlotsUpdate
876 private function getRevisionSlotsUpdate() {
877 $this->assertPrepared( __METHOD__
);
879 if ( !$this->slotsUpdate
) {
880 if ( !$this->revision
) {
881 // This should not be possible: if assertPrepared() returns true,
882 // at least one of $this->slotsUpdate or $this->revision should be set.
883 throw new LogicException( 'No revision nor a slots update is known!' );
886 $old = $this->getOldRevision();
887 $this->slotsUpdate
= RevisionSlotsUpdate
::newFromRevisionSlots(
888 $this->revision
->getSlots(),
889 $old ?
$old->getSlots() : null
892 return $this->slotsUpdate
;
896 * Returns the role names of the slots touched by the new revision,
897 * including removed roles.
901 public function getTouchedSlotRoles() {
902 return $this->getRevisionSlotsUpdate()->getTouchedRoles();
906 * Returns the role names of the slots modified by the new revision,
907 * not including removed roles.
911 public function getModifiedSlotRoles() {
912 return $this->getRevisionSlotsUpdate()->getModifiedRoles();
916 * Returns the role names of the slots removed by the new revision.
920 public function getRemovedSlotRoles() {
921 return $this->getRevisionSlotsUpdate()->getRemovedRoles();
925 * Prepare derived data updates targeting the given Revision.
927 * Calling this method requires the given revision to be present in the database.
928 * This may be right after a new revision has been created, or when re-generating
929 * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks
932 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
934 * @note: Calling this method more than once with the same revision has no effect.
935 * $options are only used for the first call. Calling this method multiple times with
936 * different revisions will cause an exception.
938 * @note: If grabCurrentRevision() (or prepareContent()) has been called before
939 * calling this method, $revision->getParentRevision() has to refer to the revision that
940 * was the current revision at the time grabCurrentRevision() was called.
942 * @param RevisionRecord $revision
943 * @param array $options Array of options, following indexes are used:
944 * - changed: bool, whether the revision changed the content (default true)
945 * - created: bool, whether the revision created the page (default false)
946 * - moved: bool, whether the page was moved (default false)
947 * - restored: bool, whether the page was undeleted (default false)
948 * - oldrevision: Revision object for the pre-update revision (default null)
949 * - parseroutput: The canonical ParserOutput of $revision (default null)
950 * - triggeringuser: The user triggering the update (UserIdentity, default null)
951 * - oldredirect: bool, null, or string 'no-change' (default null):
952 * - bool: whether the page was counted as a redirect before that
953 * revision, only used in changed is true and created is false
954 * - null or 'no-change': don't update the redirect status.
955 * - oldcountable: bool, null, or string 'no-change' (default null):
956 * - bool: whether the page was counted as an article before that
957 * revision, only used in changed is true and created is false
958 * - null: if created is false, don't update the article count; if created
959 * is true, do update the article count
960 * - 'no-change': don't update the article count, ever
963 public function prepareUpdate( RevisionRecord
$revision, array $options = [] ) {
965 !isset( $options['oldrevision'] )
966 ||
$options['oldrevision'] instanceof Revision
967 ||
$options['oldrevision'] instanceof RevisionRecord
,
968 '$options["oldrevision"]',
969 'must be a RevisionRecord (or Revision)'
972 !isset( $options['parseroutput'] )
973 ||
$options['parseroutput'] instanceof ParserOutput
,
974 '$options["parseroutput"]',
975 'must be a ParserOutput'
978 !isset( $options['triggeringuser'] )
979 ||
$options['triggeringuser'] instanceof UserIdentity
,
980 '$options["triggeringuser"]',
981 'must be a UserIdentity'
984 if ( !$revision->getId() ) {
985 throw new InvalidArgumentException(
986 'Revision must have an ID set for it to be used with prepareUpdate()!'
990 if ( $this->revision
) {
991 if ( $this->revision
->getId() === $revision->getId() ) {
992 return; // nothing to do!
994 throw new LogicException(
995 'Trying to re-use DerivedPageDataUpdater with revision '
997 . ', but it\'s already bound to revision '
998 . $this->revision
->getId()
1003 if ( $this->pstContentSlots
1004 && !$this->pstContentSlots
->hasSameContent( $revision->getSlots() )
1006 throw new LogicException(
1007 'The Revision provided has mismatching content!'
1011 // Override fields defined in $this->options with values from $options.
1012 $this->options
= array_intersect_key( $options, $this->options
) +
$this->options
;
1014 if ( isset( $this->pageState
['oldId'] ) ) {
1015 $oldId = $this->pageState
['oldId'];
1016 } elseif ( isset( $this->options
['oldrevision'] ) ) {
1017 /** @var Revision|RevisionRecord $oldRev */
1018 $oldRev = $this->options
['oldrevision'];
1019 $oldId = $oldRev->getId();
1021 $oldId = $revision->getParentId();
1024 if ( $oldId !== null ) {
1025 // XXX: what if $options['changed'] disagrees?
1026 // MovePage creates a dummy revision with changed = false!
1027 // We may want to explicitly distinguish between "no new revision" (null-edit)
1028 // and "new revision without new content" (dummy revision).
1030 if ( $oldId === $revision->getParentId() ) {
1031 // NOTE: this may still be a NullRevision!
1033 $this->options
['changed'] = true;
1034 } elseif ( $oldId === $revision->getId() ) {
1036 $this->options
['changed'] = false;
1038 // This indicates that calling code has given us the wrong Revision object
1039 throw new LogicException(
1040 'The Revision mismatches old revision ID: '
1041 . 'Old ID is ' . $oldId
1042 . ', parent ID is ' . $revision->getParentId()
1043 . ', revision ID is ' . $revision->getId()
1048 // If prepareContent() was used to generate the PST content (which is indicated by
1049 // $this->slotsUpdate being set), and this is not a null-edit, then the given
1050 // revision must have the acting user as the revision author. Otherwise, user
1051 // signatures generated by PST would mismatch the user in the revision record.
1052 if ( $this->user
!== null && $this->options
['changed'] && $this->slotsUpdate
) {
1053 $user = $revision->getUser();
1054 if ( !$this->user
->equals( $user ) ) {
1055 throw new LogicException(
1056 'The Revision provided has a mismatching actor: expected '
1057 .$this->user
->getName()
1064 // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent,
1065 // emulate the state of the page table before the edit, as good as we can.
1066 if ( !$this->pageState
) {
1067 $this->pageState
= [
1068 'oldIsRedirect' => isset( $this->options
['oldredirect'] )
1069 && is_bool( $this->options
['oldredirect'] )
1070 ?
$this->options
['oldredirect']
1072 'oldCountable' => isset( $this->options
['oldcountable'] )
1073 && is_bool( $this->options
['oldcountable'] )
1074 ?
$this->options
['oldcountable']
1078 if ( $this->options
['changed'] ) {
1079 // The edit created a new revision
1080 $this->pageState
['oldId'] = $revision->getParentId();
1082 if ( isset( $this->options
['oldrevision'] ) ) {
1083 $rev = $this->options
['oldrevision'];
1084 $this->pageState
['oldRevision'] = $rev instanceof Revision
1085 ?
$rev->getRevisionRecord()
1089 // This is a null-edit, so the old revision IS the new revision!
1090 $this->pageState
['oldId'] = $revision->getId();
1091 $this->pageState
['oldRevision'] = $revision;
1095 // "created" is forced here
1096 $this->options
['created'] = ( $this->pageState
['oldId'] === 0 );
1098 $this->revision
= $revision;
1099 $this->pstContentSlots
= $revision->getSlots();
1101 $this->doTransition( 'has-revision' );
1103 // NOTE: in case we have a User object, don't override with a UserIdentity.
1104 // We already checked that $revision->getUser() mathces $this->user;
1105 if ( !$this->user
) {
1106 $this->user
= $revision->getUser( RevisionRecord
::RAW
);
1109 // Prune any output that depends on the revision ID.
1110 if ( $this->canonicalParserOutput
) {
1111 if ( $this->outputVariesOnRevisionMetaData( $this->canonicalParserOutput
, __METHOD__
) ) {
1112 $this->canonicalParserOutput
= null;
1115 $this->saveParseLogger
->debug( __METHOD__
. ": No prepared canonical output...\n" );
1118 if ( $this->slotsOutput
) {
1119 foreach ( $this->slotsOutput
as $role => $prep ) {
1120 if ( $this->outputVariesOnRevisionMetaData( $prep->output
, __METHOD__
) ) {
1121 unset( $this->slotsOutput
[$role] );
1125 $this->saveParseLogger
->debug( __METHOD__
. ": No prepared output...\n" );
1128 // reset ParserOptions, so the actual revision ID is used in future ParserOutput generation
1129 $this->canonicalParserOptions
= null;
1131 // Avoid re-generating the canonical ParserOutput if it's known.
1132 // We just trust that the caller is passing the correct ParserOutput!
1133 if ( isset( $options['parseroutput'] ) ) {
1134 $this->canonicalParserOutput
= $options['parseroutput'];
1137 // TODO: optionally get ParserOutput from the ParserCache here.
1138 // Move the logic used by RefreshLinksJob here!
1142 * @param ParserOutput $out
1143 * @param string $method
1146 private function outputVariesOnRevisionMetaData( ParserOutput
$out, $method = __METHOD__
) {
1147 if ( $out->getFlag( 'vary-revision' ) ) {
1148 // XXX: Just keep the output if the speculative revision ID was correct, like below?
1149 $this->saveParseLogger
->info(
1150 "$method: Prepared output has vary-revision...\n"
1153 } elseif ( $out->getFlag( 'vary-revision-id' )
1154 && $out->getSpeculativeRevIdUsed() !== $this->revision
->getId()
1156 $this->saveParseLogger
->info(
1157 "$method: Prepared output has vary-revision-id with wrong ID...\n"
1160 } elseif ( $out->getFlag( 'vary-user' )
1161 && !$this->options
['changed']
1163 // When Alice makes a null-edit on top of Bob's edit,
1164 // {{REVISIONUSER}} must resolve to "Bob", not "Alice", see T135261.
1165 // TODO: to avoid this, we should check for null-edits in makeCanonicalparserOptions,
1166 // and set setCurrentRevisionCallback to return the existing revision when appropriate.
1167 // See also the comment there [dk 2018-05]
1168 $this->saveParseLogger
->info(
1169 "$method: Prepared output has vary-user and is null-edit...\n"
1173 wfDebug( "$method: Keeping prepared output...\n" );
1179 * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly!
1180 * @return PreparedEdit
1182 public function getPreparedEdit() {
1183 $this->assertPrepared( __METHOD__
);
1185 $slotsUpdate = $this->getRevisionSlotsUpdate();
1186 $preparedEdit = new PreparedEdit();
1188 $preparedEdit->popts
= $this->getCanonicalParserOptions();
1189 $preparedEdit->output
= $this->getCanonicalParserOutput();
1190 $preparedEdit->pstContent
= $this->pstContentSlots
->getContent( 'main' );
1191 $preparedEdit->newContent
=
1192 $slotsUpdate->isModifiedSlot( 'main' )
1193 ?
$slotsUpdate->getModifiedSlot( 'main' )->getContent()
1194 : $this->pstContentSlots
->getContent( 'main' ); // XXX: can we just remove this?
1195 $preparedEdit->oldContent
= null; // unused. // XXX: could get this from the parent revision
1196 $preparedEdit->revid
= $this->revision ?
$this->revision
->getId() : null;
1197 $preparedEdit->timestamp
= $preparedEdit->output
->getCacheTime();
1198 $preparedEdit->format
= $preparedEdit->pstContent
->getDefaultFormat();
1200 return $preparedEdit;
1206 private function isContentAccessible() {
1207 // XXX: when we move this to a RevisionHtmlProvider, the audience may be configurable!
1208 return $this->isContentPublic();
1212 * @param string $role
1213 * @param bool $generateHtml
1214 * @return ParserOutput
1216 public function getSlotParserOutput( $role, $generateHtml = true ) {
1217 // TODO: factor this out into a RevisionHtmlProvider that can also be used for viewing.
1219 $this->assertPrepared( __METHOD__
);
1221 if ( isset( $this->slotsOutput
[$role] ) ) {
1222 $entry = $this->slotsOutput
[$role];
1224 if ( $entry->hasHtml ||
!$generateHtml ) {
1225 return $entry->output
;
1229 if ( !$this->isContentAccessible() ) {
1231 $output = new ParserOutput();
1233 $content = $this->getRawContent( $role );
1235 $output = $content->getParserOutput(
1237 $this->revision ?
$this->revision
->getId() : null,
1238 $this->getCanonicalParserOptions(),
1243 $this->slotsOutput
[$role] = (object)[
1244 'output' => $output,
1245 'hasHtml' => $generateHtml,
1248 $output->setCacheTime( $this->getTimestampNow() );
1254 * @return ParserOutput
1256 public function getCanonicalParserOutput() {
1257 if ( $this->canonicalParserOutput
) {
1258 return $this->canonicalParserOutput
;
1261 // TODO: MCR: logic for combining the output of multiple slot goes here!
1262 // TODO: factor this out into a RevisionHtmlProvider that can also be used for viewing.
1263 $this->canonicalParserOutput
= $this->getSlotParserOutput( 'main' );
1265 return $this->canonicalParserOutput
;
1269 * @return ParserOptions
1271 public function getCanonicalParserOptions() {
1272 if ( $this->canonicalParserOptions
) {
1273 return $this->canonicalParserOptions
;
1276 // TODO: ParserOptions should *not* be controlled by the ContentHandler!
1277 // See T190712 for how to fix this for Wikibase.
1278 $this->canonicalParserOptions
= $this->wikiPage
->makeParserOptions( 'canonical' );
1280 //TODO: if $this->revision is not set but we already know that we pending update is a
1281 // null-edit, we should probably use the page's current revision here.
1282 // That would avoid the need for the !$this->options['changed'] branch in
1283 // outputVariesOnRevisionMetaData [dk 2018-05]
1285 if ( $this->revision
) {
1286 // Make sure we use the appropriate revision ID when generating output
1287 $title = $this->getTitle();
1288 $oldCallback = $this->canonicalParserOptions
->getCurrentRevisionCallback();
1289 $this->canonicalParserOptions
->setCurrentRevisionCallback(
1290 function ( Title
$parserTitle, $parser = false ) use ( $title, &$oldCallback ) {
1291 if ( $parserTitle->equals( $title ) ) {
1292 $legacyRevision = new Revision( $this->revision
);
1293 return $legacyRevision;
1295 return call_user_func( $oldCallback, $parserTitle, $parser );
1300 // NOTE: we only get here without READ_LATEST if called directly by application logic
1301 $dbIndex = $this->useMaster()
1302 ? DB_MASTER
// use the best possible guess
1303 : DB_REPLICA
; // T154554
1305 $this->canonicalParserOptions
->setSpeculativeRevIdCallback(
1306 function () use ( $dbIndex ) {
1307 // TODO: inject LoadBalancer!
1308 $lb = MediaWikiServices
::getInstance()->getDBLoadBalancer();
1309 // Use a fresh connection in order to see the latest data, by avoiding
1310 // stale data from REPEATABLE-READ snapshots.
1311 // HACK: But don't use a fresh connection in unit tests, since it would not have
1312 // the fake tables. This should be handled by the LoadBalancer!
1313 $flags = defined( 'MW_PHPUNIT_TEST' ) ?
0 : $lb::CONN_TRX_AUTO
;
1314 $db = $lb->getConnectionRef( $dbIndex, [], $this->getWikiId(), $flags );
1316 return 1 +
(int)$db->selectField(
1326 return $this->canonicalParserOptions
;
1330 * @param bool $recursive
1332 * @return DataUpdate[]
1334 public function getSecondaryDataUpdates( $recursive = false ) {
1335 // TODO: MCR: getSecondaryDataUpdates() needs a complete overhaul to avoid DataUpdates
1336 // from different slots overwriting each other in the database. Plan:
1337 // * replace direct calls to Content::getSecondaryDataUpdates() with calls to this method
1338 // * Construct LinksUpdate here, on the combined ParserOutput, instead of in AbstractContent
1340 // * Pass $slot into getSecondaryDataUpdates() - probably be introducing a new duplicate
1341 // version of this function in ContentHandler.
1342 // * The new method gets the PreparedEdit, but no $recursive flag (that's for LinksUpdate)
1343 // * Hack: call both the old and the new getSecondaryDataUpdates method here; Pass
1344 // the per-slot ParserOutput to the old method, for B/C.
1345 // * Hack: If there is more than one slot, filter LinksUpdate from the DataUpdates
1346 // returned by getSecondaryDataUpdates, and use a LinksUpdated for the combined output
1348 // * Call the SecondaryDataUpdates hook here (or kill it - its signature doesn't make sense)
1350 $content = $this->getSlots()->getContent( 'main' );
1352 // NOTE: $output is the combined output, to be shown in the default view.
1353 $output = $this->getCanonicalParserOutput();
1355 $updates = $content->getSecondaryDataUpdates(
1356 $this->getTitle(), null, $recursive, $output
1363 * Do standard updates after page edit, purge, or import.
1364 * Update links tables, site stats, search index, title cache, message cache, etc.
1365 * Purges pages that depend on this page when appropriate.
1366 * With a 10% chance, triggers pruning the recent changes table.
1368 * @note prepareUpdate() must be called before calling this method!
1370 * MCR migration note: this replaces WikiPage::doEditUpdates.
1372 public function doUpdates() {
1373 $this->assertTransition( 'done' );
1375 // TODO: move logic into a PageEventEmitter service
1377 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
1379 // NOTE: this may trigger the first parsing of the new content after an edit (when not
1380 // using pre-generated stashed output).
1381 // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse
1382 // to be perform post-send. The client could already follow a HTTP redirect to the
1383 // page view, but would then have to wait for a response until rendering is complete.
1384 $output = $this->getCanonicalParserOutput();
1386 // Save it to the parser cache.
1387 // Make sure the cache time matches page_touched to avoid double parsing.
1388 $this->parserCache
->save(
1389 $output, $wikiPage, $this->getCanonicalParserOptions(),
1390 $this->revision
->getTimestamp(), $this->revision
->getId()
1393 $legacyUser = User
::newFromIdentity( $this->user
);
1394 $legacyRevision = new Revision( $this->revision
);
1396 // Update the links tables and other secondary data
1397 $recursive = $this->options
['changed']; // T52785
1398 $updates = $this->getSecondaryDataUpdates( $recursive );
1400 foreach ( $updates as $update ) {
1401 // TODO: make an $option field for the cause
1402 $update->setCause( 'edit-page', $this->user
->getName() );
1403 if ( $update instanceof LinksUpdate
) {
1404 $update->setRevision( $legacyRevision );
1406 if ( !empty( $this->options
['triggeringuser'] ) ) {
1407 /** @var UserIdentity|User $triggeringUser */
1408 $triggeringUser = $this->options
['triggeringuser'];
1409 if ( !$triggeringUser instanceof User
) {
1410 $triggeringUser = User
::newFromIdentity( $triggeringUser );
1413 $update->setTriggeringUser( $triggeringUser );
1416 DeferredUpdates
::addUpdate( $update );
1419 // TODO: MCR: check if *any* changed slot supports categories!
1420 if ( $this->rcWatchCategoryMembership
1421 && $this->getContentHandler( 'main' )->supportsCategories() === true
1422 && ( $this->options
['changed'] ||
$this->options
['created'] )
1423 && !$this->options
['restored']
1425 // Note: jobs are pushed after deferred updates, so the job should be able to see
1426 // the recent change entry (also done via deferred updates) and carry over any
1427 // bot/deletion/IP flags, ect.
1428 $this->jobQueueGroup
->lazyPush(
1429 new CategoryMembershipChangeJob(
1432 'pageId' => $this->getPageId(),
1433 'revTimestamp' => $this->revision
->getTimestamp(),
1439 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1440 $editInfo = $this->getPreparedEdit();
1441 Hooks
::run( 'ArticleEditUpdates', [ &$wikiPage, &$editInfo, $this->options
['changed'] ] );
1443 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1444 if ( Hooks
::run( 'ArticleEditUpdatesDeleteFromRecentchanges', [ &$wikiPage ] ) ) {
1445 // Flush old entries from the `recentchanges` table
1446 if ( mt_rand( 0, 9 ) == 0 ) {
1447 $this->jobQueueGroup
->lazyPush( RecentChangesUpdateJob
::newPurgeJob() );
1451 $id = $this->getPageId();
1452 $title = $this->getTitle();
1453 $dbKey = $title->getPrefixedDBkey();
1454 $shortTitle = $title->getDBkey();
1456 if ( !$title->exists() ) {
1457 wfDebug( __METHOD__
. ": Page doesn't exist any more, bailing out\n" );
1459 $this->doTransition( 'done' );
1463 if ( $this->options
['oldcountable'] === 'no-change' ||
1464 ( !$this->options
['changed'] && !$this->options
['moved'] )
1467 } elseif ( $this->options
['created'] ) {
1468 $good = (int)$this->isCountable();
1469 } elseif ( $this->options
['oldcountable'] !== null ) {
1470 $good = (int)$this->isCountable()
1471 - (int)$this->options
['oldcountable'];
1475 $edits = $this->options
['changed'] ?
1 : 0;
1476 $pages = $this->options
['created'] ?
1 : 0;
1478 DeferredUpdates
::addUpdate( SiteStatsUpdate
::factory(
1479 [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ]
1482 // TODO: make search infrastructure aware of slots!
1483 $mainSlot = $this->revision
->getSlot( 'main' );
1484 if ( !$mainSlot->isInherited() && $this->isContentPublic() ) {
1485 DeferredUpdates
::addUpdate( new SearchUpdate( $id, $dbKey, $mainSlot->getContent() ) );
1488 // If this is another user's talk page, update newtalk.
1489 // Don't do this if $options['changed'] = false (null-edits) nor if
1490 // it's a minor edit and the user making the edit doesn't generate notifications for those.
1491 if ( $this->options
['changed']
1492 && $title->getNamespace() == NS_USER_TALK
1493 && $shortTitle != $legacyUser->getTitleKey()
1494 && !( $this->revision
->isMinor() && $legacyUser->isAllowed( 'nominornewtalk' ) )
1496 $recipient = User
::newFromName( $shortTitle, false );
1497 if ( !$recipient ) {
1498 wfDebug( __METHOD__
. ": invalid username\n" );
1500 // Allow extensions to prevent user notification
1501 // when a new message is added to their talk page
1502 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1503 if ( Hooks
::run( 'ArticleEditUpdateNewTalk', [ &$wikiPage, $recipient ] ) ) {
1504 if ( User
::isIP( $shortTitle ) ) {
1505 // An anonymous user
1506 $recipient->setNewtalk( true, $legacyRevision );
1507 } elseif ( $recipient->isLoggedIn() ) {
1508 $recipient->setNewtalk( true, $legacyRevision );
1510 wfDebug( __METHOD__
. ": don't need to notify a nonexistent user\n" );
1516 if ( $title->getNamespace() == NS_MEDIAWIKI
1517 && $this->getRevisionSlotsUpdate()->isModifiedSlot( 'main' )
1519 $mainContent = $this->isContentPublic() ?
$this->getRawContent( 'main' ) : null;
1521 $this->messageCache
->updateMessageOverride( $title, $mainContent );
1524 // TODO: move onArticleCreate and onArticle into a PageEventEmitter service
1525 if ( $this->options
['created'] ) {
1526 WikiPage
::onArticleCreate( $title );
1527 } elseif ( $this->options
['changed'] ) { // T52785
1528 WikiPage
::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() );
1531 $oldRevision = $this->getOldRevision();
1532 $oldLegacyRevision = $oldRevision ?
new Revision( $oldRevision ) : null;
1534 // TODO: In the wiring, register a listener for this on the new PageEventEmitter
1535 ResourceLoaderWikiModule
::invalidateModuleCache(
1536 $title, $oldLegacyRevision, $legacyRevision, $this->getWikiId()
1539 $this->doTransition( 'done' );