3 * A handle for managing updates for derived page data on edit, import, purge, etc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 namespace MediaWiki\Storage
;
26 use CategoryMembershipChangeJob
;
33 use InvalidArgumentException
;
38 use MediaWiki\Edit\PreparedEdit
;
39 use MediaWiki\MediaWikiServices
;
40 use MediaWiki\User\UserIdentity
;
45 use Psr\Log\LoggerInterface
;
46 use Psr\Log\NullLogger
;
47 use RecentChangesUpdateJob
;
48 use ResourceLoaderWikiModule
;
54 use Wikimedia\Assert\Assert
;
58 * A handle for managing updates for derived page data on edit, import, purge, etc.
60 * @note Avoid direct usage of DerivedPageDataUpdater.
62 * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly
63 * providing access to post-PST content and ParserOutput to callbacks during revision creation,
64 * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on
65 * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and
66 * Content::getSecondaryDataUpdates().
68 * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance,
69 * and re-used by callback code over the course of an update operation. It's a stepping stone
70 * one the way to a more complete refactoring of WikiPage.
72 * When using a DerivedPageDataUpdater, the following life cycle must be observed:
73 * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required
74 * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates
75 * require prepareContent or prepareUpdate to have been called first, to initialize the
76 * DerivedPageDataUpdater.
78 * @see docs/pageupdater.txt for more information.
80 * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases
88 class DerivedPageDataUpdater
implements IDBAccessObject
{
91 * @var UserIdentity|null
103 private $parserCache;
108 private $revisionStore;
113 private $contentLanguage;
116 * @var LoggerInterface
118 private $saveParseLogger;
123 private $jobQueueGroup;
128 private $messageCache;
131 * @var string see $wgArticleCountMethod
133 private $articleCountMethod;
136 * @var boolean see $wgRCWatchCategoryMembership
138 private $rcWatchCategoryMembership = false;
141 * See $options on prepareUpdate.
148 'oldcountable' => null,
149 'oldredirect' => null,
153 * The state of the relevant row in page table before the edit.
154 * This is determined by the first call to grabCurrentRevision, prepareContent,
155 * or prepareUpdate (so it is only accessible in 'knows-current' or a later stage).
156 * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will
157 * attempt to emulate the state of the page table before the edit.
159 * Contains the following fields:
160 * - oldRevision (RevisionRecord|null): the revision that was current before the change
161 * associated with this update. Might not be set, use getOldRevision() instead of direct
163 * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change
164 * was about creating a new page); null if not known (that should not happen).
165 * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded,
166 * can be null; use wasRedirect() instead of direct access.
167 * - oldCountable (bool|null): whether the page was countable before the change (or null
168 * if we don't have that information)
172 private $pageState = null;
175 * @var RevisionSlotsUpdate|null
177 private $slotsUpdate = null;
180 * @var MutableRevisionSlots|null
182 private $pstContentSlots = null;
185 * @var object[] anonymous objects with two fields, using slot roles as keys:
186 * - hasHtml: whether the output contains HTML
187 * - ParserOutput: the slot's parser output
189 private $slotsOutput = [];
192 * @var ParserOutput|null
194 private $canonicalParserOutput = null;
197 * @var ParserOptions|null
199 private $canonicalParserOptions = null;
202 * @var RevisionRecord
204 private $revision = null;
207 * A stage identifier for managing the life cycle of this instance.
208 * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'.
210 * @see docs/pageupdater.txt for documentation of the life cycle.
214 private $stage = 'new';
217 * Transition table for managing the life cycle of DerivedPageDateUpdater instances.
219 * XXX: Overkill. This is a linear order, we could just count. Names are nice though,
220 * and constants are also overkill...
222 * @see docs/pageupdater.txt for documentation of the life cycle.
226 private static $transitions = [
229 'knows-current' => true,
230 'has-content' => true,
231 'has-revision' => true,
234 'knows-current' => true,
235 'has-content' => true,
236 'has-revision' => true,
239 'has-content' => true,
240 'has-revision' => true,
243 'has-revision' => true,
249 * @param WikiPage $wikiPage ,
250 * @param RevisionStore $revisionStore
251 * @param ParserCache $parserCache
252 * @param JobQueueGroup $jobQueueGroup
253 * @param MessageCache $messageCache
254 * @param Language $contentLanguage
255 * @param LoggerInterface|null $saveParseLogger
257 public function __construct(
259 RevisionStore
$revisionStore,
260 ParserCache
$parserCache,
261 JobQueueGroup
$jobQueueGroup,
262 MessageCache
$messageCache,
263 Language
$contentLanguage,
264 LoggerInterface
$saveParseLogger = null
266 $this->wikiPage
= $wikiPage;
268 $this->parserCache
= $parserCache;
269 $this->revisionStore
= $revisionStore;
270 $this->jobQueueGroup
= $jobQueueGroup;
271 $this->messageCache
= $messageCache;
272 $this->contentLanguage
= $contentLanguage;
274 // XXX: replace all wfDebug calls with a Logger. Do we nede more than one logger here?
275 $this->saveParseLogger
= $saveParseLogger ?
: new NullLogger();
279 * Transition function for managing the life cycle of this instances.
281 * @see docs/pageupdater.txt for documentation of the life cycle.
283 * @param string $newStage the new stage
284 * @return string the previous stage
286 * @throws LogicException If a transition to the given stage is not possible in the current
289 private function doTransition( $newStage ) {
290 $this->assertTransition( $newStage );
292 $oldStage = $this->stage
;
293 $this->stage
= $newStage;
299 * Asserts that a transition to the given stage is possible, without performing it.
301 * @see docs/pageupdater.txt for documentation of the life cycle.
303 * @param string $newStage the new stage
305 * @throws LogicException If this instance is not in the expected stage
307 private function assertTransition( $newStage ) {
308 if ( empty( self
::$transitions[$this->stage
][$newStage] ) ) {
309 throw new LogicException( "Cannot transition from {$this->stage} to $newStage" );
314 * @return bool|string
316 private function getWikiId() {
317 // TODO: get from RevisionStore
322 * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting
323 * the given revision.
325 * @param UserIdentity|null $user The user creating the revision in question
326 * @param RevisionRecord|null $revision New revision (after save, if already saved)
327 * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST)
328 * @param null|int $parentId Parent revision of the edit (use 0 for page creation)
332 public function isReusableFor(
333 UserIdentity
$user = null,
334 RevisionRecord
$revision = null,
335 RevisionSlotsUpdate
$slotsUpdate = null,
340 && $revision->getParentId() !== $parentId
342 throw new InvalidArgumentException( '$parentId should match the parent of $revision' );
347 && $revision->getUser( RevisionRecord
::RAW
)->getName() !== $user->getName()
349 throw new InvalidArgumentException( '$user should match the author of $revision' );
352 if ( $user && $this->user
&& $user->getName() !== $this->user
->getName() ) {
356 if ( $revision && $this->revision
&& $this->revision
->getId() !== $revision->getId() ) {
360 if ( $revision && !$user ) {
361 $user = $revision->getUser( RevisionRecord
::RAW
);
364 if ( $this->pageState
366 && $revision->getParentId() !== null
367 && $this->pageState
['oldId'] !== $revision->getParentId()
372 if ( $this->pageState
373 && $parentId !== null
374 && $this->pageState
['oldId'] !== $parentId
381 && $this->revision
->getUser( RevisionRecord
::RAW
)->getName() !== $user->getName()
388 && $revision->getUser( RevisionRecord
::RAW
)->getName() !== $this->user
->getName()
393 // NOTE: this check is the primary reason for having the $this->slotsUpdate field!
394 if ( $this->slotsUpdate
396 && !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate )
401 if ( $this->pstContentSlots
403 && !$this->pstContentSlots
->hasSameContent( $revision->getSlots() )
412 * @param string $articleCountMethod "any" or "link".
413 * @see $wgArticleCountMethod
415 public function setArticleCountMethod( $articleCountMethod ) {
416 $this->articleCountMethod
= $articleCountMethod;
420 * @param bool $rcWatchCategoryMembership
421 * @see $wgRCWatchCategoryMembership
423 public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) {
424 $this->rcWatchCategoryMembership
= $rcWatchCategoryMembership;
430 private function getTitle() {
431 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
432 return $this->wikiPage
->getTitle();
438 private function getWikiPage() {
439 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
440 return $this->wikiPage
;
444 * Determines whether the page being edited already existed.
445 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
448 * @throws LogicException if called before grabCurrentRevision
450 public function pageExisted() {
451 $this->assertHasPageState( __METHOD__
);
453 return $this->pageState
['oldId'] > 0;
457 * Returns the revision that was current before the edit. This would be null if the edit
458 * created the page, or the revision's parent for a regular edit, or the revision itself
460 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
462 * @return RevisionRecord|null the revision that was current before the edit, or null if
463 * the edit created the page.
465 private function getOldRevision() {
466 $this->assertHasPageState( __METHOD__
);
468 // If 'oldRevision' is not set, load it!
469 // Useful if $this->oldPageState is initialized by prepareUpdate.
470 if ( !array_key_exists( 'oldRevision', $this->pageState
) ) {
471 /** @var int $oldId */
472 $oldId = $this->pageState
['oldId'];
473 $flags = $this->useMaster() ? RevisionStore
::READ_LATEST
: 0;
474 $this->pageState
['oldRevision'] = $oldId
475 ?
$this->revisionStore
->getRevisionById( $oldId, $flags )
479 return $this->pageState
['oldRevision'];
483 * Returns the revision that was the page's current revision when grabCurrentRevision()
486 * During an edit, that revision will act as the logical parent of the new revision.
488 * Some updates are performed based on the difference between the database state at the
489 * moment this method is first called, and the state after the edit.
491 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
493 * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception
494 * to avoid confusion, since the page's current revision is then the new revision after
495 * the edit, which was presumably passed to prepareUpdate() as the $revision parameter.
496 * Use getOldRevision() instead to access the revision that used to be current before the
499 * @return RevisionRecord|null the page's current revision, or null if the page does not
502 public function grabCurrentRevision() {
503 if ( $this->pageState
) {
504 return $this->pageState
['oldRevision'];
507 $this->assertTransition( 'knows-current' );
509 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
510 $wikiPage = $this->getWikiPage();
512 // Do not call WikiPage::clear(), since the caller may already have caused page data
513 // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now.
514 $wikiPage->loadPageData( self
::READ_LATEST
);
515 $rev = $wikiPage->getRevision();
516 $current = $rev ?
$rev->getRevisionRecord() : null;
519 'oldRevision' => $current,
520 'oldId' => $rev ?
$rev->getId() : 0,
521 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table
522 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table
525 $this->doTransition( 'knows-current' );
527 return $this->pageState
['oldRevision'];
531 * Whether prepareUpdate() or prepareContent() have been called on this instance.
535 public function isContentPrepared() {
536 return $this->pstContentSlots
!== null;
540 * Whether prepareUpdate() has been called on this instance.
544 public function isUpdatePrepared() {
545 return $this->revision
!== null;
551 private function getPageId() {
552 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
553 return $this->wikiPage
->getId();
559 private function getTimestampNow() {
560 // TODO: allow an override to be injected for testing
561 return wfTimestampNow();
565 * Whether the content of the target revision is publicly visible.
569 public function isContentPublic() {
570 if ( $this->revision
) {
571 // XXX: if that revision is the current revision, this can be skipped
572 return !$this->revision
->isDeleted( RevisionRecord
::DELETED_TEXT
);
574 // If the content has not been saved yet, it cannot have been suppressed yet.
580 * Returns the slot, modified or inherited, after PST, with no audience checks applied.
582 * @param string $role slot role name
584 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
588 public function getRawSlot( $role ) {
589 return $this->getSlots()->getSlot( $role );
593 * Returns the content of the given slot, with no audience checks.
595 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
597 * @param string $role slot role name
600 public function getRawContent( $role ) {
601 return $this->getRawSlot( $role )->getContent();
605 * Returns the content model of the given slot
607 * @param string $role slot role name
610 private function getContentModel( $role ) {
611 return $this->getRawSlot( $role )->getModel();
615 * @param string $role slot role name
616 * @return ContentHandler
618 private function getContentHandler( $role ) {
619 // TODO: inject something like a ContentHandlerRegistry
620 return ContentHandler
::getForModelID( $this->getContentModel( $role ) );
623 private function useMaster() {
624 // TODO: can we just set a flag to true in prepareContent()?
625 return $this->wikiPage
->wasLoadedFrom( self
::READ_LATEST
);
631 public function isCountable() {
632 // NOTE: Keep in sync with WikiPage::isCountable.
634 if ( !$this->getTitle()->isContentPage() ) {
638 if ( !$this->isContentPublic() ) {
639 // This should be irrelevant: countability only applies to the current revision,
640 // and the current revision is never suppressed.
644 if ( $this->isRedirect() ) {
650 if ( $this->articleCountMethod
=== 'link' ) {
651 $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() );
654 // TODO: MCR: ask all slots if they have links [SlotHandler/PageTypeHandler]
655 $mainContent = $this->getRawContent( 'main' );
656 return $mainContent->isCountable( $hasLinks );
662 public function isRedirect() {
663 // NOTE: main slot determines redirect status
664 $mainContent = $this->getRawContent( 'main' );
666 return $mainContent->isRedirect();
670 * @param RevisionRecord $rev
674 private function revisionIsRedirect( RevisionRecord
$rev ) {
675 // NOTE: main slot determines redirect status
676 $mainContent = $rev->getContent( 'main', RevisionRecord
::RAW
);
678 return $mainContent->isRedirect();
682 * Prepare updates based on an update which has not yet been saved.
684 * This may be used to create derived data that is needed when creating a new revision;
685 * particularly, this makes available the slots of the new revision via the getSlots()
686 * method, after applying PST and slot inheritance.
688 * The derived data prepared for revision creation may then later be re-used by doUpdates(),
689 * without the need to re-calculate.
691 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
693 * @note Calling this method more than once with the same $slotsUpdate
694 * has no effect. Calling this method multiple times with different content will cause
697 * @note Calling this method after prepareUpdate() has been called will cause an exception.
699 * @param User $user The user to act as context for pre-save transformation (PST).
700 * Type hint should be reduced to UserIdentity at some point.
701 * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated
702 * by this edit, before PST.
703 * @param bool $useStash Whether to use stashed ParserOutput
705 public function prepareContent(
707 RevisionSlotsUpdate
$slotsUpdate,
710 if ( $this->slotsUpdate
) {
711 if ( !$this->user
) {
712 throw new LogicException(
713 'Unexpected state: $this->slotsUpdate was initialized, '
714 . 'but $this->user was not.'
718 if ( $this->user
->getName() !== $user->getName() ) {
719 throw new LogicException( 'Can\'t call prepareContent() again for different user! '
720 . 'Expected ' . $this->user
->getName() . ', got ' . $user->getName()
724 if ( !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate ) ) {
725 throw new LogicException(
726 'Can\'t call prepareContent() again with different slot content!'
730 return; // prepareContent() already done, nothing to do
733 $this->assertTransition( 'has-content' );
735 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
736 $title = $this->getTitle();
738 $parentRevision = $this->grabCurrentRevision();
740 $this->slotsOutput
= [];
741 $this->canonicalParserOutput
= null;
742 $this->canonicalParserOptions
= null;
744 // The edit may have already been prepared via api.php?action=stashedit
745 $stashedEdit = false;
747 // TODO: MCR: allow output for all slots to be stashed.
748 if ( $useStash && $slotsUpdate->isModifiedSlot( 'main' ) ) {
749 $mainContent = $slotsUpdate->getModifiedSlot( 'main' )->getContent();
750 $legacyUser = User
::newFromIdentity( $user );
751 $stashedEdit = ApiStashEdit
::checkCache( $title, $mainContent, $legacyUser );
754 if ( $stashedEdit ) {
755 /** @var ParserOutput $output */
756 $output = $stashedEdit->output
;
758 // TODO: this should happen when stashing the ParserOutput, not now!
759 $output->setCacheTime( $stashedEdit->timestamp
);
761 // TODO: MCR: allow output for all slots to be stashed.
762 $this->canonicalParserOutput
= $output;
765 $userPopts = ParserOptions
::newFromUserAndLang( $user, $this->contentLanguage
);
766 Hooks
::run( 'ArticlePrepareTextForEdit', [ $wikiPage, $userPopts ] );
769 $this->slotsUpdate
= $slotsUpdate;
771 if ( $parentRevision ) {
772 // start out by inheriting all parent slots
773 $this->pstContentSlots
= MutableRevisionSlots
::newFromParentRevisionSlots(
774 $parentRevision->getSlots()->getSlots()
777 $this->pstContentSlots
= new MutableRevisionSlots();
780 foreach ( $slotsUpdate->getModifiedRoles() as $role ) {
781 $slot = $slotsUpdate->getModifiedSlot( $role );
783 if ( $slot->isInherited() ) {
784 // No PST for inherited slots! Note that "modified" slots may still be inherited
785 // from an earlier version, e.g. for rollbacks.
787 } elseif ( $role === 'main' && $stashedEdit ) {
788 // TODO: MCR: allow PST content for all slots to be stashed.
789 $pstSlot = SlotRecord
::newUnsaved( $role, $stashedEdit->pstContent
);
791 $content = $slot->getContent();
792 $pstContent = $content->preSaveTransform( $title, $this->user
, $userPopts );
793 $pstSlot = SlotRecord
::newUnsaved( $role, $pstContent );
796 $this->pstContentSlots
->setSlot( $pstSlot );
799 foreach ( $slotsUpdate->getRemovedRoles() as $role ) {
800 $this->pstContentSlots
->removeSlot( $role );
803 $this->options
['created'] = ( $parentRevision === null );
804 $this->options
['changed'] = ( $parentRevision === null
805 ||
!$this->pstContentSlots
->hasSameContent( $parentRevision->getSlots() ) );
807 $this->doTransition( 'has-content' );
810 private function assertHasPageState( $method ) {
811 if ( !$this->pageState
) {
812 throw new LogicException(
813 'Must call grabCurrentRevision() or prepareContent() '
814 . 'or prepareUpdate() before calling ' . $method
819 private function assertPrepared( $method ) {
820 if ( !$this->pstContentSlots
) {
821 throw new LogicException(
822 'Must call prepareContent() or prepareUpdate() before calling ' . $method
828 * Whether the edit creates the page.
832 public function isCreation() {
833 $this->assertPrepared( __METHOD__
);
834 return $this->options
['created'];
838 * Whether the edit created, or should create, a new revision (that is, it's not a null-edit).
840 * @warning at present, "null-revisions" that do not change content but do have a revision
841 * record would return false after prepareContent(), but true after prepareUpdate()!
842 * This should probably be fixed.
846 public function isChange() {
847 $this->assertPrepared( __METHOD__
);
848 return $this->options
['changed'];
852 * Whether the page was a redirect before the edit.
856 public function wasRedirect() {
857 $this->assertHasPageState( __METHOD__
);
859 if ( $this->pageState
['oldIsRedirect'] === null ) {
860 /** @var RevisionRecord $rev */
861 $rev = $this->pageState
['oldRevision'];
863 $this->pageState
['oldIsRedirect'] = $this->revisionIsRedirect( $rev );
865 $this->pageState
['oldIsRedirect'] = false;
869 return $this->pageState
['oldIsRedirect'];
873 * Returns the slots of the target revision, after PST.
875 * @return RevisionSlots
877 public function getSlots() {
878 $this->assertPrepared( __METHOD__
);
879 return $this->pstContentSlots
;
883 * Returns the RevisionSlotsUpdate for this updater.
885 * @return RevisionSlotsUpdate
887 private function getRevisionSlotsUpdate() {
888 $this->assertPrepared( __METHOD__
);
890 if ( !$this->slotsUpdate
) {
891 if ( !$this->revision
) {
892 // This should not be possible: if assertPrepared() returns true,
893 // at least one of $this->slotsUpdate or $this->revision should be set.
894 throw new LogicException( 'No revision nor a slots update is known!' );
897 $old = $this->getOldRevision();
898 $this->slotsUpdate
= RevisionSlotsUpdate
::newFromRevisionSlots(
899 $this->revision
->getSlots(),
900 $old ?
$old->getSlots() : null
903 return $this->slotsUpdate
;
907 * Returns the role names of the slots touched by the new revision,
908 * including removed roles.
912 public function getTouchedSlotRoles() {
913 return $this->getRevisionSlotsUpdate()->getTouchedRoles();
917 * Returns the role names of the slots modified by the new revision,
918 * not including removed roles.
922 public function getModifiedSlotRoles() {
923 return $this->getRevisionSlotsUpdate()->getModifiedRoles();
927 * Returns the role names of the slots removed by the new revision.
931 public function getRemovedSlotRoles() {
932 return $this->getRevisionSlotsUpdate()->getRemovedRoles();
936 * Prepare derived data updates targeting the given Revision.
938 * Calling this method requires the given revision to be present in the database.
939 * This may be right after a new revision has been created, or when re-generating
940 * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks
943 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
945 * @note Calling this method more than once with the same revision has no effect.
946 * $options are only used for the first call. Calling this method multiple times with
947 * different revisions will cause an exception.
949 * @note If grabCurrentRevision() (or prepareContent()) has been called before
950 * calling this method, $revision->getParentRevision() has to refer to the revision that
951 * was the current revision at the time grabCurrentRevision() was called.
953 * @param RevisionRecord $revision
954 * @param array $options Array of options, following indexes are used:
955 * - changed: bool, whether the revision changed the content (default true)
956 * - created: bool, whether the revision created the page (default false)
957 * - moved: bool, whether the page was moved (default false)
958 * - restored: bool, whether the page was undeleted (default false)
959 * - oldrevision: Revision object for the pre-update revision (default null)
960 * - parseroutput: The canonical ParserOutput of $revision (default null)
961 * - triggeringuser: The user triggering the update (UserIdentity, default null)
962 * - oldredirect: bool, null, or string 'no-change' (default null):
963 * - bool: whether the page was counted as a redirect before that
964 * revision, only used in changed is true and created is false
965 * - null or 'no-change': don't update the redirect status.
966 * - oldcountable: bool, null, or string 'no-change' (default null):
967 * - bool: whether the page was counted as an article before that
968 * revision, only used in changed is true and created is false
969 * - null: if created is false, don't update the article count; if created
970 * is true, do update the article count
971 * - 'no-change': don't update the article count, ever
972 * When set to null, pageState['oldCountable'] will be used instead if available.
974 public function prepareUpdate( RevisionRecord
$revision, array $options = [] ) {
976 !isset( $options['oldrevision'] )
977 ||
$options['oldrevision'] instanceof Revision
978 ||
$options['oldrevision'] instanceof RevisionRecord
,
979 '$options["oldrevision"]',
980 'must be a RevisionRecord (or Revision)'
983 !isset( $options['parseroutput'] )
984 ||
$options['parseroutput'] instanceof ParserOutput
,
985 '$options["parseroutput"]',
986 'must be a ParserOutput'
989 !isset( $options['triggeringuser'] )
990 ||
$options['triggeringuser'] instanceof UserIdentity
,
991 '$options["triggeringuser"]',
992 'must be a UserIdentity'
995 if ( !$revision->getId() ) {
996 throw new InvalidArgumentException(
997 'Revision must have an ID set for it to be used with prepareUpdate()!'
1001 if ( $this->revision
) {
1002 if ( $this->revision
->getId() === $revision->getId() ) {
1003 return; // nothing to do!
1005 throw new LogicException(
1006 'Trying to re-use DerivedPageDataUpdater with revision '
1008 . ', but it\'s already bound to revision '
1009 . $this->revision
->getId()
1014 if ( $this->pstContentSlots
1015 && !$this->pstContentSlots
->hasSameContent( $revision->getSlots() )
1017 throw new LogicException(
1018 'The Revision provided has mismatching content!'
1022 // Override fields defined in $this->options with values from $options.
1023 $this->options
= array_intersect_key( $options, $this->options
) +
$this->options
;
1025 if ( isset( $this->pageState
['oldId'] ) ) {
1026 $oldId = $this->pageState
['oldId'];
1027 } elseif ( isset( $this->options
['oldrevision'] ) ) {
1028 /** @var Revision|RevisionRecord $oldRev */
1029 $oldRev = $this->options
['oldrevision'];
1030 $oldId = $oldRev->getId();
1032 $oldId = $revision->getParentId();
1035 if ( $oldId !== null ) {
1036 // XXX: what if $options['changed'] disagrees?
1037 // MovePage creates a dummy revision with changed = false!
1038 // We may want to explicitly distinguish between "no new revision" (null-edit)
1039 // and "new revision without new content" (dummy revision).
1041 if ( $oldId === $revision->getParentId() ) {
1042 // NOTE: this may still be a NullRevision!
1044 $this->options
['changed'] = true;
1045 } elseif ( $oldId === $revision->getId() ) {
1047 $this->options
['changed'] = false;
1049 // This indicates that calling code has given us the wrong Revision object
1050 throw new LogicException(
1051 'The Revision mismatches old revision ID: '
1052 . 'Old ID is ' . $oldId
1053 . ', parent ID is ' . $revision->getParentId()
1054 . ', revision ID is ' . $revision->getId()
1059 // If prepareContent() was used to generate the PST content (which is indicated by
1060 // $this->slotsUpdate being set), and this is not a null-edit, then the given
1061 // revision must have the acting user as the revision author. Otherwise, user
1062 // signatures generated by PST would mismatch the user in the revision record.
1063 if ( $this->user
!== null && $this->options
['changed'] && $this->slotsUpdate
) {
1064 $user = $revision->getUser();
1065 if ( !$this->user
->equals( $user ) ) {
1066 throw new LogicException(
1067 'The Revision provided has a mismatching actor: expected '
1068 .$this->user
->getName()
1075 // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent,
1076 // emulate the state of the page table before the edit, as good as we can.
1077 if ( !$this->pageState
) {
1078 $this->pageState
= [
1079 'oldIsRedirect' => isset( $this->options
['oldredirect'] )
1080 && is_bool( $this->options
['oldredirect'] )
1081 ?
$this->options
['oldredirect']
1083 'oldCountable' => isset( $this->options
['oldcountable'] )
1084 && is_bool( $this->options
['oldcountable'] )
1085 ?
$this->options
['oldcountable']
1089 if ( $this->options
['changed'] ) {
1090 // The edit created a new revision
1091 $this->pageState
['oldId'] = $revision->getParentId();
1093 if ( isset( $this->options
['oldrevision'] ) ) {
1094 $rev = $this->options
['oldrevision'];
1095 $this->pageState
['oldRevision'] = $rev instanceof Revision
1096 ?
$rev->getRevisionRecord()
1100 // This is a null-edit, so the old revision IS the new revision!
1101 $this->pageState
['oldId'] = $revision->getId();
1102 $this->pageState
['oldRevision'] = $revision;
1106 // "created" is forced here
1107 $this->options
['created'] = ( $this->pageState
['oldId'] === 0 );
1109 $this->revision
= $revision;
1110 $this->pstContentSlots
= $revision->getSlots();
1112 $this->doTransition( 'has-revision' );
1114 // NOTE: in case we have a User object, don't override with a UserIdentity.
1115 // We already checked that $revision->getUser() mathces $this->user;
1116 if ( !$this->user
) {
1117 $this->user
= $revision->getUser( RevisionRecord
::RAW
);
1120 // Prune any output that depends on the revision ID.
1121 if ( $this->canonicalParserOutput
) {
1122 if ( $this->outputVariesOnRevisionMetaData( $this->canonicalParserOutput
, __METHOD__
) ) {
1123 $this->canonicalParserOutput
= null;
1126 $this->saveParseLogger
->debug( __METHOD__
. ": No prepared canonical output...\n" );
1129 if ( $this->slotsOutput
) {
1130 foreach ( $this->slotsOutput
as $role => $prep ) {
1131 if ( $this->outputVariesOnRevisionMetaData( $prep->output
, __METHOD__
) ) {
1132 unset( $this->slotsOutput
[$role] );
1136 $this->saveParseLogger
->debug( __METHOD__
. ": No prepared output...\n" );
1139 // reset ParserOptions, so the actual revision ID is used in future ParserOutput generation
1140 $this->canonicalParserOptions
= null;
1142 // Avoid re-generating the canonical ParserOutput if it's known.
1143 // We just trust that the caller is passing the correct ParserOutput!
1144 if ( isset( $options['parseroutput'] ) ) {
1145 $this->canonicalParserOutput
= $options['parseroutput'];
1148 // TODO: optionally get ParserOutput from the ParserCache here.
1149 // Move the logic used by RefreshLinksJob here!
1153 * @param ParserOutput $out
1154 * @param string $method
1157 private function outputVariesOnRevisionMetaData( ParserOutput
$out, $method = __METHOD__
) {
1158 if ( $out->getFlag( 'vary-revision' ) ) {
1159 // XXX: Just keep the output if the speculative revision ID was correct, like below?
1160 $this->saveParseLogger
->info(
1161 "$method: Prepared output has vary-revision...\n"
1164 } elseif ( $out->getFlag( 'vary-revision-id' )
1165 && $out->getSpeculativeRevIdUsed() !== $this->revision
->getId()
1167 $this->saveParseLogger
->info(
1168 "$method: Prepared output has vary-revision-id with wrong ID...\n"
1171 } elseif ( $out->getFlag( 'vary-user' )
1172 && !$this->options
['changed']
1174 // When Alice makes a null-edit on top of Bob's edit,
1175 // {{REVISIONUSER}} must resolve to "Bob", not "Alice", see T135261.
1176 // TODO: to avoid this, we should check for null-edits in makeCanonicalparserOptions,
1177 // and set setCurrentRevisionCallback to return the existing revision when appropriate.
1178 // See also the comment there [dk 2018-05]
1179 $this->saveParseLogger
->info(
1180 "$method: Prepared output has vary-user and is null-edit...\n"
1184 wfDebug( "$method: Keeping prepared output...\n" );
1190 * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly!
1191 * @return PreparedEdit
1193 public function getPreparedEdit() {
1194 $this->assertPrepared( __METHOD__
);
1196 $slotsUpdate = $this->getRevisionSlotsUpdate();
1197 $preparedEdit = new PreparedEdit();
1199 $preparedEdit->popts
= $this->getCanonicalParserOptions();
1200 $preparedEdit->output
= $this->getCanonicalParserOutput();
1201 $preparedEdit->pstContent
= $this->pstContentSlots
->getContent( 'main' );
1202 $preparedEdit->newContent
=
1203 $slotsUpdate->isModifiedSlot( 'main' )
1204 ?
$slotsUpdate->getModifiedSlot( 'main' )->getContent()
1205 : $this->pstContentSlots
->getContent( 'main' ); // XXX: can we just remove this?
1206 $preparedEdit->oldContent
= null; // unused. // XXX: could get this from the parent revision
1207 $preparedEdit->revid
= $this->revision ?
$this->revision
->getId() : null;
1208 $preparedEdit->timestamp
= $preparedEdit->output
->getCacheTime();
1209 $preparedEdit->format
= $preparedEdit->pstContent
->getDefaultFormat();
1211 return $preparedEdit;
1217 private function isContentAccessible() {
1218 // XXX: when we move this to a RevisionHtmlProvider, the audience may be configurable!
1219 return $this->isContentPublic();
1223 * @param string $role
1224 * @param bool $generateHtml
1225 * @return ParserOutput
1227 public function getSlotParserOutput( $role, $generateHtml = true ) {
1228 // TODO: factor this out into a RevisionHtmlProvider that can also be used for viewing.
1230 $this->assertPrepared( __METHOD__
);
1232 if ( isset( $this->slotsOutput
[$role] ) ) {
1233 $entry = $this->slotsOutput
[$role];
1235 if ( $entry->hasHtml ||
!$generateHtml ) {
1236 return $entry->output
;
1240 if ( !$this->isContentAccessible() ) {
1242 $output = new ParserOutput();
1244 $content = $this->getRawContent( $role );
1246 $output = $content->getParserOutput(
1248 $this->revision ?
$this->revision
->getId() : null,
1249 $this->getCanonicalParserOptions(),
1254 $this->slotsOutput
[$role] = (object)[
1255 'output' => $output,
1256 'hasHtml' => $generateHtml,
1259 $output->setCacheTime( $this->getTimestampNow() );
1265 * @return ParserOutput
1267 public function getCanonicalParserOutput() {
1268 if ( $this->canonicalParserOutput
) {
1269 return $this->canonicalParserOutput
;
1272 // TODO: MCR: logic for combining the output of multiple slot goes here!
1273 // TODO: factor this out into a RevisionHtmlProvider that can also be used for viewing.
1274 $this->canonicalParserOutput
= $this->getSlotParserOutput( 'main' );
1276 return $this->canonicalParserOutput
;
1280 * @return ParserOptions
1282 public function getCanonicalParserOptions() {
1283 if ( $this->canonicalParserOptions
) {
1284 return $this->canonicalParserOptions
;
1287 // TODO: ParserOptions should *not* be controlled by the ContentHandler!
1288 // See T190712 for how to fix this for Wikibase.
1289 $this->canonicalParserOptions
= $this->wikiPage
->makeParserOptions( 'canonical' );
1291 //TODO: if $this->revision is not set but we already know that we pending update is a
1292 // null-edit, we should probably use the page's current revision here.
1293 // That would avoid the need for the !$this->options['changed'] branch in
1294 // outputVariesOnRevisionMetaData [dk 2018-05]
1296 if ( $this->revision
) {
1297 // Make sure we use the appropriate revision ID when generating output
1298 $title = $this->getTitle();
1299 $oldCallback = $this->canonicalParserOptions
->getCurrentRevisionCallback();
1300 $this->canonicalParserOptions
->setCurrentRevisionCallback(
1301 function ( Title
$parserTitle, $parser = false ) use ( $title, &$oldCallback ) {
1302 if ( $parserTitle->equals( $title ) ) {
1303 $legacyRevision = new Revision( $this->revision
);
1304 return $legacyRevision;
1306 return call_user_func( $oldCallback, $parserTitle, $parser );
1311 // NOTE: we only get here without READ_LATEST if called directly by application logic
1312 $dbIndex = $this->useMaster()
1313 ? DB_MASTER
// use the best possible guess
1314 : DB_REPLICA
; // T154554
1316 $this->canonicalParserOptions
->setSpeculativeRevIdCallback(
1317 function () use ( $dbIndex ) {
1318 // TODO: inject LoadBalancer!
1319 $lb = MediaWikiServices
::getInstance()->getDBLoadBalancer();
1320 // Use a fresh connection in order to see the latest data, by avoiding
1321 // stale data from REPEATABLE-READ snapshots.
1322 // HACK: But don't use a fresh connection in unit tests, since it would not have
1323 // the fake tables. This should be handled by the LoadBalancer!
1324 $flags = defined( 'MW_PHPUNIT_TEST' ) ?
0 : $lb::CONN_TRX_AUTOCOMMIT
;
1325 $db = $lb->getConnectionRef( $dbIndex, [], $this->getWikiId(), $flags );
1327 return 1 +
(int)$db->selectField(
1337 return $this->canonicalParserOptions
;
1341 * @param bool $recursive
1343 * @return DataUpdate[]
1345 public function getSecondaryDataUpdates( $recursive = false ) {
1346 // TODO: MCR: getSecondaryDataUpdates() needs a complete overhaul to avoid DataUpdates
1347 // from different slots overwriting each other in the database. Plan:
1348 // * replace direct calls to Content::getSecondaryDataUpdates() with calls to this method
1349 // * Construct LinksUpdate here, on the combined ParserOutput, instead of in AbstractContent
1351 // * Pass $slot into getSecondaryDataUpdates() - probably be introducing a new duplicate
1352 // version of this function in ContentHandler.
1353 // * The new method gets the PreparedEdit, but no $recursive flag (that's for LinksUpdate)
1354 // * Hack: call both the old and the new getSecondaryDataUpdates method here; Pass
1355 // the per-slot ParserOutput to the old method, for B/C.
1356 // * Hack: If there is more than one slot, filter LinksUpdate from the DataUpdates
1357 // returned by getSecondaryDataUpdates, and use a LinksUpdated for the combined output
1359 // * Call the SecondaryDataUpdates hook here (or kill it - its signature doesn't make sense)
1361 $content = $this->getSlots()->getContent( 'main' );
1363 // NOTE: $output is the combined output, to be shown in the default view.
1364 $output = $this->getCanonicalParserOutput();
1366 $updates = $content->getSecondaryDataUpdates(
1367 $this->getTitle(), null, $recursive, $output
1374 * Do standard updates after page edit, purge, or import.
1375 * Update links tables, site stats, search index, title cache, message cache, etc.
1376 * Purges pages that depend on this page when appropriate.
1377 * With a 10% chance, triggers pruning the recent changes table.
1379 * @note prepareUpdate() must be called before calling this method!
1381 * MCR migration note: this replaces WikiPage::doEditUpdates.
1383 public function doUpdates() {
1384 $this->assertTransition( 'done' );
1386 // TODO: move logic into a PageEventEmitter service
1388 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
1390 // NOTE: this may trigger the first parsing of the new content after an edit (when not
1391 // using pre-generated stashed output).
1392 // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse
1393 // to be perform post-send. The client could already follow a HTTP redirect to the
1394 // page view, but would then have to wait for a response until rendering is complete.
1395 $output = $this->getCanonicalParserOutput();
1397 // Save it to the parser cache.
1398 // Make sure the cache time matches page_touched to avoid double parsing.
1399 $this->parserCache
->save(
1400 $output, $wikiPage, $this->getCanonicalParserOptions(),
1401 $this->revision
->getTimestamp(), $this->revision
->getId()
1404 $legacyUser = User
::newFromIdentity( $this->user
);
1405 $legacyRevision = new Revision( $this->revision
);
1407 // Update the links tables and other secondary data
1408 $recursive = $this->options
['changed']; // T52785
1409 $updates = $this->getSecondaryDataUpdates( $recursive );
1411 foreach ( $updates as $update ) {
1412 // TODO: make an $option field for the cause
1413 $update->setCause( 'edit-page', $this->user
->getName() );
1414 if ( $update instanceof LinksUpdate
) {
1415 $update->setRevision( $legacyRevision );
1417 if ( !empty( $this->options
['triggeringuser'] ) ) {
1418 /** @var UserIdentity|User $triggeringUser */
1419 $triggeringUser = $this->options
['triggeringuser'];
1420 if ( !$triggeringUser instanceof User
) {
1421 $triggeringUser = User
::newFromIdentity( $triggeringUser );
1424 $update->setTriggeringUser( $triggeringUser );
1427 DeferredUpdates
::addUpdate( $update );
1430 // TODO: MCR: check if *any* changed slot supports categories!
1431 if ( $this->rcWatchCategoryMembership
1432 && $this->getContentHandler( 'main' )->supportsCategories() === true
1433 && ( $this->options
['changed'] ||
$this->options
['created'] )
1434 && !$this->options
['restored']
1436 // Note: jobs are pushed after deferred updates, so the job should be able to see
1437 // the recent change entry (also done via deferred updates) and carry over any
1438 // bot/deletion/IP flags, ect.
1439 $this->jobQueueGroup
->lazyPush(
1440 new CategoryMembershipChangeJob(
1443 'pageId' => $this->getPageId(),
1444 'revTimestamp' => $this->revision
->getTimestamp(),
1450 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1451 $editInfo = $this->getPreparedEdit();
1452 Hooks
::run( 'ArticleEditUpdates', [ &$wikiPage, &$editInfo, $this->options
['changed'] ] );
1454 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1455 if ( Hooks
::run( 'ArticleEditUpdatesDeleteFromRecentchanges', [ &$wikiPage ] ) ) {
1456 // Flush old entries from the `recentchanges` table
1457 if ( mt_rand( 0, 9 ) == 0 ) {
1458 $this->jobQueueGroup
->lazyPush( RecentChangesUpdateJob
::newPurgeJob() );
1462 $id = $this->getPageId();
1463 $title = $this->getTitle();
1464 $dbKey = $title->getPrefixedDBkey();
1465 $shortTitle = $title->getDBkey();
1467 if ( !$title->exists() ) {
1468 wfDebug( __METHOD__
. ": Page doesn't exist any more, bailing out\n" );
1470 $this->doTransition( 'done' );
1474 if ( $this->options
['oldcountable'] === 'no-change' ||
1475 ( !$this->options
['changed'] && !$this->options
['moved'] )
1478 } elseif ( $this->options
['created'] ) {
1479 $good = (int)$this->isCountable();
1480 } elseif ( $this->options
['oldcountable'] !== null ) {
1481 $good = (int)$this->isCountable()
1482 - (int)$this->options
['oldcountable'];
1483 } elseif ( isset( $this->pageState
['oldCountable'] ) ) {
1484 $good = (int)$this->isCountable()
1485 - (int)$this->pageState
['oldCountable'];
1489 $edits = $this->options
['changed'] ?
1 : 0;
1490 $pages = $this->options
['created'] ?
1 : 0;
1492 DeferredUpdates
::addUpdate( SiteStatsUpdate
::factory(
1493 [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ]
1496 // TODO: make search infrastructure aware of slots!
1497 $mainSlot = $this->revision
->getSlot( 'main' );
1498 if ( !$mainSlot->isInherited() && $this->isContentPublic() ) {
1499 DeferredUpdates
::addUpdate( new SearchUpdate( $id, $dbKey, $mainSlot->getContent() ) );
1502 // If this is another user's talk page, update newtalk.
1503 // Don't do this if $options['changed'] = false (null-edits) nor if
1504 // it's a minor edit and the user making the edit doesn't generate notifications for those.
1505 if ( $this->options
['changed']
1506 && $title->getNamespace() == NS_USER_TALK
1507 && $shortTitle != $legacyUser->getTitleKey()
1508 && !( $this->revision
->isMinor() && $legacyUser->isAllowed( 'nominornewtalk' ) )
1510 $recipient = User
::newFromName( $shortTitle, false );
1511 if ( !$recipient ) {
1512 wfDebug( __METHOD__
. ": invalid username\n" );
1514 // Allow extensions to prevent user notification
1515 // when a new message is added to their talk page
1516 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1517 if ( Hooks
::run( 'ArticleEditUpdateNewTalk', [ &$wikiPage, $recipient ] ) ) {
1518 if ( User
::isIP( $shortTitle ) ) {
1519 // An anonymous user
1520 $recipient->setNewtalk( true, $legacyRevision );
1521 } elseif ( $recipient->isLoggedIn() ) {
1522 $recipient->setNewtalk( true, $legacyRevision );
1524 wfDebug( __METHOD__
. ": don't need to notify a nonexistent user\n" );
1530 if ( $title->getNamespace() == NS_MEDIAWIKI
1531 && $this->getRevisionSlotsUpdate()->isModifiedSlot( 'main' )
1533 $mainContent = $this->isContentPublic() ?
$this->getRawContent( 'main' ) : null;
1535 $this->messageCache
->updateMessageOverride( $title, $mainContent );
1538 // TODO: move onArticleCreate and onArticle into a PageEventEmitter service
1539 if ( $this->options
['created'] ) {
1540 WikiPage
::onArticleCreate( $title );
1541 } elseif ( $this->options
['changed'] ) { // T52785
1542 WikiPage
::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() );
1545 $oldRevision = $this->getOldRevision();
1546 $oldLegacyRevision = $oldRevision ?
new Revision( $oldRevision ) : null;
1548 // TODO: In the wiring, register a listener for this on the new PageEventEmitter
1549 ResourceLoaderWikiModule
::invalidateModuleCache(
1550 $title, $oldLegacyRevision, $legacyRevision, $this->getWikiId() ?
: wfWikiID()
1553 $this->doTransition( 'done' );