From e6c0dcc02e20ea946a1a4ea9c46323a1ae1cb2e6 Mon Sep 17 00:00:00 2001 From: addshore Date: Tue, 4 Jul 2017 22:46:46 +0100 Subject: [PATCH] Factor OldRevisionImporter & ImportableOldRevision out of WikiRevision This is to be used within the FileImporter extension to allow adding custom loggers to this import process. Change-Id: Ib094d4829764ccc8e5bd2619fb827d701ae06d43 --- autoload.php | 3 + includes/MediaWikiServices.php | 16 ++ includes/ServiceWiring.php | 16 ++ includes/import/ImportableOldRevision.php | 68 +++++++++ .../import/ImportableOldRevisionImporter.php | 143 ++++++++++++++++++ includes/import/OldRevisionImporter.php | 17 +++ includes/import/WikiRevision.php | 113 +++----------- 7 files changed, 280 insertions(+), 96 deletions(-) create mode 100644 includes/import/ImportableOldRevision.php create mode 100644 includes/import/ImportableOldRevisionImporter.php create mode 100644 includes/import/OldRevisionImporter.php diff --git a/autoload.php b/autoload.php index 88a6425dcb..7f90d4753e 100644 --- a/autoload.php +++ b/autoload.php @@ -647,6 +647,8 @@ $wgAutoloadLocalClasses = [ 'ImportStringSource' => __DIR__ . '/includes/import/ImportStringSource.php', 'ImportTextFiles' => __DIR__ . '/maintenance/importTextFiles.php', 'ImportTitleFactory' => __DIR__ . '/includes/title/ImportTitleFactory.php', + 'ImportableOldRevision' => __DIR__ . '/includes/import/ImportableOldRevision.php', + 'ImportableOldRevisionImporter' => __DIR__ . '/includes/import/ImportableOldRevisionImporter.php', 'ImportableUploadRevision' => __DIR__ . '/includes/import/ImportableUploadRevision.php', 'ImportableUploadRevisionImporter' => __DIR__ . '/includes/import/ImportableUploadRevisionImporter.php', 'IncludableSpecialPage' => __DIR__ . '/includes/specialpage/IncludableSpecialPage.php', @@ -1077,6 +1079,7 @@ $wgAutoloadLocalClasses = [ 'ObjectFactory' => __DIR__ . '/includes/compat/ObjectFactory.php', 'OldChangesList' => __DIR__ . '/includes/changes/OldChangesList.php', 'OldLocalFile' => __DIR__ . '/includes/filerepo/file/OldLocalFile.php', + 'OldRevisionImporter' => __DIR__ . '/includes/import/OldRevisionImporter.php', 'OracleInstaller' => __DIR__ . '/includes/installer/OracleInstaller.php', 'OracleUpdater' => __DIR__ . '/includes/installer/OracleUpdater.php', 'OrderedStreamingForkController' => __DIR__ . '/includes/OrderedStreamingForkController.php', diff --git a/includes/MediaWikiServices.php b/includes/MediaWikiServices.php index 6f2d72cbe3..59f194d793 100644 --- a/includes/MediaWikiServices.php +++ b/includes/MediaWikiServices.php @@ -698,6 +698,22 @@ class MediaWikiServices extends ServiceContainer { return $this->getService( 'UploadRevisionImporter' ); } + /** + * @since 1.31 + * @return \OldRevisionImporter + */ + public function getWikiRevisionOldRevisionImporter() { + return $this->getService( 'OldRevisionImporter' ); + } + + /** + * @since 1.31 + * @return \OldRevisionImporter + */ + public function getWikiRevisionOldRevisionImporterNoUpdates() { + return $this->getService( 'WikiRevisionOldRevisionImporterNoUpdates' ); + } + /** * @since 1.30 * @return CommandFactory diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index 672734dd38..dab9fb9955 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -449,6 +449,22 @@ return [ ); }, + 'OldRevisionImporter' => function ( MediaWikiServices $services ) { + return new ImportableOldRevisionImporter( + true, + LoggerFactory::getInstance( 'OldRevisionImporter' ), + $services->getDBLoadBalancer() + ); + }, + + 'WikiRevisionOldRevisionImporterNoUpdates' => function ( MediaWikiServices $services ) { + return new ImportableOldRevisionImporter( + false, + LoggerFactory::getInstance( 'OldRevisionImporter' ), + $services->getDBLoadBalancer() + ); + }, + 'ShellCommandFactory' => function ( MediaWikiServices $services ) { $config = $services->getMainConfig(); diff --git a/includes/import/ImportableOldRevision.php b/includes/import/ImportableOldRevision.php new file mode 100644 index 0000000000..6d1e24264c --- /dev/null +++ b/includes/import/ImportableOldRevision.php @@ -0,0 +1,68 @@ +doUpdates = $doUpdates; + $this->logger = $logger; + $this->loadBalancer = $loadBalancer; + } + + public function import( ImportableOldRevision $importableRevision, $doUpdates = true ) { + $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER ); + + # Sneak a single revision into place + $user = $importableRevision->getUserObj() ?: User::newFromName( $importableRevision->getUser() ); + if ( $user ) { + $userId = intval( $user->getId() ); + $userText = $user->getName(); + } else { + $userId = 0; + $userText = $importableRevision->getUser(); + $user = new User; + } + + // avoid memory leak...? + Title::clearCaches(); + + $page = WikiPage::factory( $importableRevision->getTitle() ); + $page->loadPageData( 'fromdbmaster' ); + if ( !$page->exists() ) { + // must create the page... + $pageId = $page->insertOn( $dbw ); + $created = true; + $oldcountable = null; + } else { + $pageId = $page->getId(); + $created = false; + + // Note: sha1 has been in XML dumps since 2012. If you have an + // older dump, the duplicate detection here won't work. + $prior = $dbw->selectField( 'revision', '1', + [ 'rev_page' => $pageId, + 'rev_timestamp' => $dbw->timestamp( $importableRevision->getTimestamp() ), + 'rev_sha1' => $importableRevision->getSha1Base36() ], + __METHOD__ + ); + if ( $prior ) { + // @todo FIXME: This could fail slightly for multiple matches :P + $this->logger->debug( __METHOD__ . ": skipping existing revision for [[" . + $importableRevision->getTitle()->getPrefixedText() . "]], timestamp " . + $importableRevision->getTimestamp() . "\n" ); + return false; + } + } + + if ( !$pageId ) { + // This seems to happen if two clients simultaneously try to import the + // same page + $this->logger->debug( __METHOD__ . ': got invalid $pageId when importing revision of [[' . + $importableRevision->getTitle()->getPrefixedText() . ']], timestamp ' . + $importableRevision->getTimestamp() . "\n" ); + return false; + } + + // Select previous version to make size diffs correct + // @todo This assumes that multiple revisions of the same page are imported + // in order from oldest to newest. + $prevId = $dbw->selectField( 'revision', 'rev_id', + [ + 'rev_page' => $pageId, + 'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $importableRevision->getTimestamp() ) ), + ], + __METHOD__, + [ 'ORDER BY' => [ + 'rev_timestamp DESC', + 'rev_id DESC', // timestamp is not unique per page + ] + ] + ); + + # @todo FIXME: Use original rev_id optionally (better for backups) + # Insert the row + $revision = new Revision( [ + 'title' => $importableRevision->getTitle(), + 'page' => $pageId, + 'content_model' => $importableRevision->getModel(), + 'content_format' => $importableRevision->getFormat(), + // XXX: just set 'content' => $wikiRevision->getContent()? + 'text' => $importableRevision->getContent()->serialize( $importableRevision->getFormat() ), + 'comment' => $importableRevision->getComment(), + 'user' => $userId, + 'user_text' => $userText, + 'timestamp' => $importableRevision->getTimestamp(), + 'minor_edit' => $importableRevision->getMinor(), + 'parent_id' => $prevId, + ] ); + $revision->insertOn( $dbw ); + $changed = $page->updateIfNewerOn( $dbw, $revision ); + + if ( $changed !== false && $this->doUpdates ) { + $this->logger->debug( __METHOD__ . ": running updates\n" ); + // countable/oldcountable stuff is handled in WikiImporter::finishImportPage + $page->doEditUpdates( + $revision, + $user, + [ 'created' => $created, 'oldcountable' => 'no-change' ] + ); + } + + return true; + } + +} diff --git a/includes/import/OldRevisionImporter.php b/includes/import/OldRevisionImporter.php new file mode 100644 index 0000000000..72af43b918 --- /dev/null +++ b/includes/import/OldRevisionImporter.php @@ -0,0 +1,17 @@ +sha1base36 ) { + return $this->sha1base36; + } + return false; + } + /** * @since 1.17 * @return string @@ -579,106 +590,16 @@ class WikiRevision implements ImportableUploadRevision { /** * @since 1.4.1 + * @deprecated in 1.31. Use OldRevisionImporter::import * @return bool */ public function importOldRevision() { - $dbw = wfGetDB( DB_MASTER ); - - # Sneak a single revision into place - $user = $this->getUserObj() ?: User::newFromName( $this->getUser() ); - if ( $user ) { - $userId = intval( $user->getId() ); - $userText = $user->getName(); + if ( $this->mNoUpdates ) { + $importer = MediaWikiServices::getInstance()->getWikiRevisionOldRevisionImporterNoUpdates(); } else { - $userId = 0; - $userText = $this->getUser(); - $user = new User; - } - - // avoid memory leak...? - Title::clearCaches(); - - $page = WikiPage::factory( $this->title ); - $page->loadPageData( 'fromdbmaster' ); - if ( !$page->exists() ) { - // must create the page... - $pageId = $page->insertOn( $dbw ); - $created = true; - $oldcountable = null; - } else { - $pageId = $page->getId(); - $created = false; - - // Note: sha1 has been in XML dumps since 2012. If you have an - // older dump, the duplicate detection here won't work. - $prior = $dbw->selectField( 'revision', '1', - [ 'rev_page' => $pageId, - 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), - 'rev_sha1' => $this->sha1base36 ], - __METHOD__ - ); - if ( $prior ) { - // @todo FIXME: This could fail slightly for multiple matches :P - wfDebug( __METHOD__ . ": skipping existing revision for [[" . - $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); - return false; - } - } - - if ( !$pageId ) { - // This seems to happen if two clients simultaneously try to import the - // same page - wfDebug( __METHOD__ . ': got invalid $pageId when importing revision of [[' . - $this->title->getPrefixedText() . ']], timestamp ' . $this->timestamp . "\n" ); - return false; + $importer = MediaWikiServices::getInstance()->getWikiRevisionOldRevisionImporter(); } - - // Select previous version to make size diffs correct - // @todo This assumes that multiple revisions of the same page are imported - // in order from oldest to newest. - $prevId = $dbw->selectField( 'revision', 'rev_id', - [ - 'rev_page' => $pageId, - 'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $this->timestamp ) ), - ], - __METHOD__, - [ 'ORDER BY' => [ - 'rev_timestamp DESC', - 'rev_id DESC', // timestamp is not unique per page - ] - ] - ); - - # @todo FIXME: Use original rev_id optionally (better for backups) - # Insert the row - $revision = new Revision( [ - 'title' => $this->title, - 'page' => $pageId, - 'content_model' => $this->getModel(), - 'content_format' => $this->getFormat(), - // XXX: just set 'content' => $this->getContent()? - 'text' => $this->getContent()->serialize( $this->getFormat() ), - 'comment' => $this->getComment(), - 'user' => $userId, - 'user_text' => $userText, - 'timestamp' => $this->timestamp, - 'minor_edit' => $this->minor, - 'parent_id' => $prevId, - ] ); - $revision->insertOn( $dbw ); - $changed = $page->updateIfNewerOn( $dbw, $revision ); - - if ( $changed !== false && !$this->mNoUpdates ) { - wfDebug( __METHOD__ . ": running updates\n" ); - // countable/oldcountable stuff is handled in WikiImporter::finishImportPage - $page->doEditUpdates( - $revision, - $user, - [ 'created' => $created, 'oldcountable' => 'no-change' ] - ); - } - - return true; + return $importer->import( $this ); } /** -- 2.20.1