From b00a32ba3bf9dab19089d19f2b16ad0b07fde42f Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Tue, 2 Aug 2011 14:05:01 +0000 Subject: [PATCH] Add --no-updates flag to importDump.php that allows the user to skip updating the links table. On my test dataset (an old (~2 years) MediaWiki namespace dump of enwiki) this speeds up the import from 9m35s to 10s. --- includes/Import.php | 19 ++++++++++++++++++- maintenance/importDump.php | 4 ++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/includes/Import.php b/includes/Import.php index e1fde33c24..7fcdf0564f 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -36,6 +36,7 @@ class WikiImporter { private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback; private $mDebug; private $mImportUploads, $mImageBasePath; + private $mNoUpdates = false; /** * Creates an ImportXMLReader drawing from the source provided @@ -90,6 +91,13 @@ class WikiImporter { function setDebug( $debug ) { $this->mDebug = $debug; } + + /** + * Set 'no updates' mode. In this mode, the link tables will not be updated by the importer + */ + function setNoUpdates( $noupdates ) { + $this->mNoUpdates = $noupdates; + } /** * Sets the action to perform as each new page in the stream is reached. @@ -453,6 +461,7 @@ class WikiImporter { $revision->setTimestamp( $logInfo['timestamp'] ); $revision->setParams( $logInfo['params'] ); $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); + $revision->setNoUpdates( $this->mNoUpdates ); if ( isset( $logInfo['comment'] ) ) { $revision->setComment( $logInfo['comment'] ); @@ -587,6 +596,7 @@ class WikiImporter { if ( isset( $revisionInfo['contributor']['username'] ) ) { $revision->setUserName( $revisionInfo['contributor']['username'] ); } + $revision->setNoUpdates( $this->mNoUpdates ); return $this->revisionCallback( $revision ); } @@ -677,6 +687,7 @@ class WikiImporter { if ( isset( $uploadInfo['contributor']['username'] ) ) { $revision->setUserName( $uploadInfo['contributor']['username'] ); } + $revision->setNoUpdates( $this->mNoUpdates ); return call_user_func( $this->mUploadCallback, $revision ); } @@ -853,6 +864,7 @@ class WikiRevision { var $sha1base36 = false; var $isTemp = false; var $archiveName = ''; + private $mNoUpdates = false; function setTitle( $title ) { if( is_object( $title ) ) { @@ -926,6 +938,10 @@ class WikiRevision { function setParams( $params ) { $this->params = $params; } + + public function setNoUpdates( $noupdates ) { + $this->mNoUpdates = $noupdates; + } /** * @return Title @@ -1056,8 +1072,9 @@ class WikiRevision { $revision->insertOn( $dbw ); $changed = $article->updateIfNewerOn( $dbw, $revision ); - if ( $changed !== false ) { + if ( $changed !== false && !$this->mNoUpdates ) { wfDebug( __METHOD__ . ": running updates\n" ); + throw new MWException("BROKEN: calling doEditUpdates()"); $article->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); } diff --git a/maintenance/importDump.php b/maintenance/importDump.php index c160b0368a..2390ba5418 100644 --- a/maintenance/importDump.php +++ b/maintenance/importDump.php @@ -63,6 +63,7 @@ TEXT; $this->addOption( 'dry-run', 'Parse dump without actually importing pages' ); $this->addOption( 'debug', 'Output extra verbose debug information' ); $this->addOption( 'uploads', 'Process file upload data if included (experimental)' ); + $this->addOption( 'no-updates', 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' ); $this->addOption( 'image-base-path', 'Import files from a specified path', false, true ); $this->addArg( 'file', 'Dump file to import [else use stdin]', false ); } @@ -243,6 +244,9 @@ TEXT; if( $this->hasOption( 'debug' ) ) { $importer->setDebug( true ); } + if ( $this->hasOption( 'no-updates' ) ) { + $importer->setNoUpdates( true ); + } $importer->setPageCallback( array( &$this, 'reportPage' ) ); $this->importCallback = $importer->setRevisionCallback( array( &$this, 'handleRevision' ) ); -- 2.20.1