Merge "Add skipping to nth page option/ability for dump importing process"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Thu, 8 Jun 2017 23:29:38 +0000 (23:29 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Thu, 8 Jun 2017 23:29:38 +0000 (23:29 +0000)
includes/import/WikiImporter.php
maintenance/importDump.php

index 06b579a..2fc9f5e 100644 (file)
@@ -39,6 +39,7 @@ class WikiImporter {
        private $mNoticeCallback, $mDebug;
        private $mImportUploads, $mImageBasePath;
        private $mNoUpdates = false;
+       private $pageOffset = 0;
        /** @var Config */
        private $config;
        /** @var ImportTitleFactory */
@@ -146,6 +147,16 @@ class WikiImporter {
                $this->mNoUpdates = $noupdates;
        }
 
+       /**
+        * Sets 'pageOffset' value. So it will skip the first n-1 pages
+        * and start from the nth page. It's 1-based indexing.
+        * @param int $nthPage
+        * @since 1.29
+        */
+       function setPageOffset( $nthPage ) {
+               $this->pageOffset = $nthPage;
+       }
+
        /**
         * Set a callback that displays notice messages
         *
@@ -562,9 +573,19 @@ class WikiImporter {
                $keepReading = $this->reader->read();
                $skip = false;
                $rethrow = null;
+               $pageCount = 0;
                try {
                        while ( $keepReading ) {
                                $tag = $this->reader->localName;
+                               if ( $this->pageOffset ) {
+                                       if ( $tag === 'page' ) {
+                                               $pageCount++;
+                                       }
+                                       if ( $pageCount < $this->pageOffset ) {
+                                               $keepReading = $this->reader->next();
+                                               continue;
+                                       }
+                               }
                                $type = $this->reader->nodeType;
 
                                if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
index 6717a8e..802619e 100644 (file)
@@ -80,6 +80,7 @@ TEXT
                        'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
                );
                $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
+               $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
                $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
        }
 
@@ -301,6 +302,11 @@ TEXT
                                return false;
                        }
                }
+               if ( $this->hasOption( 'skip-to' ) ) {
+                       $nthPage = (int)$this->getOption( 'skip-to' );
+                       $importer->setPageOffset( $nthPage );
+                       $this->pageCount = $nthPage - 1;
+               }
                $importer->setPageCallback( [ $this, 'reportPage' ] );
                $this->importCallback = $importer->setRevisionCallback(
                        [ $this, 'handleRevision' ] );