Merge "allow xml page content or metadata dumps to target specific namespaces"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Mon, 13 May 2019 06:51:45 +0000 (06:51 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Mon, 13 May 2019 06:51:45 +0000 (06:51 +0000)
includes/export/WikiExporter.php
maintenance/dumpBackup.php
maintenance/includes/BackupDumper.php

index e02cd83..ca63dfe 100644 (file)
@@ -83,13 +83,18 @@ class WikiExporter {
         *   - limit: maximum number of rows to return
         *   - dir: "asc" or "desc" timestamp order
         * @param int $text One of WikiExporter::TEXT or WikiExporter::STUB
+        * @param null|array $limitNamespaces Comma-separated list of namespace numbers
+        *   to limit results
         */
-       function __construct( $db, $history = self::CURRENT, $text = self::TEXT ) {
+       function __construct( $db, $history = self::CURRENT, $text = self::TEXT,
+                       $limitNamespaces = null
+       ) {
                $this->db = $db;
                $this->history = $history;
                $this->writer = new XmlDumpWriter( $text, self::schemaVersion() );
                $this->sink = new DumpOutput();
                $this->text = $text;
+               $this->limitNamespaces = $limitNamespaces;
        }
 
        /**
@@ -468,6 +473,11 @@ class WikiExporter {
         */
        protected function outputPageStreamBatch( $results, $lastRow ) {
                foreach ( $results as $row ) {
+                       if ( $this->limitNamespaces &&
+                               !in_array( $row->page_namespace, $this->limitNamespaces ) ) {
+                               $lastRow = $row;
+                               continue;
+                       }
                        if ( $lastRow === null ||
                                $lastRow->page_namespace !== $row->page_namespace ||
                                $lastRow->page_title !== $row->page_title ) {
index b942302..938a6d1 100644 (file)
@@ -65,6 +65,7 @@ TEXT
                $this->addOption( 'stub', 'Don\'t perform old_text lookups; for 2-pass dump' );
                $this->addOption( 'uploads', 'Include upload records without files' );
                $this->addOption( 'include-files', 'Include files within the XML stream' );
+               $this->addOption( 'namespaces', 'Limit to this comma-separated list of namespace numbers' );
 
                if ( $args ) {
                        $this->loadWithArgv( $args );
@@ -131,6 +132,11 @@ TEXT
                $this->dumpUploads = $this->hasOption( 'uploads' );
                $this->dumpUploadFileContents = $this->hasOption( 'include-files' );
                $this->orderRevs = $this->hasOption( 'orderrevs' );
+               if ( $this->hasOption( 'namespaces' ) ) {
+                       $this->limitNamespaces = explode( ',', $this->getOption( 'namespaces' ) );
+               } else {
+                       $this->limitNamespaces = null;
+               }
        }
 }
 
index 0b450a6..0118c94 100644 (file)
@@ -48,6 +48,7 @@ abstract class BackupDumper extends Maintenance {
        public $dumpUploads = false;
        public $dumpUploadFileContents = false;
        public $orderRevs = false;
+       public $limitNamespaces = [];
 
        protected $reportingInterval = 100;
        protected $pageCount = 0;
@@ -264,7 +265,7 @@ abstract class BackupDumper extends Maintenance {
                $this->initProgress( $history );
 
                $db = $this->backupDb();
-               $exporter = new WikiExporter( $db, $history, $text );
+               $exporter = new WikiExporter( $db, $history, $text, $this->limitNamespaces );
                $exporter->setSchemaVersion( $this->schemaVersion );
                $exporter->dumpUploads = $this->dumpUploads;
                $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;