From: Ariel T. Glenn Date: Wed, 17 Apr 2019 10:49:22 +0000 (+0300) Subject: allow xml page content or metadata dumps to target specific namespaces X-Git-Tag: 1.34.0-rc.0~1709^2 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/pie.php?a=commitdiff_plain;h=7f51b9e04067c0969daad2489f39e2d4b21a1535;p=lhc%2Fweb%2Fwiklou.git allow xml page content or metadata dumps to target specific namespaces We don't alter the db query for this, but throw away the extraneous rows before doing any processing on them whatsoever. Use of the DumpNamespaceFilter comes too late to avoid processing for each revision done in XmlDumpWriter::writeRevision. Bug: T220940 Change-Id: I9cb30ce612d862d97d96720ac68ff2327409f485 --- diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index e02cd8347c..ca63dfebf3 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -83,13 +83,18 @@ class WikiExporter { * - limit: maximum number of rows to return * - dir: "asc" or "desc" timestamp order * @param int $text One of WikiExporter::TEXT or WikiExporter::STUB + * @param null|array $limitNamespaces Comma-separated list of namespace numbers + * to limit results */ - function __construct( $db, $history = self::CURRENT, $text = self::TEXT ) { + function __construct( $db, $history = self::CURRENT, $text = self::TEXT, + $limitNamespaces = null + ) { $this->db = $db; $this->history = $history; $this->writer = new XmlDumpWriter( $text, self::schemaVersion() ); $this->sink = new DumpOutput(); $this->text = $text; + $this->limitNamespaces = $limitNamespaces; } /** @@ -468,6 +473,11 @@ class WikiExporter { */ protected function outputPageStreamBatch( $results, $lastRow ) { foreach ( $results as $row ) { + if ( $this->limitNamespaces && + !in_array( $row->page_namespace, $this->limitNamespaces ) ) { + $lastRow = $row; + continue; + } if ( $lastRow === null || $lastRow->page_namespace !== $row->page_namespace || $lastRow->page_title !== $row->page_title ) { diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php index b942302e46..938a6d1a01 100644 --- a/maintenance/dumpBackup.php +++ b/maintenance/dumpBackup.php @@ -65,6 +65,7 @@ TEXT $this->addOption( 'stub', 'Don\'t perform old_text lookups; for 2-pass dump' ); $this->addOption( 'uploads', 'Include upload records without files' ); $this->addOption( 'include-files', 'Include files within the XML stream' ); + $this->addOption( 'namespaces', 'Limit to this comma-separated list of namespace numbers' ); if ( $args ) { $this->loadWithArgv( $args ); @@ -131,6 +132,11 @@ TEXT $this->dumpUploads = $this->hasOption( 'uploads' ); $this->dumpUploadFileContents = $this->hasOption( 'include-files' ); $this->orderRevs = $this->hasOption( 'orderrevs' ); + if ( $this->hasOption( 'namespaces' ) ) { + $this->limitNamespaces = explode( ',', $this->getOption( 'namespaces' ) ); + } else { + $this->limitNamespaces = null; + } } } diff --git a/maintenance/includes/BackupDumper.php b/maintenance/includes/BackupDumper.php index 0b450a64b0..0118c9438b 100644 --- a/maintenance/includes/BackupDumper.php +++ b/maintenance/includes/BackupDumper.php @@ -48,6 +48,7 @@ abstract class BackupDumper extends Maintenance { public $dumpUploads = false; public $dumpUploadFileContents = false; public $orderRevs = false; + public $limitNamespaces = []; protected $reportingInterval = 100; protected $pageCount = 0; @@ -264,7 +265,7 @@ abstract class BackupDumper extends Maintenance { $this->initProgress( $history ); $db = $this->backupDb(); - $exporter = new WikiExporter( $db, $history, $text ); + $exporter = new WikiExporter( $db, $history, $text, $this->limitNamespaces ); $exporter->setSchemaVersion( $this->schemaVersion ); $exporter->dumpUploads = $this->dumpUploads; $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;