From 45f3912bf1e74cc29c23b151a45c446f79addabf Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 14 Dec 2018 12:24:44 +0100 Subject: [PATCH] Make the XML dump schema version configurable. Bug: T174031 Change-Id: I979b6c8f0a72bc1f5ecce1d499d3fdfa0f671588 --- includes/DefaultSettings.php | 6 ++++ includes/Defines.php | 10 ++++++ includes/export/WikiExporter.php | 19 ++++++++-- includes/export/XmlDumpWriter.php | 7 ++++ includes/import/WikiImporter.php | 1 + maintenance/dumpTextPass.php | 36 ++++++++----------- maintenance/includes/BackupDumper.php | 15 ++++++++ tests/phpunit/maintenance/DumpTestCase.php | 4 +-- tests/phpunit/maintenance/backup_PageTest.php | 5 ++- 9 files changed, 76 insertions(+), 27 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 5f93abe0a1..91170c4778 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -8977,6 +8977,12 @@ $wgInterwikiPrefixDisplayTypes = []; */ $wgMultiContentRevisionSchemaMigrationStage = SCHEMA_COMPAT_WRITE_BOTH | SCHEMA_COMPAT_READ_NEW; +/** + * The schema to use per default when generating XML dumps. This allows sites to control + * explicitly when to make breaking changes to their export and dump format. + */ +$wgXmlDumpSchemaVersion = XML_DUMP_SCHEMA_VERSION_10; + /** * Actor table schema migration stage. * diff --git a/includes/Defines.php b/includes/Defines.php index 720e8d018d..5f98b443c8 100644 --- a/includes/Defines.php +++ b/includes/Defines.php @@ -317,3 +317,13 @@ define( 'MIGRATION_WRITE_BOTH', 0x10000000 | SCHEMA_COMPAT_READ_BOTH | SCHEMA_CO define( 'MIGRATION_WRITE_NEW', 0x20000000 | SCHEMA_COMPAT_READ_BOTH | SCHEMA_COMPAT_WRITE_NEW ); define( 'MIGRATION_NEW', 0x30000000 | SCHEMA_COMPAT_NEW ); /**@}*/ + +/**@{ + * XML dump schema versions, for use with XmlDumpWriter. + * See also the corresponding export-nnnn.xsd files in the docs directory, + * which are also listed at . + * Note that not all old schema versions are represented here, as several + * were already unsupported at the time these constants were introduced. + */ +define( 'XML_DUMP_SCHEMA_VERSION_10', '0.10' ); +/**@}*/ diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index fbcf832070..120632c365 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -63,12 +63,16 @@ class WikiExporter { /** @var DumpOutput */ public $sink; + /** @var XmlDumpWriter */ + private $writer; + /** - * Returns the export schema version. + * Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion. * @return string */ public static function schemaVersion() { - return "0.10"; + global $wgXmlDumpSchemaVersion; + return $wgXmlDumpSchemaVersion; } /** @@ -83,11 +87,20 @@ class WikiExporter { function __construct( $db, $history = self::CURRENT, $text = self::TEXT ) { $this->db = $db; $this->history = $history; - $this->writer = new XmlDumpWriter(); + $this->writer = new XmlDumpWriter( $text, self::schemaVersion() ); $this->sink = new DumpOutput(); $this->text = $text; } + /** + * @param string $schemaVersion which schema version the generated XML should comply to. + * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX + * constants. + */ + public function setSchemaVersion( $schemaVersion ) { + $this->writer = new XmlDumpWriter( $this->text, $schemaVersion ); + } + /** * Set the DumpOutput or DumpFilter object which will receive * various row objects and XML output for filtering. Filters diff --git a/includes/export/XmlDumpWriter.php b/includes/export/XmlDumpWriter.php index fbc4b0d643..3c0b569743 100644 --- a/includes/export/XmlDumpWriter.php +++ b/includes/export/XmlDumpWriter.php @@ -30,6 +30,13 @@ use MediaWiki\Storage\SqlBlobStore; * @ingroup Dump */ class XmlDumpWriter { + /** + * @var string[] the schema versions supported for output + * @final + */ + public static $supportedSchemas = [ + XML_DUMP_SCHEMA_VERSION_10, + ]; /** * Title of the currently processed page diff --git a/includes/import/WikiImporter.php b/includes/import/WikiImporter.php index 4d7210245f..bd19aa7513 100644 --- a/includes/import/WikiImporter.php +++ b/includes/import/WikiImporter.php @@ -893,6 +893,7 @@ class WikiImporter { ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" ); } + // FIXME: process schema version 11! $revision = new WikiRevision( $this->config ); if ( isset( $revisionInfo['id'] ) ) { diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index 0479a91dd2..f515df70f4 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -30,7 +30,6 @@ require_once __DIR__ . '/../includes/export/WikiExporter.php'; use MediaWiki\MediaWikiServices; use MediaWiki\Storage\BlobAccessException; -use MediaWiki\Storage\BlobStore; use MediaWiki\Storage\SqlBlobStore; use Wikimedia\Rdbms\IMaintainableDatabase; @@ -143,7 +142,7 @@ TEXT } /** - * @return BlobStore + * @return SqlBlobStore */ private function getBlobStore() { return MediaWikiServices::getInstance()->getBlobStore(); @@ -737,16 +736,16 @@ TEXT } /** - * @param int|string $id Content address, or text row ID. + * @param int|string $address Content address, or text row ID. * @return bool|string */ - private function getTextSpawned( $id ) { + private function getTextSpawned( $address ) { Wikimedia\suppressWarnings(); if ( !$this->spawnProc ) { // First time? $this->openSpawn(); } - $text = $this->getTextSpawnedOnce( $id ); + $text = $this->getTextSpawnedOnce( $address ); Wikimedia\restoreWarnings(); return $text; @@ -814,11 +813,15 @@ TEXT } /** - * @param int|string $id Content address, or text row ID. + * @param int|string $address Content address, or text row ID. * @return bool|string */ - private function getTextSpawnedOnce( $id ) { - $ok = fwrite( $this->spawnWrite, "$id\n" ); + private function getTextSpawnedOnce( $address ) { + if ( is_int( $address ) || intval( $address ) ) { + $address = SqlBlobStore::makeAddressFromTextId( (int)$address ); + } + + $ok = fwrite( $this->spawnWrite, "$address\n" ); // $this->progress( ">> $id" ); if ( !$ok ) { return false; @@ -830,26 +833,17 @@ TEXT return false; } - // check that the text id they are sending is the one we asked for + // check that the text address they are sending is the one we asked for // this avoids out of sync revision text errors we have encountered in the past $newAddress = fgets( $this->spawnRead ); if ( $newAddress === false ) { return false; } if ( strpos( $newAddress, ':' ) === false ) { - $newId = intval( $newAddress ); - if ( $newId === false ) { - return false; - } - } else { - try { - $newAddressFields = SqlBlobStore::splitBlobAddress( $newAddress ); - $newId = $newAddressFields[ 1 ]; - } catch ( InvalidArgumentException $ex ) { - return false; - } + $newAddress = SqlBlobStore::makeAddressFromTextId( intval( $newAddress ) ); } - if ( $id != intval( $newId ) ) { + + if ( $newAddress !== $address ) { return false; } diff --git a/maintenance/includes/BackupDumper.php b/maintenance/includes/BackupDumper.php index 45786d8da0..a9e757e7b4 100644 --- a/maintenance/includes/BackupDumper.php +++ b/maintenance/includes/BackupDumper.php @@ -51,6 +51,7 @@ abstract class BackupDumper extends Maintenance { protected $reportingInterval = 100; protected $pageCount = 0; protected $revCount = 0; + protected $schemaVersion = null; // use default protected $server = null; // use default protected $sink = null; // Output filters protected $lastTime = 0; @@ -101,6 +102,8 @@ abstract class BackupDumper extends Maintenance { '[:]. s: latest, notalk, namespace', false, true, false, true ); $this->addOption( 'report', 'Report position and speed after every n pages processed. ' . 'Default: 100.', false, true ); + $this->addOption( 'schema-version', 'Schema version to use for output. ' . + 'Default: ' . WikiExporter::schemaVersion(), false, true ); $this->addOption( 'server', 'Force reading from MySQL server', false, true ); $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' . '-mx option to 7za command.', false, true ); @@ -155,6 +158,8 @@ abstract class BackupDumper extends Maintenance { $sink = null; $sinks = []; + $this->schemaVersion = WikiExporter::schemaVersion(); + $options = $this->orderedOptions; foreach ( $options as $arg ) { $opt = $arg[0]; @@ -215,6 +220,15 @@ abstract class BackupDumper extends Maintenance { unset( $sink ); $sink = $filter; + break; + case 'schema-version': + if ( !in_array( $param, XmlDumpWriter::$supportedSchemas ) ) { + $this->fatalError( + "Unsupported schema version $param. Supported versions: " . + implode( ', ', XmlDumpWriter::$supportedSchemas ) + ); + } + $this->schemaVersion = $param; break; } } @@ -250,6 +264,7 @@ abstract class BackupDumper extends Maintenance { $db = $this->backupDb(); $exporter = new WikiExporter( $db, $history, $text ); + $exporter->setSchemaVersion( $this->schemaVersion ); $exporter->dumpUploads = $this->dumpUploads; $exporter->dumpUploadFileContents = $this->dumpUploadFileContents; diff --git a/tests/phpunit/maintenance/DumpTestCase.php b/tests/phpunit/maintenance/DumpTestCase.php index eebc201ae2..26c9b92dbc 100644 --- a/tests/phpunit/maintenance/DumpTestCase.php +++ b/tests/phpunit/maintenance/DumpTestCase.php @@ -162,9 +162,9 @@ abstract class DumpTestCase extends MediaWikiLangTestCase { * @return string */ protected function getXmlSchemaPath( $schemaVersion = null ) { - global $IP; + global $IP, $wgXmlDumpSchemaVersion; - $schemaVersion = $schemaVersion ?: '0.10'; + $schemaVersion = $schemaVersion ?: $wgXmlDumpSchemaVersion; return "$IP/docs/export-$schemaVersion.xsd"; } diff --git a/tests/phpunit/maintenance/backup_PageTest.php b/tests/phpunit/maintenance/backup_PageTest.php index afe8c4b11c..17c8757b3c 100644 --- a/tests/phpunit/maintenance/backup_PageTest.php +++ b/tests/phpunit/maintenance/backup_PageTest.php @@ -12,6 +12,7 @@ use WikiExporter; use Wikimedia\Rdbms\IDatabase; use Wikimedia\Rdbms\LoadBalancer; use WikiPage; +use XmlDumpWriter; /** * Tests for page dumps of BackupDumper @@ -171,7 +172,9 @@ class BackupDumperPageTest extends DumpTestCase { } public function schemaVersionProvider() { - yield [ '0.10' ]; + foreach ( XmlDumpWriter::$supportedSchemas as $schemaVersion ) { + yield [ $schemaVersion ]; + } } /** -- 2.20.1