--- /dev/null
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ This is an XML Schema description of the format
+ output by MediaWiki's Special:Export system.
+
+ Version 0.2 adds optional basic file upload info support,
+ which is used by our OAI export/import submodule.
+
+ Version 0.3 adds some site configuration information such
+ as a list of defined namespaces.
+
+ Version 0.4 adds per-revision delete flags, log exports,
+ discussion threading data, a per-page redirect flag, and
+ per-namespace capitalization.
+
+ Version 0.5 adds byte count per revision.
+
+ Version 0.6 adds a separate namespace tag, and resolves the
+ redirect target and adds a separate sha1 tag for each revision.
+
+ Version 0.7 adds a unique identity constraint for both page and
+ revision identifiers. See also bug 4220.
+ Fix type for <ns> from "positiveInteger" to "nonNegativeInteger" to allow 0
+ Moves <logitem> to its right location.
+ Add parentid to revision.
+ Fix type for <id> within <contributor> to "nonNegativeInteger"
+
+ Version 0.8 adds support for a <model> and a <format> tag for
+ each revision. See contenthandler.txt.
+
+ Version 0.9 adds the database name to the site information.
+
+ Version 0.10 moved the <model> and <format> tags before the <text> tag.
+
+ Version 0.11 introduced <content> tag.
+
+ The canonical URL to the schema document is:
+ http://www.mediawiki.org/xml/export-0.11.xsd
+
+ Use the namespace:
+ http://www.mediawiki.org/xml/export-0.11/
+-->
+<schema xmlns="http://www.w3.org/2001/XMLSchema"
+ xmlns:mw="http://www.mediawiki.org/xml/export-0.11/"
+ targetNamespace="http://www.mediawiki.org/xml/export-0.11/"
+ elementFormDefault="qualified">
+
+ <annotation>
+ <documentation xml:lang="en">
+ MediaWiki's page export format
+ </documentation>
+ </annotation>
+
+ <!-- Need this to reference xml:lang -->
+ <import namespace="http://www.w3.org/XML/1998/namespace"
+ schemaLocation="http://www.w3.org/2001/xml.xsd" />
+
+ <!-- Our root element -->
+ <element name="mediawiki" type="mw:MediaWikiType">
+ <!-- Page ID contraint, see bug 4220 -->
+ <unique name="PageIDConstraint">
+ <selector xpath="mw:page" />
+ <field xpath="mw:id" />
+ </unique>
+ <!-- Revision ID contraint, see bug 4220 -->
+ <unique name="RevIDConstraint">
+ <selector xpath="mw:page/mw:revision" />
+ <field xpath="mw:id" />
+ </unique>
+ </element>
+
+ <complexType name="MediaWikiType">
+ <sequence>
+ <element name="siteinfo" type="mw:SiteInfoType"
+ minOccurs="0" maxOccurs="1" />
+ <element name="page" type="mw:PageType"
+ minOccurs="0" maxOccurs="unbounded" />
+ <element name="logitem" type="mw:LogItemType"
+ minOccurs="0" maxOccurs="unbounded" />
+ </sequence>
+ <attribute name="version" type="string" use="required" />
+ <attribute ref="xml:lang" use="required" />
+ </complexType>
+
+ <complexType name="SiteInfoType">
+ <sequence>
+ <element name="sitename" type="string" minOccurs="0" />
+ <element name="dbname" type="string" minOccurs="0" />
+ <element name="base" type="anyURI" minOccurs="0" />
+ <element name="generator" type="string" minOccurs="0" />
+ <element name="case" type="mw:CaseType" minOccurs="0" />
+ <element name="namespaces" type="mw:NamespacesType" minOccurs="0" />
+ </sequence>
+ </complexType>
+
+ <simpleType name="CaseType">
+ <restriction base="NMTOKEN">
+ <!-- Cannot have two titles differing only by case of first letter. -->
+ <!-- Default behavior through 1.5, $wgCapitalLinks = true -->
+ <enumeration value="first-letter" />
+
+ <!-- Complete title is case-sensitive -->
+ <!-- Behavior when $wgCapitalLinks = false -->
+ <enumeration value="case-sensitive" />
+
+ <!-- Cannot have non-case senstitive titles eg [[FOO]] == [[Foo]] -->
+ <!-- Not yet implemented as of MediaWiki 1.18 -->
+ <enumeration value="case-insensitive" />
+ </restriction>
+ </simpleType>
+
+ <simpleType name="DeletedFlagType">
+ <restriction base="NMTOKEN">
+ <enumeration value="deleted" />
+ </restriction>
+ </simpleType>
+
+ <complexType name="NamespacesType">
+ <sequence>
+ <element name="namespace" type="mw:NamespaceType"
+ minOccurs="0" maxOccurs="unbounded" />
+ </sequence>
+ </complexType>
+
+ <complexType name="NamespaceType">
+ <simpleContent>
+ <extension base="string">
+ <attribute name="key" type="integer" />
+ <attribute name="case" type="mw:CaseType" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="RedirectType">
+ <simpleContent>
+ <extension base="string">
+ <attribute name="title" type="string" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <simpleType name="ContentModelType">
+ <restriction base="string">
+ <pattern value="[a-zA-Z][-+./a-zA-Z0-9]*" />
+ </restriction>
+ </simpleType>
+
+ <simpleType name="ContentFormatType">
+ <restriction base="string">
+ <pattern value="[a-zA-Z][-+.a-zA-Z0-9]*/[a-zA-Z][-+.a-zA-Z0-9]*" />
+ </restriction>
+ </simpleType>
+
+ <complexType name="PageType">
+ <sequence>
+ <!-- Title in text form. (Using spaces, not underscores; with namespace ) -->
+ <element name="title" type="string" />
+
+ <!-- Namespace in canonical form -->
+ <element name="ns" type="nonNegativeInteger" />
+
+ <!-- optional page ID number -->
+ <element name="id" type="positiveInteger" />
+
+ <!-- flag if the current revision is a redirect -->
+ <element name="redirect" type="mw:RedirectType" minOccurs="0" maxOccurs="1" />
+
+ <!-- comma-separated list of string tokens, if present -->
+ <element name="restrictions" type="string" minOccurs="0" />
+
+ <!-- Zero or more sets of revision or upload data -->
+ <choice minOccurs="0" maxOccurs="unbounded">
+ <element name="revision" type="mw:RevisionType" />
+ <element name="upload" type="mw:UploadType" />
+ </choice>
+
+ <!-- Zero or One sets of discussion threading data -->
+ <element name="discussionthreadinginfo" minOccurs="0" maxOccurs="1" type="mw:DiscussionThreadingInfo" />
+ </sequence>
+ </complexType>
+
+ <complexType name="RevisionType">
+ <sequence>
+ <element name="id" type="positiveInteger" />
+ <element name="parentid" type="positiveInteger" minOccurs="0" maxOccurs="1"/>
+ <element name="timestamp" type="dateTime" />
+ <element name="contributor" type="mw:ContributorType" />
+ <element name="minor" minOccurs="0" maxOccurs="1"/>
+ <element name="comment" type="mw:CommentType"/>
+ <!-- corresponds to slot origin for the main slot -->
+ <element name="origin" type="positiveInteger" />
+ <!-- the main slot's content model -->
+ <element name="model" type="mw:ContentModelType" />
+ <!-- the main slot's serialization format -->
+ <element name="format" type="mw:ContentFormatType" />
+ <!-- the main slot's serialized content -->
+ <element name="text" type="mw:TextType"/>
+ <element name="content" type="mw:ContentType" minOccurs="0" maxOccurs="unbounded"/>
+ <!-- sha1 of the revision, a combined sha1 of content in all slots -->
+ <element name="sha1" type="string" />
+ </sequence>
+ </complexType>
+
+ <complexType name="ContentType">
+ <sequence>
+ <!-- corresponds to slot role_name -->
+ <element name="role" type="mw:SlotRoleType" />
+ <!-- corresponds to slot origin -->
+ <element name="origin" type="positiveInteger" />
+ <element name="model" type="mw:ContentModelType" />
+ <element name="format" type="mw:ContentFormatType" />
+ <element name="text" type="mw:ContentTextType" />
+ </sequence>
+ </complexType>
+
+ <simpleType name="SlotRoleType">
+ <restriction base="string">
+ <pattern value="[a-zA-Z][-+./a-zA-Z0-9]*" />
+ </restriction>
+ </simpleType>
+
+ <complexType name="ContentTextType">
+ <simpleContent>
+ <extension base="string">
+ <attribute ref="xml:space" default="preserve" />
+ <!-- This allows deleted=deleted on non-empty elements, but XSD is not omnipotent -->
+ <attribute name="deleted" type="mw:DeletedFlagType" />
+ <attribute name="location" type="anyURI" />
+ <attribute name="bytes" type="nonNegativeInteger" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="LogItemType">
+ <sequence>
+ <element name="id" type="positiveInteger" />
+ <element name="timestamp" type="dateTime" />
+ <element name="contributor" type="mw:ContributorType" />
+ <element name="comment" type="mw:CommentType" minOccurs="0" />
+ <element name="type" type="string" />
+ <element name="action" type="string" />
+ <element name="text" type="mw:LogTextType" minOccurs="0" maxOccurs="1" />
+ <element name="logtitle" type="string" minOccurs="0" maxOccurs="1" />
+ <element name="params" type="mw:LogParamsType" minOccurs="0" maxOccurs="1" />
+ </sequence>
+ </complexType>
+
+ <complexType name="CommentType">
+ <simpleContent>
+ <extension base="string">
+ <!-- This allows deleted=deleted on non-empty elements, but XSD is not omnipotent -->
+ <attribute name="deleted" type="mw:DeletedFlagType" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="TextType">
+ <simpleContent>
+ <extension base="string">
+ <attribute ref="xml:space" default="preserve" />
+ <!-- This allows deleted=deleted on non-empty elements, but XSD is not omnipotent -->
+ <attribute name="deleted" type="mw:DeletedFlagType" />
+ <!-- This isn't a good idea; we should be using "ID" instead of "NMTOKEN" -->
+ <!-- However, "NMTOKEN" is strictest definition that is both compatible with existing -->
+ <!-- usage ([0-9]+) and with the "ID" type. -->
+ <attribute name="id" type="NMTOKEN" />
+ <attribute name="location" type="anyURI" />
+ <attribute name="sha1" type="string"/>
+ <attribute name="bytes" type="nonNegativeInteger" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="LogTextType">
+ <simpleContent>
+ <extension base="string">
+ <!-- This allows deleted=deleted on non-empty elements, but XSD is not omnipotent -->
+ <attribute name="deleted" type="mw:DeletedFlagType" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="LogParamsType">
+ <simpleContent>
+ <extension base="string">
+ <attribute ref="xml:space" default="preserve" />
+ </extension>
+ </simpleContent>
+ </complexType>
+
+ <complexType name="ContributorType">
+ <sequence>
+ <element name="username" type="string" minOccurs="0" />
+ <element name="id" type="nonNegativeInteger" minOccurs="0" />
+
+ <element name="ip" type="string" minOccurs="0" />
+ </sequence>
+ <!-- This allows deleted=deleted on non-empty elements, but XSD is not omnipotent -->
+ <attribute name="deleted" type="mw:DeletedFlagType" />
+ </complexType>
+
+ <complexType name="UploadType">
+ <sequence>
+ <!-- Revision-style data... -->
+ <element name="timestamp" type="dateTime" />
+ <element name="contributor" type="mw:ContributorType" />
+ <element name="comment" type="string" minOccurs="0" />
+
+ <!-- Filename. (Using underscores, not spaces. No 'File:' namespace marker.) -->
+ <element name="filename" type="string" />
+
+ <!-- URI at which this resource can be obtained -->
+ <element name="src" type="anyURI" />
+
+ <element name="size" type="positiveInteger" />
+
+ <!-- TODO: add other metadata fields -->
+ </sequence>
+ </complexType>
+
+ <!-- Discussion threading data for LiquidThreads -->
+ <complexType name="DiscussionThreadingInfo">
+ <sequence>
+ <element name="ThreadSubject" type="string" />
+ <element name="ThreadParent" type="positiveInteger" />
+ <element name="ThreadAncestor" type="positiveInteger" />
+ <element name="ThreadPage" type="string" />
+ <element name="ThreadID" type="positiveInteger" />
+ <element name="ThreadAuthor" type="string" />
+ <element name="ThreadEditStatus" type="string" />
+ <element name="ThreadType" type="string" />
+ </sequence>
+ </complexType>
+
+</schema>
add extra metadata.
&$obj: The XmlDumpWriter object.
&$out: The text being output.
-$row: The database row for the revision.
-$text: The revision text.
+$row: The database row for the revision being dumped. DEPRECATED, use $rev instead.
+$text: The revision text to be dumped. DEPRECATED, use $rev instead.
+$rev: The RevisionRecord that is being dumped to XML
More hooks might be available but undocumented, you can execute
"php maintenance/findHooks.php" to find hidden ones.
* were already unsupported at the time these constants were introduced.
*/
define( 'XML_DUMP_SCHEMA_VERSION_10', '0.10' );
+define( 'XML_DUMP_SCHEMA_VERSION_11', '0.11' );
/**@}*/
const LOGS = 8;
const RANGE = 16;
- const TEXT = 0;
- const STUB = 1;
+ const TEXT = XmlDumpWriter::WRITE_CONTENT;
+ const STUB = XmlDumpWriter::WRITE_STUB;
const BATCH_SIZE = 50000;
* @file
*/
use MediaWiki\MediaWikiServices;
+use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\RevisionStore;
+use MediaWiki\Revision\SlotRecord;
+use MediaWiki\Revision\SuppressedDataException;
use MediaWiki\Storage\SqlBlobStore;
+use Wikimedia\Assert\Assert;
/**
* @ingroup Dump
*/
class XmlDumpWriter {
+
+ /** Output serialized revision content. */
+ const WRITE_CONTENT = 0;
+
+ /** Only output subs for revision content. */
+ const WRITE_STUB = 1;
+
+ /**
+ * Only output subs for revision content, indicating that the content has been
+ * deleted/suppressed. For internal use only.
+ */
+ const WRITE_STUB_DELETED = 2;
+
/**
* @var string[] the schema versions supported for output
* @final
*/
public static $supportedSchemas = [
XML_DUMP_SCHEMA_VERSION_10,
+ XML_DUMP_SCHEMA_VERSION_11
];
+ /**
+ * @var string which schema version the generated XML should comply to.
+ * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX
+ * constants.
+ */
+ private $schemaVersion;
+
/**
* Title of the currently processed page
*
*/
private $currentTitle = null;
+ /**
+ * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB.
+ */
+ private $contentMode;
+
+ /**
+ * XmlDumpWriter constructor.
+ *
+ * @param int $contentMode WRITE_CONTENT or WRITE_STUB.
+ * @param string $schemaVersion which schema version the generated XML should comply to.
+ * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX
+ * constants.
+ */
+ public function __construct(
+ $contentMode = self::WRITE_CONTENT,
+ $schemaVersion = XML_DUMP_SCHEMA_VERSION_11
+ ) {
+ Assert::parameter(
+ in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ] ),
+ '$contentMode',
+ 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
+ );
+
+ Assert::parameter(
+ in_array( $schemaVersion, self::$supportedSchemas ),
+ '$schemaVersion',
+ 'must be one of the following schema versions: '
+ . implode( ',', self::$supportedSchemas )
+ );
+
+ $this->contentMode = $contentMode;
+ $this->schemaVersion = $schemaVersion;
+ }
+
/**
* Opens the XML output stream's root "<mediawiki>" element.
* This does not include an xml directive, so is safe to include
* @return string
*/
function openStream() {
- $ver = WikiExporter::schemaVersion();
+ $ver = $this->schemaVersion;
return Xml::element( 'mediawiki', [
'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
);
$out = " <revision>\n";
- $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
- if ( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
- $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
+ $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n";
+
+ if ( $rev->getParentId() ) {
+ $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n";
}
- $out .= $this->writeTimestamp( $row->rev_timestamp );
+ $out .= $this->writeTimestamp( $rev->getTimestamp() );
- if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
+ if ( $rev->isDeleted( Revision::DELETED_USER ) ) {
$out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
} else {
// empty values get written out as uid 0, see T224221
- $out .= $this->writeContributor( $row->rev_user ?: 0, $row->rev_user_text );
+ $user = $rev->getUser();
+ $out .= $this->writeContributor(
+ $user ? $user->getId() : 0,
+ $user ? $user->getName() : ''
+ );
}
- if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
+ if ( $rev->isMinor() ) {
$out .= " <minor/>\n";
}
- if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
+ if ( $rev->isDeleted( Revision::DELETED_COMMENT ) ) {
$out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
} else {
- $comment = CommentStore::getStore()->getComment( 'rev_comment', $row )->text;
- if ( $comment != '' ) {
- $out .= " " . Xml::elementClean( 'comment', [], strval( $comment ) ) . "\n";
- }
+ $out .= " "
+ . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) )
+ . "\n";
+ }
+
+ $contentMode = $rev->isDeleted( Revision::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
+ : $this->contentMode;
+
+ foreach ( $rev->getSlots()->getSlots() as $slot ) {
+ $out .= $this->writeSlot( $slot, $contentMode );
}
- // TODO: rev_content_model no longer exists with MCR, see T174031
- if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
- $content_model = strval( $row->rev_content_model );
+ if ( $rev->isDeleted( Revision::DELETED_TEXT ) ) {
+ $out .= " <sha1/>\n";
} else {
- // probably using $wgContentHandlerUseDB = false;
- $content_model = ContentHandler::getDefaultModelFor( $this->currentTitle );
+ $out .= " " . Xml::element( 'sha1', null, strval( $rev->getSha1() ) ) . "\n";
}
- $content_handler = ContentHandler::getForModelID( $content_model );
+ // Avoid PHP 7.1 warning from passing $this by reference
+ $writer = $this;
+ $text = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
+ Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text, $rev ] );
- // TODO: rev_content_format no longer exists with MCR, see T174031
- if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
- $content_format = strval( $row->rev_content_format );
- } else {
- // probably using $wgContentHandlerUseDB = false;
- $content_format = $content_handler->getDefaultFormat();
+ $out .= " </revision>\n";
+
+ return $out;
+ }
+
+ /**
+ * @param SlotRecord $slot
+ * @param int $contentMode see the WRITE_XXX constants
+ *
+ * @return string
+ */
+ private function writeSlot( SlotRecord $slot, $contentMode ) {
+ $isMain = $slot->getRole() === SlotRecord::MAIN;
+ $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11;
+
+ if ( !$isV11 && !$isMain ) {
+ // ignore extra slots
+ return '';
}
- $out .= " " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
- $out .= " " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
+ $out = '';
+ $indent = ' ';
- $text = '';
- if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
- $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
- } elseif ( isset( $row->old_text ) ) {
- // Raw text from the database may have invalid chars
- $text = strval( Revision::getRevisionText( $row ) );
- try {
- $text = $content_handler->exportTransform( $text, $content_format );
- }
- catch ( Exception $ex ) {
- if ( $ex instanceof MWException || $ex instanceof RuntimeException ) {
- // leave text as is; that's the way it goes
- wfLogWarning( 'exportTransform failed on text for revid ' . $row->rev_id . "\n" );
- } else {
- throw $ex;
- }
- }
- $out .= " " . Xml::elementClean( 'text',
- [ 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ],
- strval( $text ) ) . "\n";
- } elseif ( isset( $row->_load_content ) ) {
- // TODO: make this fully MCR aware, see T174031
- $slot = $rev->getSlot( 'main' );
- try {
- $content = $slot->getContent();
+ if ( !$isMain ) {
+ // non-main slots are wrapped into an additional element.
+ $out .= ' ' . Xml::openElement( 'content' ) . "\n";
+ $indent .= ' ';
+ $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n";
+ }
- if ( $content instanceof TextContent ) {
- // HACK: For text based models, bypass the serialization step.
- // This allows extensions (like Flow)that use incompatible combinations
- // of serialization format and content model.
- $text = $content->getNativeData();
- } else {
- $text = $content->serialize( $content_format );
- }
- $text = $content_handler->exportTransform( $text, $content_format );
- $out .= " " . Xml::elementClean( 'text',
- [ 'xml:space' => 'preserve', 'bytes' => intval( $slot->getSize() ) ],
- strval( $text ) ) . "\n";
+ if ( $isV11 ) {
+ $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n";
+ }
+
+ $contentModel = $slot->getModel();
+ $contentHandler = ContentHandler::getForModelID( $contentModel );
+ $contentFormat = $contentHandler->getDefaultFormat();
+
+ // XXX: The content format is only relevant when actually outputting serialized content.
+ // It should probably be an attribute on the text tag.
+ $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n";
+ $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n";
+
+ $textAttributes = [
+ 'xml:space' => 'preserve',
+ 'bytes' => $slot->getSize(),
+ ];
+
+ if ( $isV11 ) {
+ $textAttributes['sha1'] = $slot->getSha1();
+ }
+
+ if ( $contentMode === self::WRITE_CONTENT ) {
+ try {
+ // write <text> tag
+ $out .= $this->writeText( $slot->getContent(), $textAttributes, $indent );
+ } catch ( SuppressedDataException $ex ) {
+ // NOTE: this shouldn't happen, since the caller is supposed to have checked
+ // for suppressed content!
+ // write <text> placeholder tag
+ $textAttributes['deleted'] = 'deleted';
+ $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
}
catch ( Exception $ex ) {
if ( $ex instanceof MWException || $ex instanceof RuntimeException ) {
- // there's no provsion in the schema for an attribute that will let
+ // there's no provision in the schema for an attribute that will let
// the user know this element was unavailable due to error; an empty
// tag is the best we can do
- $out .= " " . Xml::element( 'text' ) . "\n";
- wfLogWarning( 'failed to load content for revid ' . $row->rev_id . "\n" );
+ $out .= $indent . Xml::element( 'text' ) . "\n";
+ wfLogWarning(
+ 'failed to load content slot ' . $slot->getRole() . ' for revision '
+ . $slot->getRevision() . "\n"
+ );
} else {
throw $ex;
}
}
- } elseif ( isset( $row->rev_text_id ) ) {
- // Stub output for pre-MCR schema
- // TODO: MCR: rev_text_id only exists in the pre-MCR schema. Remove this when
- // we drop support for the old schema.
- $out .= " " . Xml::element( 'text',
- [ 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ],
- "" ) . "\n";
+ } elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
+ // write <text> placeholder tag
+ $textAttributes['deleted'] = 'deleted';
+ $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
} else {
- // Backwards-compatible stub output for MCR aware schema
- // TODO: MCR: emit content addresses instead of text ids, see T174031, T199121
- $slot = $rev->getSlot( 'main' );
+ // write <text> stub tag
+ if ( $isV11 ) {
+ $textAttributes['location'] = $slot->getAddress();
+ }
+ // Output the numerical text ID if possible, for backwards compatibility.
// Note that this is currently the ONLY reason we have a BlobStore here at all.
// When removing this line, check whether the BlobStore has become unused.
$textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() );
- $out .= " " . Xml::element( 'text',
- [ 'id' => $textId, 'bytes' => intval( $slot->getSize() ) ],
- "" ) . "\n";
+ if ( $textId ) {
+ $textAttributes['id'] = $textId;
+ } elseif ( !$isV11 ) {
+ throw new InvalidArgumentException(
+ 'Cannot produce stubs for non-text-table content blobs with schema version '
+ . $this->schemaVersion
+ );
+ }
+
+ $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
}
- if ( isset( $row->rev_sha1 )
- && $row->rev_sha1
- && !( $row->rev_deleted & Revision::DELETED_TEXT )
- ) {
- $out .= " " . Xml::element( 'sha1', null, strval( $row->rev_sha1 ) ) . "\n";
- } else {
- $out .= " <sha1/>\n";
+ if ( !$isMain ) {
+ $out .= ' ' . Xml::closeElement( 'content' ) . "\n";
}
- // Avoid PHP 7.1 warning from passing $this by reference
- $writer = $this;
- Hooks::run( 'XmlDumpWriterWriteRevision', [ &$writer, &$out, $row, $text ] );
+ return $out;
+ }
- $out .= " </revision>\n";
+ /**
+ * @param Content $content
+ * @param string[] $textAttributes
+ * @param string $indent
+ *
+ * @return string
+ */
+ private function writeText( Content $content, $textAttributes, $indent ) {
+ $out = '';
+
+ $contentHandler = $content->getContentHandler();
+ $contentFormat = $contentHandler->getDefaultFormat();
+
+ if ( $content instanceof TextContent ) {
+ // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow)
+ // that use incompatible combinations of serialization format and content model.
+ $data = $content->getNativeData();
+ } else {
+ $data = $content->serialize( $contentFormat );
+ }
+
+ $data = $contentHandler->exportTransform( $data, $contentFormat );
+ $textAttributes['bytes'] = $size = strlen( $data ); // make sure to use the actual size
+ $out .= $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n";
return $out;
}
$this->finalOptionCheck();
// we only want this so we know how to close a stream :-P
- $this->xmlwriterobj = new XmlDumpWriter();
+ $this->xmlwriterobj = new XmlDumpWriter( XmlDumpWriter::WRITE_CONTENT, $this->schemaVersion );
$input = fopen( $this->input, "rt" );
$this->readDump( $input );
}
}
+ /**
+ * Asserts that the xml reader is at an element of given name, and that element
+ * is an empty tag.
+ *
+ * @param string $name The name of the element to check for
+ * (e.g.: "text" for <text/>)
+ * @param bool $skip (optional) if true, skip past the found element
+ * @param bool $skip_ws (optional) if true, also skip past white spaces that trail the
+ * closing element.
+ */
+ public function assertEmptyNode( $name, $skip = true, $skip_ws = true ) {
+ $this->assertNodeStart( $name, false );
+ Assert::assertFalse( $this->xml->hasValue, "$name tag has content" );
+
+ if ( $skip ) {
+ Assert::assertTrue( $this->xml->read(), "Skipping $name tag" );
+ if ( ( $this->xml->nodeType == XMLReader::END_ELEMENT )
+ && ( $this->xml->name == $name )
+ ) {
+ $this->xml->read();
+ }
+
+ if ( $skip_ws ) {
+ $this->skipWhitespace();
+ }
+ }
+ }
+
/**
* Asserts that the xml reader is at an closing element of given name, and optionally
* skips past it.
$this->assertTextNode( "comment", $summary );
$this->skipWhitespace();
+ if ( $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11 ) {
+ $this->assertTextNode( "origin", false );
+ $this->skipWhitespace();
+ }
+
$this->assertTextNode( "model", $model );
$this->skipWhitespace();
$this->assertText( $id, $text_id, $text_bytes, $text );
} else {
$text_found = false;
+ if ( $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11 ) {
+ Assert::fail( 'Missing text node' );
+ }
}
- $this->assertTextNode( "sha1", $text_sha1 );
+ if ( $text_sha1 ) {
+ $this->assertTextNode( "sha1", $text_sha1 );
+ } else {
+ $this->assertEmptyNode( "sha1" );
+ }
if ( !$text_found ) {
$this->assertText( $id, $text_id, $text_bytes, $text );
}
if ( $text === false ) {
- // Testing for a stub
Assert::assertEquals( $this->xml->getAttribute( "id" ), $text_id,
"Text id of revision " . $id );
- Assert::assertFalse( $this->xml->hasValue, "Revision has text" );
- Assert::assertTrue( $this->xml->read(), "Skipping text start tag" );
- if ( ( $this->xml->nodeType == XMLReader::END_ELEMENT )
- && ( $this->xml->name == "text" )
- ) {
- $this->xml->read();
- }
- $this->skipWhitespace();
+ $this->assertEmptyNode( "text" );
} else {
// Testing for a real dump
Assert::assertTrue( $this->xml->read(), "Skipping text start tag" );
use DumpBackup;
use Exception;
use MediaWiki\MediaWikiServices;
+use MediaWiki\Revision\RevisionRecord;
use MediaWikiTestCase;
use MWException;
+use RequestContext;
+use RevisionDeleter;
use Title;
use WikiExporter;
use Wikimedia\Rdbms\IDatabase;
"BackupDumperTestP2Summary4 extra " );
$this->pageId2 = $page->getId();
+ $revDel = RevisionDeleter::createList(
+ 'revision',
+ RequestContext::getMain(),
+ $this->pageTitle2,
+ [ $this->revId2_2 ]
+ );
+ $revDel->setVisibility( [
+ 'value' => [ RevisionRecord::DELETED_TEXT => 1 ],
+ 'comment' => 'testing!'
+ ] );
+
$this->pageTitle3 = Title::newFromText( 'BackupDumperTestP3', $this->namespace );
$page = WikiPage::factory( $this->pageTitle3 );
list( $this->revId3_1, $this->textId3_1 ) = $this->addRevision( $page,
$asserter->assertRevision(
$this->revId2_2,
"BackupDumperTestP2Summary2",
- $this->textId2_2,
- 23,
- "b7vj5ks32po5m1z1t1br4o7scdwwy95",
- "BackupDumperTestP2Text2",
+ null, // deleted!
+ false, // deleted!
+ null, // deleted!
+ false, // deleted!
$this->revId2_1
);
$asserter->assertRevision(
$asserter->assertRevision(
$this->revId2_2,
"BackupDumperTestP2Summary2",
- $this->textId2_2,
- 23,
- "b7vj5ks32po5m1z1t1br4o7scdwwy95",
- false,
+ null, // deleted!
+ false, // deleted!
+ null, // deleted!
+ false, // deleted!
$this->revId2_1
);
$asserter->assertRevision(
$asserter->assertRevision(
$this->revId2_2,
"BackupDumperTestP2Summary2",
- $this->textId2_2,
- 23,
- "b7vj5ks32po5m1z1t1br4o7scdwwy95",
- false,
+ null, // deleted!
+ false, // deleted!
+ null, // deleted!
+ false, // deleted!
$this->revId2_1
);
$asserter->assertRevision(