Merge "Throw exception in importDump instead of dumping a random backtrace and erroring"
[lhc/web/wiklou.git] / includes / Export.php
index 7295a76..43dfd17 100644 (file)
  * @ingroup SpecialPage Dump
  */
 class WikiExporter {
-       var $list_authors = false; # Return distinct author list (when not returning full history)
-       var $author_list = "";
+       /** @var bool Return distinct author list (when not returning full history) */
+       public $list_authors = false;
 
-       var $dumpUploads = false;
-       var $dumpUploadFileContents = false;
+       /** @var bool */
+       public $dumpUploads = false;
+
+       /** @var bool */
+       public $dumpUploadFileContents = false;
+
+       /** @var string */
+       protected $author_list = "";
 
        const FULL = 1;
        const CURRENT = 2;
@@ -49,21 +55,21 @@ class WikiExporter {
        const TEXT = 0;
        const STUB = 1;
 
-       var $buffer;
+       /** @var int */
+       protected $buffer;
 
-       var $text;
+       /** @var int */
+       protected $text;
 
-       /**
-        * @var DumpOutput
-        */
-       var $sink;
+       /** @var DumpOutput */
+       protected $sink;
 
        /**
         * Returns the export schema version.
         * @return string
         */
        public static function schemaVersion() {
-               return "0.8";
+               return "0.9";
        }
 
        /**
@@ -501,8 +507,17 @@ class XmlDumpWriter {
                return Xml::element( 'mediawiki', array(
                        'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
                        'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
+                       /*
+                        * When a new version of the schema is created, it needs staging on mediawiki.org.
+                        * This requires a change in the operations/mediawiki-config git repo.
+                        *
+                        * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
+                        * you copy in the new xsd file.
+                        *
+                        * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
+                        * echo "http://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
+                        */
                        'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
-                               #TODO: how do we get a new version up there?
                                "http://www.mediawiki.org/xml/export-$ver.xsd",
                        'version'            => $ver,
                        'xml:lang'           => $wgLanguageCode ),
@@ -517,6 +532,7 @@ class XmlDumpWriter {
        function siteInfo() {
                $info = array(
                        $this->sitename(),
+                       $this->dbname(),
                        $this->homelink(),
                        $this->generator(),
                        $this->caseSetting(),
@@ -534,6 +550,14 @@ class XmlDumpWriter {
                return Xml::element( 'sitename', array(), $wgSitename );
        }
 
+       /**
+        * @return string
+        */
+       function dbname() {
+               global $wgDBname;
+               return Xml::element( 'dbname', array(), $wgDBname );
+       }
+
        /**
         * @return string
         */
@@ -664,12 +688,30 @@ class XmlDumpWriter {
                        $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
                }
 
+               if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
+                       $content_model = strval( $row->rev_content_model );
+               } else {
+                       // probably using $wgContentHandlerUseDB = false;
+                       $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+                       $content_model = ContentHandler::getDefaultModelFor( $title );
+               }
+
+               $content_handler = ContentHandler::getForModelID( $content_model );
+
+               if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
+                       $content_format = strval( $row->rev_content_format );
+               } else {
+                       // probably using $wgContentHandlerUseDB = false;
+                       $content_format = $content_handler->getDefaultFormat();
+               }
+
                $text = '';
                if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
                        $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
                } elseif ( isset( $row->old_text ) ) {
                        // Raw text from the database may have invalid chars
                        $text = strval( Revision::getRevisionText( $row ) );
+                       $text = $content_handler->exportTransform( $text, $content_format );
                        $out .= "      " . Xml::elementClean( 'text',
                                array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
                                strval( $text ) ) . "\n";
@@ -689,26 +731,7 @@ class XmlDumpWriter {
                        $out .= "      <sha1/>\n";
                }
 
-               if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
-                       $content_model = strval( $row->rev_content_model );
-               } else {
-                       // probably using $wgContentHandlerUseDB = false;
-                       // @todo test!
-                       $title = Title::makeTitle( $row->page_namespace, $row->page_title );
-                       $content_model = ContentHandler::getDefaultModelFor( $title );
-               }
-
                $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
-
-               if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
-                       $content_format = strval( $row->rev_content_format );
-               } else {
-                       // probably using $wgContentHandlerUseDB = false;
-                       // @todo test!
-                       $content_handler = ContentHandler::getForModelID( $content_model );
-                       $content_format = $content_handler->getDefaultFormat();
-               }
-
                $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
 
                wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
@@ -1144,7 +1167,6 @@ class DumpPipeOutput extends DumpFileOutput {
                        }
                }
        }
-
 }
 
 /**
@@ -1152,7 +1174,6 @@ class DumpPipeOutput extends DumpFileOutput {
  * @ingroup Dump
  */
 class DumpGZipOutput extends DumpPipeOutput {
-
        /**
         * @param string $file
         */
@@ -1166,7 +1187,6 @@ class DumpGZipOutput extends DumpPipeOutput {
  * @ingroup Dump
  */
 class DumpBZip2Output extends DumpPipeOutput {
-
        /**
         * @param string $file
         */
@@ -1180,7 +1200,6 @@ class DumpBZip2Output extends DumpPipeOutput {
  * @ingroup Dump
  */
 class Dump7ZipOutput extends DumpPipeOutput {
-
        /**
         * @param string $file
         */
@@ -1227,7 +1246,6 @@ class Dump7ZipOutput extends DumpPipeOutput {
  * @ingroup Dump
  */
 class DumpFilter {
-
        /**
         * @var DumpOutput
         * FIXME will need to be made protected whenever legacy code
@@ -1337,7 +1355,6 @@ class DumpFilter {
  * @ingroup Dump
  */
 class DumpNotalkFilter extends DumpFilter {
-
        /**
         * @param object $page
         * @return bool
@@ -1352,8 +1369,11 @@ class DumpNotalkFilter extends DumpFilter {
  * @ingroup Dump
  */
 class DumpNamespaceFilter extends DumpFilter {
-       var $invert = false;
-       var $namespaces = array();
+       /** @var bool */
+       protected $invert = false;
+
+       /** @var array */
+       protected $namespaces = array();
 
        /**
         * @param DumpOutput $sink
@@ -1417,7 +1437,13 @@ class DumpNamespaceFilter extends DumpFilter {
  * @ingroup Dump
  */
 class DumpLatestFilter extends DumpFilter {
-       var $page, $pageString, $rev, $revString;
+       protected $page;
+
+       protected $pageString;
+
+       protected $rev;
+
+       protected $revString;
 
        /**
         * @param object $page
@@ -1543,24 +1569,4 @@ class DumpMultiWriter {
                }
                return $filenames;
        }
-
-}
-
-/**
- * @param string $string
- * @return string
- */
-function xmlsafe( $string ) {
-       wfProfileIn( __FUNCTION__ );
-
-       /**
-        * The page may contain old data which has not been properly normalized.
-        * Invalid UTF-8 sequences or forbidden control characters will make our
-        * XML output invalid, so be sure to strip them out.
-        */
-       $string = UtfNormal::cleanUp( $string );
-
-       $string = htmlspecialchars( $string );
-       wfProfileOut( __FUNCTION__ );
-       return $string;
 }