Follow-up r87176: Make importDump.php import files
authorBryan Tong Minh <btongminh@users.mediawiki.org>
Sun, 15 May 2011 10:39:15 +0000 (10:39 +0000)
committerBryan Tong Minh <btongminh@users.mediawiki.org>
Sun, 15 May 2011 10:39:15 +0000 (10:39 +0000)
* Fixes for Import.php: Check sha1 of the file; only delete source files if they are temporary
* importDump.php now imports embedded files if --uploads is set; if they are not present it will try to get them from --image-base-path

RELEASE-NOTES-1.19
includes/Import.php
maintenance/importDump.php

index 436ccea..ab134fa 100644 (file)
@@ -25,6 +25,9 @@ production.
 * (bug 28503) Support for ircs:// URL protocols
 * (bug 26033) It is now possible to count all non-redirect pages in content
   namespaces as articles
+* Images can now be embedded in an XML dump stream using backupDump.php 
+  --include-files and can be imported using importDump.php --uploads; 
+  furthermore, it can import files from the filesystem using --image-base-path
 
 === Bug fixes in 1.19 ===
 * (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge.
index 757b3d2..8bd65a6 100644 (file)
@@ -177,6 +177,9 @@ class WikiImporter {
        public function setImageBasePath( $dir ) {
                $this->mImageBasePath = $dir;
        }
+       public function setImportUploads( $import ) {
+               $this->mImportUploads = $import;
+       }
 
        /**
         * Default per-revision callback, performs the import.
@@ -612,6 +615,7 @@ class WikiImporter {
                                $encoding = $this->reader->getAttribute( 'encoding' );
                                if ( $encoding === 'base64' ) {
                                        $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
+                                       $uploadInfo['isTempSrc'] = true;
                                }
                        } elseif ( $tag != '#text' ) {
                                $this->warn( "Unhandled upload XML tag $tag" );
@@ -623,6 +627,7 @@ class WikiImporter {
                        $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
                        if ( file_exists( $path ) ) {
                                $uploadInfo['fileSrc'] = $path;
+                               $uploadInfo['isTempSrc'] = false;
                        }
                }
 
@@ -652,7 +657,11 @@ class WikiImporter {
                }
                $revision->setSrc( $uploadInfo['src'] );
                if ( isset( $uploadInfo['fileSrc'] ) ) {
-                       $revision->setFileSrc( $uploadInfo['fileSrc'] );
+                       $revision->setFileSrc( $uploadInfo['fileSrc'],
+                               !empty( $uploadInfo['isTempSrc'] ) );
+               }
+               if ( isset( $uploadInfo['sha1base36'] ) ) {
+                       $revision->setSha1Base36( $uploadInfo['sha1base36'] );
                }
                $revision->setSize( intval( $uploadInfo['size'] ) );
                $revision->setComment( $uploadInfo['comment'] );
@@ -836,6 +845,8 @@ class WikiRevision {
        var $action = "";
        var $params = "";
        var $fileSrc = '';
+       var $sha1base36 = false;
+       var $isTemp = false;
        var $archiveName = '';
 
        function setTitle( $title ) {
@@ -880,8 +891,12 @@ class WikiRevision {
        function setSrc( $src ) {
                $this->src = $src;
        }
-       function setFileSrc( $src ) {
+       function setFileSrc( $src, $isTemp ) {
                $this->fileSrc = $src;
+               $this->fileIsTemp = $isTemp;
+       }
+       function setSha1Base36( $sha1base36 ) { 
+               $this->sha1base36 = $sha1base36;
        }
 
        function setFilename( $filename ) {
@@ -941,9 +956,18 @@ class WikiRevision {
        function getSrc() {
                return $this->src;
        }
+       function getSha1() {
+               if ( $this->sha1base36 ) {
+                       return wfBaseConvert( $this->sha1base36, 36, 16 );
+               }
+               return false;
+       }
        function getFileSrc() {
                return $this->fileSrc;
        }
+       function isTempSrc() {
+               return $this->isTemp;
+       }
 
        function getFilename() {
                return $this->filename;
@@ -1118,23 +1142,30 @@ class WikiRevision {
                
                # Get the file source or download if necessary
                $source = $this->getFileSrc();
+               $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
                if ( !$source ) {
                        $source = $this->downloadSource();
+                       $flags |= File::DELETE_SOURCE;
                }
                if( !$source ) {
                        wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
                        return false;
                }
+               $sha1 = $this->getSha1();
+               if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
+                       wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
+                       return false;
+               }
 
                $user = User::newFromName( $this->user_text );
                
                # Do the actual upload
                if ( $archiveName ) {
                        $status = $file->uploadOld( $source, $archiveName, 
-                               $this->getTimestamp(), $this->getComment(), $user, File::DELETE_SOURCE );
+                               $this->getTimestamp(), $this->getComment(), $user, $flags );
                } else {
                        $status = $file->upload( $source, $this->getComment(), $this->getComment(), 
-                               File::DELETE_SOURCE, false, $this->getTimestamp(), $user );
+                               $flags, false, $this->getTimestamp(), $user );
                }
                
                if ( $status->isGood() ) {
index 5397959..a3dc745 100644 (file)
@@ -38,6 +38,7 @@ class BackupReader {
        var $dryRun    = false;
        var $debug     = false;
        var $uploads   = false;
+       var $imageBasePath = false;
        var $nsFilter  = false;
 
        function __construct() {
@@ -201,6 +202,12 @@ class BackupReader {
                        array( &$this, 'handleUpload' ) );
                $this->logItemCallback = $importer->setLogItemCallback(
                        array( &$this, 'handleLogItem' ) );
+               if ( $this->uploads ) {
+                       $importer->setImportUploads( true );
+               }
+               if ( $this->imageBasePath ) {
+                       $importer->setImageBasePath( $this->imageBasePath );
+               }
 
                if ( $this->dryRun ) {
                        $importer->setPageOutCallback( null );
@@ -230,6 +237,7 @@ class BackupReader {
                echo "  --dry-run  Parse dump without actually importing pages.\n";
                echo "  --debug    Output extra verbose debug information\n";
                echo "  --uploads  Process file upload data if included (experimental)\n";
+               echo "  --image-base-path=path  Import files from a specified path\n";
                echo "\n";
                echo "Compressed XML files may be read directly:\n";
                echo "  .gz $gz\n";
@@ -259,6 +267,9 @@ if ( isset( $options['debug'] ) ) {
 if ( isset( $options['uploads'] ) ) {
        $reader->uploads = true; // experimental!
 }
+if ( isset( $options['image-base-path'] ) ) {
+       $reader->imageBasePath = $options['image-base-path'];
+}
 if ( isset( $options['namespaces'] ) ) {
        $reader->setNsfilter( explode( '|', $options['namespaces'] ) );
 }