From 43d5d3b682cc1733ad01a837d11af4a402d57e6a Mon Sep 17 00:00:00 2001 From: Bryan Tong Minh Date: Sun, 15 May 2011 10:39:15 +0000 Subject: [PATCH] Follow-up r87176: Make importDump.php import files * Fixes for Import.php: Check sha1 of the file; only delete source files if they are temporary * importDump.php now imports embedded files if --uploads is set; if they are not present it will try to get them from --image-base-path --- RELEASE-NOTES-1.19 | 3 +++ includes/Import.php | 39 ++++++++++++++++++++++++++++++++++---- maintenance/importDump.php | 11 +++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index 436cceab0d..ab134faa2f 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -25,6 +25,9 @@ production. * (bug 28503) Support for ircs:// URL protocols * (bug 26033) It is now possible to count all non-redirect pages in content namespaces as articles +* Images can now be embedded in an XML dump stream using backupDump.php + --include-files and can be imported using importDump.php --uploads; + furthermore, it can import files from the filesystem using --image-base-path === Bug fixes in 1.19 === * (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge. diff --git a/includes/Import.php b/includes/Import.php index 757b3d2f8b..8bd65a6e5e 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -177,6 +177,9 @@ class WikiImporter { public function setImageBasePath( $dir ) { $this->mImageBasePath = $dir; } + public function setImportUploads( $import ) { + $this->mImportUploads = $import; + } /** * Default per-revision callback, performs the import. @@ -612,6 +615,7 @@ class WikiImporter { $encoding = $this->reader->getAttribute( 'encoding' ); if ( $encoding === 'base64' ) { $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); + $uploadInfo['isTempSrc'] = true; } } elseif ( $tag != '#text' ) { $this->warn( "Unhandled upload XML tag $tag" ); @@ -623,6 +627,7 @@ class WikiImporter { $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; if ( file_exists( $path ) ) { $uploadInfo['fileSrc'] = $path; + $uploadInfo['isTempSrc'] = false; } } @@ -652,7 +657,11 @@ class WikiImporter { } $revision->setSrc( $uploadInfo['src'] ); if ( isset( $uploadInfo['fileSrc'] ) ) { - $revision->setFileSrc( $uploadInfo['fileSrc'] ); + $revision->setFileSrc( $uploadInfo['fileSrc'], + !empty( $uploadInfo['isTempSrc'] ) ); + } + if ( isset( $uploadInfo['sha1base36'] ) ) { + $revision->setSha1Base36( $uploadInfo['sha1base36'] ); } $revision->setSize( intval( $uploadInfo['size'] ) ); $revision->setComment( $uploadInfo['comment'] ); @@ -836,6 +845,8 @@ class WikiRevision { var $action = ""; var $params = ""; var $fileSrc = ''; + var $sha1base36 = false; + var $isTemp = false; var $archiveName = ''; function setTitle( $title ) { @@ -880,8 +891,12 @@ class WikiRevision { function setSrc( $src ) { $this->src = $src; } - function setFileSrc( $src ) { + function setFileSrc( $src, $isTemp ) { $this->fileSrc = $src; + $this->fileIsTemp = $isTemp; + } + function setSha1Base36( $sha1base36 ) { + $this->sha1base36 = $sha1base36; } function setFilename( $filename ) { @@ -941,9 +956,18 @@ class WikiRevision { function getSrc() { return $this->src; } + function getSha1() { + if ( $this->sha1base36 ) { + return wfBaseConvert( $this->sha1base36, 36, 16 ); + } + return false; + } function getFileSrc() { return $this->fileSrc; } + function isTempSrc() { + return $this->isTemp; + } function getFilename() { return $this->filename; @@ -1118,23 +1142,30 @@ class WikiRevision { # Get the file source or download if necessary $source = $this->getFileSrc(); + $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; if ( !$source ) { $source = $this->downloadSource(); + $flags |= File::DELETE_SOURCE; } if( !$source ) { wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); return false; } + $sha1 = $this->getSha1(); + if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { + wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); + return false; + } $user = User::newFromName( $this->user_text ); # Do the actual upload if ( $archiveName ) { $status = $file->uploadOld( $source, $archiveName, - $this->getTimestamp(), $this->getComment(), $user, File::DELETE_SOURCE ); + $this->getTimestamp(), $this->getComment(), $user, $flags ); } else { $status = $file->upload( $source, $this->getComment(), $this->getComment(), - File::DELETE_SOURCE, false, $this->getTimestamp(), $user ); + $flags, false, $this->getTimestamp(), $user ); } if ( $status->isGood() ) { diff --git a/maintenance/importDump.php b/maintenance/importDump.php index 5397959e74..a3dc74561a 100644 --- a/maintenance/importDump.php +++ b/maintenance/importDump.php @@ -38,6 +38,7 @@ class BackupReader { var $dryRun = false; var $debug = false; var $uploads = false; + var $imageBasePath = false; var $nsFilter = false; function __construct() { @@ -201,6 +202,12 @@ class BackupReader { array( &$this, 'handleUpload' ) ); $this->logItemCallback = $importer->setLogItemCallback( array( &$this, 'handleLogItem' ) ); + if ( $this->uploads ) { + $importer->setImportUploads( true ); + } + if ( $this->imageBasePath ) { + $importer->setImageBasePath( $this->imageBasePath ); + } if ( $this->dryRun ) { $importer->setPageOutCallback( null ); @@ -230,6 +237,7 @@ class BackupReader { echo " --dry-run Parse dump without actually importing pages.\n"; echo " --debug Output extra verbose debug information\n"; echo " --uploads Process file upload data if included (experimental)\n"; + echo " --image-base-path=path Import files from a specified path\n"; echo "\n"; echo "Compressed XML files may be read directly:\n"; echo " .gz $gz\n"; @@ -259,6 +267,9 @@ if ( isset( $options['debug'] ) ) { if ( isset( $options['uploads'] ) ) { $reader->uploads = true; // experimental! } +if ( isset( $options['image-base-path'] ) ) { + $reader->imageBasePath = $options['image-base-path']; +} if ( isset( $options['namespaces'] ) ) { $reader->setNsfilter( explode( '|', $options['namespaces'] ) ); } -- 2.20.1