/**
* Maintenance script to import one or more images from the local file system into
- * the wiki without using the web-based interface
+ * the wiki without using the web-based interface.
+ *
+ * "Smart import" additions:
+ * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
+ * - process:
+ * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
+ * - fetch metadata from source wiki for each file to import.
+ * - commit the fetched metadata to the destination wiki while submitting.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
* @author Rob Church <robchur@gmail.com>
+ * @author Mij <mij@bitchx.it>
*/
-$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from' );
-require_once( dirname(__FILE__) . '/commandLine.inc' );
-require_once( 'importImages.inc' );
+$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
+require_once( dirname( __FILE__ ) . '/commandLine.inc' );
+require_once( dirname( __FILE__ ) . '/importImages.inc' );
$processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
echo( "Import Images\n\n" );
# Need a path
-if( count( $args ) > 0 ) {
+if ( count( $args ) > 0 ) {
$dir = $args[0];
# Check Protection
- if (isset($options['protect']) && isset($options['unprotect']))
- die("Cannot specify both protect and unprotect. Only 1 is allowed.\n");
+ if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) )
+ die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
-if (isset($options['protect']) && $options['protect'] == 1)
- die("You must specify a protection option.\n");
+ if ( isset( $options['protect'] ) && $options['protect'] == 1 )
+ die( "You must specify a protection option.\n" );
# Prepare the list of allowed extensions
global $wgFileExtensions;
$user = isset( $options['user'] )
? User::newFromName( $options['user'] )
: User::newFromName( 'Maintenance script' );
- if( !$user instanceof User )
+ if ( !$user instanceof User )
$user = User::newFromName( 'Maintenance script' );
$wgUser = $user;
# Get block check. If a value is given, this specified how often the check is performed
if ( isset( $options['check-userblock'] ) ) {
if ( !$options['check-userblock'] ) $checkUserBlock = 1;
- else $checkUserBlock = (int)$options['check-userblock'];
+ else $checkUserBlock = (int)$options['check-userblock'];
} else {
$checkUserBlock = false;
}
- # Get --from
+ # Get --from
$from = @$options['from'];
- # Get sleep time.
+ # Get sleep time.
$sleep = @$options['sleep'];
- if ( $sleep ) $sleep = (int)$sleep;
+ if ( $sleep ) $sleep = (int)$sleep;
# Get limit number
$limit = @$options['limit'];
- if ( $limit ) $limit = (int)$limit;
+ if ( $limit ) $limit = (int)$limit;
- # Get the upload comment
+ # Get the upload comment. Provide a default one in case there's no comment given.
$comment = 'Importing image file';
if ( isset( $options['comment-file'] ) ) {
$license = isset( $options['license'] ) ? $options['license'] : '';
# Batch "upload" operation
- if( ( $count = count( $files ) ) > 0 ) {
-
- foreach( $files as $file ) {
+ if ( ( $count = count( $files ) ) > 0 ) {
+
+ foreach ( $files as $file ) {
$base = wfBaseName( $file );
-
+
# Validate a title
$title = Title::makeTitleSafe( NS_FILE, $base );
- if( !is_object( $title ) ) {
+ if ( !is_object( $title ) ) {
echo( "{$base} could not be imported; a valid title cannot be produced\n" );
continue;
}
-
+
if ( $from ) {
if ( $from == $title->getDBkey() ) {
$from = NULL;
}
if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
- $user->clearInstanceCache( 'name' ); //reload from DB!
+ $user->clearInstanceCache( 'name' ); // reload from DB!
if ( $user->isBlocked() ) {
- echo( $user->getName() . " was blocked! Aborting." );
+ echo( $user->getName() . " was blocked! Aborting.\n" );
break;
}
}
# Check existence
$image = wfLocalFile( $title );
- if( $image->exists() ) {
- if( isset( $options['overwrite'] ) ) {
+ if ( $image->exists() ) {
+ if ( isset( $options['overwrite'] ) ) {
echo( "{$base} exists, overwriting..." );
$svar = 'overwritten';
} else {
} else {
if ( isset( $options['skip-dupes'] ) ) {
$repo = $image->getRepo();
- $sha1 = File::sha1Base36( $file ); #XXX: we end up calculating this again when actually uploading. that sucks.
+ $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
$dupes = $repo->findBySha1( $sha1 );
$svar = 'added';
}
- # Find comment text
- $commentText = false;
-
- if ( $commentExt ) {
- $f = findAuxFile( $file, $commentExt );
- if ( !$f ) {
- echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+ if ( isset( $options['source-wiki-url'] ) ) {
+ /* find comment text directly from source wiki, through MW's API */
+ $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
+ if ( $real_comment === false )
+ $commentText = $comment;
+ else
+ $commentText = $real_comment;
+
+ /* find user directly from source wiki, through MW's API */
+ $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
+ if ( $real_user === false ) {
+ $wgUser = $user;
} else {
- $commentText = file_get_contents( $f );
+ $wgUser = User::newFromName( $real_user );
+ if ( $wgUser === false ) {
+ # user does not exist in target wiki
+ echo ( "failed: user '$real_user' does not exist in target wiki." );
+ continue;
+ }
+ }
+ } else {
+ # Find comment text
+ $commentText = false;
+
+ if ( $commentExt ) {
+ $f = findAuxFile( $file, $commentExt );
if ( !$f ) {
- echo( " Failed to load comment file {$f}, using default comment. " );
+ echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+ } else {
+ $commentText = file_get_contents( $f );
+ if ( !$f ) {
+ echo( " Failed to load comment file {$f}, using default comment. " );
+ }
}
}
- if ( $commentText && $comment ) {
- $commentText = trim( $commentText ) . "\n\n" . trim( $comment );
+ if ( !$commentText ) {
+ $commentText = $comment;
}
}
- if ( !$commentText ) {
- $commentText = $comment;
- }
- # Import the file
+ # Import the file
if ( isset( $options['dry'] ) ) {
- echo( " publishing {$file}... " );
+ echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
} else {
$archive = $image->publish( $file );
- if( WikiError::isError( $archive ) || !$archive->isGood() ) {
- echo( "failed.\n" );
+ if ( !$archive->isGood() ) {
+ echo( "failed. (" .
+ $archive->getWikiText() .
+ ")\n" );
$failed++;
continue;
}
}
-
+
$doProtect = false;
$restrictions = array();
-
+
global $wgRestrictionLevels;
-
- $protectLevel = isset($options['protect']) ? $options['protect'] : null;
-
+
+ $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
+
if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
$restrictions['move'] = $protectLevel;
$restrictions['edit'] = $protectLevel;
$doProtect = true;
}
- if (isset($options['unprotect'])) {
+ if ( isset( $options['unprotect'] ) ) {
$restrictions['move'] = '';
$restrictions['edit'] = '';
$doProtect = true;
} else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
# We're done!
echo( "done.\n" );
- if ($doProtect) {
+ if ( $doProtect ) {
# Protect the file
$article = new Article( $title );
echo "\nWaiting for slaves...\n";
// Wait for slaves.
- sleep(2.0);
- wfWaitForSlaves( 1.0 );
-
+ sleep( 2.0 ); # Why this sleep?
+ wfWaitForSlaves();
+
echo( "\nSetting image restrictions ... " );
- if ( $article->updateRestrictions($restrictions) )
- echo( "done.\n" );
+ if ( $article->updateRestrictions( $restrictions ) )
+ echo( "done.\n" );
else
- echo( "failed.\n" );
+ echo( "failed.\n" );
}
} else {
- echo( "failed.\n" );
+ echo( "failed. (at recordUpload stage)\n" );
$svar = 'failed';
}
-
+
$$svar++;
$processed++;
if ( $sleep )
sleep( $sleep );
}
-
+
# Print out some statistics
echo( "\n" );
- foreach( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
- 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
+ foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
+ 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
'failed' => 'Failed' ) as $var => $desc ) {
- if( $$var > 0 )
+ if ( $$var > 0 )
echo( "{$desc}: {$$var}\n" );
}
-
+
} else {
echo( "No suitable files could be found for import.\n" );
}
showUsage();
}
-exit(0);
+exit( 0 );
function showUsage( $reason = false ) {
- if( $reason ) {
+ if ( $reason ) {
echo( $reason . "\n" );
}
- echo <<<END
+ echo <<<TEXT
Imports images and other media files into the wiki
USAGE: php importImages.php [options] <dir>
--overwrite Overwrite existing images with the same name (default is to skip them)
--limit=<num> Limit the number of images to process. Ignored or skipped images are not counted.
--from=<name> Ignore all files until the one with the given name. Useful for resuming
- aborted imports. <name> should be the file's canonical database form.
+ aborted imports. <name> should be the file's canonical database form.
--skip-dupes Skip images that were already uploaded under a different name (check SHA1)
--sleep=<sec> Sleep between files. Useful mostly for debugging.
--user=<username> Set username of uploader, default 'Maintenance script'
--dry Dry run, don't import anything
--protect=<protect> Specify the protect value (autoconfirmed,sysop)
--unprotect Unprotects all uploaded images
+--source-wiki-url if specified, take User and Comment data for each imported file from this URL.
+ For example, --source-wiki-url="http://en.wikipedia.org/"
-END;
- exit(1);
-}
\ No newline at end of file
+TEXT;
+ exit( 1 );
+}