Some HipHop fixes:
[lhc/web/wiklou.git] / maintenance / importImages.php
index a85b8bd..47dc3cc 100644 (file)
@@ -2,31 +2,54 @@
 
 /**
  * Maintenance script to import one or more images from the local file system into
- * the wiki without using the web-based interface
+ * the wiki without using the web-based interface.
+ *
+ * "Smart import" additions:
+ * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
+ * - process:
+ *      - interface with the source wiki, don't use bare files only (see --source-wiki-url).
+ *      - fetch metadata from source wiki for each file to import.
+ *      - commit the fetched metadata to the destination wiki while submitting.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
  *
  * @file
  * @ingroup Maintenance
  * @author Rob Church <robchur@gmail.com>
+ * @author Mij <mij@bitchx.it>
  */
 
-$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from' );
-require_once( dirname(__FILE__) . '/commandLine.inc' );
-require_once( 'importImages.inc' );
+$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
+require_once( dirname( __FILE__ ) . '/commandLine.inc' );
+require_once( dirname( __FILE__ ) . '/importImages.inc' );
 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
 
 echo( "Import Images\n\n" );
 
 # Need a path
-if( count( $args ) > 0 ) {
+if ( count( $args ) > 0 ) {
 
        $dir = $args[0];
 
        # Check Protection
-       if (isset($options['protect']) && isset($options['unprotect']))
-                       die("Cannot specify both protect and unprotect.  Only 1 is allowed.\n");
+       if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) )
+                       die( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n" );
 
-if (isset($options['protect']) && $options['protect'] == 1)
-                       die("You must specify a protection option.\n");
+       if ( isset( $options['protect'] ) && $options['protect'] == 1 )
+                       die( "You must specify a protection option.\n" );
 
        # Prepare the list of allowed extensions
        global $wgFileExtensions;
@@ -41,30 +64,30 @@ if (isset($options['protect']) && $options['protect'] == 1)
        $user = isset( $options['user'] )
                ? User::newFromName( $options['user'] )
                : User::newFromName( 'Maintenance script' );
-       if( !$user instanceof User )
+       if ( !$user instanceof User )
                $user = User::newFromName( 'Maintenance script' );
        $wgUser = $user;
 
        # Get block check. If a value is given, this specified how often the check is performed
        if ( isset( $options['check-userblock'] ) ) {
                if ( !$options['check-userblock'] ) $checkUserBlock = 1;
-               else $checkUserBlock = (int)$options['check-userblock']; 
+               else $checkUserBlock = (int)$options['check-userblock'];
        } else {
                $checkUserBlock = false;
        }
 
-       # Get --from 
+       # Get --from
        $from = @$options['from'];
 
-       # Get sleep time. 
+       # Get sleep time.
        $sleep = @$options['sleep'];
-       if ( $sleep ) $sleep = (int)$sleep; 
+       if ( $sleep ) $sleep = (int)$sleep;
 
        # Get limit number
        $limit = @$options['limit'];
-       if ( $limit ) $limit = (int)$limit; 
+       if ( $limit ) $limit = (int)$limit;
 
-       # Get the upload comment
+       # Get the upload comment. Provide a default one in case there's no comment given.
        $comment = 'Importing image file';
 
        if ( isset( $options['comment-file'] ) ) {
@@ -83,18 +106,18 @@ if (isset($options['protect']) && $options['protect'] == 1)
        $license = isset( $options['license'] ) ? $options['license'] : '';
 
        # Batch "upload" operation
-       if( ( $count = count( $files ) ) > 0 ) {
-       
-               foreach( $files as $file ) {
+       if ( ( $count = count( $files ) ) > 0 ) {
+
+               foreach ( $files as $file ) {
                        $base = wfBaseName( $file );
-       
+
                        # Validate a title
                        $title = Title::makeTitleSafe( NS_FILE, $base );
-                       if( !is_object( $title ) ) {
+                       if ( !is_object( $title ) ) {
                                echo( "{$base} could not be imported; a valid title cannot be produced\n" );
                                continue;
                        }
-       
+
                        if ( $from ) {
                                if ( $from == $title->getDBkey() ) {
                                        $from = NULL;
@@ -105,17 +128,17 @@ if (isset($options['protect']) && $options['protect'] == 1)
                        }
 
                        if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
-                               $user->clearInstanceCache( 'name' ); //reload from DB!
+                               $user->clearInstanceCache( 'name' ); // reload from DB!
                                if ( $user->isBlocked() ) {
-                                       echo( $user->getName() . " was blocked! Aborting." );
+                                       echo( $user->getName() . " was blocked! Aborting.\n" );
                                        break;
                                }
                        }
 
                        # Check existence
                        $image = wfLocalFile( $title );
-                       if( $image->exists() ) {
-                               if( isset( $options['overwrite'] ) ) {
+                       if ( $image->exists() ) {
+                               if ( isset( $options['overwrite'] ) ) {
                                        echo( "{$base} exists, overwriting..." );
                                        $svar = 'overwritten';
                                } else {
@@ -126,7 +149,7 @@ if (isset($options['protect']) && $options['protect'] == 1)
                        } else {
                                if ( isset( $options['skip-dupes'] ) ) {
                                        $repo = $image->getRepo();
-                                       $sha1 = File::sha1Base36( $file ); #XXX: we end up calculating this again when actually uploading. that sucks.
+                                       $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
 
                                        $dupes = $repo->findBySha1( $sha1 );
 
@@ -141,54 +164,75 @@ if (isset($options['protect']) && $options['protect'] == 1)
                                $svar = 'added';
                        }
 
-                       # Find comment text
-                       $commentText = false;
-
-                       if ( $commentExt ) {
-                               $f = findAuxFile( $file, $commentExt );
-                               if ( !$f ) {
-                                       echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+                       if ( isset( $options['source-wiki-url'] ) ) {
+                               /* find comment text directly from source wiki, through MW's API */
+                               $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
+                               if ( $real_comment === false )
+                                       $commentText = $comment;
+                               else
+                                       $commentText = $real_comment;
+
+                               /* find user directly from source wiki, through MW's API */
+                               $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
+                               if ( $real_user === false ) {
+                                       $wgUser = $user;
                                } else {
-                                       $commentText = file_get_contents( $f );
+                                       $wgUser = User::newFromName( $real_user );
+                                       if ( $wgUser === false ) {
+                                               # user does not exist in target wiki
+                                               echo ( "failed: user '$real_user' does not exist in target wiki." );
+                                               continue;
+                                       }
+                               }
+                       } else {
+                               # Find comment text
+                               $commentText = false;
+
+                               if ( $commentExt ) {
+                                       $f = findAuxFile( $file, $commentExt );
                                        if ( !$f ) {
-                                               echo( " Failed to load comment file {$f}, using default comment. " );
+                                               echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+                                       } else {
+                                               $commentText = file_get_contents( $f );
+                                               if ( !$f ) {
+                                                       echo( " Failed to load comment file {$f}, using default comment. " );
+                                               }
                                        }
                                }
 
-                               if ( $commentText && $comment ) {
-                                       $commentText = trim( $commentText ) . "\n\n" . trim( $comment );
+                               if ( !$commentText ) {
+                                       $commentText = $comment;
                                }
                        }
 
-                       if ( !$commentText ) {
-                               $commentText = $comment;
-                       }
 
-                       # Import the file       
+                       # Import the file
                        if ( isset( $options['dry'] ) ) {
-                               echo( " publishing {$file}... " );
+                               echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
                        } else {
                                $archive = $image->publish( $file );
-                               if( WikiError::isError( $archive ) || !$archive->isGood() ) {
-                                       echo( "failed.\n" );
+                               if ( !$archive->isGood() ) {
+                                       echo( "failed. (" .
+                                               $archive->getWikiText() .
+                                               ")\n" );
                                        $failed++;
                                        continue;
                                }
                        }
-                       
+
                        $doProtect = false;
                        $restrictions = array();
-                       
+
                        global $wgRestrictionLevels;
-                       
-                       $protectLevel = isset($options['protect']) ? $options['protect'] : null;
-                       
+
+                       $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
+
                        if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
                                        $restrictions['move'] = $protectLevel;
                                        $restrictions['edit'] = $protectLevel;
                                        $doProtect = true;
                        }
-                       if (isset($options['unprotect'])) {
+                       if ( isset( $options['unprotect'] ) ) {
                                        $restrictions['move'] = '';
                                        $restrictions['edit'] = '';
                                        $doProtect = true;
@@ -200,26 +244,26 @@ if (isset($options['protect']) && $options['protect'] == 1)
                        } else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
                                # We're done!
                                echo( "done.\n" );
-                               if ($doProtect) {
+                               if ( $doProtect ) {
                                                # Protect the file
                                                $article = new Article( $title );
                                                echo "\nWaiting for slaves...\n";
                                                // Wait for slaves.
-                                               sleep(2.0);
-                                               wfWaitForSlaves( 1.0 );
-                                               
+                                               sleep( 2.0 ); # Why this sleep?
+                                               wfWaitForSlaves();
+
                                                echo( "\nSetting image restrictions ... " );
-                                               if ( $article->updateRestrictions($restrictions) )
-                                                               echo( "done.\n" );
+                                               if ( $article->updateRestrictions( $restrictions ) )
+                                                       echo( "done.\n" );
                                                else
-                                                               echo( "failed.\n" );
+                                                       echo( "failed.\n" );
                                }
 
                        } else {
-                               echo( "failed.\n" );
+                               echo( "failed. (at recordUpload stage)\n" );
                                $svar = 'failed';
                        }
-                       
+
                        $$svar++;
                        $processed++;
 
@@ -229,16 +273,16 @@ if (isset($options['protect']) && $options['protect'] == 1)
                        if ( $sleep )
                                sleep( $sleep );
                }
-               
+
                # Print out some statistics
                echo( "\n" );
-               foreach( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored', 
-                       'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten', 
+               foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
+                       'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
                        'failed' => 'Failed' ) as $var => $desc ) {
-                       if( $$var > 0 )
+                       if ( $$var > 0 )
                                echo( "{$desc}: {$$var}\n" );
                }
-               
+
        } else {
                echo( "No suitable files could be found for import.\n" );
        }
@@ -247,14 +291,14 @@ if (isset($options['protect']) && $options['protect'] == 1)
        showUsage();
 }
 
-exit(0);
+exit( 0 );
 
 function showUsage( $reason = false ) {
-       if( $reason ) {
+       if ( $reason ) {
                echo( $reason . "\n" );
        }
 
-       echo <<<END
+       echo <<<TEXT
 Imports images and other media files into the wiki
 USAGE: php importImages.php [options] <dir>
 
@@ -265,7 +309,7 @@ Options:
 --overwrite            Overwrite existing images with the same name (default is to skip them)
 --limit=<num>          Limit the number of images to process. Ignored or skipped images are not counted.
 --from=<name>          Ignore all files until the one with the given name. Useful for resuming
-                        aborted imports. <name> should be the file's canonical database form.
+                                               aborted imports. <name> should be the file's canonical database form.
 --skip-dupes           Skip images that were already uploaded under a different name (check SHA1)
 --sleep=<sec>          Sleep between files. Useful mostly for debugging.
 --user=<username>      Set username of uploader, default 'Maintenance script'
@@ -278,7 +322,9 @@ Options:
 --dry                  Dry run, don't import anything
 --protect=<protect>     Specify the protect value (autoconfirmed,sysop)
 --unprotect             Unprotects all uploaded images
+--source-wiki-url   if specified, take User and Comment data for each imported file from this URL.
+                                       For example, --source-wiki-url="http://en.wikipedia.org/"
 
-END;
-       exit(1);
-}
\ No newline at end of file
+TEXT;
+       exit( 1 );
+}