[FileBackend] Changed copy script to use batches for concurrency.
authorAaron Schulz <aschulz@wikimedia.org>
Sun, 6 May 2012 18:49:59 +0000 (11:49 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Mon, 7 May 2012 01:07:29 +0000 (18:07 -0700)
* Also added a 'subdir' option so multiple shards can be done at once.

Change-Id: Id8c3a89a4cb30978f66db3bf95ebfebc0a9c01b4

maintenance/copyFileBackend.php

index 314318f..8d20533 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * Copy all files in one container of one backend to another.
+ * Copy all files in some containers of one backend to another.
  *
  * This can also be used to re-shard the files for one backend using the
  * config of second backend. The second backend should have the same config
@@ -31,61 +31,107 @@ require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 class CopyFileBackend extends Maintenance {
        public function __construct() {
                parent::__construct();
-               $this->mDescription = "Copy all the files in one backend to another.";
+               $this->mDescription = "Copy files in one backend to another.";
                $this->addOption( 'src', 'Backend containing the source files', true, true );
                $this->addOption( 'dst', 'Backend where files should be copied to', true, true );
                $this->addOption( 'containers', 'Pipe separated list of containers', true, true );
-               $this->addOption( 'fast', 'Skip SHA-1 checks on pre-existing files' );
+               $this->addOption( 'subdir', 'Only do items in this child directory', false, true );
+               $this->setBatchSize( 50 );
        }
 
        public function execute() {
                $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
                $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
-
                $containers = explode( '|', $this->getOption( 'containers' ) );
+               $subDir = $this->getOption( rtrim( 'subdir', '/' ), '' );
+
+               $count = 0;
                foreach ( $containers as $container ) {
-                       $this->output( "Doing container $container...\n" );
+                       if ( $subDir != '' ) {
+                               $backendRel = "$container/$subDir";
+                               $this->output( "Doing container '$container', directory '$subDir'...\n" );
+                       } else {
+                               $backendRel = $container;
+                               $this->output( "Doing container '$container'...\n" );
+                       }
 
-                       $srcPathsRel = $src->getFileList(
-                               array( 'dir' => $src->getRootStoragePath() . "/$container" ) );
+                       $dir = $src->getRootStoragePath() . "/$backendRel";
+                       $srcPathsRel = $src->getFileList( array( 'dir' => $dir ) );
                        if ( $srcPathsRel === null ) {
                                $this->error( "Could not list files in $container.", 1 ); // die
                        }
-                       foreach ( $srcPathsRel as $srcPathRel ) {
-                               $srcPath = $src->getRootStoragePath() . "/$container/$srcPathRel";
-                               $dstPath = $dst->getRootStoragePath() . "/$container/$srcPathRel";
 
-                               if ( $dst->fileExists( array( 'src' => $dstPath, 'latest' => 1 ) ) ) {
-                                       if ( $this->hasOption( 'fast' ) ) {
-                                               $this->output( "Already have $dstPath.\n" );
-                                               continue; // assume already copied...
-                                       }
-                                       $srcSha1 = $src->getFileSha1Base36( array( 'src' => $srcPath ) );
-                                       $dstSha1 = $dst->getFileSha1Base36( array( 'src' => $dstPath ) );
-                                       if ( $srcSha1 && $srcSha1 === $dstSha1 ) {
-                                               $this->output( "Already have $dstPath.\n" );
-                                               continue; // already copied...
-                                       }
+                       $batchPaths = array();
+                       foreach ( $srcPathsRel as $srcPathRel ) {
+                               $batchPaths[$srcPathRel] = 1; // remove duplicates
+                               if ( count( $batchPaths ) >= $this->mBatchSize ) {
+                                       $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst );
+                                       $batchPaths = array(); // done
                                }
+                               ++$count;
+                       }
+                       if ( count( $batchPaths ) ) { // left-overs
+                               $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst );
+                               $batchPaths = array(); // done
+                       }
 
-                               $fsFile = $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) );
-                               if ( !$fsFile ) {
-                                       $this->error( "Could not get local copy of $srcPath.", 1 ); // die
-                               }
+                       if ( $subDir != '' ) {
+                               $this->output( "Finished container '$container', directory '$subDir'.\n" );
+                       } else {
+                               $this->output( "Finished container '$container'.\n" );
+                       }
+               }
 
-                               $status = $dst->prepare( array( 'dir' => dirname( $dstPath ) ) );
-                               $status->merge( $dst->store(
-                                       array( 'src' => $fsFile->getPath(), 'dst' => $dstPath ),
-                                       array( 'nonLocking' => 1, 'nonJournaled' => 1 )
-                               ) );
-                               if ( !$status->isOK() ) {
-                                       print_r( $status->getErrorsArray() );
-                                       $this->error( "Could not copy $srcPath to $dstPath.", 1 ); // die
-                               }
+               $this->output( "Done [$count file(s)].\n" );
+       }
 
-                               $this->output( "Copied $srcPath to $dstPath.\n" );
+       protected function copyFileBatch(
+               array $srcPathsRel, $backendRel, FileBackend $src, FileBackend $dst
+       ) {
+               $ops = array();
+               $fsFiles = array();
+               foreach ( $srcPathsRel as $srcPathRel ) {
+                       $srcPath = $src->getRootStoragePath() . "/$backendRel/$srcPathRel";
+                       $dstPath = $dst->getRootStoragePath() . "/$backendRel/$srcPathRel";
+                       if ( $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) ) {
+                               $this->output( "Already have $srcPathRel.\n" );
+                               continue; // assume already copied...
+                       }
+                       // Note: getLocalReference() is fast for FS backends
+                       $fsFile = $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) );
+                       if ( !$fsFile ) {
+                               $this->error( "Could not get local copy of $srcPath.", 1 ); // die
+                       }
+                       $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
+                       // Note: prepare() is usually fast for key/value backends
+                       $status = $dst->prepare( array( 'dir' => dirname( $dstPath ) ) );
+                       if ( !$status->isOK() ) {
+                               $this->error( print_r( $status->getErrorsArray(), true ) );
+                               $this->error( "Could not copy $srcPath to $dstPath.", 1 ); // die
                        }
+                       $ops[] = array( 'op' => 'store',
+                               'src' => $fsFile->getPath(), 'dst' => $dstPath, 'overwrite' => 1 );
                }
+
+               $status = $dst->doOperations( $ops, array( 'nonJournaled' => 1 ) );
+               if ( !$status->isOK() ) {
+                       $this->error( print_r( $status->getErrorsArray(), true ) );
+                       $this->error( "Could not copy file batch.", 1 ); // die
+               } else {
+                       $this->output( "Copied these file(s):\n" . implode( "\n", $srcPathsRel ) . "\n\n" );
+               }
+       }
+
+       protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
+               return (
+                       ( $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) )
+                               === $dst->fileExists( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit
+                       ) && ( $src->getFileSize( array( 'src' => $sPath, 'latest' => 1 ) )
+                               === $dst->getFileSize( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit
+                       ) && ( $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) )
+                               === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) )
+                       )
+               );
        }
 }