* Used this parameter to speed up copyFileBackend.php.
* Also added mtime checks to copyFileBackend.php and a few cleanups.
* Also fixed some incorrect getDirListPageInternal/getFileListPageInternal docs.
Change-Id: I424ef238f7adf4cf1f33b74e3a4e187dcb328a99
*
* @param $params array
* $params include:
- * - dir : storage directory
- * - topOnly : only return direct child files of the directory (since 1.20)
+ * - dir : storage directory
+ * - topOnly : only return direct child files of the directory (since 1.20)
+ * - adviseStat : set to true if stat requests will be made on the files (since 1.22)
* @return Traversable|Array|null Returns null on failure
*/
abstract public function getFileList( array $params );
*
* @param $params array
* $params include:
- * - dir : storage directory
+ * - dir : storage directory
+ * - adviseStat : set to true if stat requests will be made on the files (since 1.22)
* @return Traversable|Array|null Returns null on failure
* @since 1.20
*/
protected $maxFileSize = 4294967296; // integer bytes (4GiB)
const CACHE_TTL = 10; // integer; TTL in seconds for process cache entries
+ const CACHE_CHEAP_SIZE = 300; // integer; max entries in "cheap cache"
+ const CACHE_EXPENSIVE_SIZE = 5; // integer; max entries in "expensive cache"
/**
* @see FileBackend::__construct()
public function __construct( array $config ) {
parent::__construct( $config );
$this->memCache = new EmptyBagOStuff(); // disabled by default
- $this->cheapCache = new ProcessCacheLRU( 300 );
- $this->expensiveCache = new ProcessCacheLRU( 5 );
+ $this->cheapCache = new ProcessCacheLRU( self::CACHE_CHEAP_SIZE );
+ $this->expensiveCache = new ProcessCacheLRU( self::CACHE_EXPENSIVE_SIZE );
}
/**
* @param string $dir Resolved storage directory with no trailing slash
* @param string|null $after Storage path of file to list items after
* @param $limit integer Max number of items to list
- * @param array $params Includes flag for 'topOnly'
- * @return Array List of relative paths of dirs directly under $dir
+ * @param array $params Parameters for getDirectoryList()
+ * @return Array List of resolved paths of directories directly under $dir
*/
public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) {
$dirs = array();
if ( $after === INF ) {
return $dirs; // nothing more
}
- wfProfileIn( __METHOD__ . '-' . $this->name );
+ wfProfileIn( __METHOD__ . '-' . $this->name );
try {
$container = $this->getContainer( $fullCont );
$prefix = ( $dir == '' ) ? null : "{$dir}/";
// Non-recursive: only list dirs right under $dir
if ( !empty( $params['topOnly'] ) ) {
$objects = $container->list_objects( $limit, $after, $prefix, null, '/' );
- foreach ( $objects as $object ) { // files and dirs
+ foreach ( $objects as $object ) { // files and directories
if ( substr( $object, -1 ) === '/' ) {
$dirs[] = $object; // directories end in '/'
}
}
}
}
+ // Page on the unfiltered directory listing (what is returned may be filtered)
if ( count( $objects ) < $limit ) {
$after = INF; // avoid a second RTT
} else {
$this->handleException( $e, null, __METHOD__,
array( 'cont' => $fullCont, 'dir' => $dir ) );
}
-
wfProfileOut( __METHOD__ . '-' . $this->name );
+
return $dirs;
}
* @param string $dir Resolved storage directory with no trailing slash
* @param string|null $after Storage path of file to list items after
* @param $limit integer Max number of items to list
- * @param array $params Includes flag for 'topOnly'
- * @return Array List of relative paths of files under $dir
+ * @param array $params Parameters for getDirectoryList()
+ * @return Array List of resolved paths of files under $dir
*/
public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) {
$files = array();
if ( $after === INF ) {
return $files; // nothing more
}
- wfProfileIn( __METHOD__ . '-' . $this->name );
+ wfProfileIn( __METHOD__ . '-' . $this->name );
try {
$container = $this->getContainer( $fullCont );
$prefix = ( $dir == '' ) ? null : "{$dir}/";
// Non-recursive: only list files right under $dir
if ( !empty( $params['topOnly'] ) ) { // files and dirs
- $objects = $container->list_objects( $limit, $after, $prefix, null, '/' );
- foreach ( $objects as $object ) {
- if ( substr( $object, -1 ) !== '/' ) {
- $files[] = $object; // directories end in '/'
+ if ( !empty( $params['adviseStat'] ) ) {
+ $limit = min( $limit, self::CACHE_CHEAP_SIZE );
+ // Note: get_objects() does not include directories
+ $objects = $this->loadObjectListing( $params, $dir,
+ $container->get_objects( $limit, $after, $prefix, null, '/' ) );
+ $files = $objects;
+ } else {
+ $objects = $container->list_objects( $limit, $after, $prefix, null, '/' );
+ foreach ( $objects as $object ) { // files and directories
+ if ( substr( $object, -1 ) !== '/' ) {
+ $files[] = $object; // directories end in '/'
+ }
}
}
// Recursive: list all files under $dir and its subdirs
} else { // files
- $objects = $container->list_objects( $limit, $after, $prefix );
+ if ( !empty( $params['adviseStat'] ) ) {
+ $limit = min( $limit, self::CACHE_CHEAP_SIZE );
+ $objects = $this->loadObjectListing( $params, $dir,
+ $container->get_objects( $limit, $after, $prefix ) );
+ } else {
+ $objects = $container->list_objects( $limit, $after, $prefix );
+ }
$files = $objects;
}
+ // Page on the unfiltered object listing (what is returned may be filtered)
if ( count( $objects ) < $limit ) {
$after = INF; // avoid a second RTT
} else {
$this->handleException( $e, null, __METHOD__,
array( 'cont' => $fullCont, 'dir' => $dir ) );
}
-
wfProfileOut( __METHOD__ . '-' . $this->name );
+
return $files;
}
+ /**
+ * Load a list of objects that belong under $dir into stat cache
+ * and return a list of the names of the objects in the same order.
+ *
+ * @param array $params Parameters for getDirectoryList()
+ * @param string $dir Resolved container directory path
+ * @param array $cfObjects List of CF_Object items
+ * @return array List of object names
+ */
+ private function loadObjectListing( array $params, $dir, array $cfObjects ) {
+ $names = array();
+ $storageDir = rtrim( $params['dir'], '/' );
+ $suffixStart = ( $dir === '' ) ? 0 : strlen( $dir ) + 1; // size of "path/to/dir/"
+ foreach ( $cfObjects as $object ) {
+ $path = "{$storageDir}/" . substr( $object->name, $suffixStart );
+ $val = array(
+ // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW
+ 'mtime' => wfTimestamp( TS_MW, $object->last_modified ),
+ 'size' => (int)$object->content_length,
+ 'latest' => false // eventually consistent
+ );
+ $this->cheapCache->set( $path, 'stat', $val );
+ $names[] = $object->name;
+ }
+ return $names;
+ }
+
/**
* @see FileBackendStore::doGetFileSha1base36()
* @return bool
protected function doGetFileSha1base36( array $params ) {
$stat = $this->getFileStat( $params );
if ( $stat ) {
+ if ( !isset( $stat['sha1'] ) ) {
+ // Stat entries filled by file listings don't include SHA1
+ $this->clearCache( array( $params['src'] ) );
+ $stat = $this->getFileStat( $params );
+ }
return $stat['sha1'];
} else {
return false;
* @ingroup Maintenance
*/
class CopyFileBackend extends Maintenance {
+ protected $statCache = array();
+
public function __construct() {
parent::__construct();
$this->mDescription = "Copy files in one backend to another.";
$this->addOption( 'containers', 'Pipe separated list of containers', true, true );
$this->addOption( 'subdir', 'Only do items in this child directory', false, true );
$this->addOption( 'ratefile', 'File to check periodically for batch size', false, true );
+ $this->addOption( 'prestat', 'Stat the destination files first (try to use listings)' );
$this->addOption( 'skiphash', 'Skip SHA-1 sync checks for files' );
$this->addOption( 'missingonly', 'Only copy files missing from destination listing' );
$this->addOption( 'utf8only', 'Skip source files that do not have valid UTF-8 names' );
}
$srcPathsRel = $src->getFileList( array(
- 'dir' => $src->getRootStoragePath() . "/$backendRel" ) );
+ 'dir' => $src->getRootStoragePath() . "/$backendRel",
+ 'adviseStat' => !$this->hasOption( 'missingonly' ) // avoid HEADs
+ ) );
if ( $srcPathsRel === null ) {
$this->error( "Could not list files in $container.", 1 ); // die
}
- // Do a listing comparison if specified
if ( $this->hasOption( 'missingonly' ) ) {
$dstPathsRel = $dst->getFileList( array(
'dir' => $dst->getRootStoragePath() . "/$backendRel" ) );
// Only copy the missing files over in the next loop
$srcPathsRel = $missingPathsRel;
$this->output( count( $srcPathsRel ) . " file(s) need to be copied.\n" );
+ } elseif ( $this->getOption( 'prestat' ) ) {
+ // Build the stat cache for the destination files
+ $this->output( "Building destination stat cache..." );
+ $dstPathsRel = $dst->getFileList( array(
+ 'dir' => $dst->getRootStoragePath() . "/$backendRel",
+ 'adviseStat' => true // avoid HEADs
+ ) );
+ if ( $dstPathsRel === null ) {
+ $this->error( "Could not list files in $container.", 1 ); // die
+ }
+ $this->statCache = array(); // clear
+ foreach ( $dstPathsRel as $dstPathRel ) {
+ $path = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel";
+ $this->statCache[sha1($path)] = $dst->getFileStat( array( 'src' => $path ) );
+ }
+ $this->output( "done [" . count( $this->statCache ) . " file(s)]\n" );
}
$batchPaths = array();
if ( $this->hasOption( 'utf8only' ) && !mb_check_encoding( $srcPath, 'UTF-8' ) ) {
$this->error( "Detected illegal (non-UTF8) path for $srcPath." );
continue;
- } elseif ( $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) ) {
+ } elseif ( !$this->hasOption( 'missingonly' )
+ && $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) )
+ {
$this->output( "Already have $srcPathRel.\n" );
continue; // assume already copied...
}
? $fsFiles[$srcPath]
: $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) );
if ( !$fsFile ) {
- $this->error( "Could not get local copy of $srcPath.", 1 ); // die
+ if ( $src->fileExists( array( 'src' => $srcPath ) ) === false ) {
+ $this->error( "File '$srcPath' was listed be must have been deleted." );
+ } else {
+ $this->error( "Could not get local copy of $srcPath.", 1 ); // die
+ }
} elseif ( !$fsFile->exists() ) {
// FSFileBackends just return the path for getLocalReference() and paths with
// illegal slashes may get normalized to a different path. This can cause the
protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
$skipHash = $this->hasOption( 'skiphash' );
+ $srcStat = $src->getFileStat( array( 'src' => $sPath ) );
+ $dPathSha1 = sha1( $dPath );
+ $dstStat = isset( $this->statCache[$dPathSha1] )
+ ? $this->statCache[$dPathSha1]
+ : $dst->getFileStat( array( 'src' => $dPath ) );
return (
- ( $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) )
- === $dst->fileExists( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit
- ) && ( $src->getFileSize( array( 'src' => $sPath, 'latest' => 1 ) )
- === $dst->getFileSize( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit
- ) && ( $skipHash || ( $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) )
+ is_array( $srcStat ) // sanity check that source exists
+ && is_array( $dstStat ) // dest exists
+ && $srcStat['size'] === $dstStat['size']
+ && ( !$skipHash || $srcStat['mtime'] <= $dstStat['mtime'] )
+ && ( $skipHash || $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) )
=== $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) )
- ) )
+ )
);
}
}
$this->assertEquals( true, $status->isOK(),
"Creation of files succeeded with OK status ($backendName)." );
- // Expected listing
+ // Expected listing at root
$expected = array(
"e/test1.txt",
"e/test2.txt",
);
sort( $expected );
- // Actual listing (no trailing slash)
- $list = array();
+ // Actual listing (no trailing slash) at root
$iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1" ) );
- foreach ( $iter as $file ) {
- $list[] = $file;
- }
+ $list = $this->listToArray( $iter );
sort( $list );
+ $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
+ // Actual listing (no trailing slash) at root with advise
+ $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1", 'adviseStat' => 1 ) );
+ $list = $this->listToArray( $iter );
+ sort( $list );
$this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
- // Actual listing (with trailing slash)
+ // Actual listing (with trailing slash) at root
$list = array();
$iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/" ) );
foreach ( $iter as $file ) {
$list[] = $file;
}
sort( $list );
-
$this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
- // Expected listing
+ // Expected listing at subdir
$expected = array(
"test1.txt",
"test2.txt",
);
sort( $expected );
- // Actual listing (no trailing slash)
- $list = array();
+ // Actual listing (no trailing slash) at subdir
$iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir" ) );
- foreach ( $iter as $file ) {
- $list[] = $file;
- }
+ $list = $this->listToArray( $iter );
sort( $list );
+ $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
+ // Actual listing (no trailing slash) at subdir with advise
+ $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir", 'adviseStat' => 1 ) );
+ $list = $this->listToArray( $iter );
+ sort( $list );
$this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
- // Actual listing (with trailing slash)
+ // Actual listing (with trailing slash) at subdir
$list = array();
$iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir/" ) );
foreach ( $iter as $file ) {
$list[] = $file;
}
sort( $list );
-
$this->assertEquals( $expected, $list, "Correct file listing ($backendName)." );
// Actual listing (using iterator second time)
- $list = array();
- foreach ( $iter as $file ) {
- $list[] = $file;
- }
+ $list = $this->listToArray( $iter );
sort( $list );
-
$this->assertEquals( $expected, $list, "Correct file listing ($backendName), second iteration." );
- // Expected listing (top files only)
+ // Actual listing (top files only) at root
+ $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1" ) );
+ $list = $this->listToArray( $iter );
+ sort( $list );
+ $this->assertEquals( array(), $list, "Correct top file listing ($backendName)." );
+
+ // Expected listing (top files only) at subdir
$expected = array(
"test1.txt",
"test2.txt",
);
sort( $expected );
- // Actual listing (top files only)
- $list = array();
+ // Actual listing (top files only) at subdir
$iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir" ) );
- foreach ( $iter as $file ) {
- $list[] = $file;
- }
+ $list = $this->listToArray( $iter );
sort( $list );
+ $this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." );
+ // Actual listing (top files only) at subdir with advise
+ $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir", 'adviseStat' => 1 ) );
+ $list = $this->listToArray( $iter );
+ sort( $list );
$this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." );
foreach ( $files as $file ) { // clean up
$this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." );
$iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/e/subdir1" ) );
- $items = is_array( $iter ) ? $iter : iterator_to_array( $iter );
+ $items = $this->listToArray( $iter );
$this->assertEquals( array(), $items, "Directory listing is empty." );
foreach ( $files as $file ) { // clean up
// no errors
}
- $items = is_array( $iter ) ? $iter : iterator_to_array( $iter );
+ $items = $this->listToArray( $iter );
$this->assertEquals( array(), $items, "Directory listing is empty." );
$iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/e/not/exists" ) );
- $items = is_array( $iter ) ? $iter : iterator_to_array( $iter );
+ $items = $this->listToArray( $iter );
$this->assertEquals( array(), $items, "Directory listing is empty." );
}
"Scoped unlocking of files succeeded with OK status ($backendName)." );
}
+ // helper function
+ private function listToArray( $iter ) {
+ return is_array( $iter ) ? $iter : iterator_to_array( $iter );
+ }
+
// test helper wrapper for backend prepare() function
private function prepare( array $params ) {
return $this->backend->prepare( $params );