From d1940caa1f4a11e7c7d8d2ff5b178da7de8e2a92 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Tue, 30 Apr 2013 11:39:40 -0700 Subject: [PATCH] [FileBackend] Added "adviseStat" option for the "listing followed by stat" case. * Used this parameter to speed up copyFileBackend.php. * Also added mtime checks to copyFileBackend.php and a few cleanups. * Also fixed some incorrect getDirListPageInternal/getFileListPageInternal docs. Change-Id: I424ef238f7adf4cf1f33b74e3a4e187dcb328a99 --- includes/filebackend/FileBackend.php | 8 +- includes/filebackend/FileBackendStore.php | 6 +- includes/filebackend/SwiftFileBackend.php | 76 +++++++++++++++---- maintenance/copyFileBackend.php | 51 ++++++++++--- .../includes/filebackend/FileBackendTest.php | 71 +++++++++-------- 5 files changed, 153 insertions(+), 59 deletions(-) diff --git a/includes/filebackend/FileBackend.php b/includes/filebackend/FileBackend.php index 57fd4d7775..0c9f7a17b2 100644 --- a/includes/filebackend/FileBackend.php +++ b/includes/filebackend/FileBackend.php @@ -1092,8 +1092,9 @@ abstract class FileBackend { * * @param $params array * $params include: - * - dir : storage directory - * - topOnly : only return direct child files of the directory (since 1.20) + * - dir : storage directory + * - topOnly : only return direct child files of the directory (since 1.20) + * - adviseStat : set to true if stat requests will be made on the files (since 1.22) * @return Traversable|Array|null Returns null on failure */ abstract public function getFileList( array $params ); @@ -1106,7 +1107,8 @@ abstract class FileBackend { * * @param $params array * $params include: - * - dir : storage directory + * - dir : storage directory + * - adviseStat : set to true if stat requests will be made on the files (since 1.22) * @return Traversable|Array|null Returns null on failure * @since 1.20 */ diff --git a/includes/filebackend/FileBackendStore.php b/includes/filebackend/FileBackendStore.php index e50355c597..e20c6fcc5c 100644 --- a/includes/filebackend/FileBackendStore.php +++ b/includes/filebackend/FileBackendStore.php @@ -49,6 +49,8 @@ abstract class FileBackendStore extends FileBackend { protected $maxFileSize = 4294967296; // integer bytes (4GiB) const CACHE_TTL = 10; // integer; TTL in seconds for process cache entries + const CACHE_CHEAP_SIZE = 300; // integer; max entries in "cheap cache" + const CACHE_EXPENSIVE_SIZE = 5; // integer; max entries in "expensive cache" /** * @see FileBackend::__construct() @@ -58,8 +60,8 @@ abstract class FileBackendStore extends FileBackend { public function __construct( array $config ) { parent::__construct( $config ); $this->memCache = new EmptyBagOStuff(); // disabled by default - $this->cheapCache = new ProcessCacheLRU( 300 ); - $this->expensiveCache = new ProcessCacheLRU( 5 ); + $this->cheapCache = new ProcessCacheLRU( self::CACHE_CHEAP_SIZE ); + $this->expensiveCache = new ProcessCacheLRU( self::CACHE_EXPENSIVE_SIZE ); } /** diff --git a/includes/filebackend/SwiftFileBackend.php b/includes/filebackend/SwiftFileBackend.php index 1527cfedf8..f9e2ce47d9 100644 --- a/includes/filebackend/SwiftFileBackend.php +++ b/includes/filebackend/SwiftFileBackend.php @@ -968,23 +968,23 @@ class SwiftFileBackend extends FileBackendStore { * @param string $dir Resolved storage directory with no trailing slash * @param string|null $after Storage path of file to list items after * @param $limit integer Max number of items to list - * @param array $params Includes flag for 'topOnly' - * @return Array List of relative paths of dirs directly under $dir + * @param array $params Parameters for getDirectoryList() + * @return Array List of resolved paths of directories directly under $dir */ public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { $dirs = array(); if ( $after === INF ) { return $dirs; // nothing more } - wfProfileIn( __METHOD__ . '-' . $this->name ); + wfProfileIn( __METHOD__ . '-' . $this->name ); try { $container = $this->getContainer( $fullCont ); $prefix = ( $dir == '' ) ? null : "{$dir}/"; // Non-recursive: only list dirs right under $dir if ( !empty( $params['topOnly'] ) ) { $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); - foreach ( $objects as $object ) { // files and dirs + foreach ( $objects as $object ) { // files and directories if ( substr( $object, -1 ) === '/' ) { $dirs[] = $object; // directories end in '/' } @@ -1015,6 +1015,7 @@ class SwiftFileBackend extends FileBackendStore { } } } + // Page on the unfiltered directory listing (what is returned may be filtered) if ( count( $objects ) < $limit ) { $after = INF; // avoid a second RTT } else { @@ -1025,8 +1026,8 @@ class SwiftFileBackend extends FileBackendStore { $this->handleException( $e, null, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); + return $dirs; } @@ -1041,32 +1042,47 @@ class SwiftFileBackend extends FileBackendStore { * @param string $dir Resolved storage directory with no trailing slash * @param string|null $after Storage path of file to list items after * @param $limit integer Max number of items to list - * @param array $params Includes flag for 'topOnly' - * @return Array List of relative paths of files under $dir + * @param array $params Parameters for getDirectoryList() + * @return Array List of resolved paths of files under $dir */ public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { $files = array(); if ( $after === INF ) { return $files; // nothing more } - wfProfileIn( __METHOD__ . '-' . $this->name ); + wfProfileIn( __METHOD__ . '-' . $this->name ); try { $container = $this->getContainer( $fullCont ); $prefix = ( $dir == '' ) ? null : "{$dir}/"; // Non-recursive: only list files right under $dir if ( !empty( $params['topOnly'] ) ) { // files and dirs - $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); - foreach ( $objects as $object ) { - if ( substr( $object, -1 ) !== '/' ) { - $files[] = $object; // directories end in '/' + if ( !empty( $params['adviseStat'] ) ) { + $limit = min( $limit, self::CACHE_CHEAP_SIZE ); + // Note: get_objects() does not include directories + $objects = $this->loadObjectListing( $params, $dir, + $container->get_objects( $limit, $after, $prefix, null, '/' ) ); + $files = $objects; + } else { + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { // files and directories + if ( substr( $object, -1 ) !== '/' ) { + $files[] = $object; // directories end in '/' + } } } // Recursive: list all files under $dir and its subdirs } else { // files - $objects = $container->list_objects( $limit, $after, $prefix ); + if ( !empty( $params['adviseStat'] ) ) { + $limit = min( $limit, self::CACHE_CHEAP_SIZE ); + $objects = $this->loadObjectListing( $params, $dir, + $container->get_objects( $limit, $after, $prefix ) ); + } else { + $objects = $container->list_objects( $limit, $after, $prefix ); + } $files = $objects; } + // Page on the unfiltered object listing (what is returned may be filtered) if ( count( $objects ) < $limit ) { $after = INF; // avoid a second RTT } else { @@ -1077,11 +1093,38 @@ class SwiftFileBackend extends FileBackendStore { $this->handleException( $e, null, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); + return $files; } + /** + * Load a list of objects that belong under $dir into stat cache + * and return a list of the names of the objects in the same order. + * + * @param array $params Parameters for getDirectoryList() + * @param string $dir Resolved container directory path + * @param array $cfObjects List of CF_Object items + * @return array List of object names + */ + private function loadObjectListing( array $params, $dir, array $cfObjects ) { + $names = array(); + $storageDir = rtrim( $params['dir'], '/' ); + $suffixStart = ( $dir === '' ) ? 0 : strlen( $dir ) + 1; // size of "path/to/dir/" + foreach ( $cfObjects as $object ) { + $path = "{$storageDir}/" . substr( $object->name, $suffixStart ); + $val = array( + // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW + 'mtime' => wfTimestamp( TS_MW, $object->last_modified ), + 'size' => (int)$object->content_length, + 'latest' => false // eventually consistent + ); + $this->cheapCache->set( $path, 'stat', $val ); + $names[] = $object->name; + } + return $names; + } + /** * @see FileBackendStore::doGetFileSha1base36() * @return bool @@ -1089,6 +1132,11 @@ class SwiftFileBackend extends FileBackendStore { protected function doGetFileSha1base36( array $params ) { $stat = $this->getFileStat( $params ); if ( $stat ) { + if ( !isset( $stat['sha1'] ) ) { + // Stat entries filled by file listings don't include SHA1 + $this->clearCache( array( $params['src'] ) ); + $stat = $this->getFileStat( $params ); + } return $stat['sha1']; } else { return false; diff --git a/maintenance/copyFileBackend.php b/maintenance/copyFileBackend.php index 81bc915912..6ffd72bc06 100644 --- a/maintenance/copyFileBackend.php +++ b/maintenance/copyFileBackend.php @@ -35,6 +35,8 @@ require_once( __DIR__ . '/Maintenance.php' ); * @ingroup Maintenance */ class CopyFileBackend extends Maintenance { + protected $statCache = array(); + public function __construct() { parent::__construct(); $this->mDescription = "Copy files in one backend to another."; @@ -43,6 +45,7 @@ class CopyFileBackend extends Maintenance { $this->addOption( 'containers', 'Pipe separated list of containers', true, true ); $this->addOption( 'subdir', 'Only do items in this child directory', false, true ); $this->addOption( 'ratefile', 'File to check periodically for batch size', false, true ); + $this->addOption( 'prestat', 'Stat the destination files first (try to use listings)' ); $this->addOption( 'skiphash', 'Skip SHA-1 sync checks for files' ); $this->addOption( 'missingonly', 'Only copy files missing from destination listing' ); $this->addOption( 'utf8only', 'Skip source files that do not have valid UTF-8 names' ); @@ -72,12 +75,13 @@ class CopyFileBackend extends Maintenance { } $srcPathsRel = $src->getFileList( array( - 'dir' => $src->getRootStoragePath() . "/$backendRel" ) ); + 'dir' => $src->getRootStoragePath() . "/$backendRel", + 'adviseStat' => !$this->hasOption( 'missingonly' ) // avoid HEADs + ) ); if ( $srcPathsRel === null ) { $this->error( "Could not list files in $container.", 1 ); // die } - // Do a listing comparison if specified if ( $this->hasOption( 'missingonly' ) ) { $dstPathsRel = $dst->getFileList( array( 'dir' => $dst->getRootStoragePath() . "/$backendRel" ) ); @@ -101,6 +105,22 @@ class CopyFileBackend extends Maintenance { // Only copy the missing files over in the next loop $srcPathsRel = $missingPathsRel; $this->output( count( $srcPathsRel ) . " file(s) need to be copied.\n" ); + } elseif ( $this->getOption( 'prestat' ) ) { + // Build the stat cache for the destination files + $this->output( "Building destination stat cache..." ); + $dstPathsRel = $dst->getFileList( array( + 'dir' => $dst->getRootStoragePath() . "/$backendRel", + 'adviseStat' => true // avoid HEADs + ) ); + if ( $dstPathsRel === null ) { + $this->error( "Could not list files in $container.", 1 ); // die + } + $this->statCache = array(); // clear + foreach ( $dstPathsRel as $dstPathRel ) { + $path = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel"; + $this->statCache[sha1($path)] = $dst->getFileStat( array( 'src' => $path ) ); + } + $this->output( "done [" . count( $this->statCache ) . " file(s)]\n" ); } $batchPaths = array(); @@ -159,7 +179,9 @@ class CopyFileBackend extends Maintenance { if ( $this->hasOption( 'utf8only' ) && !mb_check_encoding( $srcPath, 'UTF-8' ) ) { $this->error( "Detected illegal (non-UTF8) path for $srcPath." ); continue; - } elseif ( $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) ) { + } elseif ( !$this->hasOption( 'missingonly' ) + && $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) ) + { $this->output( "Already have $srcPathRel.\n" ); continue; // assume already copied... } @@ -167,7 +189,11 @@ class CopyFileBackend extends Maintenance { ? $fsFiles[$srcPath] : $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) ); if ( !$fsFile ) { - $this->error( "Could not get local copy of $srcPath.", 1 ); // die + if ( $src->fileExists( array( 'src' => $srcPath ) ) === false ) { + $this->error( "File '$srcPath' was listed be must have been deleted." ); + } else { + $this->error( "Could not get local copy of $srcPath.", 1 ); // die + } } elseif ( !$fsFile->exists() ) { // FSFileBackends just return the path for getLocalReference() and paths with // illegal slashes may get normalized to a different path. This can cause the @@ -206,14 +232,19 @@ class CopyFileBackend extends Maintenance { protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) { $skipHash = $this->hasOption( 'skiphash' ); + $srcStat = $src->getFileStat( array( 'src' => $sPath ) ); + $dPathSha1 = sha1( $dPath ); + $dstStat = isset( $this->statCache[$dPathSha1] ) + ? $this->statCache[$dPathSha1] + : $dst->getFileStat( array( 'src' => $dPath ) ); return ( - ( $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) ) - === $dst->fileExists( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit - ) && ( $src->getFileSize( array( 'src' => $sPath, 'latest' => 1 ) ) - === $dst->getFileSize( array( 'src' => $dPath, 'latest' => 1 ) ) // short-circuit - ) && ( $skipHash || ( $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) + is_array( $srcStat ) // sanity check that source exists + && is_array( $dstStat ) // dest exists + && $srcStat['size'] === $dstStat['size'] + && ( !$skipHash || $srcStat['mtime'] <= $dstStat['mtime'] ) + && ( $skipHash || $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) ) - ) ) + ) ); } } diff --git a/tests/phpunit/includes/filebackend/FileBackendTest.php b/tests/phpunit/includes/filebackend/FileBackendTest.php index 0d6e4d5921..529d8cb379 100644 --- a/tests/phpunit/includes/filebackend/FileBackendTest.php +++ b/tests/phpunit/includes/filebackend/FileBackendTest.php @@ -1770,7 +1770,7 @@ class FileBackendTest extends MediaWikiTestCase { $this->assertEquals( true, $status->isOK(), "Creation of files succeeded with OK status ($backendName)." ); - // Expected listing + // Expected listing at root $expected = array( "e/test1.txt", "e/test2.txt", @@ -1789,27 +1789,28 @@ class FileBackendTest extends MediaWikiTestCase { ); sort( $expected ); - // Actual listing (no trailing slash) - $list = array(); + // Actual listing (no trailing slash) at root $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1" ) ); - foreach ( $iter as $file ) { - $list[] = $file; - } + $list = $this->listToArray( $iter ); sort( $list ); + $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); + // Actual listing (no trailing slash) at root with advise + $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1", 'adviseStat' => 1 ) ); + $list = $this->listToArray( $iter ); + sort( $list ); $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); - // Actual listing (with trailing slash) + // Actual listing (with trailing slash) at root $list = array(); $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/" ) ); foreach ( $iter as $file ) { $list[] = $file; } sort( $list ); - $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); - // Expected listing + // Expected listing at subdir $expected = array( "test1.txt", "test2.txt", @@ -1821,36 +1822,39 @@ class FileBackendTest extends MediaWikiTestCase { ); sort( $expected ); - // Actual listing (no trailing slash) - $list = array(); + // Actual listing (no trailing slash) at subdir $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir" ) ); - foreach ( $iter as $file ) { - $list[] = $file; - } + $list = $this->listToArray( $iter ); sort( $list ); + $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); + // Actual listing (no trailing slash) at subdir with advise + $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir", 'adviseStat' => 1 ) ); + $list = $this->listToArray( $iter ); + sort( $list ); $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); - // Actual listing (with trailing slash) + // Actual listing (with trailing slash) at subdir $list = array(); $iter = $this->backend->getFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir/" ) ); foreach ( $iter as $file ) { $list[] = $file; } sort( $list ); - $this->assertEquals( $expected, $list, "Correct file listing ($backendName)." ); // Actual listing (using iterator second time) - $list = array(); - foreach ( $iter as $file ) { - $list[] = $file; - } + $list = $this->listToArray( $iter ); sort( $list ); - $this->assertEquals( $expected, $list, "Correct file listing ($backendName), second iteration." ); - // Expected listing (top files only) + // Actual listing (top files only) at root + $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1" ) ); + $list = $this->listToArray( $iter ); + sort( $list ); + $this->assertEquals( array(), $list, "Correct top file listing ($backendName)." ); + + // Expected listing (top files only) at subdir $expected = array( "test1.txt", "test2.txt", @@ -1860,14 +1864,16 @@ class FileBackendTest extends MediaWikiTestCase { ); sort( $expected ); - // Actual listing (top files only) - $list = array(); + // Actual listing (top files only) at subdir $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir" ) ); - foreach ( $iter as $file ) { - $list[] = $file; - } + $list = $this->listToArray( $iter ); sort( $list ); + $this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." ); + // Actual listing (top files only) at subdir with advise + $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/e/subdir2/subdir", 'adviseStat' => 1 ) ); + $list = $this->listToArray( $iter ); + sort( $list ); $this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." ); foreach ( $files as $file ) { // clean up @@ -2066,7 +2072,7 @@ class FileBackendTest extends MediaWikiTestCase { $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." ); $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/e/subdir1" ) ); - $items = is_array( $iter ) ? $iter : iterator_to_array( $iter ); + $items = $this->listToArray( $iter ); $this->assertEquals( array(), $items, "Directory listing is empty." ); foreach ( $files as $file ) { // clean up @@ -2078,11 +2084,11 @@ class FileBackendTest extends MediaWikiTestCase { // no errors } - $items = is_array( $iter ) ? $iter : iterator_to_array( $iter ); + $items = $this->listToArray( $iter ); $this->assertEquals( array(), $items, "Directory listing is empty." ); $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/e/not/exists" ) ); - $items = is_array( $iter ) ? $iter : iterator_to_array( $iter ); + $items = $this->listToArray( $iter ); $this->assertEquals( array(), $items, "Directory listing is empty." ); } @@ -2187,6 +2193,11 @@ class FileBackendTest extends MediaWikiTestCase { "Scoped unlocking of files succeeded with OK status ($backendName)." ); } + // helper function + private function listToArray( $iter ) { + return is_array( $iter ) ? $iter : iterator_to_array( $iter ); + } + // test helper wrapper for backend prepare() function private function prepare( array $params ) { return $this->backend->prepare( $params ); -- 2.20.1