From: Aaron Schulz Date: Thu, 5 Apr 2012 05:56:08 +0000 (-0700) Subject: [FileBackend] Added some basic directory functions. X-Git-Tag: 1.31.0-rc.0~23828^2 X-Git-Url: http://git.cyclocoop.org/%40spipnet%40?a=commitdiff_plain;h=dd5326ccf12e526fcfac22a0943baffe52942579;p=lhc%2Fweb%2Fwiklou.git [FileBackend] Added some basic directory functions. * Added directoryExists() function to check if a dir exists. * Added getDirectoryList() and getTopDirectoryList() functions to list out subdirs. * Added getTopFileList() function to list out immediate files in a dir. This wraps getFileList(), which was modified. Change-Id: I087835f40d76c41f5d6af55b9a34a51e0f44a675 --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 60cbbaafb8..937f09c04f 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -510,10 +510,16 @@ $wgAutoloadLocalClasses = array( 'FileBackend' => 'includes/filerepo/backend/FileBackend.php', 'FileBackendStore' => 'includes/filerepo/backend/FileBackendStore.php', 'FileBackendStoreShardListIterator' => 'includes/filerepo/backend/FileBackendStore.php', + 'FileBackendStoreShardDirIterator' => 'includes/filerepo/backend/FileBackendStore.php', + 'FileBackendStoreShardFileIterator' => 'includes/filerepo/backend/FileBackendStore.php', 'FileBackendMultiWrite' => 'includes/filerepo/backend/FileBackendMultiWrite.php', 'FSFileBackend' => 'includes/filerepo/backend/FSFileBackend.php', + 'FSFileBackendList' => 'includes/filerepo/backend/FSFileBackend.php', + 'FSFileBackendDirList' => 'includes/filerepo/backend/FSFileBackend.php', 'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php', 'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php', + 'SwiftFileBackendList' => 'includes/filerepo/backend/SwiftFileBackend.php', + 'SwiftFileBackendDirList' => 'includes/filerepo/backend/SwiftFileBackend.php', 'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php', 'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php', 'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php', diff --git a/includes/filerepo/backend/FSFileBackend.php b/includes/filerepo/backend/FSFileBackend.php index 4a27ca117d..df822fdca7 100644 --- a/includes/filerepo/backend/FSFileBackend.php +++ b/includes/filerepo/backend/FSFileBackend.php @@ -7,11 +7,11 @@ /** * @brief Class for a file system (FS) based file backend. - * + * * All "containers" each map to a directory under the backend's base directory. * For backwards-compatibility, some container paths can be set to custom paths. * The wiki ID will not be used in any custom paths, so this should be avoided. - * + * * Having directories with thousands of files will diminish performance. * Sharding can be accomplished by using FileRepo-style hash paths. * @@ -76,7 +76,7 @@ class FSFileBackend extends FileBackendStore { /** * Sanity check a relative file system path for validity - * + * * @param $path string Normalized relative path * @return bool */ @@ -95,14 +95,14 @@ class FSFileBackend extends FileBackendStore { /** * Given the short (unresolved) and full (resolved) name of * a container, return the file system path of the container. - * + * * @param $shortCont string * @param $fullCont string - * @return string|null + * @return string|null */ protected function containerFSRoot( $shortCont, $fullCont ) { if ( isset( $this->containerPaths[$shortCont] ) ) { - return $this->containerPaths[$shortCont]; + return $this->containerPaths[$shortCont]; } elseif ( isset( $this->basePath ) ) { return "{$this->basePath}/{$fullCont}"; } @@ -111,7 +111,7 @@ class FSFileBackend extends FileBackendStore { /** * Get the absolute file system path for a storage path - * + * * @param $storagePath string Storage path * @return string|null */ @@ -439,6 +439,41 @@ class FSFileBackend extends FileBackendStore { clearstatcache(); // clear the PHP file stat cache } + /** + * @see FileBackendStore::doDirectoryExists() + * @return bool|null + */ + protected function doDirectoryExists( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + + $this->trapWarnings(); // don't trust 'false' if there were errors + $exists = is_dir( $dir ); + $hadError = $this->untrapWarnings(); + + return $hadError ? null : $exists; + } + + /** + * @see FileBackendStore::getDirectoryListInternal() + * @return Array|null + */ + public function getDirectoryListInternal( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $exists = is_dir( $dir ); + if ( !$exists ) { + wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); + return array(); // nothing under this dir + } elseif ( !is_readable( $dir ) ) { + wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); + return null; // bad permissions? + } + return new FSFileBackendDirList( $dir, $params ); + } + /** * @see FileBackendStore::getFileListInternal() * @return array|FSFileBackendFileList|null @@ -451,13 +486,11 @@ class FSFileBackend extends FileBackendStore { if ( !$exists ) { wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); return array(); // nothing under this dir - } - $readable = is_readable( $dir ); - if ( !$readable ) { + } elseif ( !is_readable( $dir ) ) { wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); return null; // bad permissions? } - return new FSFileBackendFileList( $dir ); + return new FSFileBackendFileList( $dir, $params ); } /** @@ -543,53 +576,65 @@ class FSFileBackend extends FileBackendStore { } /** - * Wrapper around RecursiveDirectoryIterator that catches - * exception or does any custom behavoir that we may want. + * Wrapper around RecursiveDirectoryIterator/DirectoryIterator that + * catches exception or does any custom behavoir that we may want. * Do not use this class from places outside FSFileBackend. * * @ingroup FileBackend */ -class FSFileBackendFileList implements Iterator { - /** @var RecursiveIteratorIterator */ +abstract class FSFileBackendList implements Iterator { + /** @var Iterator */ protected $iter; protected $suffixStart; // integer protected $pos = 0; // integer + /** @var Array */ + protected $params = array(); /** * @param $dir string file system directory */ - public function __construct( $dir ) { + public function __construct( $dir, array $params ) { $dir = realpath( $dir ); // normalize $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/" + $this->params = $params; + try { + $this->iter = $this->initIterator( $dir ); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; // bad permissions? deleted? + } + } + + /** + * Return an appropriate iterator object to wrap + * + * @param $dir string file system directory + * @return Iterator + */ + protected function initIterator( $dir ) { + if ( !empty( $this->params['topOnly'] ) ) { // non-recursive + # Get an iterator that will get direct sub-nodes + return new DirectoryIterator( $dir ); + } else { // recursive # Get an iterator that will return leaf nodes (non-directories) if ( MWInit::classExists( 'FilesystemIterator' ) ) { // PHP >= 5.3 # RecursiveDirectoryIterator extends FilesystemIterator. # FilesystemIterator::SKIP_DOTS default is inconsistent in PHP 5.3.x. - $flags = FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS; - $this->iter = new RecursiveIteratorIterator( - new RecursiveDirectoryIterator( $dir, $flags ) ); + $flags = FilesystemIterator::CURRENT_AS_SELF | FilesystemIterator::SKIP_DOTS; + return new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir, $flags ), + RecursiveIteratorIterator::CHILD_FIRST // include dirs + ); } else { // PHP < 5.3 # RecursiveDirectoryIterator extends DirectoryIterator - $this->iter = new RecursiveIteratorIterator( - new RecursiveDirectoryIterator( $dir ) ); + return new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir ), + RecursiveIteratorIterator::CHILD_FIRST // include dirs + ); } - } catch ( UnexpectedValueException $e ) { - $this->iter = null; // bad permissions? deleted? } } - /** - * @see Iterator::current() - * @return string|bool String or false - */ - public function current() { - // Return only the relative path and normalize slashes to FileBackend-style - // Make sure to use the realpath since the suffix is based upon that - return str_replace( '\\', '/', - substr( realpath( $this->iter->current() ), $this->suffixStart ) ); - } - /** * @see Iterator::key() * @return integer @@ -598,6 +643,14 @@ class FSFileBackendFileList implements Iterator { return $this->pos; } + /** + * @see Iterator::current() + * @return string|bool String or false + */ + public function current() { + return $this->getRelPath( $this->iter->current()->getPathname() ); + } + /** * @see Iterator::next() * @return void @@ -605,6 +658,7 @@ class FSFileBackendFileList implements Iterator { public function next() { try { $this->iter->next(); + $this->filterViaNext(); } catch ( UnexpectedValueException $e ) { $this->iter = null; } @@ -619,6 +673,7 @@ class FSFileBackendFileList implements Iterator { $this->pos = 0; try { $this->iter->rewind(); + $this->filterViaNext(); } catch ( UnexpectedValueException $e ) { $this->iter = null; } @@ -631,4 +686,44 @@ class FSFileBackendFileList implements Iterator { public function valid() { return $this->iter && $this->iter->valid(); } + + /** + * Filter out items by advancing to the next ones + */ + protected function filterViaNext() {} + + /** + * Return only the relative path and normalize slashes to FileBackend-style. + * Uses the "real path" since the suffix is based upon that. + * + * @param $path string + * @return string + */ + protected function getRelPath( $path ) { + return strtr( substr( realpath( $path ), $this->suffixStart ), '\\', '/' ); + } +} + +class FSFileBackendDirList extends FSFileBackendList { + protected function filterViaNext() { + while ( $this->iter->valid() ) { + if ( $this->iter->current()->isDot() || !$this->iter->current()->isDir() ) { + $this->iter->next(); // skip non-directories and dot files + } else { + break; + } + } + } +} + +class FSFileBackendFileList extends FSFileBackendList { + protected function filterViaNext() { + while ( $this->iter->valid() ) { + if ( !$this->iter->current()->isFile() ) { + $this->iter->next(); // skip non-files and dot files + } else { + break; + } + } + } } diff --git a/includes/filerepo/backend/FileBackend.php b/includes/filerepo/backend/FileBackend.php index b9821bfd31..f3b879a4dd 100644 --- a/includes/filerepo/backend/FileBackend.php +++ b/includes/filerepo/backend/FileBackend.php @@ -546,22 +546,89 @@ abstract class FileBackend { abstract public function getLocalCopy( array $params ); /** - * Get an iterator to list out all stored files under a storage directory. + * Check if a directory exists at a given storage path. + * Backends using key/value stores will check if the path is a + * virtual directory, meaning there are files under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage directory + * + * @return bool|null Returns null on failure + * @since 1.20 + */ + abstract public function directoryExists( array $params ); + + /** + * Get an iterator to list *all* directories under a storage directory. + * If the directory is of the form "mwstore://backend/container", + * then all directories in the container should be listed. + * If the directory is of form "mwstore://backend/container/dir", + * then all directories directly under that directory should be listed. + * Results should be storage directories relative to the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage directory + * topOnly : only return direct child directories of the directory + * + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + abstract public function getDirectoryList( array $params ); + + /** + * Same as FileBackend::getDirectoryList() except only lists + * directories that are immediately under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage directory + * + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + final public function getTopDirectoryList( array $params ) { + return $this->getDirectoryList( array( 'topOnly' => true ) + $params ); + } + + /** + * Get an iterator to list *all* stored files under a storage directory. * If the directory is of the form "mwstore://backend/container", * then all files in the container should be listed. * If the directory is of form "mwstore://backend/container/dir", - * then all files under that container directory should be listed. + * then all files under that directory should be listed. * Results should be storage paths relative to the given directory. * * Storage backends with eventual consistency might return stale data. * * $params include: - * dir : storage path directory + * dir : storage directory + * topOnly : only return direct child files of the directory * * @return Traversable|Array|null Returns null on failure */ abstract public function getFileList( array $params ); + /** + * Same as FileBackend::getFileList() except only lists + * files that are immediately under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage directory + * + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + final public function getTopFileList( array $params ) { + return $this->getFileList( array( 'topOnly' => true ) + $params ); + } + /** * Invalidate any in-process file existence and property cache. * If $paths is given, then only the cache for those files will be cleared. @@ -708,6 +775,7 @@ abstract class FileBackend { * * @param $path string * @return bool + * @since 1.20 */ final public static function isPathTraversalFree( $path ) { return ( self::normalizeContainerPath( $path ) !== null ); diff --git a/includes/filerepo/backend/FileBackendMultiWrite.php b/includes/filerepo/backend/FileBackendMultiWrite.php index 9c3cf5b5e7..a902488486 100644 --- a/includes/filerepo/backend/FileBackendMultiWrite.php +++ b/includes/filerepo/backend/FileBackendMultiWrite.php @@ -7,7 +7,7 @@ /** * @brief Proxy backend that mirrors writes to several internal backends. - * + * * This class defines a multi-write backend. Multiple backends can be * registered to this proxy backend and it will act as a single backend. * Use this when all access to those backends is through this proxy backend. @@ -220,7 +220,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Substitute the backend name in storage path parameters * for a set of operations with that of a given internal backend. - * + * * @param $ops Array List of file operation arrays * @param $backend FileBackendStore * @return Array @@ -241,7 +241,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Same as substOpBatchPaths() but for a single operation - * + * * @param $op File operation array * @param $backend FileBackendStore * @return Array @@ -253,7 +253,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Substitute the backend of storage paths with an internal backend's name - * + * * @param $paths Array|string List of paths or single string path * @param $backend FileBackendStore * @return Array|string @@ -268,7 +268,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Substitute the backend of internal storage paths with the proxy backend's name - * + * * @param $paths Array|string List of paths or single string path * @return Array|string */ @@ -320,7 +320,7 @@ class FileBackendMultiWrite extends FileBackend { } /** - * @see FileBackend::getFileList() + * @see FileBackend::concatenate() */ public function concatenate( array $params ) { // We are writing to an FS file, so we don't need to do this per-backend @@ -408,6 +408,22 @@ class FileBackendMultiWrite extends FileBackend { return $this->backends[$this->masterIndex]->getLocalCopy( $realParams ); } + /** + * @see FileBackend::directoryExists() + */ + public function directoryExists( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->directoryExists( $realParams ); + } + + /** + * @see FileBackend::getSubdirectoryList() + */ + public function getDirectoryList( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getDirectoryList( $realParams ); + } + /** * @see FileBackend::getFileList() */ diff --git a/includes/filerepo/backend/FileBackendStore.php b/includes/filerepo/backend/FileBackendStore.php index 55dedc1e05..c41d0fc8bd 100644 --- a/includes/filerepo/backend/FileBackendStore.php +++ b/includes/filerepo/backend/FileBackendStore.php @@ -646,7 +646,78 @@ abstract class FileBackendStore extends FileBackend { } /** - * @copydoc FileBackend::getFileList() + * @see FileBackend::directoryExists() + * @return bool|null + */ + final public function directoryExists( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + return false; // invalid storage path + } + if ( $shard !== null ) { // confined to a single container/shard + return $this->doDirectoryExists( $fullCont, $dir, $params ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + $res = false; // response + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $exists = $this->doDirectoryExists( "{$fullCont}{$suffix}", $dir, $params ); + if ( $exists ) { + $res = true; + break; // found one! + } elseif ( $exists === null ) { // error? + $res = null; // if we don't find anything, it is indeterminate + } + } + return $res; + } + } + + /** + * @see FileBackendStore::directoryExists() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return bool|null + */ + abstract protected function doDirectoryExists( $container, $dir, array $params ); + + /** + * @see FileBackend::getDirectoryList() + * @return Array|null|Traversable + */ + final public function getDirectoryList( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { // invalid storage path + return null; + } + if ( $shard !== null ) { + // File listing is confined to a single container/shard + return $this->getDirectoryListInternal( $fullCont, $dir, $params ); + } else { + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + // File listing spans multiple containers/shards + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + return new FileBackendStoreShardDirIterator( $this, + $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); + } + } + + /** + * Do not call this function from places outside FileBackend + * + * @see FileBackendStore::getDirectoryList() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null + */ + abstract public function getDirectoryListInternal( $container, $dir, array $params ); + + /** + * @see FileBackend::getFileList() * @return Array|null|Traversable */ final public function getFileList( array $params ) { @@ -661,7 +732,7 @@ abstract class FileBackendStore extends FileBackend { wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); // File listing spans multiple containers/shards list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); - return new FileBackendStoreShardListIterator( $this, + return new FileBackendStoreShardFileIterator( $this, $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); } } @@ -962,6 +1033,19 @@ abstract class FileBackendStore extends FileBackend { return ''; // no sharding } + /** + * Check if a storage path maps to a single shard. + * Container dirs like "a", where the container shards on "x/xy", + * can reside on several shards. Such paths are tricky to handle. + * + * @param $storagePath string Storage path + * @return bool + */ + final public function isSingleShardPathInternal( $storagePath ) { + list( $c, $r, $shard ) = $this->resolveStoragePath( $storagePath ); + return ( $shard !== null ); + } + /** * Get the sharding config for a container. * If greater than 0, then all file storage paths within @@ -1044,26 +1128,29 @@ abstract class FileBackendStore extends FileBackend { } /** - * FileBackendStore helper function to handle file listings that span container shards. + * FileBackendStore helper function to handle listings that span container shards. * Do not use this class from places outside of FileBackendStore. * * @ingroup FileBackend */ -class FileBackendStoreShardListIterator implements Iterator { - /* @var FileBackendStore */ +abstract class FileBackendStoreShardListIterator implements Iterator { + /** @var FileBackendStore */ protected $backend; - /* @var Array */ + /** @var Array */ protected $params; - /* @var Array */ + /** @var Array */ protected $shardSuffixes; - protected $container; // string - protected $directory; // string + protected $container; // string; full container name + protected $directory; // string; resolved relative path - /* @var Traversable */ + /** @var Traversable */ protected $iter; protected $curShard = 0; // integer protected $pos = 0; // integer + /** @var Array */ + protected $multiShardPaths = array(); // (rel path => 1) + /** * @param $backend FileBackendStore * @param $container string Full storage container name @@ -1112,6 +1199,8 @@ class FileBackendStoreShardListIterator implements Iterator { } else { $this->iter->next(); } + // Filter out items that we already listed + $this->filterViaNext(); // Find the next non-empty shard if no elements are left $this->nextShardIteratorIfNotValid(); } @@ -1124,6 +1213,8 @@ class FileBackendStoreShardListIterator implements Iterator { $this->pos = 0; $this->curShard = 0; $this->setIteratorFromCurrentShard(); + // Filter out items that we already listed + $this->filterViaNext(); // Find the next non-empty shard if this one has no elements $this->nextShardIteratorIfNotValid(); } @@ -1142,6 +1233,25 @@ class FileBackendStoreShardListIterator implements Iterator { } } + /** + * Filter out duplicate items by advancing to the next ones + */ + protected function filterViaNext() { + while ( $this->iter->valid() ) { + $rel = $this->iter->current(); // path relative to given directory + $path = $this->params['dir'] . "/{$rel}"; // full storage path + if ( !$this->backend->isSingleShardPathInternal( $path ) ) { + // Don't keep listing paths that are on multiple shards + if ( isset( $this->multiShardPaths[$rel] ) ) { + $this->iter->next(); // we already listed this path + } else { + $this->multiShardPaths[$rel] = 1; + break; + } + } + } + } + /** * If the list iterator for this container shard is out of items, * then move on to the next container that has items. @@ -1161,7 +1271,35 @@ class FileBackendStoreShardListIterator implements Iterator { */ protected function setIteratorFromCurrentShard() { $suffix = $this->shardSuffixes[$this->curShard]; - $this->iter = $this->backend->getFileListInternal( + $this->iter = $this->listFromShard( "{$this->container}{$suffix}", $this->directory, $this->params ); } + + /** + * Get the list for a given container shard + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null + */ + abstract protected function listFromShard( $container, $dir, array $params ); +} + +/** + * Iterator for listing directories + */ +class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator { + protected function listFromShard( $container, $dir, array $params ) { + return $this->backend->getDirectoryListInternal( $container, $dir, $params ); + } +} + +/** + * Iterator for listing regular files + */ +class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator { + protected function listFromShard( $container, $dir, array $params ) { + return $this->backend->getFileListInternal( $container, $dir, $params ); + } } diff --git a/includes/filerepo/backend/SwiftFileBackend.php b/includes/filerepo/backend/SwiftFileBackend.php index c7e40e8d08..af5d693dd8 100644 --- a/includes/filerepo/backend/SwiftFileBackend.php +++ b/includes/filerepo/backend/SwiftFileBackend.php @@ -535,12 +535,39 @@ class SwiftFileBackend extends FileBackendStore { return $data; } + /** + * @see FileBackendStore::doDirectoryExists() + * @return bool|null + */ + protected function doDirectoryExists( $fullCont, $dir, array $params ) { + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + return ( count( $container->list_objects( 1, null, $prefix ) ) > 0 ); + } catch ( NoSuchContainerException $e ) { + return false; + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + return null; // error + } + + /** + * @see FileBackendStore::getDirectoryListInternal() + * @return SwiftFileBackendDirList + */ + public function getDirectoryListInternal( $fullCont, $dir, array $params ) { + return new SwiftFileBackendDirList( $this, $fullCont, $dir, $params ); + } + /** * @see FileBackendStore::getFileListInternal() * @return SwiftFileBackendFileList */ public function getFileListInternal( $fullCont, $dir, array $params ) { - return new SwiftFileBackendFileList( $this, $fullCont, $dir ); + return new SwiftFileBackendFileList( $this, $fullCont, $dir, $params ); } /** @@ -548,17 +575,96 @@ class SwiftFileBackend extends FileBackendStore { * * @param $fullCont string Resolved container name * @param $dir string Resolved storage directory with no trailing slash - * @param $after string Storage path of file to list items after + * @param $after string|null Storage path of file to list items after * @param $limit integer Max number of items to list - * @return Array + * @param $params Array Includes flag for 'topOnly' + * @return Array List of relative paths of dirs directly under $dir */ - public function getFileListPageInternal( $fullCont, $dir, $after, $limit ) { + public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { + $dirs = array(); + + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + // Non-recursive: only list dirs right under $dir + if ( !empty( $params['topOnly'] ) ) { + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { // files and dirs + if ( substr( $object, -1 ) === '/' ) { + $dirs[] = $object; // directories end in '/' + } + $after = $object; // update last item + } + // Recursive: list all dirs under $dir and its subdirs + } else { + // Get directory from last item of prior page + $lastDir = $this->getParentDir( $after ); // must be first page + $objects = $container->list_objects( $limit, $after, $prefix ); + foreach ( $objects as $object ) { // files + $objectDir = $this->getParentDir( $object ); // directory of object + if ( $objectDir !== false ) { // file has a parent dir + // Swift stores paths in UTF-8, using binary sorting. + // See function "create_container_table" in common/db.py. + // If a directory is not "greater" than the last one, + // then it was already listed by the calling iterator. + if ( $objectDir > $lastDir ) { + $pDir = $objectDir; + do { // add dir and all its parent dirs + $dirs[] = "{$pDir}/"; + $pDir = $this->getParentDir( $pDir ); + } while ( $pDir !== false // sanity + && $pDir > $lastDir // not done already + && strlen( $pDir ) > strlen( $dir ) // within $dir + ); + } + $lastDir = $objectDir; + } + $after = $object; // update last item + } + } + } catch ( NoSuchContainerException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + return $dirs; + } + + protected function getParentDir( $path ) { + return ( strpos( $path, '/' ) !== false ) ? dirname( $path ) : false; + } + + /** + * Do not call this function outside of SwiftFileBackendFileList + * + * @param $fullCont string Resolved container name + * @param $dir string Resolved storage directory with no trailing slash + * @param $after string|null Storage path of file to list items after + * @param $limit integer Max number of items to list + * @param $params Array Includes flag for 'topOnly' + * @return Array List of relative paths of files under $dir + */ + public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { $files = array(); try { $container = $this->getContainer( $fullCont ); $prefix = ( $dir == '' ) ? null : "{$dir}/"; - $files = $container->list_objects( $limit, $after, $prefix ); + // Non-recursive: only list files right under $dir + if ( !empty( $params['topOnly'] ) ) { // files and dirs + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { + if ( substr( $object, -1 ) !== '/' ) { + $files[] = $object; // directories end in '/' + } + } + // Recursive: list all files under $dir and its subdirs + } else { // files + $files = $container->list_objects( $limit, $after, $prefix ); + } + $after = end( $files ); // update last item + reset( $files ); // reset pointer } catch ( NoSuchContainerException $e ) { } catch ( InvalidResponseException $e ) { } catch ( Exception $e ) { // some other exception? @@ -816,22 +922,24 @@ class SwiftFileBackend extends FileBackendStore { } /** - * SwiftFileBackend helper class to page through object listings. + * SwiftFileBackend helper class to page through listings. * Swift also has a listing limit of 10,000 objects for sanity. * Do not use this class from places outside SwiftFileBackend. * * @ingroup FileBackend */ -class SwiftFileBackendFileList implements Iterator { +abstract class SwiftFileBackendList implements Iterator { /** @var Array */ protected $bufferIter = array(); protected $bufferAfter = null; // string; list items *after* this path protected $pos = 0; // integer + /** @var Array */ + protected $params = array(); /** @var SwiftFileBackend */ protected $backend; - protected $container; // - protected $dir; // string storage directory + protected $container; // string; container name + protected $dir; // string; storage directory protected $suffixStart; // integer const PAGE_SIZE = 5000; // file listing buffer size @@ -840,8 +948,9 @@ class SwiftFileBackendFileList implements Iterator { * @param $backend SwiftFileBackend * @param $fullCont string Resolved container name * @param $dir string Resolved directory relative to container + * @param $params Array */ - public function __construct( SwiftFileBackend $backend, $fullCont, $dir ) { + public function __construct( SwiftFileBackend $backend, $fullCont, $dir, array $params ) { $this->backend = $backend; $this->container = $fullCont; $this->dir = $dir; @@ -853,14 +962,7 @@ class SwiftFileBackendFileList implements Iterator { } else { // dir within container $this->suffixStart = strlen( $this->dir ) + 1; // size of "path/to/dir/" } - } - - /** - * @see Iterator::current() - * @return string|bool String or false - */ - public function current() { - return substr( current( $this->bufferIter ), $this->suffixStart ); + $this->params = $params; } /** @@ -882,10 +984,9 @@ class SwiftFileBackendFileList implements Iterator { // Check if there are no files left in this page and // advance to the next page if this page was not empty. if ( !$this->valid() && count( $this->bufferIter ) ) { - $this->bufferAfter = end( $this->bufferIter ); - $this->bufferIter = $this->backend->getFileListPageInternal( - $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE - ); + $this->bufferIter = $this->pageFromList( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params + ); // updates $this->bufferAfter } } @@ -896,9 +997,9 @@ class SwiftFileBackendFileList implements Iterator { public function rewind() { $this->pos = 0; $this->bufferAfter = null; - $this->bufferIter = $this->backend->getFileListPageInternal( - $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE - ); + $this->bufferIter = $this->pageFromList( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params + ); // updates $this->bufferAfter } /** @@ -908,4 +1009,58 @@ class SwiftFileBackendFileList implements Iterator { public function valid() { return ( current( $this->bufferIter ) !== false ); // no paths can have this value } + + /** + * Get the given list portion (page) + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $after string|null + * @param $limit integer + * @param $params Array + * @return Traversable|Array|null + */ + abstract protected function pageFromList( $container, $dir, &$after, $limit, array $params ); +} + +/** + * Iterator for listing directories + */ +class SwiftFileBackendDirList extends SwiftFileBackendList { + /** + * @see Iterator::current() + * @return string|bool String (relative path) or false + */ + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart, -1 ); + } + + /** + * @see SwiftFileBackendList::pageFromList() + * @return Array + */ + protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { + return $this->backend->getDirListPageInternal( $container, $dir, $after, $limit, $params ); + } +} + +/** + * Iterator for listing regular files + */ +class SwiftFileBackendFileList extends SwiftFileBackendList { + /** + * @see Iterator::current() + * @return string|bool String (relative path) or false + */ + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart ); + } + + /** + * @see SwiftFileBackendList::pageFromList() + * @return Array + */ + protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { + return $this->backend->getFileListPageInternal( $container, $dir, $after, $limit, $params ); + } } diff --git a/tests/phpunit/includes/filerepo/FileBackendTest.php b/tests/phpunit/includes/filerepo/FileBackendTest.php index 2e95f551e9..612c368327 100644 --- a/tests/phpunit/includes/filerepo/FileBackendTest.php +++ b/tests/phpunit/includes/filerepo/FileBackendTest.php @@ -1307,6 +1307,26 @@ class FileBackendTest extends MediaWikiTestCase { $this->assertEquals( $expected, $list, "Correct file listing ($backendName), second iteration." ); + // Expected listing (top files only) + $expected = array( + "test1.txt", + "test2.txt", + "test3.txt", + "test4.txt", + "test5.txt" + ); + sort( $expected ); + + // Actual listing (top files only) + $list = array(); + $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/subdir2/subdir" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." ); + foreach ( $files as $file ) { // clean up $this->backend->doOperation( array( 'op' => 'delete', 'src' => $file ) ); } @@ -1315,6 +1335,182 @@ class FileBackendTest extends MediaWikiTestCase { foreach ( $iter as $iter ) {} // no errors } + public function testGetDirectoryList() { + $this->backend = $this->singleBackend; + $this->tearDownFiles(); + $this->doTestGetDirectoryList(); + $this->tearDownFiles(); + + $this->backend = $this->multiBackend; + $this->tearDownFiles(); + $this->doTestGetDirectoryList(); + $this->tearDownFiles(); + } + + private function doTestGetDirectoryList() { + $backendName = $this->backendClass(); + + $base = $this->baseStorePath(); + $files = array( + "$base/unittest-cont1/test1.txt", + "$base/unittest-cont1/test2.txt", + "$base/unittest-cont1/test3.txt", + "$base/unittest-cont1/subdir1/test1.txt", + "$base/unittest-cont1/subdir1/test2.txt", + "$base/unittest-cont1/subdir2/test3.txt", + "$base/unittest-cont1/subdir2/test4.txt", + "$base/unittest-cont1/subdir2/subdir/test1.txt", + "$base/unittest-cont1/subdir3/subdir/test2.txt", + "$base/unittest-cont1/subdir4/subdir/test3.txt", + "$base/unittest-cont1/subdir4/subdir/test4.txt", + "$base/unittest-cont1/subdir4/subdir/test5.txt", + "$base/unittest-cont1/subdir4/subdir/sub/test0.txt", + "$base/unittest-cont1/subdir4/subdir/sub/120-px-file.txt", + ); + + // Add the files + $ops = array(); + foreach ( $files as $file ) { + $this->prepare( array( 'dir' => dirname( $file ) ) ); + $ops[] = array( 'op' => 'create', 'content' => 'xxy', 'dst' => $file ); + } + $status = $this->backend->doOperations( $ops ); + $this->assertEquals( array(), $status->errors, + "Creation of files succeeded ($backendName)." ); + $this->assertEquals( true, $status->isOK(), + "Creation of files succeeded with OK status ($backendName)." ); + + // Expected listing + $expected = array( + "subdir1", + "subdir2", + "subdir3", + "subdir4", + ); + sort( $expected ); + + $this->assertEquals( true, + $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir1" ) ), + "Directory exists in ($backendName)." ); + $this->assertEquals( true, + $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir2/subdir" ) ), + "Directory exists in ($backendName)." ); + $this->assertEquals( false, + $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir2/test1.txt" ) ), + "Directory does not exists in ($backendName)." ); + + // Actual listing (no trailing slash) + $list = array(); + $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." ); + + // Actual listing (with trailing slash) + $list = array(); + $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." ); + + // Expected listing + $expected = array( + "subdir", + ); + sort( $expected ); + + // Actual listing (no trailing slash) + $list = array(); + $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir2" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." ); + + // Actual listing (with trailing slash) + $list = array(); + $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir2/" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." ); + + // Actual listing (using iterator second time) + $list = array(); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName), second iteration." ); + + // Expected listing (recursive) + $expected = array( + "subdir1", + "subdir2", + "subdir3", + "subdir4", + "subdir2/subdir", + "subdir3/subdir", + "subdir4/subdir", + "subdir4/subdir/sub", + ); + sort( $expected ); + + // Actual listing (recursive) + $list = array(); + $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." ); + + // Expected listing (recursive) + $expected = array( + "subdir", + "subdir/sub", + ); + sort( $expected ); + + // Actual listing (recursive) + $list = array(); + $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir4" ) ); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." ); + + // Actual listing (recursive, second time) + $list = array(); + foreach ( $iter as $file ) { + $list[] = $file; + } + sort( $list ); + + $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." ); + + foreach ( $files as $file ) { // clean up + $this->backend->doOperation( array( 'op' => 'delete', 'src' => $file ) ); + } + + $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/not/exists" ) ); + foreach ( $iter as $iter ) {} // no errors + } + // test helper wrapper for backend prepare() function private function prepare( array $params ) { $this->dirsToPrune[] = $params['dir'];