[FileBackend] Added some basic directory functions.
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 5 Apr 2012 05:56:08 +0000 (22:56 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sun, 22 Apr 2012 04:33:41 +0000 (21:33 -0700)
* Added directoryExists() function to check if a dir exists.
* Added getDirectoryList() and getTopDirectoryList() functions to list out subdirs.
* Added getTopFileList() function to list out immediate files in a dir. This wraps getFileList(), which was modified.

Change-Id: I087835f40d76c41f5d6af55b9a34a51e0f44a675

includes/AutoLoader.php
includes/filerepo/backend/FSFileBackend.php
includes/filerepo/backend/FileBackend.php
includes/filerepo/backend/FileBackendMultiWrite.php
includes/filerepo/backend/FileBackendStore.php
includes/filerepo/backend/SwiftFileBackend.php
tests/phpunit/includes/filerepo/FileBackendTest.php

index 60cbbaa..937f09c 100644 (file)
@@ -510,10 +510,16 @@ $wgAutoloadLocalClasses = array(
        'FileBackend' => 'includes/filerepo/backend/FileBackend.php',
        'FileBackendStore' => 'includes/filerepo/backend/FileBackendStore.php',
        'FileBackendStoreShardListIterator' => 'includes/filerepo/backend/FileBackendStore.php',
+       'FileBackendStoreShardDirIterator' => 'includes/filerepo/backend/FileBackendStore.php',
+       'FileBackendStoreShardFileIterator' => 'includes/filerepo/backend/FileBackendStore.php',
        'FileBackendMultiWrite' => 'includes/filerepo/backend/FileBackendMultiWrite.php',
        'FSFileBackend' => 'includes/filerepo/backend/FSFileBackend.php',
+       'FSFileBackendList' => 'includes/filerepo/backend/FSFileBackend.php',
+       'FSFileBackendDirList' => 'includes/filerepo/backend/FSFileBackend.php',
        'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
        'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
+       'SwiftFileBackendList' => 'includes/filerepo/backend/SwiftFileBackend.php',
+       'SwiftFileBackendDirList' => 'includes/filerepo/backend/SwiftFileBackend.php',
        'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
        'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
        'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
index 4a27ca1..df822fd 100644 (file)
@@ -7,11 +7,11 @@
 
 /**
  * @brief Class for a file system (FS) based file backend.
- * 
+ *
  * All "containers" each map to a directory under the backend's base directory.
  * For backwards-compatibility, some container paths can be set to custom paths.
  * The wiki ID will not be used in any custom paths, so this should be avoided.
- * 
+ *
  * Having directories with thousands of files will diminish performance.
  * Sharding can be accomplished by using FileRepo-style hash paths.
  *
@@ -76,7 +76,7 @@ class FSFileBackend extends FileBackendStore {
 
        /**
         * Sanity check a relative file system path for validity
-        * 
+        *
         * @param $path string Normalized relative path
         * @return bool
         */
@@ -95,14 +95,14 @@ class FSFileBackend extends FileBackendStore {
        /**
         * Given the short (unresolved) and full (resolved) name of
         * a container, return the file system path of the container.
-        * 
+        *
         * @param $shortCont string
         * @param $fullCont string
-        * @return string|null 
+        * @return string|null
         */
        protected function containerFSRoot( $shortCont, $fullCont ) {
                if ( isset( $this->containerPaths[$shortCont] ) ) {
-                       return $this->containerPaths[$shortCont]; 
+                       return $this->containerPaths[$shortCont];
                } elseif ( isset( $this->basePath ) ) {
                        return "{$this->basePath}/{$fullCont}";
                }
@@ -111,7 +111,7 @@ class FSFileBackend extends FileBackendStore {
 
        /**
         * Get the absolute file system path for a storage path
-        * 
+        *
         * @param $storagePath string Storage path
         * @return string|null
         */
@@ -439,6 +439,41 @@ class FSFileBackend extends FileBackendStore {
                clearstatcache(); // clear the PHP file stat cache
        }
 
+       /**
+        * @see FileBackendStore::doDirectoryExists()
+        * @return bool|null
+        */
+       protected function doDirectoryExists( $fullCont, $dirRel, array $params ) {
+               list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] );
+               $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid
+               $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot;
+
+               $this->trapWarnings(); // don't trust 'false' if there were errors
+               $exists = is_dir( $dir );
+               $hadError = $this->untrapWarnings();
+
+               return $hadError ? null : $exists;
+       }
+
+       /**
+        * @see FileBackendStore::getDirectoryListInternal()
+        * @return Array|null
+        */
+       public function getDirectoryListInternal( $fullCont, $dirRel, array $params ) {
+               list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] );
+               $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid
+               $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot;
+               $exists = is_dir( $dir );
+               if ( !$exists ) {
+                       wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" );
+                       return array(); // nothing under this dir
+               } elseif ( !is_readable( $dir ) ) {
+                       wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" );
+                       return null; // bad permissions?
+               }
+               return new FSFileBackendDirList( $dir, $params );
+       }
+
        /**
         * @see FileBackendStore::getFileListInternal()
         * @return array|FSFileBackendFileList|null
@@ -451,13 +486,11 @@ class FSFileBackend extends FileBackendStore {
                if ( !$exists ) {
                        wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" );
                        return array(); // nothing under this dir
-               }
-               $readable = is_readable( $dir );
-               if ( !$readable ) {
+               } elseif ( !is_readable( $dir ) ) {
                        wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" );
                        return null; // bad permissions?
                }
-               return new FSFileBackendFileList( $dir );
+               return new FSFileBackendFileList( $dir, $params );
        }
 
        /**
@@ -543,53 +576,65 @@ class FSFileBackend extends FileBackendStore {
 }
 
 /**
- * Wrapper around RecursiveDirectoryIterator that catches
- * exception or does any custom behavoir that we may want.
+ * Wrapper around RecursiveDirectoryIterator/DirectoryIterator that
+ * catches exception or does any custom behavoir that we may want.
  * Do not use this class from places outside FSFileBackend.
  *
  * @ingroup FileBackend
  */
-class FSFileBackendFileList implements Iterator {
-       /** @var RecursiveIteratorIterator */
+abstract class FSFileBackendList implements Iterator {
+       /** @var Iterator */
        protected $iter;
        protected $suffixStart; // integer
        protected $pos = 0; // integer
+       /** @var Array */
+       protected $params = array();
 
        /**
         * @param $dir string file system directory
         */
-       public function __construct( $dir ) {
+       public function __construct( $dir, array $params ) {
                $dir = realpath( $dir ); // normalize
                $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/"
+               $this->params = $params;
+
                try {
+                       $this->iter = $this->initIterator( $dir );
+               } catch ( UnexpectedValueException $e ) {
+                       $this->iter = null; // bad permissions? deleted?
+               }
+       }
+
+       /**
+        * Return an appropriate iterator object to wrap
+        *
+        * @param $dir string file system directory
+        * @return Iterator
+        */
+       protected function initIterator( $dir ) {
+               if ( !empty( $this->params['topOnly'] ) ) { // non-recursive
+                       # Get an iterator that will get direct sub-nodes
+                       return new DirectoryIterator( $dir );
+               } else { // recursive
                        # Get an iterator that will return leaf nodes (non-directories)
                        if ( MWInit::classExists( 'FilesystemIterator' ) ) { // PHP >= 5.3
                                # RecursiveDirectoryIterator extends FilesystemIterator.
                                # FilesystemIterator::SKIP_DOTS default is inconsistent in PHP 5.3.x.
-                               $flags = FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS;
-                               $this->iter = new RecursiveIteratorIterator( 
-                                       new RecursiveDirectoryIterator( $dir, $flags ) );
+                               $flags = FilesystemIterator::CURRENT_AS_SELF | FilesystemIterator::SKIP_DOTS;
+                               return new RecursiveIteratorIterator(
+                                       new RecursiveDirectoryIterator( $dir, $flags ),
+                                       RecursiveIteratorIterator::CHILD_FIRST // include dirs
+                               );
                        } else { // PHP < 5.3
                                # RecursiveDirectoryIterator extends DirectoryIterator
-                               $this->iter = new RecursiveIteratorIterator( 
-                                       new RecursiveDirectoryIterator( $dir ) );
+                               return new RecursiveIteratorIterator(
+                                       new RecursiveDirectoryIterator( $dir ),
+                                       RecursiveIteratorIterator::CHILD_FIRST // include dirs
+                               );
                        }
-               } catch ( UnexpectedValueException $e ) {
-                       $this->iter = null; // bad permissions? deleted?
                }
        }
 
-       /**
-        * @see Iterator::current()
-        * @return string|bool String or false
-        */
-       public function current() {
-               // Return only the relative path and normalize slashes to FileBackend-style
-               // Make sure to use the realpath since the suffix is based upon that
-               return str_replace( '\\', '/',
-                       substr( realpath( $this->iter->current() ), $this->suffixStart ) );
-       }
-
        /**
         * @see Iterator::key()
         * @return integer
@@ -598,6 +643,14 @@ class FSFileBackendFileList implements Iterator {
                return $this->pos;
        }
 
+       /**
+        * @see Iterator::current()
+        * @return string|bool String or false
+        */
+       public function current() {
+               return $this->getRelPath( $this->iter->current()->getPathname() );
+       }
+
        /**
         * @see Iterator::next()
         * @return void
@@ -605,6 +658,7 @@ class FSFileBackendFileList implements Iterator {
        public function next() {
                try {
                        $this->iter->next();
+                       $this->filterViaNext();
                } catch ( UnexpectedValueException $e ) {
                        $this->iter = null;
                }
@@ -619,6 +673,7 @@ class FSFileBackendFileList implements Iterator {
                $this->pos = 0;
                try {
                        $this->iter->rewind();
+                       $this->filterViaNext();
                } catch ( UnexpectedValueException $e ) {
                        $this->iter = null;
                }
@@ -631,4 +686,44 @@ class FSFileBackendFileList implements Iterator {
        public function valid() {
                return $this->iter && $this->iter->valid();
        }
+
+       /**
+        * Filter out items by advancing to the next ones
+        */
+       protected function filterViaNext() {}
+
+       /**
+        * Return only the relative path and normalize slashes to FileBackend-style.
+        * Uses the "real path" since the suffix is based upon that.
+        *
+        * @param $path string
+        * @return string
+        */
+       protected function getRelPath( $path ) {
+               return strtr( substr( realpath( $path ), $this->suffixStart ), '\\', '/' );
+       }
+}
+
+class FSFileBackendDirList extends FSFileBackendList {
+       protected function filterViaNext() {
+               while ( $this->iter->valid() ) {
+                       if ( $this->iter->current()->isDot() || !$this->iter->current()->isDir() ) {
+                               $this->iter->next(); // skip non-directories and dot files
+                       } else {
+                               break;
+                       }
+               }
+       }
+}
+
+class FSFileBackendFileList extends FSFileBackendList {
+       protected function filterViaNext() {
+               while ( $this->iter->valid() ) {
+                       if ( !$this->iter->current()->isFile() ) {
+                               $this->iter->next(); // skip non-files and dot files
+                       } else {
+                               break;
+                       }
+               }
+       }
 }
index b9821bf..f3b879a 100644 (file)
@@ -546,22 +546,89 @@ abstract class FileBackend {
        abstract public function getLocalCopy( array $params );
 
        /**
-        * Get an iterator to list out all stored files under a storage directory.
+        * Check if a directory exists at a given storage path.
+        * Backends using key/value stores will check if the path is a
+        * virtual directory, meaning there are files under the given directory.
+        *
+        * Storage backends with eventual consistency might return stale data.
+        *
+        * $params include:
+        *     dir : storage directory
+        *
+        * @return bool|null Returns null on failure
+        * @since 1.20
+        */
+       abstract public function directoryExists( array $params );
+
+       /**
+        * Get an iterator to list *all* directories under a storage directory.
+        * If the directory is of the form "mwstore://backend/container",
+        * then all directories in the container should be listed.
+        * If the directory is of form "mwstore://backend/container/dir",
+        * then all directories directly under that directory should be listed.
+        * Results should be storage directories relative to the given directory.
+        *
+        * Storage backends with eventual consistency might return stale data.
+        *
+        * $params include:
+        *     dir     : storage directory
+        *     topOnly : only return direct child directories of the directory
+        *
+        * @return Traversable|Array|null Returns null on failure
+        * @since 1.20
+        */
+       abstract public function getDirectoryList( array $params );
+
+       /**
+        * Same as FileBackend::getDirectoryList() except only lists
+        * directories that are immediately under the given directory.
+        *
+        * Storage backends with eventual consistency might return stale data.
+        *
+        * $params include:
+        *     dir : storage directory
+        *
+        * @return Traversable|Array|null Returns null on failure
+        * @since 1.20
+        */
+       final public function getTopDirectoryList( array $params ) {
+               return $this->getDirectoryList( array( 'topOnly' => true ) + $params );
+       }
+
+       /**
+        * Get an iterator to list *all* stored files under a storage directory.
         * If the directory is of the form "mwstore://backend/container",
         * then all files in the container should be listed.
         * If the directory is of form "mwstore://backend/container/dir",
-        * then all files under that container directory should be listed.
+        * then all files under that directory should be listed.
         * Results should be storage paths relative to the given directory.
         *
         * Storage backends with eventual consistency might return stale data.
         *
         * $params include:
-        *     dir : storage path directory
+        *     dir     : storage directory
+        *     topOnly : only return direct child files of the directory
         *
         * @return Traversable|Array|null Returns null on failure
         */
        abstract public function getFileList( array $params );
 
+       /**
+        * Same as FileBackend::getFileList() except only lists
+        * files that are immediately under the given directory.
+        *
+        * Storage backends with eventual consistency might return stale data.
+        *
+        * $params include:
+        *     dir : storage directory
+        *
+        * @return Traversable|Array|null Returns null on failure
+        * @since 1.20
+        */
+       final public function getTopFileList( array $params ) {
+               return $this->getFileList( array( 'topOnly' => true ) + $params );
+       }
+
        /**
         * Invalidate any in-process file existence and property cache.
         * If $paths is given, then only the cache for those files will be cleared.
@@ -708,6 +775,7 @@ abstract class FileBackend {
         *
         * @param $path string
         * @return bool
+        * @since 1.20
         */
        final public static function isPathTraversalFree( $path ) {
                return ( self::normalizeContainerPath( $path ) !== null );
index 9c3cf5b..a902488 100644 (file)
@@ -7,7 +7,7 @@
 
 /**
  * @brief Proxy backend that mirrors writes to several internal backends.
- * 
+ *
  * This class defines a multi-write backend. Multiple backends can be
  * registered to this proxy backend and it will act as a single backend.
  * Use this when all access to those backends is through this proxy backend.
@@ -220,7 +220,7 @@ class FileBackendMultiWrite extends FileBackend {
        /**
         * Substitute the backend name in storage path parameters
         * for a set of operations with that of a given internal backend.
-        * 
+        *
         * @param $ops Array List of file operation arrays
         * @param $backend FileBackendStore
         * @return Array
@@ -241,7 +241,7 @@ class FileBackendMultiWrite extends FileBackend {
 
        /**
         * Same as substOpBatchPaths() but for a single operation
-        * 
+        *
         * @param $op File operation array
         * @param $backend FileBackendStore
         * @return Array
@@ -253,7 +253,7 @@ class FileBackendMultiWrite extends FileBackend {
 
        /**
         * Substitute the backend of storage paths with an internal backend's name
-        * 
+        *
         * @param $paths Array|string List of paths or single string path
         * @param $backend FileBackendStore
         * @return Array|string
@@ -268,7 +268,7 @@ class FileBackendMultiWrite extends FileBackend {
 
        /**
         * Substitute the backend of internal storage paths with the proxy backend's name
-        * 
+        *
         * @param $paths Array|string List of paths or single string path
         * @return Array|string
         */
@@ -320,7 +320,7 @@ class FileBackendMultiWrite extends FileBackend {
        }
 
        /**
-        * @see FileBackend::getFileList()
+        * @see FileBackend::concatenate()
         */
        public function concatenate( array $params ) {
                // We are writing to an FS file, so we don't need to do this per-backend
@@ -408,6 +408,22 @@ class FileBackendMultiWrite extends FileBackend {
                return $this->backends[$this->masterIndex]->getLocalCopy( $realParams );
        }
 
+       /**
+        * @see FileBackend::directoryExists()
+        */
+       public function directoryExists( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->directoryExists( $realParams );
+       }
+
+       /**
+        * @see FileBackend::getSubdirectoryList()
+        */
+       public function getDirectoryList( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getDirectoryList( $realParams );
+       }
+
        /**
         * @see FileBackend::getFileList()
         */
index 55dedc1..c41d0fc 100644 (file)
@@ -646,7 +646,78 @@ abstract class FileBackendStore extends FileBackend {
        }
 
        /**
-        * @copydoc FileBackend::getFileList()
+        * @see FileBackend::directoryExists()
+        * @return bool|null
+        */
+       final public function directoryExists( array $params ) {
+               list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+               if ( $dir === null ) {
+                       return false; // invalid storage path
+               }
+               if ( $shard !== null ) { // confined to a single container/shard
+                       return $this->doDirectoryExists( $fullCont, $dir, $params );
+               } else { // directory is on several shards
+                       wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+                       list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+                       $res = false; // response
+                       foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
+                               $exists = $this->doDirectoryExists( "{$fullCont}{$suffix}", $dir, $params );
+                               if ( $exists ) {
+                                       $res = true;
+                                       break; // found one!
+                               } elseif ( $exists === null ) { // error?
+                                       $res = null; // if we don't find anything, it is indeterminate
+                               }
+                       }
+                       return $res;
+               }
+       }
+
+       /**
+        * @see FileBackendStore::directoryExists()
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return bool|null
+        */
+       abstract protected function doDirectoryExists( $container, $dir, array $params );
+
+       /**
+        * @see FileBackend::getDirectoryList()
+        * @return Array|null|Traversable
+        */
+       final public function getDirectoryList( array $params ) {
+               list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+               if ( $dir === null ) { // invalid storage path
+                       return null;
+               }
+               if ( $shard !== null ) {
+                       // File listing is confined to a single container/shard
+                       return $this->getDirectoryListInternal( $fullCont, $dir, $params );
+               } else {
+                       wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+                       // File listing spans multiple containers/shards
+                       list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+                       return new FileBackendStoreShardDirIterator( $this,
+                               $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params );
+               }
+       }
+
+       /**
+        * Do not call this function from places outside FileBackend
+        *
+        * @see FileBackendStore::getDirectoryList()
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return Traversable|Array|null
+        */
+       abstract public function getDirectoryListInternal( $container, $dir, array $params );
+
+       /**
+        * @see FileBackend::getFileList()
         * @return Array|null|Traversable
         */
        final public function getFileList( array $params ) {
@@ -661,7 +732,7 @@ abstract class FileBackendStore extends FileBackend {
                        wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
                        // File listing spans multiple containers/shards
                        list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
-                       return new FileBackendStoreShardListIterator( $this,
+                       return new FileBackendStoreShardFileIterator( $this,
                                $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params );
                }
        }
@@ -962,6 +1033,19 @@ abstract class FileBackendStore extends FileBackend {
                return ''; // no sharding
        }
 
+       /**
+        * Check if a storage path maps to a single shard.
+        * Container dirs like "a", where the container shards on "x/xy",
+        * can reside on several shards. Such paths are tricky to handle.
+        *
+        * @param $storagePath string Storage path
+        * @return bool
+        */
+       final public function isSingleShardPathInternal( $storagePath ) {
+               list( $c, $r, $shard ) = $this->resolveStoragePath( $storagePath );
+               return ( $shard !== null );
+       }
+
        /**
         * Get the sharding config for a container.
         * If greater than 0, then all file storage paths within
@@ -1044,26 +1128,29 @@ abstract class FileBackendStore extends FileBackend {
 }
 
 /**
- * FileBackendStore helper function to handle file listings that span container shards.
+ * FileBackendStore helper function to handle listings that span container shards.
  * Do not use this class from places outside of FileBackendStore.
  *
  * @ingroup FileBackend
  */
-class FileBackendStoreShardListIterator implements Iterator {
-       /* @var FileBackendStore */
+abstract class FileBackendStoreShardListIterator implements Iterator {
+       /** @var FileBackendStore */
        protected $backend;
-       /* @var Array */
+       /** @var Array */
        protected $params;
-       /* @var Array */
+       /** @var Array */
        protected $shardSuffixes;
-       protected $container; // string
-       protected $directory; // string
+       protected $container; // string; full container name
+       protected $directory; // string; resolved relative path
 
-       /* @var Traversable */
+       /** @var Traversable */
        protected $iter;
        protected $curShard = 0; // integer
        protected $pos = 0; // integer
 
+       /** @var Array */
+       protected $multiShardPaths = array(); // (rel path => 1)
+
        /**
         * @param $backend FileBackendStore
         * @param $container string Full storage container name
@@ -1112,6 +1199,8 @@ class FileBackendStoreShardListIterator implements Iterator {
                } else {
                        $this->iter->next();
                }
+               // Filter out items that we already listed
+               $this->filterViaNext();
                // Find the next non-empty shard if no elements are left
                $this->nextShardIteratorIfNotValid();
        }
@@ -1124,6 +1213,8 @@ class FileBackendStoreShardListIterator implements Iterator {
                $this->pos = 0;
                $this->curShard = 0;
                $this->setIteratorFromCurrentShard();
+               // Filter out items that we already listed
+               $this->filterViaNext();
                // Find the next non-empty shard if this one has no elements
                $this->nextShardIteratorIfNotValid();
        }
@@ -1142,6 +1233,25 @@ class FileBackendStoreShardListIterator implements Iterator {
                }
        }
 
+       /**
+        * Filter out duplicate items by advancing to the next ones
+        */
+       protected function filterViaNext() {
+               while ( $this->iter->valid() ) {
+                       $rel = $this->iter->current(); // path relative to given directory
+                       $path = $this->params['dir'] . "/{$rel}"; // full storage path
+                       if ( !$this->backend->isSingleShardPathInternal( $path ) ) {
+                               // Don't keep listing paths that are on multiple shards
+                               if ( isset( $this->multiShardPaths[$rel] ) ) {
+                                       $this->iter->next(); // we already listed this path
+                               } else {
+                                       $this->multiShardPaths[$rel] = 1;
+                                       break;
+                               }
+                       }
+               }
+       }
+
        /**
         * If the list iterator for this container shard is out of items,
         * then move on to the next container that has items.
@@ -1161,7 +1271,35 @@ class FileBackendStoreShardListIterator implements Iterator {
         */
        protected function setIteratorFromCurrentShard() {
                $suffix = $this->shardSuffixes[$this->curShard];
-               $this->iter = $this->backend->getFileListInternal(
+               $this->iter = $this->listFromShard(
                        "{$this->container}{$suffix}", $this->directory, $this->params );
        }
+
+       /**
+        * Get the list for a given container shard
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return Traversable|Array|null
+        */
+       abstract protected function listFromShard( $container, $dir, array $params );
+}
+
+/**
+ * Iterator for listing directories
+ */
+class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator {
+       protected function listFromShard( $container, $dir, array $params ) {
+               return $this->backend->getDirectoryListInternal( $container, $dir, $params );
+       }
+}
+
+/**
+ * Iterator for listing regular files
+ */
+class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator {
+       protected function listFromShard( $container, $dir, array $params ) {
+               return $this->backend->getFileListInternal( $container, $dir, $params );
+       }
 }
index c7e40e8..af5d693 100644 (file)
@@ -535,12 +535,39 @@ class SwiftFileBackend extends FileBackendStore {
                return $data;
        }
 
+       /**
+        * @see FileBackendStore::doDirectoryExists()
+        * @return bool|null
+        */
+       protected function doDirectoryExists( $fullCont, $dir, array $params ) {
+               try {
+                       $container = $this->getContainer( $fullCont );
+                       $prefix = ( $dir == '' ) ? null : "{$dir}/";
+                       return ( count( $container->list_objects( 1, null, $prefix ) ) > 0 );
+               } catch ( NoSuchContainerException $e ) {
+                       return false;
+               } catch ( InvalidResponseException $e ) {
+               } catch ( Exception $e ) { // some other exception?
+                       $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) );
+               }
+
+               return null; // error
+       }
+
+       /**
+        * @see FileBackendStore::getDirectoryListInternal()
+        * @return SwiftFileBackendDirList
+        */
+       public function getDirectoryListInternal( $fullCont, $dir, array $params ) {
+               return new SwiftFileBackendDirList( $this, $fullCont, $dir, $params );
+       }
+
        /**
         * @see FileBackendStore::getFileListInternal()
         * @return SwiftFileBackendFileList
         */
        public function getFileListInternal( $fullCont, $dir, array $params ) {
-               return new SwiftFileBackendFileList( $this, $fullCont, $dir );
+               return new SwiftFileBackendFileList( $this, $fullCont, $dir, $params );
        }
 
        /**
@@ -548,17 +575,96 @@ class SwiftFileBackend extends FileBackendStore {
         *
         * @param $fullCont string Resolved container name
         * @param $dir string Resolved storage directory with no trailing slash
-        * @param $after string Storage path of file to list items after
+        * @param $after string|null Storage path of file to list items after
         * @param $limit integer Max number of items to list
-        * @return Array
+        * @param $params Array Includes flag for 'topOnly'
+        * @return Array List of relative paths of dirs directly under $dir
         */
-       public function getFileListPageInternal( $fullCont, $dir, $after, $limit ) {
+       public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) {
+               $dirs = array();
+
+               try {
+                       $container = $this->getContainer( $fullCont );
+                       $prefix = ( $dir == '' ) ? null : "{$dir}/";
+                       // Non-recursive: only list dirs right under $dir
+                       if ( !empty( $params['topOnly'] ) ) {
+                               $objects = $container->list_objects( $limit, $after, $prefix, null, '/' );
+                               foreach ( $objects as $object ) { // files and dirs
+                                       if ( substr( $object, -1 ) === '/' ) {
+                                               $dirs[] = $object; // directories end in '/'
+                                       }
+                                       $after = $object; // update last item
+                               }
+                       // Recursive: list all dirs under $dir and its subdirs
+                       } else {
+                               // Get directory from last item of prior page
+                               $lastDir = $this->getParentDir( $after ); // must be first page
+                               $objects = $container->list_objects( $limit, $after, $prefix );
+                               foreach ( $objects as $object ) { // files
+                                       $objectDir = $this->getParentDir( $object ); // directory of object
+                                       if ( $objectDir !== false ) { // file has a parent dir
+                                               // Swift stores paths in UTF-8, using binary sorting.
+                                               // See function "create_container_table" in common/db.py.
+                                               // If a directory is not "greater" than the last one,
+                                               // then it was already listed by the calling iterator.
+                                               if ( $objectDir > $lastDir ) {
+                                                       $pDir = $objectDir;
+                                                       do { // add dir and all its parent dirs
+                                                               $dirs[] = "{$pDir}/";
+                                                               $pDir = $this->getParentDir( $pDir );
+                                                       } while ( $pDir !== false // sanity
+                                                               && $pDir > $lastDir // not done already
+                                                               && strlen( $pDir ) > strlen( $dir ) // within $dir
+                                                       );
+                                               }
+                                               $lastDir = $objectDir;
+                                       }
+                                       $after = $object; // update last item
+                               }
+                       }
+               } catch ( NoSuchContainerException $e ) {
+               } catch ( InvalidResponseException $e ) {
+               } catch ( Exception $e ) { // some other exception?
+                       $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) );
+               }
+
+               return $dirs;
+       }
+
+       protected function getParentDir( $path ) {
+               return ( strpos( $path, '/' ) !== false ) ? dirname( $path ) : false;
+       }
+
+       /**
+        * Do not call this function outside of SwiftFileBackendFileList
+        *
+        * @param $fullCont string Resolved container name
+        * @param $dir string Resolved storage directory with no trailing slash
+        * @param $after string|null Storage path of file to list items after
+        * @param $limit integer Max number of items to list
+        * @param $params Array Includes flag for 'topOnly'
+        * @return Array List of relative paths of files under $dir
+        */
+       public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) {
                $files = array();
 
                try {
                        $container = $this->getContainer( $fullCont );
                        $prefix = ( $dir == '' ) ? null : "{$dir}/";
-                       $files = $container->list_objects( $limit, $after, $prefix );
+                       // Non-recursive: only list files right under $dir
+                       if ( !empty( $params['topOnly'] ) ) { // files and dirs
+                               $objects = $container->list_objects( $limit, $after, $prefix, null, '/' );
+                               foreach ( $objects as $object ) {
+                                       if ( substr( $object, -1 ) !== '/' ) {
+                                               $files[] = $object; // directories end in '/'
+                                       }
+                               }
+                       // Recursive: list all files under $dir and its subdirs
+                       } else { // files
+                               $files = $container->list_objects( $limit, $after, $prefix );
+                       }
+                       $after = end( $files ); // update last item
+                       reset( $files ); // reset pointer
                } catch ( NoSuchContainerException $e ) {
                } catch ( InvalidResponseException $e ) {
                } catch ( Exception $e ) { // some other exception?
@@ -816,22 +922,24 @@ class SwiftFileBackend extends FileBackendStore {
 }
 
 /**
- * SwiftFileBackend helper class to page through object listings.
+ * SwiftFileBackend helper class to page through listings.
  * Swift also has a listing limit of 10,000 objects for sanity.
  * Do not use this class from places outside SwiftFileBackend.
  *
  * @ingroup FileBackend
  */
-class SwiftFileBackendFileList implements Iterator {
+abstract class SwiftFileBackendList implements Iterator {
        /** @var Array */
        protected $bufferIter = array();
        protected $bufferAfter = null; // string; list items *after* this path
        protected $pos = 0; // integer
+       /** @var Array */
+       protected $params = array();
 
        /** @var SwiftFileBackend */
        protected $backend;
-       protected $container; //
-       protected $dir; // string storage directory
+       protected $container; // string; container name
+       protected $dir; // string; storage directory
        protected $suffixStart; // integer
 
        const PAGE_SIZE = 5000; // file listing buffer size
@@ -840,8 +948,9 @@ class SwiftFileBackendFileList implements Iterator {
         * @param $backend SwiftFileBackend
         * @param $fullCont string Resolved container name
         * @param $dir string Resolved directory relative to container
+        * @param $params Array
         */
-       public function __construct( SwiftFileBackend $backend, $fullCont, $dir ) {
+       public function __construct( SwiftFileBackend $backend, $fullCont, $dir, array $params ) {
                $this->backend = $backend;
                $this->container = $fullCont;
                $this->dir = $dir;
@@ -853,14 +962,7 @@ class SwiftFileBackendFileList implements Iterator {
                } else { // dir within container
                        $this->suffixStart = strlen( $this->dir ) + 1; // size of "path/to/dir/"
                }
-       }
-
-       /**
-        * @see Iterator::current()
-        * @return string|bool String or false
-        */
-       public function current() {
-               return substr( current( $this->bufferIter ), $this->suffixStart );
+               $this->params = $params;
        }
 
        /**
@@ -882,10 +984,9 @@ class SwiftFileBackendFileList implements Iterator {
                // Check if there are no files left in this page and
                // advance to the next page if this page was not empty.
                if ( !$this->valid() && count( $this->bufferIter ) ) {
-                       $this->bufferAfter = end( $this->bufferIter );
-                       $this->bufferIter = $this->backend->getFileListPageInternal(
-                               $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE
-                       );
+                       $this->bufferIter = $this->pageFromList(
+                               $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params
+                       ); // updates $this->bufferAfter
                }
        }
 
@@ -896,9 +997,9 @@ class SwiftFileBackendFileList implements Iterator {
        public function rewind() {
                $this->pos = 0;
                $this->bufferAfter = null;
-               $this->bufferIter = $this->backend->getFileListPageInternal(
-                       $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE
-               );
+               $this->bufferIter = $this->pageFromList(
+                       $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params
+               ); // updates $this->bufferAfter
        }
 
        /**
@@ -908,4 +1009,58 @@ class SwiftFileBackendFileList implements Iterator {
        public function valid() {
                return ( current( $this->bufferIter ) !== false ); // no paths can have this value
        }
+
+       /**
+        * Get the given list portion (page)
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $after string|null
+        * @param $limit integer
+        * @param $params Array
+        * @return Traversable|Array|null
+        */
+       abstract protected function pageFromList( $container, $dir, &$after, $limit, array $params );
+}
+
+/**
+ * Iterator for listing directories
+ */
+class SwiftFileBackendDirList extends SwiftFileBackendList {
+       /**
+        * @see Iterator::current()
+        * @return string|bool String (relative path) or false
+        */
+       public function current() {
+               return substr( current( $this->bufferIter ), $this->suffixStart, -1 );
+       }
+
+       /**
+        * @see SwiftFileBackendList::pageFromList()
+        * @return Array
+        */
+       protected function pageFromList( $container, $dir, &$after, $limit, array $params ) {
+               return $this->backend->getDirListPageInternal( $container, $dir, $after, $limit, $params );
+       }
+}
+
+/**
+ * Iterator for listing regular files
+ */
+class SwiftFileBackendFileList extends SwiftFileBackendList {
+       /**
+        * @see Iterator::current()
+        * @return string|bool String (relative path) or false
+        */
+       public function current() {
+               return substr( current( $this->bufferIter ), $this->suffixStart );
+       }
+
+       /**
+        * @see SwiftFileBackendList::pageFromList()
+        * @return Array
+        */
+       protected function pageFromList( $container, $dir, &$after, $limit, array $params ) {
+               return $this->backend->getFileListPageInternal( $container, $dir, $after, $limit, $params );
+       }
 }
index 2e95f55..612c368 100644 (file)
@@ -1307,6 +1307,26 @@ class FileBackendTest extends MediaWikiTestCase {
 
                $this->assertEquals( $expected, $list, "Correct file listing ($backendName), second iteration." );
 
+               // Expected listing (top files only)
+               $expected = array(
+                       "test1.txt",
+                       "test2.txt",
+                       "test3.txt",
+                       "test4.txt",
+                       "test5.txt"
+               );
+               sort( $expected );
+
+               // Actual listing (top files only)
+               $list = array();
+               $iter = $this->backend->getTopFileList( array( 'dir' => "$base/unittest-cont1/subdir2/subdir" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top file listing ($backendName)." );
+
                foreach ( $files as $file ) { // clean up
                        $this->backend->doOperation( array( 'op' => 'delete', 'src' => $file ) );
                }
@@ -1315,6 +1335,182 @@ class FileBackendTest extends MediaWikiTestCase {
                foreach ( $iter as $iter ) {} // no errors
        }
 
+       public function testGetDirectoryList() {
+               $this->backend = $this->singleBackend;
+               $this->tearDownFiles();
+               $this->doTestGetDirectoryList();
+               $this->tearDownFiles();
+
+               $this->backend = $this->multiBackend;
+               $this->tearDownFiles();
+               $this->doTestGetDirectoryList();
+               $this->tearDownFiles();
+       }
+
+       private function doTestGetDirectoryList() {
+               $backendName = $this->backendClass();
+
+               $base = $this->baseStorePath();
+               $files = array(
+                       "$base/unittest-cont1/test1.txt",
+                       "$base/unittest-cont1/test2.txt",
+                       "$base/unittest-cont1/test3.txt",
+                       "$base/unittest-cont1/subdir1/test1.txt",
+                       "$base/unittest-cont1/subdir1/test2.txt",
+                       "$base/unittest-cont1/subdir2/test3.txt",
+                       "$base/unittest-cont1/subdir2/test4.txt",
+                       "$base/unittest-cont1/subdir2/subdir/test1.txt",
+                       "$base/unittest-cont1/subdir3/subdir/test2.txt",
+                       "$base/unittest-cont1/subdir4/subdir/test3.txt",
+                       "$base/unittest-cont1/subdir4/subdir/test4.txt",
+                       "$base/unittest-cont1/subdir4/subdir/test5.txt",
+                       "$base/unittest-cont1/subdir4/subdir/sub/test0.txt",
+                       "$base/unittest-cont1/subdir4/subdir/sub/120-px-file.txt",
+               );
+
+               // Add the files
+               $ops = array();
+               foreach ( $files as $file ) {
+                       $this->prepare( array( 'dir' => dirname( $file ) ) );
+                       $ops[] = array( 'op' => 'create', 'content' => 'xxy', 'dst' => $file );
+               }
+               $status = $this->backend->doOperations( $ops );
+               $this->assertEquals( array(), $status->errors,
+                       "Creation of files succeeded ($backendName)." );
+               $this->assertEquals( true, $status->isOK(),
+                       "Creation of files succeeded with OK status ($backendName)." );
+
+               // Expected listing
+               $expected = array(
+                       "subdir1",
+                       "subdir2",
+                       "subdir3",
+                       "subdir4",
+               );
+               sort( $expected );
+
+               $this->assertEquals( true,
+                       $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir1" ) ),
+                       "Directory exists in ($backendName)." );
+               $this->assertEquals( true,
+                       $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir2/subdir" ) ),
+                       "Directory exists in ($backendName)." );
+               $this->assertEquals( false,
+                       $this->backend->directoryExists( array( 'dir' => "$base/unittest-cont1/subdir2/test1.txt" ) ),
+                       "Directory does not exists in ($backendName)." );
+
+               // Actual listing (no trailing slash)
+               $list = array();
+               $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." );
+
+               // Actual listing (with trailing slash)
+               $list = array();
+               $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." );
+
+               // Expected listing
+               $expected = array(
+                       "subdir",
+               );
+               sort( $expected );
+
+               // Actual listing (no trailing slash)
+               $list = array();
+               $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir2" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." );
+
+               // Actual listing (with trailing slash)
+               $list = array();
+               $iter = $this->backend->getTopDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir2/" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName)." );
+
+               // Actual listing (using iterator second time)
+               $list = array();
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct top dir listing ($backendName), second iteration." );
+
+               // Expected listing (recursive)
+               $expected = array(
+                       "subdir1",
+                       "subdir2",
+                       "subdir3",
+                       "subdir4",
+                       "subdir2/subdir",
+                       "subdir3/subdir",
+                       "subdir4/subdir",
+                       "subdir4/subdir/sub",
+               );
+               sort( $expected );
+
+               // Actual listing (recursive)
+               $list = array();
+               $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." );
+
+               // Expected listing (recursive)
+               $expected = array(
+                       "subdir",
+                       "subdir/sub",
+               );
+               sort( $expected );
+
+               // Actual listing (recursive)
+               $list = array();
+               $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/subdir4" ) );
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." );
+
+               // Actual listing (recursive, second time)
+               $list = array();
+               foreach ( $iter as $file ) {
+                       $list[] = $file;
+               }
+               sort( $list );
+
+               $this->assertEquals( $expected, $list, "Correct dir listing ($backendName)." );
+
+               foreach ( $files as $file ) { // clean up
+                       $this->backend->doOperation( array( 'op' => 'delete', 'src' => $file ) );
+               }
+
+               $iter = $this->backend->getDirectoryList( array( 'dir' => "$base/unittest-cont1/not/exists" ) );
+               foreach ( $iter as $iter ) {} // no errors
+       }
+
        // test helper wrapper for backend prepare() function
        private function prepare( array $params ) {
                $this->dirsToPrune[] = $params['dir'];