Merge "[FileBackend] Added some basic directory functions."
authorBrion VIBBER <brion@wikimedia.org>
Wed, 25 Apr 2012 19:38:12 +0000 (19:38 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 25 Apr 2012 19:38:12 +0000 (19:38 +0000)
1  2 
includes/filerepo/backend/FileBackendMultiWrite.php
includes/filerepo/backend/FileBackendStore.php

@@@ -84,7 -84,8 +84,7 @@@ class FileBackendMultiWrite extends Fil
                $status = Status::newGood();
  
                $performOps = array(); // list of FileOp objects
 -              $filesRead = array(); // storage paths read from
 -              $filesChanged = array(); // storage paths written to
 +              $paths = array(); // storage paths read from or written to
                // Build up a list of FileOps. The list will have all the ops
                // for one backend, then all the ops for the next, and so on.
                // These batches of ops are all part of a continuous array.
                foreach ( $this->backends as $index => $backend ) {
                        $backendOps = $this->substOpBatchPaths( $ops, $backend );
                        // Add on the operation batch for this backend
 -                      $performOps = array_merge( $performOps, $backend->getOperations( $backendOps ) );
 +                      $performOps = array_merge( $performOps,
 +                              $backend->getOperationsInternal( $backendOps ) );
                        if ( $index == 0 ) { // first batch
                                // Get the files used for these operations. Each backend has a batch of
                                // the same operations, so we only need to get them from the first batch.
 -                              foreach ( $performOps as $fileOp ) {
 -                                      $filesRead = array_merge( $filesRead, $fileOp->storagePathsRead() );
 -                                      $filesChanged = array_merge( $filesChanged, $fileOp->storagePathsChanged() );
 -                              }
 +                              $paths = $backend->getPathsToLockForOpsInternal( $performOps );
                                // Get the paths under the proxy backend's name
 -                              $filesRead = $this->unsubstPaths( $filesRead );
 -                              $filesChanged = $this->unsubstPaths( $filesChanged );
 +                              $paths['sh'] = $this->unsubstPaths( $paths['sh'] );
 +                              $paths['ex'] = $this->unsubstPaths( $paths['ex'] );
                        }
                }
  
                // Try to lock those files for the scope of this function...
                if ( empty( $opts['nonLocking'] ) ) {
 -                      $filesLockSh = array_diff( $filesRead, $filesChanged ); // optimization
 -                      $filesLockEx = $filesChanged;
 -                      // Get a shared lock on the parent directory of each path changed
 -                      $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) );
                        // Try to lock those files for the scope of this function...
 -                      $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
 -                      $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
 +                      $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status );
 +                      $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status );
                        if ( !$status->isOK() ) {
                                return $status; // abort
                        }
  
                // Do a consistency check to see if the backends agree
                if ( count( $this->backends ) > 1 ) {
 -                      $status->merge( $this->consistencyCheck( array_merge( $filesRead, $filesChanged ) ) );
 +                      $status->merge( $this->consistencyCheck( array_merge( $paths['sh'], $paths['ex'] ) ) );
                        if ( !$status->isOK() ) {
                                return $status; // abort
                        }
        }
  
        /**
-        * @see FileBackend::getFileList()
+        * @see FileBackend::concatenate()
         */
        public function concatenate( array $params ) {
                // We are writing to an FS file, so we don't need to do this per-backend
                return $this->backends[$this->masterIndex]->getLocalCopy( $realParams );
        }
  
+       /**
+        * @see FileBackend::directoryExists()
+        */
+       public function directoryExists( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->directoryExists( $realParams );
+       }
+       /**
+        * @see FileBackend::getSubdirectoryList()
+        */
+       public function getDirectoryList( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getDirectoryList( $realParams );
+       }
        /**
         * @see FileBackend::getFileList()
         */
@@@ -646,7 -646,78 +646,78 @@@ abstract class FileBackendStore extend
        }
  
        /**
-        * @copydoc FileBackend::getFileList()
+        * @see FileBackend::directoryExists()
+        * @return bool|null
+        */
+       final public function directoryExists( array $params ) {
+               list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+               if ( $dir === null ) {
+                       return false; // invalid storage path
+               }
+               if ( $shard !== null ) { // confined to a single container/shard
+                       return $this->doDirectoryExists( $fullCont, $dir, $params );
+               } else { // directory is on several shards
+                       wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+                       list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+                       $res = false; // response
+                       foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
+                               $exists = $this->doDirectoryExists( "{$fullCont}{$suffix}", $dir, $params );
+                               if ( $exists ) {
+                                       $res = true;
+                                       break; // found one!
+                               } elseif ( $exists === null ) { // error?
+                                       $res = null; // if we don't find anything, it is indeterminate
+                               }
+                       }
+                       return $res;
+               }
+       }
+       /**
+        * @see FileBackendStore::directoryExists()
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return bool|null
+        */
+       abstract protected function doDirectoryExists( $container, $dir, array $params );
+       /**
+        * @see FileBackend::getDirectoryList()
+        * @return Array|null|Traversable
+        */
+       final public function getDirectoryList( array $params ) {
+               list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+               if ( $dir === null ) { // invalid storage path
+                       return null;
+               }
+               if ( $shard !== null ) {
+                       // File listing is confined to a single container/shard
+                       return $this->getDirectoryListInternal( $fullCont, $dir, $params );
+               } else {
+                       wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+                       // File listing spans multiple containers/shards
+                       list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+                       return new FileBackendStoreShardDirIterator( $this,
+                               $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params );
+               }
+       }
+       /**
+        * Do not call this function from places outside FileBackend
+        *
+        * @see FileBackendStore::getDirectoryList()
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return Traversable|Array|null
+        */
+       abstract public function getDirectoryListInternal( $container, $dir, array $params );
+       /**
+        * @see FileBackend::getFileList()
         * @return Array|null|Traversable
         */
        final public function getFileList( array $params ) {
                        wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
                        // File listing spans multiple containers/shards
                        list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
-                       return new FileBackendStoreShardListIterator( $this,
+                       return new FileBackendStoreShardFileIterator( $this,
                                $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params );
                }
        }
         * @return Array List of FileOp objects
         * @throws MWException
         */
 -      final public function getOperations( array $ops ) {
 +      final public function getOperationsInternal( array $ops ) {
                $supportedOps = $this->supportedOperations();
  
                $performOps = array(); // array of FileOp objects
                                // Append the FileOp class
                                $performOps[] = new $class( $this, $params );
                        } else {
 -                              throw new MWException( "Operation `$opName` is not supported." );
 +                              throw new MWException( "Operation '$opName' is not supported." );
                        }
                }
  
                return $performOps;
        }
  
 +      /**
 +       * Get a list of storage paths to lock for a list of operations
 +       * Returns an array with 'sh' (shared) and 'ex' (exclusive) keys,
 +       * each corresponding to a list of storage paths to be locked.
 +       *
 +       * @param $performOps Array List of FileOp objects
 +       * @return Array ('sh' => list of paths, 'ex' => list of paths)
 +       */
 +      final public function getPathsToLockForOpsInternal( array $performOps ) {
 +              // Build up a list of files to lock...
 +              $paths = array( 'sh' => array(), 'ex' => array() );
 +              foreach ( $performOps as $fileOp ) {
 +                      $paths['sh'] = array_merge( $paths['sh'], $fileOp->storagePathsRead() );
 +                      $paths['ex'] = array_merge( $paths['ex'], $fileOp->storagePathsChanged() );
 +              }
 +              // Optimization: if doing an EX lock anyway, don't also set an SH one
 +              $paths['sh'] = array_diff( $paths['sh'], $paths['ex'] );
 +              // Get a shared lock on the parent directory of each path changed
 +              $paths['sh'] = array_merge( $paths['sh'], array_map( 'dirname', $paths['ex'] ) );
 +
 +              return $paths;
 +      }
 +
        /**
         * @see FileBackend::doOperationsInternal()
         * @return Status
                $status = Status::newGood();
  
                // Build up a list of FileOps...
 -              $performOps = $this->getOperations( $ops );
 +              $performOps = $this->getOperationsInternal( $ops );
  
                // Acquire any locks as needed...
                if ( empty( $opts['nonLocking'] ) ) {
                        // Build up a list of files to lock...
 -                      $filesLockEx = $filesLockSh = array();
 -                      foreach ( $performOps as $fileOp ) {
 -                              $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() );
 -                              $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() );
 -                      }
 -                      // Optimization: if doing an EX lock anyway, don't also set an SH one
 -                      $filesLockSh = array_diff( $filesLockSh, $filesLockEx );
 -                      // Get a shared lock on the parent directory of each path changed
 -                      $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) );
 +                      $paths = $this->getPathsToLockForOpsInternal( $performOps );
                        // Try to lock those files for the scope of this function...
 -                      $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
 -                      $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
 +                      $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status );
 +                      $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status );
                        if ( !$status->isOK() ) {
                                wfProfileOut( __METHOD__ . '-' . $this->name );
                                wfProfileOut( __METHOD__ );
                return ''; // no sharding
        }
  
+       /**
+        * Check if a storage path maps to a single shard.
+        * Container dirs like "a", where the container shards on "x/xy",
+        * can reside on several shards. Such paths are tricky to handle.
+        *
+        * @param $storagePath string Storage path
+        * @return bool
+        */
+       final public function isSingleShardPathInternal( $storagePath ) {
+               list( $c, $r, $shard ) = $this->resolveStoragePath( $storagePath );
+               return ( $shard !== null );
+       }
        /**
         * Get the sharding config for a container.
         * If greater than 0, then all file storage paths within
  }
  
  /**
-  * FileBackendStore helper function to handle file listings that span container shards.
+  * FileBackendStore helper function to handle listings that span container shards.
   * Do not use this class from places outside of FileBackendStore.
   *
   * @ingroup FileBackend
   */
- class FileBackendStoreShardListIterator implements Iterator {
-       /* @var FileBackendStore */
abstract class FileBackendStoreShardListIterator implements Iterator {
+       /** @var FileBackendStore */
        protected $backend;
-       /* @var Array */
+       /** @var Array */
        protected $params;
-       /* @var Array */
+       /** @var Array */
        protected $shardSuffixes;
-       protected $container; // string
-       protected $directory; // string
+       protected $container; // string; full container name
+       protected $directory; // string; resolved relative path
  
-       /* @var Traversable */
+       /** @var Traversable */
        protected $iter;
        protected $curShard = 0; // integer
        protected $pos = 0; // integer
  
+       /** @var Array */
+       protected $multiShardPaths = array(); // (rel path => 1)
        /**
         * @param $backend FileBackendStore
         * @param $container string Full storage container name
                } else {
                        $this->iter->next();
                }
+               // Filter out items that we already listed
+               $this->filterViaNext();
                // Find the next non-empty shard if no elements are left
                $this->nextShardIteratorIfNotValid();
        }
                $this->pos = 0;
                $this->curShard = 0;
                $this->setIteratorFromCurrentShard();
+               // Filter out items that we already listed
+               $this->filterViaNext();
                // Find the next non-empty shard if this one has no elements
                $this->nextShardIteratorIfNotValid();
        }
                }
        }
  
+       /**
+        * Filter out duplicate items by advancing to the next ones
+        */
+       protected function filterViaNext() {
+               while ( $this->iter->valid() ) {
+                       $rel = $this->iter->current(); // path relative to given directory
+                       $path = $this->params['dir'] . "/{$rel}"; // full storage path
+                       if ( !$this->backend->isSingleShardPathInternal( $path ) ) {
+                               // Don't keep listing paths that are on multiple shards
+                               if ( isset( $this->multiShardPaths[$rel] ) ) {
+                                       $this->iter->next(); // we already listed this path
+                               } else {
+                                       $this->multiShardPaths[$rel] = 1;
+                                       break;
+                               }
+                       }
+               }
+       }
        /**
         * If the list iterator for this container shard is out of items,
         * then move on to the next container that has items.
         */
        protected function setIteratorFromCurrentShard() {
                $suffix = $this->shardSuffixes[$this->curShard];
-               $this->iter = $this->backend->getFileListInternal(
+               $this->iter = $this->listFromShard(
                        "{$this->container}{$suffix}", $this->directory, $this->params );
        }
+       /**
+        * Get the list for a given container shard
+        *
+        * @param $container string Resolved container name
+        * @param $dir string Resolved path relative to container
+        * @param $params Array
+        * @return Traversable|Array|null
+        */
+       abstract protected function listFromShard( $container, $dir, array $params );
+ }
+ /**
+  * Iterator for listing directories
+  */
+ class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator {
+       protected function listFromShard( $container, $dir, array $params ) {
+               return $this->backend->getDirectoryListInternal( $container, $dir, $params );
+       }
+ }
+ /**
+  * Iterator for listing regular files
+  */
+ class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator {
+       protected function listFromShard( $container, $dir, array $params ) {
+               return $this->backend->getFileListInternal( $container, $dir, $params );
+       }
  }