Merge "[FileBackend] Rewrote FileBackendStoreShardListIterator to actually work."
authorTim Starling <tstarling@wikimedia.org>
Fri, 29 Jun 2012 06:23:42 +0000 (06:23 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 29 Jun 2012 06:23:42 +0000 (06:23 +0000)
1  2 
includes/filerepo/backend/FileBackendStore.php

@@@ -532,12 -532,14 +532,12 @@@ abstract class FileBackendStore extend
         * @return bool
         */
        final public function getFileStat( array $params ) {
 -              wfProfileIn( __METHOD__ );
 -              wfProfileIn( __METHOD__ . '-' . $this->name );
                $path = self::normalizeStoragePath( $params['src'] );
                if ( $path === null ) {
 -                      wfProfileOut( __METHOD__ . '-' . $this->name );
 -                      wfProfileOut( __METHOD__ );
                        return false; // invalid storage path
                }
 +              wfProfileIn( __METHOD__ );
 +              wfProfileIn( __METHOD__ . '-' . $this->name );
                $latest = !empty( $params['latest'] ); // use latest data?
                if ( !isset( $this->cache[$path]['stat'] ) ) {
                        $this->primeFileCache( array( $path ) ); // check persistent cache
                        $this->trimCache(); // limit memory
                        $this->cache[$path]['stat'] = $stat;
                        $this->setFileCache( $path, $stat ); // update persistent cache
 +                      if ( isset( $stat['sha1'] ) ) { // some backends store SHA-1 as metadata
 +                              $this->trimCache(); // limit memory
 +                              $this->cache[$path]['sha1'] =
 +                                      array( 'hash' => $stat['sha1'], 'latest' => $latest );
 +                      }
                } else {
                        wfDebug( __METHOD__ . ": File $path does not exist.\n" );
                }
         * @return bool|string
         */
        final public function getFileSha1Base36( array $params ) {
 +              $path = self::normalizeStoragePath( $params['src'] );
 +              if ( $path === null ) {
 +                      return false; // invalid storage path
 +              }
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-' . $this->name );
 -              $path = $params['src'];
 +              $latest = !empty( $params['latest'] ); // use latest data?
                if ( isset( $this->cache[$path]['sha1'] ) ) {
 -                      $this->pingCache( $path ); // LRU
 -                      wfProfileOut( __METHOD__ . '-' . $this->name );
 -                      wfProfileOut( __METHOD__ );
 -                      return $this->cache[$path]['sha1'];
 +                      // If we want the latest data, check that this cached
 +                      // value was in fact fetched with the latest available data.
 +                      if ( !$latest || $this->cache[$path]['sha1']['latest'] ) {
 +                              $this->pingCache( $path ); // LRU
 +                              wfProfileOut( __METHOD__ . '-' . $this->name );
 +                              wfProfileOut( __METHOD__ );
 +                              return $this->cache[$path]['sha1']['hash'];
 +                      }
                }
                wfProfileIn( __METHOD__ . '-miss' );
                wfProfileIn( __METHOD__ . '-miss-' . $this->name );
                wfProfileOut( __METHOD__ . '-miss' );
                if ( $hash ) { // don't cache negatives
                        $this->trimCache(); // limit memory
 -                      $this->cache[$path]['sha1'] = $hash;
 +                      $this->cache[$path]['sha1'] = array( 'hash' => $hash, 'latest' => $latest );
                }
                wfProfileOut( __METHOD__ . '-' . $this->name );
                wfProfileOut( __METHOD__ );
  
        /**
         * @see FileBackendStore::getFileSha1Base36()
 -       * @return bool
 +       * @return bool|string
         */
        protected function doGetFileSha1Base36( array $params ) {
                $fsFile = $this->getLocalReference( $params );
         * @return TempFSFile|null
         */
        public function getLocalReference( array $params ) {
 +              $path = self::normalizeStoragePath( $params['src'] );
 +              if ( $path === null ) {
 +                      return null; // invalid storage path
 +              }
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-' . $this->name );
 -              $path = $params['src'];
 +              $latest = !empty( $params['latest'] ); // use latest data?
                if ( isset( $this->expensiveCache[$path]['localRef'] ) ) {
 -                      $this->pingExpensiveCache( $path );
 -                      wfProfileOut( __METHOD__ . '-' . $this->name );
 -                      wfProfileOut( __METHOD__ );
 -                      return $this->expensiveCache[$path]['localRef'];
 +                      // If we want the latest data, check that this cached
 +                      // value was in fact fetched with the latest available data.
 +                      if ( !$latest || $this->expensiveCache[$path]['localRef']['latest'] ) {
 +                              $this->pingExpensiveCache( $path );
 +                              wfProfileOut( __METHOD__ . '-' . $this->name );
 +                              wfProfileOut( __METHOD__ );
 +                              return $this->expensiveCache[$path]['localRef']['object'];
 +                      }
                }
                $tmpFile = $this->getLocalCopy( $params );
                if ( $tmpFile ) { // don't cache negatives
                        $this->trimExpensiveCache(); // limit memory
 -                      $this->expensiveCache[$path]['localRef'] = $tmpFile;
 +                      $this->expensiveCache[$path]['localRef'] =
 +                              array( 'object' => $tmpFile, 'latest' => $latest );
                }
                wfProfileOut( __METHOD__ . '-' . $this->name );
                wfProfileOut( __METHOD__ );
                $values = $this->memCache->getMulti( array_keys( $pathNames ) );
                foreach ( $values as $cacheKey => $val ) {
                        if ( is_array( $val ) ) {
 +                              $path = $pathNames[$cacheKey];
                                $this->trimCache(); // limit memory
 -                              $this->cache[$pathNames[$cacheKey]]['stat'] = $val;
 +                              $this->cache[$path]['stat'] = $val;
 +                              if ( isset( $val['sha1'] ) ) { // some backends store SHA-1 as metadata
 +                                      $this->trimCache(); // limit memory
 +                                      $this->cache[$path]['sha1'] =
 +                                              array( 'hash' => $val['sha1'], 'latest' => $val['latest'] );
 +                              }
                        }
                }
  
@@@ -1600,23 -1574,34 +1600,34 @@@ abstract class FileBackendStoreShardLis
        }
  
        /**
-        * @see Iterator::current()
-        * @return string|bool String or false
+        * @see Iterator::key()
+        * @return integer
         */
-       public function current() {
-               if ( is_array( $this->iter ) ) {
-                       return current( $this->iter );
-               } else {
-                       return $this->iter->current();
+       public function key() {
+               return $this->pos;
+       }
+       /**
+        * @see Iterator::valid()
+        * @return bool
+        */
+       public function valid() {
+               if ( $this->iter instanceof Iterator ) {
+                       return $this->iter->valid();
+               } elseif ( is_array( $this->iter ) ) {
+                       return ( current( $this->iter ) !== false ); // no paths can have this value
                }
+               return false; // some failure?
        }
  
        /**
-        * @see Iterator::key()
-        * @return integer
+        * @see Iterator::current()
+        * @return string|bool String or false
         */
-       public function key() {
-               return $this->pos;
+       public function current() {
+               return ( $this->iter instanceof Iterator )
+                       ? $this->iter->current()
+                       : current( $this->iter );
        }
  
        /**
         */
        public function next() {
                ++$this->pos;
-               if ( is_array( $this->iter ) ) {
-                       next( $this->iter );
-               } else {
-                       $this->iter->next();
-               }
-               // Filter out items that we already listed
-               $this->filterViaNext();
-               // Find the next non-empty shard if no elements are left
-               $this->nextShardIteratorIfNotValid();
+               ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter );
+               do {
+                       $continue = false; // keep scanning shards?
+                       $this->filterViaNext(); // filter out duplicates
+                       // Find the next non-empty shard if no elements are left
+                       if ( !$this->valid() ) {
+                               $this->nextShardIteratorIfNotValid();
+                               $continue = $this->valid(); // re-filter unless we ran out of shards
+                       }
+               } while ( $continue );
        }
  
        /**
                $this->pos = 0;
                $this->curShard = 0;
                $this->setIteratorFromCurrentShard();
-               // Filter out items that we already listed
-               $this->filterViaNext();
-               // Find the next non-empty shard if this one has no elements
-               $this->nextShardIteratorIfNotValid();
-       }
-       /**
-        * @see Iterator::valid()
-        * @return bool
-        */
-       public function valid() {
-               if ( $this->iter === null ) {
-                       return false; // some failure?
-               } elseif ( is_array( $this->iter ) ) {
-                       return ( current( $this->iter ) !== false ); // no paths can have this value
-               } else {
-                       return $this->iter->valid();
-               }
+               do {
+                       $continue = false; // keep scanning shards?
+                       $this->filterViaNext(); // filter out duplicates
+                       // Find the next non-empty shard if no elements are left
+                       if ( !$this->valid() ) {
+                               $this->nextShardIteratorIfNotValid();
+                               $continue = $this->valid(); // re-filter unless we ran out of shards
+                       }
+               } while ( $continue );
        }
  
        /**
         * Filter out duplicate items by advancing to the next ones
         */
        protected function filterViaNext() {
-               while ( $this->iter->valid() ) {
+               while ( $this->valid() ) {
                        $rel = $this->iter->current(); // path relative to given directory
                        $path = $this->params['dir'] . "/{$rel}"; // full storage path
-                       if ( !$this->backend->isSingleShardPathInternal( $path ) ) {
+                       if ( $this->backend->isSingleShardPathInternal( $path ) ) {
+                               break; // path is only on one shard; no issue with duplicates
+                       } elseif ( isset( $this->multiShardPaths[$rel] ) ) {
                                // Don't keep listing paths that are on multiple shards
-                               if ( isset( $this->multiShardPaths[$rel] ) ) {
-                                       $this->iter->next(); // we already listed this path
-                               } else {
-                                       $this->multiShardPaths[$rel] = 1;
-                                       break;
-                               }
+                               ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter );
+                       } else {
+                               $this->multiShardPaths[$rel] = 1;
+                               break;
                        }
                }
        }
         * If there are none, then it advances to the last container.
         */
        protected function nextShardIteratorIfNotValid() {
-               while ( !$this->valid() ) {
-                       if ( ++$this->curShard >= count( $this->shardSuffixes ) ) {
-                               break; // no more container shards
-                       }
+               while ( !$this->valid() && ++$this->curShard < count( $this->shardSuffixes ) ) {
                        $this->setIteratorFromCurrentShard();
                }
        }
         * Set the list iterator to that of the current container shard
         */
        protected function setIteratorFromCurrentShard() {
-               $suffix = $this->shardSuffixes[$this->curShard];
                $this->iter = $this->listFromShard(
-                       "{$this->container}{$suffix}", $this->directory, $this->params );
+                       $this->container . $this->shardSuffixes[$this->curShard],
+                       $this->directory, $this->params );
+               // Start loading results so that current() works
+               if ( $this->iter ) {
+                       ( $this->iter instanceof Iterator ) ? $this->iter->rewind() : reset( $this->iter );
+               }
        }
  
        /**
   */
  class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator {
        /**
-        * @param string $container
-        * @param string $dir
-        * @param array $params
+        * @see FileBackendStoreShardListIterator::listFromShard()
         * @return Array|null|Traversable
         */
        protected function listFromShard( $container, $dir, array $params ) {
   */
  class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator {
        /**
-        * @param string $container
-        * @param string $dir
-        * @param array $params
+        * @see FileBackendStoreShardListIterator::listFromShard()
         * @return Array|null|Traversable
         */
        protected function listFromShard( $container, $dir, array $params ) {