Merge "[FileBackend] Process cache fixes and cleanups."
authorHashar <hashar@free.fr>
Fri, 15 Jun 2012 21:09:34 +0000 (21:09 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 15 Jun 2012 21:09:34 +0000 (21:09 +0000)
1  2 
includes/filerepo/backend/FileBackendStore.php
includes/filerepo/backend/SwiftFileBackend.php

@@@ -532,14 -532,12 +532,12 @@@ abstract class FileBackendStore extend
         * @return bool
         */
        final public function getFileStat( array $params ) {
-               wfProfileIn( __METHOD__ );
-               wfProfileIn( __METHOD__ . '-' . $this->name );
                $path = self::normalizeStoragePath( $params['src'] );
                if ( $path === null ) {
-                       wfProfileOut( __METHOD__ . '-' . $this->name );
-                       wfProfileOut( __METHOD__ );
                        return false; // invalid storage path
                }
+               wfProfileIn( __METHOD__ );
+               wfProfileIn( __METHOD__ . '-' . $this->name );
                $latest = !empty( $params['latest'] ); // use latest data?
                if ( !isset( $this->cache[$path]['stat'] ) ) {
                        $this->primeFileCache( array( $path ) ); // check persistent cache
                        $this->trimCache(); // limit memory
                        $this->cache[$path]['stat'] = $stat;
                        $this->setFileCache( $path, $stat ); // update persistent cache
+                       if ( isset( $stat['sha1'] ) ) { // some backends store SHA-1 as metadata
+                               $this->trimCache(); // limit memory
+                               $this->cache[$path]['sha1'] =
+                                       array( 'hash' => $stat['sha1'], 'latest' => $latest );
+                       }
                } else {
                        wfDebug( __METHOD__ . ": File $path does not exist.\n" );
                }
         * @return bool|string
         */
        final public function getFileSha1Base36( array $params ) {
+               $path = self::normalizeStoragePath( $params['src'] );
+               if ( $path === null ) {
+                       return false; // invalid storage path
+               }
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-' . $this->name );
-               $path = $params['src'];
+               $latest = !empty( $params['latest'] ); // use latest data?
                if ( isset( $this->cache[$path]['sha1'] ) ) {
-                       $this->pingCache( $path ); // LRU
-                       wfProfileOut( __METHOD__ . '-' . $this->name );
-                       wfProfileOut( __METHOD__ );
-                       return $this->cache[$path]['sha1'];
+                       // If we want the latest data, check that this cached
+                       // value was in fact fetched with the latest available data.
+                       if ( !$latest || $this->cache[$path]['sha1']['latest'] ) {
+                               $this->pingCache( $path ); // LRU
+                               wfProfileOut( __METHOD__ . '-' . $this->name );
+                               wfProfileOut( __METHOD__ );
+                               return $this->cache[$path]['sha1']['hash'];
+                       }
                }
                wfProfileIn( __METHOD__ . '-miss' );
                wfProfileIn( __METHOD__ . '-miss-' . $this->name );
                wfProfileOut( __METHOD__ . '-miss' );
                if ( $hash ) { // don't cache negatives
                        $this->trimCache(); // limit memory
-                       $this->cache[$path]['sha1'] = $hash;
+                       $this->cache[$path]['sha1'] = array( 'hash' => $hash, 'latest' => $latest );
                }
                wfProfileOut( __METHOD__ . '-' . $this->name );
                wfProfileOut( __METHOD__ );
  
        /**
         * @see FileBackendStore::getFileSha1Base36()
-        * @return bool
+        * @return bool|string
         */
        protected function doGetFileSha1Base36( array $params ) {
                $fsFile = $this->getLocalReference( $params );
         * @return TempFSFile|null
         */
        public function getLocalReference( array $params ) {
+               $path = self::normalizeStoragePath( $params['src'] );
+               if ( $path === null ) {
+                       return null; // invalid storage path
+               }
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-' . $this->name );
-               $path = $params['src'];
+               $latest = !empty( $params['latest'] ); // use latest data?
                if ( isset( $this->expensiveCache[$path]['localRef'] ) ) {
-                       $this->pingExpensiveCache( $path );
-                       wfProfileOut( __METHOD__ . '-' . $this->name );
-                       wfProfileOut( __METHOD__ );
-                       return $this->expensiveCache[$path]['localRef'];
+                       // If we want the latest data, check that this cached
+                       // value was in fact fetched with the latest available data.
+                       if ( !$latest || $this->expensiveCache[$path]['localRef']['latest'] ) {
+                               $this->pingExpensiveCache( $path );
+                               wfProfileOut( __METHOD__ . '-' . $this->name );
+                               wfProfileOut( __METHOD__ );
+                               return $this->expensiveCache[$path]['localRef']['object'];
+                       }
                }
                $tmpFile = $this->getLocalCopy( $params );
                if ( $tmpFile ) { // don't cache negatives
                        $this->trimExpensiveCache(); // limit memory
-                       $this->expensiveCache[$path]['localRef'] = $tmpFile;
+                       $this->expensiveCache[$path]['localRef'] =
+                               array( 'object' => $tmpFile, 'latest' => $latest );
                }
                wfProfileOut( __METHOD__ . '-' . $this->name );
                wfProfileOut( __METHOD__ );
         * @param $val mixed Information to cache
         */
        final protected function setContainerCache( $container, $val ) {
 -              $this->memCache->set( $this->containerCacheKey( $container ), $val, 14*86400 );
 +              $this->memCache->add( $this->containerCacheKey( $container ), $val, 14*86400 );
        }
  
        /**
 -       * Delete the cached info for a container
 +       * Delete the cached info for a container.
 +       * The cache key is salted for a while to prevent race conditions.
         *
         * @param $container string Resolved container name
         */
        final protected function deleteContainerCache( $container ) {
 -              if ( !$this->memCache->delete( $this->containerCacheKey( $container ) ) ) {
 +              if ( !$this->memCache->set( $this->containerCacheKey( $container ), 'PURGED', 300 ) ) {
                        trigger_error( "Unable to delete stat cache for container $container." );
                }
        }
         * @param $val mixed Information to cache
         */
        final protected function setFileCache( $path, $val ) {
 -              $this->memCache->set( $this->fileCacheKey( $path ), $val, 7*86400 );
 +              $this->memCache->add( $this->fileCacheKey( $path ), $val, 7*86400 );
        }
  
        /**
 -       * Delete the cached stat info for a file path
 +       * Delete the cached stat info for a file path.
 +       * The cache key is salted for a while to prevent race conditions.
         *
         * @param $path string Storage path
         */
        final protected function deleteFileCache( $path ) {
 -              if ( !$this->memCache->delete( $this->fileCacheKey( $path ) ) ) {
 +              if ( !$this->memCache->set( $this->fileCacheKey( $path ), 'PURGED', 300 ) ) {
                        trigger_error( "Unable to delete stat cache for file $path." );
                }
        }
                $values = $this->memCache->getMulti( array_keys( $pathNames ) );
                foreach ( $values as $cacheKey => $val ) {
                        if ( is_array( $val ) ) {
+                               $path = $pathNames[$cacheKey];
                                $this->trimCache(); // limit memory
-                               $this->cache[$pathNames[$cacheKey]]['stat'] = $val;
+                               $this->cache[$path]['stat'] = $val;
+                               if ( isset( $val['sha1'] ) ) { // some backends store SHA-1 as metadata
+                                       $this->trimCache(); // limit memory
+                                       $this->cache[$path]['sha1'] =
+                                               array( 'hash' => $val['sha1'], 'latest' => $val['latest'] );
+                               }
                        }
                }
  
@@@ -42,9 -42,6 +42,9 @@@ class SwiftFileBackend extends FileBack
        protected $authTTL; // integer seconds
        protected $swiftAnonUser; // string; username to handle unauthenticated requests
        protected $swiftUseCDN; // boolean; whether CloudFiles CDN is enabled
 +      protected $swiftCDNExpiry; // integer; how long to cache things in the CDN
 +      protected $swiftCDNPurgable; // boolean; whether object CDN purging is enabled
 +
        protected $maxContCacheSize = 300; // integer; max containers with entries
  
        /** @var CF_Connection */
         *    swiftAuthTTL       : Swift authentication TTL (seconds)
         *    swiftAnonUser      : Swift user used for end-user requests (account:username)
         *    swiftUseCDN        : Whether a Cloud Files Content Delivery Network is set up
 +       *    swiftCDNExpiry     : How long (in seconds) to store content in the CDN.
 +       *                         If files may likely change, this should probably not exceed
 +       *                         a few days. For example, deletions may take this long to apply.
 +       *                         If object purging is enabled, however, this is not an issue.
 +       *    swiftCDNPurgable   : Whether object purge requests are allowed by the CDN.
         *    shardViaHashLevels : Map of container names to sharding config with:
         *                         'base'   : base of hash characters, 16 or 36
         *                         'levels' : the number of hash levels (and digits)
                $this->swiftUseCDN = isset( $config['swiftUseCDN'] )
                        ? $config['swiftUseCDN']
                        : false;
 +              $this->swiftCDNExpiry = isset( $config['swiftCDNExpiry'] )
 +                      ? $config['swiftCDNExpiry']
 +                      : 3600; // hour
 +              $this->swiftCDNPurgable = isset( $config['swiftCDNPurgable'] )
 +                      ? $config['swiftCDNPurgable']
 +                      : true;
                // Cache container info to mask latency
                $this->memCache = wfGetMainCache();
        }
                                ) );
                        }
                        if ( $this->swiftUseCDN ) { // Rackspace style CDN
 -                              $contObj->make_public();
 +                              $contObj->make_public( $this->swiftCDNExpiry );
                        }
                } catch ( CDNNotEnabledException $e ) {
                        // CDN not enabled; nothing to see here
                        $stat = array(
                                // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW
                                'mtime' => wfTimestamp( TS_MW, $srcObj->last_modified ),
-                               'size'  => $srcObj->content_length,
+                               'size'  => (int)$srcObj->content_length,
                                'sha1'  => $srcObj->metadata['Sha1base36']
                        );
                } catch ( NoSuchContainerException $e ) {
        }
  
        /**
 -       * Purge the CDN cache of affected objects if CDN caching is enabled
 +       * Purge the CDN cache of affected objects if CDN caching is enabled.
 +       * This is for Rackspace/Akamai CDNs.
         *
         * @param $objects Array List of CF_Object items
         * @return void
         */
        public function purgeCDNCache( array $objects ) {
 -              if ( $this->swiftUseCDN ) { // Rackspace style CDN
 +              if ( $this->swiftUseCDN && $this->swiftCDNPurgable ) {
                        foreach ( $objects as $object ) {
                                try {
                                        $object->purge_from_cdn();
         *
         * @param $container string Container name
         * @return CF_Container
 -       * @throws InvalidResponseException
 +       * @throws CloudFilesException
         */
        protected function createContainer( $container ) {
                $conn = $this->getConnection(); // Swift proxy connection
         *
         * @param $container string Container name
         * @return void
 -       * @throws InvalidResponseException
 +       * @throws CloudFilesException
         */
        protected function deleteContainer( $container ) {
                $conn = $this->getConnection(); // Swift proxy connection
 -              $conn->delete_container( $container );
                unset( $this->connContainers[$container] ); // purge cache
 +              $conn->delete_container( $container );
        }
  
        /**