* This adds a new preloadFileStat() method to FileBackend.
* FileBackend::doOperations() makes use of this instead of primeFileCache().
The later was mostly useless due to the 'latest' flag, which stat entries
rarely had (and since they use add(), 'latest' ones do not override the
non-'latest' ones).
* Simplified primeContainerCache/primeFileCache to no longer accept
FileOp objects and made sure nothing passes those in anymore.
Change-Id: Iee3878cadd69c3a594c701c0aff81aab5f473de5
}
/**
- * Preload persistent file stat and property cache into in-process cache.
+ * Preload persistent file stat cache and property cache into in-process cache.
* This should be used when stat calls will be made on a known list of a many files.
*
+ * @see FileBackend::getFileStat()
+ *
* @param array $paths Storage paths
*/
public function preloadCache( array $paths ) {
* Invalidate any in-process file stat and property cache.
* If $paths is given, then only the cache for those files will be cleared.
*
+ * @see FileBackend::getFileStat()
+ *
* @param array $paths Storage paths (optional)
*/
public function clearCache( array $paths = null ) {
}
+ /**
+ * Preload file stat information (concurrently if possible) into in-process cache.
+ * This should be used when stat calls will be made on a known list of a many files.
+ *
+ * @see FileBackend::getFileStat()
+ *
+ * @param array $params Parameters include:
+ * - srcs : list of source storage paths
+ * - latest : use the latest available data
+ * @since 1.23
+ */
+ public function preloadFileStat( array $params ) {
+ }
+
/**
* Lock the files at the given storage paths in the backend.
* This will either lock all the files or none (on failure).
}
}
+ public function preloadFileStat( array $params ) {
+ $this->backends[$this->masterIndex]->preloadFileStat( $params );
+ }
+
public function getScopedLocksForOps( array $ops, Status $status ) {
$realOps = $this->substOpBatchPaths( $ops, $this->backends[$this->masterIndex] );
$fileOps = $this->backends[$this->masterIndex]->getOperationsInternal( $realOps );
$this->clearCache();
}
- // Load from the persistent file and container caches
- $this->primeFileCache( $performOps );
- $this->primeContainerCache( $performOps );
+ // Build the list of paths involved
+ $paths = array();
+ foreach ( $performOps as $op ) {
+ $paths = array_merge( $paths, $op->storagePathsRead() );
+ $paths = array_merge( $paths, $op->storagePathsChanged() );
+ }
+ // Load from the persistent container caches
+ $this->primeContainerCache( $paths );
+ // Get the latest stat info for all the files (having locked them)
+ $this->preloadFileStat( array( 'srcs' => $paths, 'latest' => true ) );
// Actually attempt the operation batch...
$opts = $this->setConcurrencyFlags( $opts );
protected function doClearCache( array $paths = null ) {
}
+ final public function preloadFileStat( array $params ) {
+ $section = new ProfileSection( __METHOD__ . "-{$this->name}" );
+
+ $params['concurrency'] = ( $this->parallelize !== 'off' ) ? $this->concurrency : 1;
+ $stats = $this->doGetFileStatMulti( $params );
+ if ( $stats === null ) {
+ return; // not supported
+ }
+
+ $latest = !empty( $params['latest'] ); // use latest data?
+ foreach ( $stats as $path => $stat ) {
+ $path = FileBackend::normalizeStoragePath( $path );
+ if ( $path === null ) {
+ continue; // this shouldn't happen
+ }
+ if ( is_array( $stat ) ) { // file exists
+ $stat['latest'] = $latest;
+ $this->cheapCache->set( $path, 'stat', $stat );
+ $this->setFileCache( $path, $stat ); // update persistent cache
+ if ( isset( $stat['sha1'] ) ) { // some backends store SHA-1 as metadata
+ $this->cheapCache->set( $path, 'sha1',
+ array( 'hash' => $stat['sha1'], 'latest' => $latest ) );
+ }
+ if ( isset( $stat['xattr'] ) ) { // some backends store headers/metadata
+ $stat['xattr'] = self::normalizeXAttributes( $stat['xattr'] );
+ $this->cheapCache->set( $path, 'xattr',
+ array( 'map' => $stat['xattr'], 'latest' => $latest ) );
+ }
+ } elseif ( $stat === false ) { // file does not exist
+ $this->cheapCache->set( $path, 'stat',
+ $latest ? 'NOT_EXIST_LATEST' : 'NOT_EXIST' );
+ $this->cheapCache->set( $path, 'xattr',
+ array( 'map' => false, 'latest' => $latest ) );
+ $this->cheapCache->set( $path, 'sha1',
+ array( 'hash' => false, 'latest' => $latest ) );
+ wfDebug( __METHOD__ . ": File $path does not exist.\n" );
+ } else { // an error occurred
+ wfDebug( __METHOD__ . ": Could not stat file $path.\n" );
+ }
+ }
+ }
+
+ /**
+ * Get file stat information (concurrently if possible) for several files
+ *
+ * @see FileBackend::getFileStat()
+ *
+ * @param array $params Parameters include:
+ * - srcs : list of source storage paths
+ * - latest : use the latest available data
+ * @return array|null Map of storage paths to array|bool|null (returns null if not supported)
+ * @since 1.23
+ */
+ protected function doGetFileStatMulti( array $params ) {
+ return null; // not supported
+ }
+
/**
* Is this a key/value store where directories are just virtual?
* Virtual directories exists in so much as files exists that are
/**
* Do a batch lookup from cache for container stats for all containers
- * used in a list of container names, storage paths, or FileOp objects.
+ * used in a list of container names or storage paths objects.
* This loads the persistent cache values into the process cache.
*
* @param array $items
$contNames = array(); // (cache key => resolved container name)
// Get all the paths/containers from the items...
foreach ( $items as $item ) {
- if ( $item instanceof FileOp ) {
- $paths = array_merge( $paths, $item->storagePathsRead() );
- $paths = array_merge( $paths, $item->storagePathsChanged() );
- } elseif ( self::isStoragePath( $item ) ) {
+ if ( self::isStoragePath( $item ) ) {
$paths[] = $item;
} elseif ( is_string( $item ) ) { // full container name
$contNames[$this->containerCacheKey( $item )] = $item;
* used in a list of storage paths or FileOp objects.
* This loads the persistent cache values into the process cache.
*
- * @param array $items List of storage paths or FileOps
+ * @param array $items List of storage paths
*/
final protected function primeFileCache( array $items ) {
$section = new ProfileSection( __METHOD__ . "-{$this->name}" );
$pathNames = array(); // (cache key => storage path)
// Get all the paths/containers from the items...
foreach ( $items as $item ) {
- if ( $item instanceof FileOp ) {
- $paths = array_merge( $paths, $item->storagePathsRead() );
- $paths = array_merge( $paths, $item->storagePathsChanged() );
- } elseif ( self::isStoragePath( $item ) ) {
+ if ( self::isStoragePath( $item ) ) {
$paths[] = FileBackend::normalizeStoragePath( $item );
}
}
}
protected function doGetFileStat( array $params ) {
- list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] );
- if ( $srcRel === null ) {
- return false; // invalid storage path
- }
-
- $auth = $this->getAuthentication();
- if ( !$auth ) {
- return null;
- }
+ $params['concurrency'] = 1;
+ $stats = $this->doGetFileStatMulti( array( 'srcs' => array( $params['src'] ) ) + $params );
- // (a) Check the container
- $cstat = $this->getContainerStat( $srcCont, true );
- if ( $cstat === false ) {
- return false; // ok, nothing to do
- } elseif ( !is_array( $cstat ) ) {
- return null;
- }
-
- // (b) Check the file
- list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $this->http->run( array(
- 'method' => 'HEAD',
- 'url' => $this->storageUrl( $auth, $srcCont, $srcRel ),
- 'headers' => $this->authTokenHeaders( $auth ) + $this->headersFromParams( $params )
- ) );
- if ( $rcode === 200 || $rcode === 204 ) {
- // Update the object if it is missing some headers
- $rhdrs = $this->addMissingMetadata( $rhdrs, $params['src'] );
- // Fetch all of the custom metadata headers
- $metadata = array();
- foreach ( $rhdrs as $name => $value ) {
- if ( strpos( $name, 'x-object-meta-' ) === 0 ) {
- $metadata[substr( $name, strlen( 'x-object-meta-' ) )] = $value;
- }
- }
- // Fetch all of the custom raw HTTP headers
- $headers = $this->sanitizeHdrs( array( 'headers' => $rhdrs ) );
- $stat = array(
- // Convert various random Swift dates to TS_MW
- 'mtime' => $this->convertSwiftDate( $rhdrs['last-modified'], TS_MW ),
- // Empty objects actually return no content-length header in Ceph
- 'size' => isset( $rhdrs['content-length'] ) ? (int)$rhdrs['content-length'] : 0,
- 'sha1' => $rhdrs['x-object-meta-sha1base36'],
- 'md5' => ctype_xdigit( $rhdrs['etag'] ) ? $rhdrs['etag'] : null,
- 'xattr' => array( 'metadata' => $metadata, 'headers' => $headers )
- );
- } elseif ( $rcode === 404 ) {
- $stat = false;
- } else {
- $stat = null;
- $this->onError( null, __METHOD__, $params, $rerr, $rcode, $rdesc );
- }
-
- return $stat;
+ return reset( $stats );
}
/**
}
$stat = array(
// Convert various random Swift dates to TS_MW
- 'mtime' => $this->convertSwiftDate( $object->last_modified, TS_MW ),
- 'size' => (int)$object->bytes,
- 'md5' => ctype_xdigit( $object->hash ) ? $object->hash : null,
+ 'mtime' => $this->convertSwiftDate( $object->last_modified, TS_MW ),
+ 'size' => (int)$object->bytes,
+ // Note: manifiest ETags are not an MD5 of the file
+ 'md5' => ctype_xdigit( $object->hash ) ? $object->hash : null,
'latest' => false // eventually consistent
);
$names[] = array( $object->name, $stat );
}
}
+ protected function doGetFileStatMulti( array $params ) {
+ $stats = array();
+
+ $auth = $this->getAuthentication();
+
+ $reqs = array();
+ foreach ( $params['srcs'] as $path ) {
+ list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $path );
+ if ( $srcRel === null ) {
+ $stats[$path] = false;
+ continue; // invalid storage path
+ } elseif ( !$auth ) {
+ $stats[$path] = null;
+ continue;
+ }
+
+ // (a) Check the container
+ $cstat = $this->getContainerStat( $srcCont, true );
+ if ( $cstat === false ) {
+ $stats[$path] = false;
+ continue; // ok, nothing to do
+ } elseif ( !is_array( $cstat ) ) {
+ $stats[$path] = null;
+ continue;
+ }
+
+ $reqs[$path] = array(
+ 'method' => 'HEAD',
+ 'url' => $this->storageUrl( $auth, $srcCont, $srcRel ),
+ 'headers' => $this->authTokenHeaders( $auth ) + $this->headersFromParams( $params )
+ );
+ }
+
+ $opts = array( 'maxConnsPerHost' => $params['concurrency'] );
+ $reqs = $this->http->runMulti( $reqs, $opts );
+
+ foreach ( $params['srcs'] as $path ) {
+ if ( array_key_exists( $path, $stats ) ) {
+ continue; // some sort of failure above
+ }
+ // (b) Check the file
+ list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $reqs[$path]['response'];
+ if ( $rcode === 200 || $rcode === 204 ) {
+ // Update the object if it is missing some headers
+ $rhdrs = $this->addMissingMetadata( $rhdrs, $path );
+ // Fetch all of the custom metadata headers
+ $metadata = array();
+ foreach ( $rhdrs as $name => $value ) {
+ if ( strpos( $name, 'x-object-meta-' ) === 0 ) {
+ $metadata[substr( $name, strlen( 'x-object-meta-' ) )] = $value;
+ }
+ }
+ // Fetch all of the custom raw HTTP headers
+ $headers = $this->sanitizeHdrs( array( 'headers' => $rhdrs ) );
+ $stat = array(
+ // Convert various random Swift dates to TS_MW
+ 'mtime' => $this->convertSwiftDate( $rhdrs['last-modified'], TS_MW ),
+ // Empty objects actually return no content-length header in Ceph
+ 'size' => isset( $rhdrs['content-length'] ) ? (int)$rhdrs['content-length'] : 0,
+ 'sha1' => $rhdrs[ 'x-object-meta-sha1base36'],
+ // Note: manifiest ETags are not an MD5 of the file
+ 'md5' => ctype_xdigit( $rhdrs['etag'] ) ? $rhdrs['etag'] : null,
+ 'xattr' => array( 'metadata' => $metadata, 'headers' => $headers )
+ );
+ } elseif ( $rcode === 404 ) {
+ $stat = false;
+ } else {
+ $stat = null;
+ $this->onError( null, __METHOD__, $params, $rerr, $rcode, $rdesc );
+ }
+ $stats[$path] = $stat;
+ }
+
+ return $stats;
+ }
+
/**
* @return array|null Credential map
*/