enhance filerepo doc structure
[lhc/web/wiklou.git] / includes / filerepo / backend / FileBackend.php
1 <?php
2 /**
3 * @defgroup FileBackend File backend
4 * @ingroup FileRepo
5 *
6 * This module regroup classes meant for MediaWiki to interacts with
7 */
8
9 /**
10 * @file
11 * @ingroup FileBackend
12 * @author Aaron Schulz
13 */
14
15 /**
16 * Base class for all file backend classes (including multi-write backends).
17 *
18 * This class defines the methods as abstract that subclasses must implement.
19 * Outside callers can assume that all backends will have these functions.
20 *
21 * All "storage paths" are of the format "mwstore://backend/container/path".
22 * The paths use UNIX file system (FS) notation, though any particular backend may
23 * not actually be using a local filesystem. Therefore, the paths are only virtual.
24 *
25 * Backend contents are stored under wiki-specific container names by default.
26 * For legacy reasons, this has no effect for the FS backend class, and per-wiki
27 * segregation must be done by setting the container paths appropriately.
28 *
29 * FS-based backends are somewhat more restrictive due to the existence of real
30 * directory files; a regular file cannot have the same name as a directory. Other
31 * backends with virtual directories may not have this limitation. Callers should
32 * store files in such a way that no files and directories are under the same path.
33 *
34 * Methods should avoid throwing exceptions at all costs.
35 * As a corollary, external dependencies should be kept to a minimum.
36 *
37 * @ingroup FileBackend
38 * @since 1.19
39 */
40 abstract class FileBackend {
41 protected $name; // string; unique backend name
42 protected $wikiId; // string; unique wiki name
43 protected $readOnly; // string; read-only explanation message
44 /** @var LockManager */
45 protected $lockManager;
46
47 /**
48 * Create a new backend instance from configuration.
49 * This should only be called from within FileBackendGroup.
50 *
51 * $config includes:
52 * 'name' : The unique name of this backend.
53 * This should consist of alphanumberic, '-', and '_' characters.
54 * 'wikiId' : Prefix to container names that is unique to this wiki.
55 * This should consist of alphanumberic, '-', and '_' characters.
56 * 'lockManager' : Registered name of a file lock manager to use.
57 * 'readOnly' : Write operations are disallowed if this is a non-empty string.
58 * It should be an explanation for the backend being read-only.
59 *
60 * @param $config Array
61 */
62 public function __construct( array $config ) {
63 $this->name = $config['name'];
64 $this->wikiId = isset( $config['wikiId'] )
65 ? $config['wikiId']
66 : wfWikiID(); // e.g. "my_wiki-en_"
67 $this->lockManager = ( $config['lockManager'] instanceof LockManager )
68 ? $config['lockManager']
69 : LockManagerGroup::singleton()->get( $config['lockManager'] );
70 $this->readOnly = isset( $config['readOnly'] )
71 ? (string)$config['readOnly']
72 : '';
73 }
74
75 /**
76 * Get the unique backend name.
77 * We may have multiple different backends of the same type.
78 * For example, we can have two Swift backends using different proxies.
79 *
80 * @return string
81 */
82 final public function getName() {
83 return $this->name;
84 }
85
86 /**
87 * Check if this backend is read-only
88 *
89 * @return bool
90 */
91 final public function isReadOnly() {
92 return ( $this->readOnly != '' );
93 }
94
95 /**
96 * Get an explanatory message if this backend is read-only
97 *
98 * @return string|false Returns falls if the backend is not read-only
99 */
100 final public function getReadOnlyReason() {
101 return ( $this->readOnly != '' ) ? $this->readOnly : false;
102 }
103
104 /**
105 * This is the main entry point into the backend for write operations.
106 * Callers supply an ordered list of operations to perform as a transaction.
107 * Files will be locked, the stat cache cleared, and then the operations attempted.
108 * If any serious errors occur, all attempted operations will be rolled back.
109 *
110 * $ops is an array of arrays. The outer array holds a list of operations.
111 * Each inner array is a set of key value pairs that specify an operation.
112 *
113 * Supported operations and their parameters:
114 * a) Create a new file in storage with the contents of a string
115 * array(
116 * 'op' => 'create',
117 * 'dst' => <storage path>,
118 * 'content' => <string of new file contents>,
119 * 'overwrite' => <boolean>,
120 * 'overwriteSame' => <boolean>
121 * )
122 * b) Copy a file system file into storage
123 * array(
124 * 'op' => 'store',
125 * 'src' => <file system path>,
126 * 'dst' => <storage path>,
127 * 'overwrite' => <boolean>,
128 * 'overwriteSame' => <boolean>
129 * )
130 * c) Copy a file within storage
131 * array(
132 * 'op' => 'copy',
133 * 'src' => <storage path>,
134 * 'dst' => <storage path>,
135 * 'overwrite' => <boolean>,
136 * 'overwriteSame' => <boolean>
137 * )
138 * d) Move a file within storage
139 * array(
140 * 'op' => 'move',
141 * 'src' => <storage path>,
142 * 'dst' => <storage path>,
143 * 'overwrite' => <boolean>,
144 * 'overwriteSame' => <boolean>
145 * )
146 * e) Delete a file within storage
147 * array(
148 * 'op' => 'delete',
149 * 'src' => <storage path>,
150 * 'ignoreMissingSource' => <boolean>
151 * )
152 * f) Do nothing (no-op)
153 * array(
154 * 'op' => 'null',
155 * )
156 *
157 * Boolean flags for operations (operation-specific):
158 * 'ignoreMissingSource' : The operation will simply succeed and do
159 * nothing if the source file does not exist.
160 * 'overwrite' : Any destination file will be overwritten.
161 * 'overwriteSame' : An error will not be given if a file already
162 * exists at the destination that has the same
163 * contents as the new contents to be written there.
164 *
165 * $opts is an associative of boolean flags, including:
166 * 'force' : Errors that would normally cause a rollback do not.
167 * The remaining operations are still attempted if any fail.
168 * 'nonLocking' : No locks are acquired for the operations.
169 * This can increase performance for non-critical writes.
170 * This has no effect unless the 'force' flag is set.
171 * 'allowStale' : Don't require the latest available data.
172 * This can increase performance for non-critical writes.
173 * This has no effect unless the 'force' flag is set.
174 *
175 * Remarks on locking:
176 * File system paths given to operations should refer to files that are
177 * already locked or otherwise safe from modification from other processes.
178 * Normally these files will be new temp files, which should be adequate.
179 *
180 * Return value:
181 * This returns a Status, which contains all warnings and fatals that occured
182 * during the operation. The 'failCount', 'successCount', and 'success' members
183 * will reflect each operation attempted. The status will be "OK" unless any
184 * of the operations failed and the 'force' parameter was not set.
185 *
186 * @param $ops Array List of operations to execute in order
187 * @param $opts Array Batch operation options
188 * @return Status
189 */
190 final public function doOperations( array $ops, array $opts = array() ) {
191 if ( $this->isReadOnly() ) {
192 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
193 }
194 if ( empty( $opts['force'] ) ) { // sanity
195 unset( $opts['nonLocking'] );
196 unset( $opts['allowStale'] );
197 }
198 return $this->doOperationsInternal( $ops, $opts );
199 }
200
201 /**
202 * @see FileBackend::doOperations()
203 */
204 abstract protected function doOperationsInternal( array $ops, array $opts );
205
206 /**
207 * Same as doOperations() except it takes a single operation.
208 * If you are doing a batch of operations that should either
209 * all succeed or all fail, then use that function instead.
210 *
211 * @see FileBackend::doOperations()
212 *
213 * @param $op Array Operation
214 * @param $opts Array Operation options
215 * @return Status
216 */
217 final public function doOperation( array $op, array $opts = array() ) {
218 return $this->doOperations( array( $op ), $opts );
219 }
220
221 /**
222 * Performs a single create operation.
223 * This sets $params['op'] to 'create' and passes it to doOperation().
224 *
225 * @see FileBackend::doOperation()
226 *
227 * @param $params Array Operation parameters
228 * @param $opts Array Operation options
229 * @return Status
230 */
231 final public function create( array $params, array $opts = array() ) {
232 $params['op'] = 'create';
233 return $this->doOperation( $params, $opts );
234 }
235
236 /**
237 * Performs a single store operation.
238 * This sets $params['op'] to 'store' and passes it to doOperation().
239 *
240 * @see FileBackend::doOperation()
241 *
242 * @param $params Array Operation parameters
243 * @param $opts Array Operation options
244 * @return Status
245 */
246 final public function store( array $params, array $opts = array() ) {
247 $params['op'] = 'store';
248 return $this->doOperation( $params, $opts );
249 }
250
251 /**
252 * Performs a single copy operation.
253 * This sets $params['op'] to 'copy' and passes it to doOperation().
254 *
255 * @see FileBackend::doOperation()
256 *
257 * @param $params Array Operation parameters
258 * @param $opts Array Operation options
259 * @return Status
260 */
261 final public function copy( array $params, array $opts = array() ) {
262 $params['op'] = 'copy';
263 return $this->doOperation( $params, $opts );
264 }
265
266 /**
267 * Performs a single move operation.
268 * This sets $params['op'] to 'move' and passes it to doOperation().
269 *
270 * @see FileBackend::doOperation()
271 *
272 * @param $params Array Operation parameters
273 * @param $opts Array Operation options
274 * @return Status
275 */
276 final public function move( array $params, array $opts = array() ) {
277 $params['op'] = 'move';
278 return $this->doOperation( $params, $opts );
279 }
280
281 /**
282 * Performs a single delete operation.
283 * This sets $params['op'] to 'delete' and passes it to doOperation().
284 *
285 * @see FileBackend::doOperation()
286 *
287 * @param $params Array Operation parameters
288 * @param $opts Array Operation options
289 * @return Status
290 */
291 final public function delete( array $params, array $opts = array() ) {
292 $params['op'] = 'delete';
293 return $this->doOperation( $params, $opts );
294 }
295
296 /**
297 * Concatenate a list of storage files into a single file system file.
298 * The target path should refer to a file that is already locked or
299 * otherwise safe from modification from other processes. Normally,
300 * the file will be a new temp file, which should be adequate.
301 * $params include:
302 * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...)
303 * dst : file system path to 0-byte temp file
304 *
305 * @param $params Array Operation parameters
306 * @return Status
307 */
308 abstract public function concatenate( array $params );
309
310 /**
311 * Prepare a storage directory for usage.
312 * This will create any required containers and parent directories.
313 * Backends using key/value stores only need to create the container.
314 *
315 * $params include:
316 * dir : storage directory
317 *
318 * @param $params Array
319 * @return Status
320 */
321 final public function prepare( array $params ) {
322 if ( $this->isReadOnly() ) {
323 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
324 }
325 return $this->doPrepare( $params );
326 }
327
328 /**
329 * @see FileBackend::prepare()
330 */
331 abstract protected function doPrepare( array $params );
332
333 /**
334 * Take measures to block web access to a storage directory and
335 * the container it belongs to. FS backends might add .htaccess
336 * files whereas key/value store backends might restrict container
337 * access to the auth user that represents end-users in web request.
338 * This is not guaranteed to actually do anything.
339 *
340 * $params include:
341 * dir : storage directory
342 * noAccess : try to deny file access
343 * noListing : try to deny file listing
344 *
345 * @param $params Array
346 * @return Status
347 */
348 final public function secure( array $params ) {
349 if ( $this->isReadOnly() ) {
350 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
351 }
352 $status = $this->doPrepare( $params ); // dir must exist to restrict it
353 if ( $status->isOK() ) {
354 $status->merge( $this->doSecure( $params ) );
355 }
356 return $status;
357 }
358
359 /**
360 * @see FileBackend::secure()
361 */
362 abstract protected function doSecure( array $params );
363
364 /**
365 * Delete a storage directory if it is empty.
366 * Backends using key/value stores may do nothing unless the directory
367 * is that of an empty container, in which case it should be deleted.
368 *
369 * $params include:
370 * dir : storage directory
371 *
372 * @param $params Array
373 * @return Status
374 */
375 final public function clean( array $params ) {
376 if ( $this->isReadOnly() ) {
377 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
378 }
379 return $this->doClean( $params );
380 }
381
382 /**
383 * @see FileBackend::clean()
384 */
385 abstract protected function doClean( array $params );
386
387 /**
388 * Check if a file exists at a storage path in the backend.
389 * This returns false if only a directory exists at the path.
390 *
391 * $params include:
392 * src : source storage path
393 * latest : use the latest available data
394 *
395 * @param $params Array
396 * @return bool|null Returns null on failure
397 */
398 abstract public function fileExists( array $params );
399
400 /**
401 * Get the last-modified timestamp of the file at a storage path.
402 *
403 * $params include:
404 * src : source storage path
405 * latest : use the latest available data
406 *
407 * @param $params Array
408 * @return string|false TS_MW timestamp or false on failure
409 */
410 abstract public function getFileTimestamp( array $params );
411
412 /**
413 * Get the contents of a file at a storage path in the backend.
414 * This should be avoided for potentially large files.
415 *
416 * $params include:
417 * src : source storage path
418 * latest : use the latest available data
419 *
420 * @param $params Array
421 * @return string|false Returns false on failure
422 */
423 abstract public function getFileContents( array $params );
424
425 /**
426 * Get the size (bytes) of a file at a storage path in the backend.
427 *
428 * $params include:
429 * src : source storage path
430 * latest : use the latest available data
431 *
432 * @param $params Array
433 * @return integer|false Returns false on failure
434 */
435 abstract public function getFileSize( array $params );
436
437 /**
438 * Get quick information about a file at a storage path in the backend.
439 * If the file does not exist, then this returns false.
440 * Otherwise, the result is an associative array that includes:
441 * mtime : the last-modified timestamp (TS_MW)
442 * size : the file size (bytes)
443 * Additional values may be included for internal use only.
444 *
445 * $params include:
446 * src : source storage path
447 * latest : use the latest available data
448 *
449 * @param $params Array
450 * @return Array|false|null Returns null on failure
451 */
452 abstract public function getFileStat( array $params );
453
454 /**
455 * Get a SHA-1 hash of the file at a storage path in the backend.
456 *
457 * $params include:
458 * src : source storage path
459 * latest : use the latest available data
460 *
461 * @param $params Array
462 * @return string|false Hash string or false on failure
463 */
464 abstract public function getFileSha1Base36( array $params );
465
466 /**
467 * Get the properties of the file at a storage path in the backend.
468 * Returns FSFile::placeholderProps() on failure.
469 *
470 * $params include:
471 * src : source storage path
472 * latest : use the latest available data
473 *
474 * @param $params Array
475 * @return Array
476 */
477 abstract public function getFileProps( array $params );
478
479 /**
480 * Stream the file at a storage path in the backend.
481 * If the file does not exists, a 404 error will be given.
482 * Appropriate HTTP headers (Status, Content-Type, Content-Length)
483 * must be sent if streaming began, while none should be sent otherwise.
484 * Implementations should flush the output buffer before sending data.
485 *
486 * $params include:
487 * src : source storage path
488 * headers : additional HTTP headers to send on success
489 * latest : use the latest available data
490 *
491 * @param $params Array
492 * @return Status
493 */
494 abstract public function streamFile( array $params );
495
496 /**
497 * Returns a file system file, identical to the file at a storage path.
498 * The file returned is either:
499 * a) A local copy of the file at a storage path in the backend.
500 * The temporary copy will have the same extension as the source.
501 * b) An original of the file at a storage path in the backend.
502 * Temporary files may be purged when the file object falls out of scope.
503 *
504 * Write operations should *never* be done on this file as some backends
505 * may do internal tracking or may be instances of FileBackendMultiWrite.
506 * In that later case, there are copies of the file that must stay in sync.
507 * Additionally, further calls to this function may return the same file.
508 *
509 * $params include:
510 * src : source storage path
511 * latest : use the latest available data
512 *
513 * @param $params Array
514 * @return FSFile|null Returns null on failure
515 */
516 abstract public function getLocalReference( array $params );
517
518 /**
519 * Get a local copy on disk of the file at a storage path in the backend.
520 * The temporary copy will have the same file extension as the source.
521 * Temporary files may be purged when the file object falls out of scope.
522 *
523 * $params include:
524 * src : source storage path
525 * latest : use the latest available data
526 *
527 * @param $params Array
528 * @return TempFSFile|null Returns null on failure
529 */
530 abstract public function getLocalCopy( array $params );
531
532 /**
533 * Get an iterator to list out all stored files under a storage directory.
534 * If the directory is of the form "mwstore://container", then all items in
535 * the container should be listed. If of the form "mwstore://container/dir",
536 * then all items under that container directory should be listed.
537 * Results should be storage paths relative to the given directory.
538 *
539 * $params include:
540 * dir : storage path directory
541 *
542 * @return Traversable|Array|null Returns null on failure
543 */
544 abstract public function getFileList( array $params );
545
546 /**
547 * Invalidate any in-process file existence and property cache.
548 * If $paths is given, then only the cache for those files will be cleared.
549 *
550 * @param $paths Array Storage paths (optional)
551 * @return void
552 */
553 public function clearCache( array $paths = null ) {}
554
555 /**
556 * Lock the files at the given storage paths in the backend.
557 * This will either lock all the files or none (on failure).
558 *
559 * Callers should consider using getScopedFileLocks() instead.
560 *
561 * @param $paths Array Storage paths
562 * @param $type integer LockManager::LOCK_* constant
563 * @return Status
564 */
565 final public function lockFiles( array $paths, $type ) {
566 return $this->lockManager->lock( $paths, $type );
567 }
568
569 /**
570 * Unlock the files at the given storage paths in the backend.
571 *
572 * @param $paths Array Storage paths
573 * @param $type integer LockManager::LOCK_* constant
574 * @return Status
575 */
576 final public function unlockFiles( array $paths, $type ) {
577 return $this->lockManager->unlock( $paths, $type );
578 }
579
580 /**
581 * Lock the files at the given storage paths in the backend.
582 * This will either lock all the files or none (on failure).
583 * On failure, the status object will be updated with errors.
584 *
585 * Once the return value goes out scope, the locks will be released and
586 * the status updated. Unlock fatals will not change the status "OK" value.
587 *
588 * @param $paths Array Storage paths
589 * @param $type integer LockManager::LOCK_* constant
590 * @param $status Status Status to update on lock/unlock
591 * @return ScopedLock|null Returns null on failure
592 */
593 final public function getScopedFileLocks( array $paths, $type, Status $status ) {
594 return ScopedLock::factory( $this->lockManager, $paths, $type, $status );
595 }
596
597 /**
598 * Check if a given path is a "mwstore://" path.
599 * This does not do any further validation or any existence checks.
600 *
601 * @param $path string
602 * @return bool
603 */
604 final public static function isStoragePath( $path ) {
605 return ( strpos( $path, 'mwstore://' ) === 0 );
606 }
607
608 /**
609 * Split a storage path into a backend name, a container name,
610 * and a relative file path. The relative path may be the empty string.
611 * This does not do any path normalization or traversal checks.
612 *
613 * @param $storagePath string
614 * @return Array (backend, container, rel object) or (null, null, null)
615 */
616 final public static function splitStoragePath( $storagePath ) {
617 if ( self::isStoragePath( $storagePath ) ) {
618 // Remove the "mwstore://" prefix and split the path
619 $parts = explode( '/', substr( $storagePath, 10 ), 3 );
620 if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) {
621 if ( count( $parts ) == 3 ) {
622 return $parts; // e.g. "backend/container/path"
623 } else {
624 return array( $parts[0], $parts[1], '' ); // e.g. "backend/container"
625 }
626 }
627 }
628 return array( null, null, null );
629 }
630
631 /**
632 * Normalize a storage path by cleaning up directory separators.
633 * Returns null if the path is not of the format of a valid storage path.
634 *
635 * @param $storagePath string
636 * @return string|null
637 */
638 final public static function normalizeStoragePath( $storagePath ) {
639 list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
640 if ( $relPath !== null ) { // must be for this backend
641 $relPath = self::normalizeContainerPath( $relPath );
642 if ( $relPath !== null ) {
643 return ( $relPath != '' )
644 ? "mwstore://{$backend}/{$container}/{$relPath}"
645 : "mwstore://{$backend}/{$container}";
646 }
647 }
648 return null;
649 }
650
651 /**
652 * Validate and normalize a relative storage path.
653 * Null is returned if the path involves directory traversal.
654 * Traversal is insecure for FS backends and broken for others.
655 *
656 * @param $path string Storage path relative to a container
657 * @return string|null
658 */
659 final protected static function normalizeContainerPath( $path ) {
660 // Normalize directory separators
661 $path = strtr( $path, '\\', '/' );
662 // Collapse any consecutive directory separators
663 $path = preg_replace( '![/]{2,}!', '/', $path );
664 // Remove any leading directory separator
665 $path = ltrim( $path, '/' );
666 // Use the same traversal protection as Title::secureAndSplit()
667 if ( strpos( $path, '.' ) !== false ) {
668 if (
669 $path === '.' ||
670 $path === '..' ||
671 strpos( $path, './' ) === 0 ||
672 strpos( $path, '../' ) === 0 ||
673 strpos( $path, '/./' ) !== false ||
674 strpos( $path, '/../' ) !== false
675 ) {
676 return null;
677 }
678 }
679 return $path;
680 }
681
682 /**
683 * Get the parent storage directory of a storage path.
684 * This returns a path like "mwstore://backend/container",
685 * "mwstore://backend/container/...", or null if there is no parent.
686 *
687 * @param $storagePath string
688 * @return string|null
689 */
690 final public static function parentStoragePath( $storagePath ) {
691 $storagePath = dirname( $storagePath );
692 list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath );
693 return ( $rel === null ) ? null : $storagePath;
694 }
695
696 /**
697 * Get the final extension from a storage or FS path
698 *
699 * @param $path string
700 * @return string
701 */
702 final public static function extensionFromPath( $path ) {
703 $i = strrpos( $path, '.' );
704 return strtolower( $i ? substr( $path, $i + 1 ) : '' );
705 }
706 }
707
708 /**
709 * @brief Base class for all backends associated with a particular storage medium.
710 *
711 * This class defines the methods as abstract that subclasses must implement.
712 * Outside callers should *not* use functions with "Internal" in the name.
713 *
714 * The FileBackend operations are implemented using basic functions
715 * such as storeInternal(), copyInternal(), deleteInternal() and the like.
716 * This class is also responsible for path resolution and sanitization.
717 *
718 * @ingroup FileBackend
719 * @since 1.19
720 */
721 abstract class FileBackendStore extends FileBackend {
722 /** @var Array Map of paths to small (RAM/disk) cache items */
723 protected $cache = array(); // (storage path => key => value)
724 protected $maxCacheSize = 100; // integer; max paths with entries
725 /** @var Array Map of paths to large (RAM/disk) cache items */
726 protected $expensiveCache = array(); // (storage path => key => value)
727 protected $maxExpensiveCacheSize = 10; // integer; max paths with entries
728
729 /** @var Array Map of container names to sharding settings */
730 protected $shardViaHashLevels = array(); // (container name => config array)
731
732 protected $maxFileSize = 1000000000; // integer bytes (1GB)
733
734 /**
735 * Get the maximum allowable file size given backend
736 * medium restrictions and basic performance constraints.
737 * Do not call this function from places outside FileBackend and FileOp.
738 *
739 * @return integer Bytes
740 */
741 final public function maxFileSizeInternal() {
742 return $this->maxFileSize;
743 }
744
745 /**
746 * Check if a file can be created at a given storage path.
747 * FS backends should check if the parent directory exists and the file is writable.
748 * Backends using key/value stores should check if the container exists.
749 *
750 * @param $storagePath string
751 * @return bool
752 */
753 abstract public function isPathUsableInternal( $storagePath );
754
755 /**
756 * Create a file in the backend with the given contents.
757 * Do not call this function from places outside FileBackend and FileOp.
758 *
759 * $params include:
760 * content : the raw file contents
761 * dst : destination storage path
762 * overwrite : overwrite any file that exists at the destination
763 *
764 * @param $params Array
765 * @return Status
766 */
767 final public function createInternal( array $params ) {
768 wfProfileIn( __METHOD__ );
769 if ( strlen( $params['content'] ) > $this->maxFileSizeInternal() ) {
770 $status = Status::newFatal( 'backend-fail-create', $params['dst'] );
771 } else {
772 $status = $this->doCreateInternal( $params );
773 $this->clearCache( array( $params['dst'] ) );
774 }
775 wfProfileOut( __METHOD__ );
776 return $status;
777 }
778
779 /**
780 * @see FileBackendStore::createInternal()
781 */
782 abstract protected function doCreateInternal( array $params );
783
784 /**
785 * Store a file into the backend from a file on disk.
786 * Do not call this function from places outside FileBackend and FileOp.
787 *
788 * $params include:
789 * src : source path on disk
790 * dst : destination storage path
791 * overwrite : overwrite any file that exists at the destination
792 *
793 * @param $params Array
794 * @return Status
795 */
796 final public function storeInternal( array $params ) {
797 wfProfileIn( __METHOD__ );
798 if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) {
799 $status = Status::newFatal( 'backend-fail-store', $params['dst'] );
800 } else {
801 $status = $this->doStoreInternal( $params );
802 $this->clearCache( array( $params['dst'] ) );
803 }
804 wfProfileOut( __METHOD__ );
805 return $status;
806 }
807
808 /**
809 * @see FileBackendStore::storeInternal()
810 */
811 abstract protected function doStoreInternal( array $params );
812
813 /**
814 * Copy a file from one storage path to another in the backend.
815 * Do not call this function from places outside FileBackend and FileOp.
816 *
817 * $params include:
818 * src : source storage path
819 * dst : destination storage path
820 * overwrite : overwrite any file that exists at the destination
821 *
822 * @param $params Array
823 * @return Status
824 */
825 final public function copyInternal( array $params ) {
826 wfProfileIn( __METHOD__ );
827 $status = $this->doCopyInternal( $params );
828 $this->clearCache( array( $params['dst'] ) );
829 wfProfileOut( __METHOD__ );
830 return $status;
831 }
832
833 /**
834 * @see FileBackendStore::copyInternal()
835 */
836 abstract protected function doCopyInternal( array $params );
837
838 /**
839 * Delete a file at the storage path.
840 * Do not call this function from places outside FileBackend and FileOp.
841 *
842 * $params include:
843 * src : source storage path
844 * ignoreMissingSource : do nothing if the source file does not exist
845 *
846 * @param $params Array
847 * @return Status
848 */
849 final public function deleteInternal( array $params ) {
850 wfProfileIn( __METHOD__ );
851 $status = $this->doDeleteInternal( $params );
852 $this->clearCache( array( $params['src'] ) );
853 wfProfileOut( __METHOD__ );
854 return $status;
855 }
856
857 /**
858 * @see FileBackendStore::deleteInternal()
859 */
860 abstract protected function doDeleteInternal( array $params );
861
862 /**
863 * Move a file from one storage path to another in the backend.
864 * Do not call this function from places outside FileBackend and FileOp.
865 *
866 * $params include:
867 * src : source storage path
868 * dst : destination storage path
869 * overwrite : overwrite any file that exists at the destination
870 *
871 * @param $params Array
872 * @return Status
873 */
874 final public function moveInternal( array $params ) {
875 wfProfileIn( __METHOD__ );
876 $status = $this->doMoveInternal( $params );
877 $this->clearCache( array( $params['src'], $params['dst'] ) );
878 wfProfileOut( __METHOD__ );
879 return $status;
880 }
881
882 /**
883 * @see FileBackendStore::moveInternal()
884 */
885 protected function doMoveInternal( array $params ) {
886 // Copy source to dest
887 $status = $this->copyInternal( $params );
888 if ( $status->isOK() ) {
889 // Delete source (only fails due to races or medium going down)
890 $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) );
891 $status->setResult( true, $status->value ); // ignore delete() errors
892 }
893 return $status;
894 }
895
896 /**
897 * @see FileBackend::concatenate()
898 */
899 final public function concatenate( array $params ) {
900 wfProfileIn( __METHOD__ );
901 $status = Status::newGood();
902
903 // Try to lock the source files for the scope of this function
904 $scopeLockS = $this->getScopedFileLocks( $params['srcs'], LockManager::LOCK_UW, $status );
905 if ( $status->isOK() ) {
906 // Actually do the concatenation
907 $status->merge( $this->doConcatenate( $params ) );
908 }
909
910 wfProfileOut( __METHOD__ );
911 return $status;
912 }
913
914 /**
915 * @see FileBackendStore::concatenate()
916 */
917 protected function doConcatenate( array $params ) {
918 $status = Status::newGood();
919 $tmpPath = $params['dst']; // convenience
920
921 // Check that the specified temp file is valid...
922 wfSuppressWarnings();
923 $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) );
924 wfRestoreWarnings();
925 if ( !$ok ) { // not present or not empty
926 $status->fatal( 'backend-fail-opentemp', $tmpPath );
927 return $status;
928 }
929
930 // Build up the temp file using the source chunks (in order)...
931 $tmpHandle = fopen( $tmpPath, 'ab' );
932 if ( $tmpHandle === false ) {
933 $status->fatal( 'backend-fail-opentemp', $tmpPath );
934 return $status;
935 }
936 foreach ( $params['srcs'] as $virtualSource ) {
937 // Get a local FS version of the chunk
938 $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) );
939 if ( !$tmpFile ) {
940 $status->fatal( 'backend-fail-read', $virtualSource );
941 return $status;
942 }
943 // Get a handle to the local FS version
944 $sourceHandle = fopen( $tmpFile->getPath(), 'r' );
945 if ( $sourceHandle === false ) {
946 fclose( $tmpHandle );
947 $status->fatal( 'backend-fail-read', $virtualSource );
948 return $status;
949 }
950 // Append chunk to file (pass chunk size to avoid magic quotes)
951 if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) {
952 fclose( $sourceHandle );
953 fclose( $tmpHandle );
954 $status->fatal( 'backend-fail-writetemp', $tmpPath );
955 return $status;
956 }
957 fclose( $sourceHandle );
958 }
959 if ( !fclose( $tmpHandle ) ) {
960 $status->fatal( 'backend-fail-closetemp', $tmpPath );
961 return $status;
962 }
963
964 clearstatcache(); // temp file changed
965
966 return $status;
967 }
968
969 /**
970 * @see FileBackend::doPrepare()
971 */
972 final protected function doPrepare( array $params ) {
973 wfProfileIn( __METHOD__ );
974
975 $status = Status::newGood();
976 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
977 if ( $dir === null ) {
978 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
979 wfProfileOut( __METHOD__ );
980 return $status; // invalid storage path
981 }
982
983 if ( $shard !== null ) { // confined to a single container/shard
984 $status->merge( $this->doPrepareInternal( $fullCont, $dir, $params ) );
985 } else { // directory is on several shards
986 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
987 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
988 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
989 $status->merge( $this->doPrepareInternal( "{$fullCont}{$suffix}", $dir, $params ) );
990 }
991 }
992
993 wfProfileOut( __METHOD__ );
994 return $status;
995 }
996
997 /**
998 * @see FileBackendStore::doPrepare()
999 */
1000 protected function doPrepareInternal( $container, $dir, array $params ) {
1001 return Status::newGood();
1002 }
1003
1004 /**
1005 * @see FileBackend::doSecure()
1006 */
1007 final protected function doSecure( array $params ) {
1008 wfProfileIn( __METHOD__ );
1009 $status = Status::newGood();
1010
1011 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
1012 if ( $dir === null ) {
1013 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
1014 wfProfileOut( __METHOD__ );
1015 return $status; // invalid storage path
1016 }
1017
1018 if ( $shard !== null ) { // confined to a single container/shard
1019 $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) );
1020 } else { // directory is on several shards
1021 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
1022 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
1023 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
1024 $status->merge( $this->doSecureInternal( "{$fullCont}{$suffix}", $dir, $params ) );
1025 }
1026 }
1027
1028 wfProfileOut( __METHOD__ );
1029 return $status;
1030 }
1031
1032 /**
1033 * @see FileBackendStore::doSecure()
1034 */
1035 protected function doSecureInternal( $container, $dir, array $params ) {
1036 return Status::newGood();
1037 }
1038
1039 /**
1040 * @see FileBackend::doClean()
1041 */
1042 final protected function doClean( array $params ) {
1043 wfProfileIn( __METHOD__ );
1044 $status = Status::newGood();
1045
1046 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
1047 if ( $dir === null ) {
1048 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
1049 wfProfileOut( __METHOD__ );
1050 return $status; // invalid storage path
1051 }
1052
1053 // Attempt to lock this directory...
1054 $filesLockEx = array( $params['dir'] );
1055 $scopedLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
1056 if ( !$status->isOK() ) {
1057 wfProfileOut( __METHOD__ );
1058 return $status; // abort
1059 }
1060
1061 if ( $shard !== null ) { // confined to a single container/shard
1062 $status->merge( $this->doCleanInternal( $fullCont, $dir, $params ) );
1063 } else { // directory is on several shards
1064 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
1065 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
1066 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
1067 $status->merge( $this->doCleanInternal( "{$fullCont}{$suffix}", $dir, $params ) );
1068 }
1069 }
1070
1071 wfProfileOut( __METHOD__ );
1072 return $status;
1073 }
1074
1075 /**
1076 * @see FileBackendStore::doClean()
1077 */
1078 protected function doCleanInternal( $container, $dir, array $params ) {
1079 return Status::newGood();
1080 }
1081
1082 /**
1083 * @see FileBackend::fileExists()
1084 */
1085 final public function fileExists( array $params ) {
1086 wfProfileIn( __METHOD__ );
1087 $stat = $this->getFileStat( $params );
1088 wfProfileOut( __METHOD__ );
1089 return ( $stat === null ) ? null : (bool)$stat; // null => failure
1090 }
1091
1092 /**
1093 * @see FileBackend::getFileTimestamp()
1094 */
1095 final public function getFileTimestamp( array $params ) {
1096 wfProfileIn( __METHOD__ );
1097 $stat = $this->getFileStat( $params );
1098 wfProfileOut( __METHOD__ );
1099 return $stat ? $stat['mtime'] : false;
1100 }
1101
1102 /**
1103 * @see FileBackend::getFileSize()
1104 */
1105 final public function getFileSize( array $params ) {
1106 wfProfileIn( __METHOD__ );
1107 $stat = $this->getFileStat( $params );
1108 wfProfileOut( __METHOD__ );
1109 return $stat ? $stat['size'] : false;
1110 }
1111
1112 /**
1113 * @see FileBackend::getFileStat()
1114 */
1115 final public function getFileStat( array $params ) {
1116 wfProfileIn( __METHOD__ );
1117 $path = self::normalizeStoragePath( $params['src'] );
1118 if ( $path === null ) {
1119 return false; // invalid storage path
1120 }
1121 $latest = !empty( $params['latest'] );
1122 if ( isset( $this->cache[$path]['stat'] ) ) {
1123 // If we want the latest data, check that this cached
1124 // value was in fact fetched with the latest available data.
1125 if ( !$latest || $this->cache[$path]['stat']['latest'] ) {
1126 wfProfileOut( __METHOD__ );
1127 return $this->cache[$path]['stat'];
1128 }
1129 }
1130 $stat = $this->doGetFileStat( $params );
1131 if ( is_array( $stat ) ) { // don't cache negatives
1132 $this->trimCache(); // limit memory
1133 $this->cache[$path]['stat'] = $stat;
1134 $this->cache[$path]['stat']['latest'] = $latest;
1135 }
1136 wfProfileOut( __METHOD__ );
1137 return $stat;
1138 }
1139
1140 /**
1141 * @see FileBackendStore::getFileStat()
1142 */
1143 abstract protected function doGetFileStat( array $params );
1144
1145 /**
1146 * @see FileBackend::getFileContents()
1147 */
1148 public function getFileContents( array $params ) {
1149 wfProfileIn( __METHOD__ );
1150 $tmpFile = $this->getLocalReference( $params );
1151 if ( !$tmpFile ) {
1152 wfProfileOut( __METHOD__ );
1153 return false;
1154 }
1155 wfSuppressWarnings();
1156 $data = file_get_contents( $tmpFile->getPath() );
1157 wfRestoreWarnings();
1158 wfProfileOut( __METHOD__ );
1159 return $data;
1160 }
1161
1162 /**
1163 * @see FileBackend::getFileSha1Base36()
1164 */
1165 final public function getFileSha1Base36( array $params ) {
1166 wfProfileIn( __METHOD__ );
1167 $path = $params['src'];
1168 if ( isset( $this->cache[$path]['sha1'] ) ) {
1169 wfProfileOut( __METHOD__ );
1170 return $this->cache[$path]['sha1'];
1171 }
1172 $hash = $this->doGetFileSha1Base36( $params );
1173 if ( $hash ) { // don't cache negatives
1174 $this->trimCache(); // limit memory
1175 $this->cache[$path]['sha1'] = $hash;
1176 }
1177 wfProfileOut( __METHOD__ );
1178 return $hash;
1179 }
1180
1181 /**
1182 * @see FileBackendStore::getFileSha1Base36()
1183 */
1184 protected function doGetFileSha1Base36( array $params ) {
1185 $fsFile = $this->getLocalReference( $params );
1186 if ( !$fsFile ) {
1187 return false;
1188 } else {
1189 return $fsFile->getSha1Base36();
1190 }
1191 }
1192
1193 /**
1194 * @see FileBackend::getFileProps()
1195 */
1196 final public function getFileProps( array $params ) {
1197 wfProfileIn( __METHOD__ );
1198 $fsFile = $this->getLocalReference( $params );
1199 $props = $fsFile ? $fsFile->getProps() : FSFile::placeholderProps();
1200 wfProfileOut( __METHOD__ );
1201 return $props;
1202 }
1203
1204 /**
1205 * @see FileBackend::getLocalReference()
1206 */
1207 public function getLocalReference( array $params ) {
1208 wfProfileIn( __METHOD__ );
1209 $path = $params['src'];
1210 if ( isset( $this->expensiveCache[$path]['localRef'] ) ) {
1211 wfProfileOut( __METHOD__ );
1212 return $this->expensiveCache[$path]['localRef'];
1213 }
1214 $tmpFile = $this->getLocalCopy( $params );
1215 if ( $tmpFile ) { // don't cache negatives
1216 $this->trimExpensiveCache(); // limit memory
1217 $this->expensiveCache[$path]['localRef'] = $tmpFile;
1218 }
1219 wfProfileOut( __METHOD__ );
1220 return $tmpFile;
1221 }
1222
1223 /**
1224 * @see FileBackend::streamFile()
1225 */
1226 final public function streamFile( array $params ) {
1227 wfProfileIn( __METHOD__ );
1228 $status = Status::newGood();
1229
1230 $info = $this->getFileStat( $params );
1231 if ( !$info ) { // let StreamFile handle the 404
1232 $status->fatal( 'backend-fail-notexists', $params['src'] );
1233 }
1234
1235 // Set output buffer and HTTP headers for stream
1236 $extraHeaders = $params['headers'] ? $params['headers'] : array();
1237 $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders );
1238 if ( $res == StreamFile::NOT_MODIFIED ) {
1239 // do nothing; client cache is up to date
1240 } elseif ( $res == StreamFile::READY_STREAM ) {
1241 $status = $this->doStreamFile( $params );
1242 } else {
1243 $status->fatal( 'backend-fail-stream', $params['src'] );
1244 }
1245
1246 wfProfileOut( __METHOD__ );
1247 return $status;
1248 }
1249
1250 /**
1251 * @see FileBackendStore::streamFile()
1252 */
1253 protected function doStreamFile( array $params ) {
1254 $status = Status::newGood();
1255
1256 $fsFile = $this->getLocalReference( $params );
1257 if ( !$fsFile ) {
1258 $status->fatal( 'backend-fail-stream', $params['src'] );
1259 } elseif ( !readfile( $fsFile->getPath() ) ) {
1260 $status->fatal( 'backend-fail-stream', $params['src'] );
1261 }
1262
1263 return $status;
1264 }
1265
1266 /**
1267 * @copydoc FileBackend::getFileList()
1268 */
1269 final public function getFileList( array $params ) {
1270 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
1271 if ( $dir === null ) { // invalid storage path
1272 return null;
1273 }
1274 if ( $shard !== null ) {
1275 // File listing is confined to a single container/shard
1276 return $this->getFileListInternal( $fullCont, $dir, $params );
1277 } else {
1278 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
1279 // File listing spans multiple containers/shards
1280 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
1281 return new FileBackendStoreShardListIterator( $this,
1282 $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params );
1283 }
1284 }
1285
1286 /**
1287 * Do not call this function from places outside FileBackend
1288 *
1289 * @see FileBackendStore::getFileList()
1290 *
1291 * @param $container string Resolved container name
1292 * @param $dir string Resolved path relative to container
1293 * @param $params Array
1294 * @return Traversable|Array|null
1295 */
1296 abstract public function getFileListInternal( $container, $dir, array $params );
1297
1298 /**
1299 * Get the list of supported operations and their corresponding FileOp classes.
1300 *
1301 * @return Array
1302 */
1303 protected function supportedOperations() {
1304 return array(
1305 'store' => 'StoreFileOp',
1306 'copy' => 'CopyFileOp',
1307 'move' => 'MoveFileOp',
1308 'delete' => 'DeleteFileOp',
1309 'create' => 'CreateFileOp',
1310 'null' => 'NullFileOp'
1311 );
1312 }
1313
1314 /**
1315 * Return a list of FileOp objects from a list of operations.
1316 * Do not call this function from places outside FileBackend.
1317 *
1318 * The result must have the same number of items as the input.
1319 * An exception is thrown if an unsupported operation is requested.
1320 *
1321 * @param $ops Array Same format as doOperations()
1322 * @return Array List of FileOp objects
1323 * @throws MWException
1324 */
1325 final public function getOperations( array $ops ) {
1326 $supportedOps = $this->supportedOperations();
1327
1328 $performOps = array(); // array of FileOp objects
1329 // Build up ordered array of FileOps...
1330 foreach ( $ops as $operation ) {
1331 $opName = $operation['op'];
1332 if ( isset( $supportedOps[$opName] ) ) {
1333 $class = $supportedOps[$opName];
1334 // Get params for this operation
1335 $params = $operation;
1336 // Append the FileOp class
1337 $performOps[] = new $class( $this, $params );
1338 } else {
1339 throw new MWException( "Operation `$opName` is not supported." );
1340 }
1341 }
1342
1343 return $performOps;
1344 }
1345
1346 /**
1347 * @see FileBackend::doOperationsInternal()
1348 */
1349 protected function doOperationsInternal( array $ops, array $opts ) {
1350 wfProfileIn( __METHOD__ );
1351 $status = Status::newGood();
1352
1353 // Build up a list of FileOps...
1354 $performOps = $this->getOperations( $ops );
1355
1356 // Acquire any locks as needed...
1357 if ( empty( $opts['nonLocking'] ) ) {
1358 // Build up a list of files to lock...
1359 $filesLockEx = $filesLockSh = array();
1360 foreach ( $performOps as $fileOp ) {
1361 $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() );
1362 $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() );
1363 }
1364 // Optimization: if doing an EX lock anyway, don't also set an SH one
1365 $filesLockSh = array_diff( $filesLockSh, $filesLockEx );
1366 // Get a shared lock on the parent directory of each path changed
1367 $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) );
1368 // Try to lock those files for the scope of this function...
1369 $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
1370 $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
1371 if ( !$status->isOK() ) {
1372 wfProfileOut( __METHOD__ );
1373 return $status; // abort
1374 }
1375 }
1376
1377 // Clear any cache entries (after locks acquired)
1378 $this->clearCache();
1379
1380 // Actually attempt the operation batch...
1381 $subStatus = FileOp::attemptBatch( $performOps, $opts );
1382
1383 // Merge errors into status fields
1384 $status->merge( $subStatus );
1385 $status->success = $subStatus->success; // not done in merge()
1386
1387 wfProfileOut( __METHOD__ );
1388 return $status;
1389 }
1390
1391 /**
1392 * @see FileBackend::clearCache()
1393 */
1394 final public function clearCache( array $paths = null ) {
1395 if ( is_array( $paths ) ) {
1396 $paths = array_map( 'FileBackend::normalizeStoragePath', $paths );
1397 $paths = array_filter( $paths, 'strlen' ); // remove nulls
1398 }
1399 if ( $paths === null ) {
1400 $this->cache = array();
1401 $this->expensiveCache = array();
1402 } else {
1403 foreach ( $paths as $path ) {
1404 unset( $this->cache[$path] );
1405 unset( $this->expensiveCache[$path] );
1406 }
1407 }
1408 $this->doClearCache( $paths );
1409 }
1410
1411 /**
1412 * Clears any additional stat caches for storage paths
1413 *
1414 * @see FileBackend::clearCache()
1415 *
1416 * @param $paths Array Storage paths (optional)
1417 * @return void
1418 */
1419 protected function doClearCache( array $paths = null ) {}
1420
1421 /**
1422 * Prune the inexpensive cache if it is too big to add an item
1423 *
1424 * @return void
1425 */
1426 protected function trimCache() {
1427 if ( count( $this->cache ) >= $this->maxCacheSize ) {
1428 reset( $this->cache );
1429 unset( $this->cache[key( $this->cache )] );
1430 }
1431 }
1432
1433 /**
1434 * Prune the expensive cache if it is too big to add an item
1435 *
1436 * @return void
1437 */
1438 protected function trimExpensiveCache() {
1439 if ( count( $this->expensiveCache ) >= $this->maxExpensiveCacheSize ) {
1440 reset( $this->expensiveCache );
1441 unset( $this->expensiveCache[key( $this->expensiveCache )] );
1442 }
1443 }
1444
1445 /**
1446 * Check if a container name is valid.
1447 * This checks for for length and illegal characters.
1448 *
1449 * @param $container string
1450 * @return bool
1451 */
1452 final protected static function isValidContainerName( $container ) {
1453 // This accounts for Swift and S3 restrictions while leaving room
1454 // for things like '.xxx' (hex shard chars) or '.seg' (segments).
1455 // This disallows directory separators or traversal characters.
1456 // Note that matching strings URL encode to the same string;
1457 // in Swift, the length restriction is *after* URL encoding.
1458 return preg_match( '/^[a-z0-9][a-z0-9-_]{0,199}$/i', $container );
1459 }
1460
1461 /**
1462 * Splits a storage path into an internal container name,
1463 * an internal relative file name, and a container shard suffix.
1464 * Any shard suffix is already appended to the internal container name.
1465 * This also checks that the storage path is valid and within this backend.
1466 *
1467 * If the container is sharded but a suffix could not be determined,
1468 * this means that the path can only refer to a directory and can only
1469 * be scanned by looking in all the container shards.
1470 *
1471 * @param $storagePath string
1472 * @return Array (container, path, container suffix) or (null, null, null) if invalid
1473 */
1474 final protected function resolveStoragePath( $storagePath ) {
1475 list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
1476 if ( $backend === $this->name ) { // must be for this backend
1477 $relPath = self::normalizeContainerPath( $relPath );
1478 if ( $relPath !== null ) {
1479 // Get shard for the normalized path if this container is sharded
1480 $cShard = $this->getContainerShard( $container, $relPath );
1481 // Validate and sanitize the relative path (backend-specific)
1482 $relPath = $this->resolveContainerPath( $container, $relPath );
1483 if ( $relPath !== null ) {
1484 // Prepend any wiki ID prefix to the container name
1485 $container = $this->fullContainerName( $container );
1486 if ( self::isValidContainerName( $container ) ) {
1487 // Validate and sanitize the container name (backend-specific)
1488 $container = $this->resolveContainerName( "{$container}{$cShard}" );
1489 if ( $container !== null ) {
1490 return array( $container, $relPath, $cShard );
1491 }
1492 }
1493 }
1494 }
1495 }
1496 return array( null, null, null );
1497 }
1498
1499 /**
1500 * Like resolveStoragePath() except null values are returned if
1501 * the container is sharded and the shard could not be determined.
1502 *
1503 * @see FileBackendStore::resolveStoragePath()
1504 *
1505 * @param $storagePath string
1506 * @return Array (container, path) or (null, null) if invalid
1507 */
1508 final protected function resolveStoragePathReal( $storagePath ) {
1509 list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath );
1510 if ( $cShard !== null ) {
1511 return array( $container, $relPath );
1512 }
1513 return array( null, null );
1514 }
1515
1516 /**
1517 * Get the container name shard suffix for a given path.
1518 * Any empty suffix means the container is not sharded.
1519 *
1520 * @param $container string Container name
1521 * @param $relStoragePath string Storage path relative to the container
1522 * @return string|null Returns null if shard could not be determined
1523 */
1524 final protected function getContainerShard( $container, $relPath ) {
1525 list( $levels, $base, $repeat ) = $this->getContainerHashLevels( $container );
1526 if ( $levels == 1 || $levels == 2 ) {
1527 // Hash characters are either base 16 or 36
1528 $char = ( $base == 36 ) ? '[0-9a-z]' : '[0-9a-f]';
1529 // Get a regex that represents the shard portion of paths.
1530 // The concatenation of the captures gives us the shard.
1531 if ( $levels === 1 ) { // 16 or 36 shards per container
1532 $hashDirRegex = '(' . $char . ')';
1533 } else { // 256 or 1296 shards per container
1534 if ( $repeat ) { // verbose hash dir format (e.g. "a/ab/abc")
1535 $hashDirRegex = $char . '/(' . $char . '{2})';
1536 } else { // short hash dir format (e.g. "a/b/c")
1537 $hashDirRegex = '(' . $char . ')/(' . $char . ')';
1538 }
1539 }
1540 // Allow certain directories to be above the hash dirs so as
1541 // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab").
1542 // They must be 2+ chars to avoid any hash directory ambiguity.
1543 $m = array();
1544 if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) {
1545 return '.' . implode( '', array_slice( $m, 1 ) );
1546 }
1547 return null; // failed to match
1548 }
1549 return ''; // no sharding
1550 }
1551
1552 /**
1553 * Get the sharding config for a container.
1554 * If greater than 0, then all file storage paths within
1555 * the container are required to be hashed accordingly.
1556 *
1557 * @param $container string
1558 * @return Array (integer levels, integer base, repeat flag) or (0, 0, false)
1559 */
1560 final protected function getContainerHashLevels( $container ) {
1561 if ( isset( $this->shardViaHashLevels[$container] ) ) {
1562 $config = $this->shardViaHashLevels[$container];
1563 $hashLevels = (int)$config['levels'];
1564 if ( $hashLevels == 1 || $hashLevels == 2 ) {
1565 $hashBase = (int)$config['base'];
1566 if ( $hashBase == 16 || $hashBase == 36 ) {
1567 return array( $hashLevels, $hashBase, $config['repeat'] );
1568 }
1569 }
1570 }
1571 return array( 0, 0, false ); // no sharding
1572 }
1573
1574 /**
1575 * Get a list of full container shard suffixes for a container
1576 *
1577 * @param $container string
1578 * @return Array
1579 */
1580 final protected function getContainerSuffixes( $container ) {
1581 $shards = array();
1582 list( $digits, $base ) = $this->getContainerHashLevels( $container );
1583 if ( $digits > 0 ) {
1584 $numShards = pow( $base, $digits );
1585 for ( $index = 0; $index < $numShards; $index++ ) {
1586 $shards[] = '.' . wfBaseConvert( $index, 10, $base, $digits );
1587 }
1588 }
1589 return $shards;
1590 }
1591
1592 /**
1593 * Get the full container name, including the wiki ID prefix
1594 *
1595 * @param $container string
1596 * @return string
1597 */
1598 final protected function fullContainerName( $container ) {
1599 if ( $this->wikiId != '' ) {
1600 return "{$this->wikiId}-$container";
1601 } else {
1602 return $container;
1603 }
1604 }
1605
1606 /**
1607 * Resolve a container name, checking if it's allowed by the backend.
1608 * This is intended for internal use, such as encoding illegal chars.
1609 * Subclasses can override this to be more restrictive.
1610 *
1611 * @param $container string
1612 * @return string|null
1613 */
1614 protected function resolveContainerName( $container ) {
1615 return $container;
1616 }
1617
1618 /**
1619 * Resolve a relative storage path, checking if it's allowed by the backend.
1620 * This is intended for internal use, such as encoding illegal chars or perhaps
1621 * getting absolute paths (e.g. FS based backends). Note that the relative path
1622 * may be the empty string (e.g. the path is simply to the container).
1623 *
1624 * @param $container string Container name
1625 * @param $relStoragePath string Storage path relative to the container
1626 * @return string|null Path or null if not valid
1627 */
1628 protected function resolveContainerPath( $container, $relStoragePath ) {
1629 return $relStoragePath;
1630 }
1631 }
1632
1633 /**
1634 * FileBackendStore helper function to handle file listings that span container shards.
1635 * Do not use this class from places outside of FileBackendStore.
1636 *
1637 * @ingroup FileBackend
1638 */
1639 class FileBackendStoreShardListIterator implements Iterator {
1640 /* @var FileBackendStore */
1641 protected $backend;
1642 /* @var Array */
1643 protected $params;
1644 /* @var Array */
1645 protected $shardSuffixes;
1646 protected $container; // string
1647 protected $directory; // string
1648
1649 /* @var Traversable */
1650 protected $iter;
1651 protected $curShard = 0; // integer
1652 protected $pos = 0; // integer
1653
1654 /**
1655 * @param $backend FileBackendStore
1656 * @param $container string Full storage container name
1657 * @param $dir string Storage directory relative to container
1658 * @param $suffixes Array List of container shard suffixes
1659 * @param $params Array
1660 */
1661 public function __construct(
1662 FileBackendStore $backend, $container, $dir, array $suffixes, array $params
1663 ) {
1664 $this->backend = $backend;
1665 $this->container = $container;
1666 $this->directory = $dir;
1667 $this->shardSuffixes = $suffixes;
1668 $this->params = $params;
1669 }
1670
1671 public function current() {
1672 if ( is_array( $this->iter ) ) {
1673 return current( $this->iter );
1674 } else {
1675 return $this->iter->current();
1676 }
1677 }
1678
1679 public function key() {
1680 return $this->pos;
1681 }
1682
1683 public function next() {
1684 ++$this->pos;
1685 if ( is_array( $this->iter ) ) {
1686 next( $this->iter );
1687 } else {
1688 $this->iter->next();
1689 }
1690 // Find the next non-empty shard if no elements are left
1691 $this->nextShardIteratorIfNotValid();
1692 }
1693
1694 /**
1695 * If the iterator for this container shard is out of items,
1696 * then move on to the next container that has items.
1697 * If there are none, then it advances to the last container.
1698 */
1699 protected function nextShardIteratorIfNotValid() {
1700 while ( !$this->valid() ) {
1701 if ( ++$this->curShard >= count( $this->shardSuffixes ) ) {
1702 break; // no more container shards
1703 }
1704 $this->setIteratorFromCurrentShard();
1705 }
1706 }
1707
1708 protected function setIteratorFromCurrentShard() {
1709 $suffix = $this->shardSuffixes[$this->curShard];
1710 $this->iter = $this->backend->getFileListInternal(
1711 "{$this->container}{$suffix}", $this->directory, $this->params );
1712 }
1713
1714 public function rewind() {
1715 $this->pos = 0;
1716 $this->curShard = 0;
1717 $this->setIteratorFromCurrentShard();
1718 // Find the next non-empty shard if this one has no elements
1719 $this->nextShardIteratorIfNotValid();
1720 }
1721
1722 public function valid() {
1723 if ( $this->iter == null ) {
1724 return false; // some failure?
1725 } elseif ( is_array( $this->iter ) ) {
1726 return ( current( $this->iter ) !== false ); // no paths can have this value
1727 } else {
1728 return $this->iter->valid();
1729 }
1730 }
1731 }