[FileBackend] r113704: updated FileBackend constructor documentation
[lhc/web/wiklou.git] / includes / filerepo / backend / FileBackend.php
1 <?php
2 /**
3 * @defgroup FileBackend File backend
4 * @ingroup FileRepo
5 *
6 * File backend is used to interact with file storage systems,
7 * such as the local file system, NFS, or cloud storage systems.
8 */
9
10 /**
11 * @file
12 * @ingroup FileBackend
13 * @author Aaron Schulz
14 */
15
16 /**
17 * @brief Base class for all file backend classes (including multi-write backends).
18 *
19 * This class defines the methods as abstract that subclasses must implement.
20 * Outside callers can assume that all backends will have these functions.
21 *
22 * All "storage paths" are of the format "mwstore://<backend>/<container>/<path>".
23 * The <path> portion is a relative path that uses UNIX file system (FS) notation,
24 * though any particular backend may not actually be using a local filesystem.
25 * Therefore, the relative paths are only virtual.
26 *
27 * Backend contents are stored under wiki-specific container names by default.
28 * For legacy reasons, this has no effect for the FS backend class, and per-wiki
29 * segregation must be done by setting the container paths appropriately.
30 *
31 * FS-based backends are somewhat more restrictive due to the existence of real
32 * directory files; a regular file cannot have the same name as a directory. Other
33 * backends with virtual directories may not have this limitation. Callers should
34 * store files in such a way that no files and directories are under the same path.
35 *
36 * Methods should avoid throwing exceptions at all costs.
37 * As a corollary, external dependencies should be kept to a minimum.
38 *
39 * @ingroup FileBackend
40 * @since 1.19
41 */
42 abstract class FileBackend {
43 protected $name; // string; unique backend name
44 protected $wikiId; // string; unique wiki name
45 protected $readOnly; // string; read-only explanation message
46 /** @var LockManager */
47 protected $lockManager;
48 /** @var FileJournal */
49 protected $fileJournal;
50
51 /**
52 * Create a new backend instance from configuration.
53 * This should only be called from within FileBackendGroup.
54 *
55 * $config includes:
56 * 'name' : The unique name of this backend.
57 * This should consist of alphanumberic, '-', and '_' characters.
58 * This name should not be changed after use.
59 * 'wikiId' : Prefix to container names that is unique to this wiki.
60 * It should only consist of alphanumberic, '-', and '_' characters.
61 * 'lockManager' : Registered name of a file lock manager to use.
62 * 'fileJournal' : File journal configuration; see FileJournal::factory().
63 * Journals simply log changes to files stored in the backend.
64 * 'readOnly' : Write operations are disallowed if this is a non-empty string.
65 * It should be an explanation for the backend being read-only.
66 *
67 * @param $config Array
68 */
69 public function __construct( array $config ) {
70 $this->name = $config['name'];
71 if ( !preg_match( '!^[a-zA-Z0-9-_]{1,255}$!', $this->name ) ) {
72 throw new MWException( "Backend name `{$this->name}` is invalid." );
73 }
74 $this->wikiId = isset( $config['wikiId'] )
75 ? $config['wikiId']
76 : wfWikiID(); // e.g. "my_wiki-en_"
77 $this->lockManager = ( $config['lockManager'] instanceof LockManager )
78 ? $config['lockManager']
79 : LockManagerGroup::singleton()->get( $config['lockManager'] );
80 $this->fileJournal = isset( $config['fileJournal'] )
81 ? FileJournal::factory( $config['fileJournal'], $this->name )
82 : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
83 $this->readOnly = isset( $config['readOnly'] )
84 ? (string)$config['readOnly']
85 : '';
86 }
87
88 /**
89 * Get the unique backend name.
90 * We may have multiple different backends of the same type.
91 * For example, we can have two Swift backends using different proxies.
92 *
93 * @return string
94 */
95 final public function getName() {
96 return $this->name;
97 }
98
99 /**
100 * Check if this backend is read-only
101 *
102 * @return bool
103 */
104 final public function isReadOnly() {
105 return ( $this->readOnly != '' );
106 }
107
108 /**
109 * Get an explanatory message if this backend is read-only
110 *
111 * @return string|bool Returns falls if the backend is not read-only
112 */
113 final public function getReadOnlyReason() {
114 return ( $this->readOnly != '' ) ? $this->readOnly : false;
115 }
116
117 /**
118 * This is the main entry point into the backend for write operations.
119 * Callers supply an ordered list of operations to perform as a transaction.
120 * Files will be locked, the stat cache cleared, and then the operations attempted.
121 * If any serious errors occur, all attempted operations will be rolled back.
122 *
123 * $ops is an array of arrays. The outer array holds a list of operations.
124 * Each inner array is a set of key value pairs that specify an operation.
125 *
126 * Supported operations and their parameters:
127 * a) Create a new file in storage with the contents of a string
128 * array(
129 * 'op' => 'create',
130 * 'dst' => <storage path>,
131 * 'content' => <string of new file contents>,
132 * 'overwrite' => <boolean>,
133 * 'overwriteSame' => <boolean>
134 * )
135 * b) Copy a file system file into storage
136 * array(
137 * 'op' => 'store',
138 * 'src' => <file system path>,
139 * 'dst' => <storage path>,
140 * 'overwrite' => <boolean>,
141 * 'overwriteSame' => <boolean>
142 * )
143 * c) Copy a file within storage
144 * array(
145 * 'op' => 'copy',
146 * 'src' => <storage path>,
147 * 'dst' => <storage path>,
148 * 'overwrite' => <boolean>,
149 * 'overwriteSame' => <boolean>
150 * )
151 * d) Move a file within storage
152 * array(
153 * 'op' => 'move',
154 * 'src' => <storage path>,
155 * 'dst' => <storage path>,
156 * 'overwrite' => <boolean>,
157 * 'overwriteSame' => <boolean>
158 * )
159 * e) Delete a file within storage
160 * array(
161 * 'op' => 'delete',
162 * 'src' => <storage path>,
163 * 'ignoreMissingSource' => <boolean>
164 * )
165 * f) Do nothing (no-op)
166 * array(
167 * 'op' => 'null',
168 * )
169 *
170 * Boolean flags for operations (operation-specific):
171 * 'ignoreMissingSource' : The operation will simply succeed and do
172 * nothing if the source file does not exist.
173 * 'overwrite' : Any destination file will be overwritten.
174 * 'overwriteSame' : An error will not be given if a file already
175 * exists at the destination that has the same
176 * contents as the new contents to be written there.
177 *
178 * $opts is an associative of boolean flags, including:
179 * 'force' : Errors that would normally cause a rollback do not.
180 * The remaining operations are still attempted if any fail.
181 * 'nonLocking' : No locks are acquired for the operations.
182 * This can increase performance for non-critical writes.
183 * This has no effect unless the 'force' flag is set.
184 * 'allowStale' : Don't require the latest available data.
185 * This can increase performance for non-critical writes.
186 * This has no effect unless the 'force' flag is set.
187 * 'nonJournaled' : Don't log this operation batch in the file journal.
188 * This limits the ability of recovery scripts.
189 *
190 * Remarks on locking:
191 * File system paths given to operations should refer to files that are
192 * already locked or otherwise safe from modification from other processes.
193 * Normally these files will be new temp files, which should be adequate.
194 *
195 * Return value:
196 * This returns a Status, which contains all warnings and fatals that occured
197 * during the operation. The 'failCount', 'successCount', and 'success' members
198 * will reflect each operation attempted. The status will be "OK" unless:
199 * a) unexpected operation errors occurred (network partitions, disk full...)
200 * b) significant operation errors occured and 'force' was not set
201 *
202 * @param $ops Array List of operations to execute in order
203 * @param $opts Array Batch operation options
204 * @return Status
205 */
206 final public function doOperations( array $ops, array $opts = array() ) {
207 if ( $this->isReadOnly() ) {
208 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
209 }
210 if ( empty( $opts['force'] ) ) { // sanity
211 unset( $opts['nonLocking'] );
212 unset( $opts['allowStale'] );
213 }
214 return $this->doOperationsInternal( $ops, $opts );
215 }
216
217 /**
218 * @see FileBackend::doOperations()
219 */
220 abstract protected function doOperationsInternal( array $ops, array $opts );
221
222 /**
223 * Same as doOperations() except it takes a single operation.
224 * If you are doing a batch of operations that should either
225 * all succeed or all fail, then use that function instead.
226 *
227 * @see FileBackend::doOperations()
228 *
229 * @param $op Array Operation
230 * @param $opts Array Operation options
231 * @return Status
232 */
233 final public function doOperation( array $op, array $opts = array() ) {
234 return $this->doOperations( array( $op ), $opts );
235 }
236
237 /**
238 * Performs a single create operation.
239 * This sets $params['op'] to 'create' and passes it to doOperation().
240 *
241 * @see FileBackend::doOperation()
242 *
243 * @param $params Array Operation parameters
244 * @param $opts Array Operation options
245 * @return Status
246 */
247 final public function create( array $params, array $opts = array() ) {
248 $params['op'] = 'create';
249 return $this->doOperation( $params, $opts );
250 }
251
252 /**
253 * Performs a single store operation.
254 * This sets $params['op'] to 'store' and passes it to doOperation().
255 *
256 * @see FileBackend::doOperation()
257 *
258 * @param $params Array Operation parameters
259 * @param $opts Array Operation options
260 * @return Status
261 */
262 final public function store( array $params, array $opts = array() ) {
263 $params['op'] = 'store';
264 return $this->doOperation( $params, $opts );
265 }
266
267 /**
268 * Performs a single copy operation.
269 * This sets $params['op'] to 'copy' and passes it to doOperation().
270 *
271 * @see FileBackend::doOperation()
272 *
273 * @param $params Array Operation parameters
274 * @param $opts Array Operation options
275 * @return Status
276 */
277 final public function copy( array $params, array $opts = array() ) {
278 $params['op'] = 'copy';
279 return $this->doOperation( $params, $opts );
280 }
281
282 /**
283 * Performs a single move operation.
284 * This sets $params['op'] to 'move' and passes it to doOperation().
285 *
286 * @see FileBackend::doOperation()
287 *
288 * @param $params Array Operation parameters
289 * @param $opts Array Operation options
290 * @return Status
291 */
292 final public function move( array $params, array $opts = array() ) {
293 $params['op'] = 'move';
294 return $this->doOperation( $params, $opts );
295 }
296
297 /**
298 * Performs a single delete operation.
299 * This sets $params['op'] to 'delete' and passes it to doOperation().
300 *
301 * @see FileBackend::doOperation()
302 *
303 * @param $params Array Operation parameters
304 * @param $opts Array Operation options
305 * @return Status
306 */
307 final public function delete( array $params, array $opts = array() ) {
308 $params['op'] = 'delete';
309 return $this->doOperation( $params, $opts );
310 }
311
312 /**
313 * Concatenate a list of storage files into a single file system file.
314 * The target path should refer to a file that is already locked or
315 * otherwise safe from modification from other processes. Normally,
316 * the file will be a new temp file, which should be adequate.
317 * $params include:
318 * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...)
319 * dst : file system path to 0-byte temp file
320 *
321 * @param $params Array Operation parameters
322 * @return Status
323 */
324 abstract public function concatenate( array $params );
325
326 /**
327 * Prepare a storage directory for usage.
328 * This will create any required containers and parent directories.
329 * Backends using key/value stores only need to create the container.
330 *
331 * $params include:
332 * dir : storage directory
333 *
334 * @param $params Array
335 * @return Status
336 */
337 final public function prepare( array $params ) {
338 if ( $this->isReadOnly() ) {
339 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
340 }
341 return $this->doPrepare( $params );
342 }
343
344 /**
345 * @see FileBackend::prepare()
346 */
347 abstract protected function doPrepare( array $params );
348
349 /**
350 * Take measures to block web access to a storage directory and
351 * the container it belongs to. FS backends might add .htaccess
352 * files whereas key/value store backends might restrict container
353 * access to the auth user that represents end-users in web request.
354 * This is not guaranteed to actually do anything.
355 *
356 * $params include:
357 * dir : storage directory
358 * noAccess : try to deny file access
359 * noListing : try to deny file listing
360 *
361 * @param $params Array
362 * @return Status
363 */
364 final public function secure( array $params ) {
365 if ( $this->isReadOnly() ) {
366 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
367 }
368 $status = $this->doPrepare( $params ); // dir must exist to restrict it
369 if ( $status->isOK() ) {
370 $status->merge( $this->doSecure( $params ) );
371 }
372 return $status;
373 }
374
375 /**
376 * @see FileBackend::secure()
377 */
378 abstract protected function doSecure( array $params );
379
380 /**
381 * Delete a storage directory if it is empty.
382 * Backends using key/value stores may do nothing unless the directory
383 * is that of an empty container, in which case it should be deleted.
384 *
385 * $params include:
386 * dir : storage directory
387 *
388 * @param $params Array
389 * @return Status
390 */
391 final public function clean( array $params ) {
392 if ( $this->isReadOnly() ) {
393 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
394 }
395 return $this->doClean( $params );
396 }
397
398 /**
399 * @see FileBackend::clean()
400 */
401 abstract protected function doClean( array $params );
402
403 /**
404 * Check if a file exists at a storage path in the backend.
405 * This returns false if only a directory exists at the path.
406 *
407 * $params include:
408 * src : source storage path
409 * latest : use the latest available data
410 *
411 * @param $params Array
412 * @return bool|null Returns null on failure
413 */
414 abstract public function fileExists( array $params );
415
416 /**
417 * Get the last-modified timestamp of the file at a storage path.
418 *
419 * $params include:
420 * src : source storage path
421 * latest : use the latest available data
422 *
423 * @param $params Array
424 * @return string|bool TS_MW timestamp or false on failure
425 */
426 abstract public function getFileTimestamp( array $params );
427
428 /**
429 * Get the contents of a file at a storage path in the backend.
430 * This should be avoided for potentially large files.
431 *
432 * $params include:
433 * src : source storage path
434 * latest : use the latest available data
435 *
436 * @param $params Array
437 * @return string|bool Returns false on failure
438 */
439 abstract public function getFileContents( array $params );
440
441 /**
442 * Get the size (bytes) of a file at a storage path in the backend.
443 *
444 * $params include:
445 * src : source storage path
446 * latest : use the latest available data
447 *
448 * @param $params Array
449 * @return integer|bool Returns false on failure
450 */
451 abstract public function getFileSize( array $params );
452
453 /**
454 * Get quick information about a file at a storage path in the backend.
455 * If the file does not exist, then this returns false.
456 * Otherwise, the result is an associative array that includes:
457 * mtime : the last-modified timestamp (TS_MW)
458 * size : the file size (bytes)
459 * Additional values may be included for internal use only.
460 *
461 * $params include:
462 * src : source storage path
463 * latest : use the latest available data
464 *
465 * @param $params Array
466 * @return Array|bool|null Returns null on failure
467 */
468 abstract public function getFileStat( array $params );
469
470 /**
471 * Get a SHA-1 hash of the file at a storage path in the backend.
472 *
473 * $params include:
474 * src : source storage path
475 * latest : use the latest available data
476 *
477 * @param $params Array
478 * @return string|bool Hash string or false on failure
479 */
480 abstract public function getFileSha1Base36( array $params );
481
482 /**
483 * Get the properties of the file at a storage path in the backend.
484 * Returns FSFile::placeholderProps() on failure.
485 *
486 * $params include:
487 * src : source storage path
488 * latest : use the latest available data
489 *
490 * @param $params Array
491 * @return Array
492 */
493 abstract public function getFileProps( array $params );
494
495 /**
496 * Stream the file at a storage path in the backend.
497 * If the file does not exists, a 404 error will be given.
498 * Appropriate HTTP headers (Status, Content-Type, Content-Length)
499 * must be sent if streaming began, while none should be sent otherwise.
500 * Implementations should flush the output buffer before sending data.
501 *
502 * $params include:
503 * src : source storage path
504 * headers : additional HTTP headers to send on success
505 * latest : use the latest available data
506 *
507 * @param $params Array
508 * @return Status
509 */
510 abstract public function streamFile( array $params );
511
512 /**
513 * Returns a file system file, identical to the file at a storage path.
514 * The file returned is either:
515 * a) A local copy of the file at a storage path in the backend.
516 * The temporary copy will have the same extension as the source.
517 * b) An original of the file at a storage path in the backend.
518 * Temporary files may be purged when the file object falls out of scope.
519 *
520 * Write operations should *never* be done on this file as some backends
521 * may do internal tracking or may be instances of FileBackendMultiWrite.
522 * In that later case, there are copies of the file that must stay in sync.
523 * Additionally, further calls to this function may return the same file.
524 *
525 * $params include:
526 * src : source storage path
527 * latest : use the latest available data
528 *
529 * @param $params Array
530 * @return FSFile|null Returns null on failure
531 */
532 abstract public function getLocalReference( array $params );
533
534 /**
535 * Get a local copy on disk of the file at a storage path in the backend.
536 * The temporary copy will have the same file extension as the source.
537 * Temporary files may be purged when the file object falls out of scope.
538 *
539 * $params include:
540 * src : source storage path
541 * latest : use the latest available data
542 *
543 * @param $params Array
544 * @return TempFSFile|null Returns null on failure
545 */
546 abstract public function getLocalCopy( array $params );
547
548 /**
549 * Get an iterator to list out all stored files under a storage directory.
550 * If the directory is of the form "mwstore://backend/container",
551 * then all files in the container should be listed.
552 * If the directory is of form "mwstore://backend/container/dir",
553 * then all files under that container directory should be listed.
554 * Results should be storage paths relative to the given directory.
555 *
556 * Storage backends with eventual consistency might return stale data.
557 *
558 * $params include:
559 * dir : storage path directory
560 *
561 * @return Traversable|Array|null Returns null on failure
562 */
563 abstract public function getFileList( array $params );
564
565 /**
566 * Invalidate any in-process file existence and property cache.
567 * If $paths is given, then only the cache for those files will be cleared.
568 *
569 * @param $paths Array Storage paths (optional)
570 * @return void
571 */
572 public function clearCache( array $paths = null ) {}
573
574 /**
575 * Lock the files at the given storage paths in the backend.
576 * This will either lock all the files or none (on failure).
577 *
578 * Callers should consider using getScopedFileLocks() instead.
579 *
580 * @param $paths Array Storage paths
581 * @param $type integer LockManager::LOCK_* constant
582 * @return Status
583 */
584 final public function lockFiles( array $paths, $type ) {
585 return $this->lockManager->lock( $paths, $type );
586 }
587
588 /**
589 * Unlock the files at the given storage paths in the backend.
590 *
591 * @param $paths Array Storage paths
592 * @param $type integer LockManager::LOCK_* constant
593 * @return Status
594 */
595 final public function unlockFiles( array $paths, $type ) {
596 return $this->lockManager->unlock( $paths, $type );
597 }
598
599 /**
600 * Lock the files at the given storage paths in the backend.
601 * This will either lock all the files or none (on failure).
602 * On failure, the status object will be updated with errors.
603 *
604 * Once the return value goes out scope, the locks will be released and
605 * the status updated. Unlock fatals will not change the status "OK" value.
606 *
607 * @param $paths Array Storage paths
608 * @param $type integer LockManager::LOCK_* constant
609 * @param $status Status Status to update on lock/unlock
610 * @return ScopedLock|null Returns null on failure
611 */
612 final public function getScopedFileLocks( array $paths, $type, Status $status ) {
613 return ScopedLock::factory( $this->lockManager, $paths, $type, $status );
614 }
615
616 /**
617 * Check if a given path is a "mwstore://" path.
618 * This does not do any further validation or any existence checks.
619 *
620 * @param $path string
621 * @return bool
622 */
623 final public static function isStoragePath( $path ) {
624 return ( strpos( $path, 'mwstore://' ) === 0 );
625 }
626
627 /**
628 * Split a storage path into a backend name, a container name,
629 * and a relative file path. The relative path may be the empty string.
630 * This does not do any path normalization or traversal checks.
631 *
632 * @param $storagePath string
633 * @return Array (backend, container, rel object) or (null, null, null)
634 */
635 final public static function splitStoragePath( $storagePath ) {
636 if ( self::isStoragePath( $storagePath ) ) {
637 // Remove the "mwstore://" prefix and split the path
638 $parts = explode( '/', substr( $storagePath, 10 ), 3 );
639 if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) {
640 if ( count( $parts ) == 3 ) {
641 return $parts; // e.g. "backend/container/path"
642 } else {
643 return array( $parts[0], $parts[1], '' ); // e.g. "backend/container"
644 }
645 }
646 }
647 return array( null, null, null );
648 }
649
650 /**
651 * Normalize a storage path by cleaning up directory separators.
652 * Returns null if the path is not of the format of a valid storage path.
653 *
654 * @param $storagePath string
655 * @return string|null
656 */
657 final public static function normalizeStoragePath( $storagePath ) {
658 list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
659 if ( $relPath !== null ) { // must be for this backend
660 $relPath = self::normalizeContainerPath( $relPath );
661 if ( $relPath !== null ) {
662 return ( $relPath != '' )
663 ? "mwstore://{$backend}/{$container}/{$relPath}"
664 : "mwstore://{$backend}/{$container}";
665 }
666 }
667 return null;
668 }
669
670 /**
671 * Get the parent storage directory of a storage path.
672 * This returns a path like "mwstore://backend/container",
673 * "mwstore://backend/container/...", or null if there is no parent.
674 *
675 * @param $storagePath string
676 * @return string|null
677 */
678 final public static function parentStoragePath( $storagePath ) {
679 $storagePath = dirname( $storagePath );
680 list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath );
681 return ( $rel === null ) ? null : $storagePath;
682 }
683
684 /**
685 * Get the final extension from a storage or FS path
686 *
687 * @param $path string
688 * @return string
689 */
690 final public static function extensionFromPath( $path ) {
691 $i = strrpos( $path, '.' );
692 return strtolower( $i ? substr( $path, $i + 1 ) : '' );
693 }
694
695 /**
696 * Validate and normalize a relative storage path.
697 * Null is returned if the path involves directory traversal.
698 * Traversal is insecure for FS backends and broken for others.
699 *
700 * @param $path string Storage path relative to a container
701 * @return string|null
702 */
703 final protected static function normalizeContainerPath( $path ) {
704 // Normalize directory separators
705 $path = strtr( $path, '\\', '/' );
706 // Collapse any consecutive directory separators
707 $path = preg_replace( '![/]{2,}!', '/', $path );
708 // Remove any leading directory separator
709 $path = ltrim( $path, '/' );
710 // Use the same traversal protection as Title::secureAndSplit()
711 if ( strpos( $path, '.' ) !== false ) {
712 if (
713 $path === '.' ||
714 $path === '..' ||
715 strpos( $path, './' ) === 0 ||
716 strpos( $path, '../' ) === 0 ||
717 strpos( $path, '/./' ) !== false ||
718 strpos( $path, '/../' ) !== false
719 ) {
720 return null;
721 }
722 }
723 return $path;
724 }
725 }