added unit tests for WikitextContent
[lhc/web/wiklou.git] / includes / filerepo / backend / FileBackendMultiWrite.php
index 114424c..9c3cf5b 100644 (file)
@@ -6,52 +6,61 @@
  */
 
 /**
+ * @brief Proxy backend that mirrors writes to several internal backends.
+ * 
  * This class defines a multi-write backend. Multiple backends can be
  * registered to this proxy backend and it will act as a single backend.
  * Use this when all access to those backends is through this proxy backend.
  * At least one of the backends must be declared the "master" backend.
  *
  * Only use this class when transitioning from one storage system to another.
- * 
- * The order that the backends are defined sets the priority of which
- * backend is read from or written to first. Functions like fileExists()
- * and getFileProps() will return information based on the first backend
- * that has the file. Special cases are listed below:
- *     a) getFileTimestamp() will always check only the master backend to
- *        avoid confusing and inconsistent results.
- * 
- * All write operations are performed on all backends.
+ *
+ * Read operations are only done on the 'master' backend for consistency.
+ * Write operations are performed on all backends, in the order defined.
  * If an operation fails on one backend it will be rolled back from the others.
  *
  * @ingroup FileBackend
+ * @since 1.19
  */
-class FileBackendMultiWrite extends FileBackendBase {
-       /** @var Array Prioritized list of FileBackend objects */
-       protected $fileBackends = array(); // array of (backend index => backends)
-       protected $masterIndex = -1; // index of master backend
+class FileBackendMultiWrite extends FileBackend {
+       /** @var Array Prioritized list of FileBackendStore objects */
+       protected $backends = array(); // array of (backend index => backends)
+       protected $masterIndex = -1; // integer; index of master backend
+       protected $syncChecks = 0; // integer bitfield
+
+       /* Possible internal backend consistency checks */
+       const CHECK_SIZE = 1;
+       const CHECK_TIME = 2;
 
        /**
         * Construct a proxy backend that consists of several internal backends.
-        * $config contains:
-        *     'name'        : The name of the proxy backend
-        *     'lockManager' : Registered name of the file lock manager to use
+        * Additional $config params include:
         *     'backends'    : Array of backend config and multi-backend settings.
         *                     Each value is the config used in the constructor of a
-        *                     FileBackend class, but with these additional settings:
+        *                     FileBackendStore class, but with these additional settings:
         *                         'class'         : The name of the backend class
         *                         'isMultiMaster' : This must be set for one backend.
+        *     'syncChecks'  : Integer bitfield of internal backend sync checks to perform.
+        *                     Possible bits include self::CHECK_SIZE and self::CHECK_TIME.
+        *                     The checks are done before allowing any file operations.
         * @param $config Array
         */
        public function __construct( array $config ) {
                parent::__construct( $config );
+               $namesUsed = array();
                // Construct backends here rather than via registration
                // to keep these backends hidden from outside the proxy.
                foreach ( $config['backends'] as $index => $config ) {
+                       $name = $config['name'];
+                       if ( isset( $namesUsed[$name] ) ) { // don't break FileOp predicates
+                               throw new MWException( "Two or more backends defined with the name $name." );
+                       }
+                       $namesUsed[$name] = 1;
                        if ( !isset( $config['class'] ) ) {
                                throw new MWException( 'No class given for a backend config.' );
                        }
                        $class = $config['class'];
-                       $this->fileBackends[$index] = new $class( $config );
+                       $this->backends[$index] = new $class( $config );
                        if ( !empty( $config['isMultiMaster'] ) ) {
                                if ( $this->masterIndex >= 0 ) {
                                        throw new MWException( 'More than one master backend defined.' );
@@ -62,75 +71,167 @@ class FileBackendMultiWrite extends FileBackendBase {
                if ( $this->masterIndex < 0 ) { // need backends and must have a master
                        throw new MWException( 'No master backend defined.' );
                }
+               $this->syncChecks = isset( $config['syncChecks'] )
+                       ? $config['syncChecks']
+                       : self::CHECK_SIZE;
        }
 
        /**
-        * @see FileBackendBase::doOperationsInternal()
+        * @see FileBackend::doOperationsInternal()
+        * @return Status
         */
        final protected function doOperationsInternal( array $ops, array $opts ) {
                $status = Status::newGood();
 
                $performOps = array(); // list of FileOp objects
-               $filesLockEx = $filesLockSh = array(); // storage paths to lock
+               $filesRead = array(); // storage paths read from
+               $filesChanged = array(); // storage paths written to
                // Build up a list of FileOps. The list will have all the ops
                // for one backend, then all the ops for the next, and so on.
                // These batches of ops are all part of a continuous array.
-               // Also build up a list of files to lock...
-               foreach ( $this->fileBackends as $index => $backend ) {
-                       $backendOps = $this->substOpPaths( $ops, $backend );
+               // Also build up a list of files read/changed...
+               foreach ( $this->backends as $index => $backend ) {
+                       $backendOps = $this->substOpBatchPaths( $ops, $backend );
+                       // Add on the operation batch for this backend
                        $performOps = array_merge( $performOps, $backend->getOperations( $backendOps ) );
-                       if ( $index == 0 && empty( $opts['nonLocking'] ) ) {
-                               // Set "files to lock" from the first batch so we don't try to set all
-                               // locks two or three times over (depending on the number of backends).
-                               // A lock on one storage path is a lock on all the backends.
+                       if ( $index == 0 ) { // first batch
+                               // Get the files used for these operations. Each backend has a batch of
+                               // the same operations, so we only need to get them from the first batch.
                                foreach ( $performOps as $fileOp ) {
-                                       $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() );
-                                       $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() );
+                                       $filesRead = array_merge( $filesRead, $fileOp->storagePathsRead() );
+                                       $filesChanged = array_merge( $filesChanged, $fileOp->storagePathsChanged() );
                                }
-                               // Optimization: if doing an EX lock anyway, don't also set an SH one
-                               $filesLockSh = array_diff( $filesLockSh, $filesLockEx );
-                               // Lock the paths under the proxy backend's name
-                               $this->unsubstPaths( $filesLockSh );
-                               $this->unsubstPaths( $filesLockEx );
+                               // Get the paths under the proxy backend's name
+                               $filesRead = $this->unsubstPaths( $filesRead );
+                               $filesChanged = $this->unsubstPaths( $filesChanged );
                        }
                }
 
                // Try to lock those files for the scope of this function...
-               $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
-               $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
-               if ( !$status->isOK() ) {
-                       return $status; // abort
+               if ( empty( $opts['nonLocking'] ) ) {
+                       $filesLockSh = array_diff( $filesRead, $filesChanged ); // optimization
+                       $filesLockEx = $filesChanged;
+                       // Get a shared lock on the parent directory of each path changed
+                       $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) );
+                       // Try to lock those files for the scope of this function...
+                       $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
+                       $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
+                       if ( !$status->isOK() ) {
+                               return $status; // abort
+                       }
                }
 
                // Clear any cache entries (after locks acquired)
-               foreach ( $this->fileBackends as $backend ) {
-                       $backend->clearCache();
+               $this->clearCache();
+
+               // Do a consistency check to see if the backends agree
+               if ( count( $this->backends ) > 1 ) {
+                       $status->merge( $this->consistencyCheck( array_merge( $filesRead, $filesChanged ) ) );
+                       if ( !$status->isOK() ) {
+                               return $status; // abort
+                       }
                }
+
                // Actually attempt the operation batch...
-               $status->merge( FileOp::attemptBatch( $performOps, $opts ) );
+               $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
+
+               $success = array();
+               $failCount = 0;
+               $successCount = 0;
+               // Make 'success', 'successCount', and 'failCount' fields reflect
+               // the overall operation, rather than all the batches for each backend.
+               // Do this by only using success values from the master backend's batch.
+               $batchStart = $this->masterIndex * count( $ops );
+               $batchEnd = $batchStart + count( $ops ) - 1;
+               for ( $i = $batchStart; $i <= $batchEnd; $i++ ) {
+                       if ( !isset( $subStatus->success[$i] ) ) {
+                               break; // failed out before trying this op
+                       } elseif ( $subStatus->success[$i] ) {
+                               ++$successCount;
+                       } else {
+                               ++$failCount;
+                       }
+                       $success[] = $subStatus->success[$i];
+               }
+               $subStatus->success = $success;
+               $subStatus->successCount = $successCount;
+               $subStatus->failCount = $failCount;
+
+               // Merge errors into status fields
+               $status->merge( $subStatus );
+               $status->success = $subStatus->success; // not done in merge()
+
+               return $status;
+       }
+
+       /**
+        * Check that a set of files are consistent across all internal backends
+        *
+        * @param $paths Array
+        * @return Status
+        */
+       public function consistencyCheck( array $paths ) {
+               $status = Status::newGood();
+               if ( $this->syncChecks == 0 ) {
+                       return $status; // skip checks
+               }
+
+               $mBackend = $this->backends[$this->masterIndex];
+               foreach ( array_unique( $paths ) as $path ) {
+                       $params = array( 'src' => $path, 'latest' => true );
+                       // Stat the file on the 'master' backend
+                       $mStat = $mBackend->getFileStat( $this->substOpPaths( $params, $mBackend ) );
+                       // Check of all clone backends agree with the master...
+                       foreach ( $this->backends as $index => $cBackend ) {
+                               if ( $index === $this->masterIndex ) {
+                                       continue; // master
+                               }
+                               $cStat = $cBackend->getFileStat( $this->substOpPaths( $params, $cBackend ) );
+                               if ( $mStat ) { // file is in master
+                                       if ( !$cStat ) { // file should exist
+                                               $status->fatal( 'backend-fail-synced', $path );
+                                               continue;
+                                       }
+                                       if ( $this->syncChecks & self::CHECK_SIZE ) {
+                                               if ( $cStat['size'] != $mStat['size'] ) { // wrong size
+                                                       $status->fatal( 'backend-fail-synced', $path );
+                                                       continue;
+                                               }
+                                       }
+                                       if ( $this->syncChecks & self::CHECK_TIME ) {
+                                               $mTs = wfTimestamp( TS_UNIX, $mStat['mtime'] );
+                                               $cTs = wfTimestamp( TS_UNIX, $cStat['mtime'] );
+                                               if ( abs( $mTs - $cTs ) > 30 ) { // outdated file somewhere
+                                                       $status->fatal( 'backend-fail-synced', $path );
+                                                       continue;
+                                               }
+                                       }
+                               } else { // file is not in master
+                                       if ( $cStat ) { // file should not exist
+                                               $status->fatal( 'backend-fail-synced', $path );
+                                       }
+                               }
+                       }
+               }
 
                return $status;
        }
 
        /**
         * Substitute the backend name in storage path parameters
-        * for a set of operations with a that of a given backend.
+        * for a set of operations with that of a given internal backend.
         * 
         * @param $ops Array List of file operation arrays
-        * @param $backend FileBackend
+        * @param $backend FileBackendStore
         * @return Array
         */
-       protected function substOpPaths( array $ops, FileBackend $backend ) {
+       protected function substOpBatchPaths( array $ops, FileBackendStore $backend ) {
                $newOps = array(); // operations
                foreach ( $ops as $op ) {
                        $newOp = $op; // operation
-                       foreach ( array( 'src', 'srcs', 'dst' ) as $par ) {
-                               if ( isset( $newOp[$par] ) ) {
-                                       $newOp[$par] = preg_replace(
-                                               '!^mwstore://' . preg_quote( $this->name ) . '/!',
-                                               'mwstore://' . $backend->getName() . '/',
-                                               $newOp[$par] // string or array
-                                       );
+                       foreach ( array( 'src', 'srcs', 'dst', 'dir' ) as $par ) {
+                               if ( isset( $newOp[$par] ) ) { // string or array
+                                       $newOp[$par] = $this->substPaths( $newOp[$par], $backend );
                                }
                        }
                        $newOps[] = $newOp;
@@ -139,169 +240,189 @@ class FileBackendMultiWrite extends FileBackendBase {
        }
 
        /**
-        * Replace the backend part of storage paths with this backend's name
+        * Same as substOpBatchPaths() but for a single operation
         * 
-        * @param &$paths Array
-        * @return void 
+        * @param $op File operation array
+        * @param $backend FileBackendStore
+        * @return Array
         */
-       protected function unsubstPaths( array &$paths ) {
-               foreach ( $paths as &$path ) {
-                       $path = preg_replace( '!^mwstore://([^/]+)!', "mwstore://{$this->name}", $path );
-               }
+       protected function substOpPaths( array $ops, FileBackendStore $backend ) {
+               $newOps = $this->substOpBatchPaths( array( $ops ), $backend );
+               return $newOps[0];
+       }
+
+       /**
+        * Substitute the backend of storage paths with an internal backend's name
+        * 
+        * @param $paths Array|string List of paths or single string path
+        * @param $backend FileBackendStore
+        * @return Array|string
+        */
+       protected function substPaths( $paths, FileBackendStore $backend ) {
+               return preg_replace(
+                       '!^mwstore://' . preg_quote( $this->name ) . '/!',
+                       StringUtils::escapeRegexReplacement( "mwstore://{$backend->getName()}/" ),
+                       $paths // string or array
+               );
+       }
+
+       /**
+        * Substitute the backend of internal storage paths with the proxy backend's name
+        * 
+        * @param $paths Array|string List of paths or single string path
+        * @return Array|string
+        */
+       protected function unsubstPaths( $paths ) {
+               return preg_replace(
+                       '!^mwstore://([^/]+)!',
+                       StringUtils::escapeRegexReplacement( "mwstore://{$this->name}" ),
+                       $paths // string or array
+               );
        }
 
        /**
-        * @see FileBackendBase::prepare()
+        * @see FileBackend::doPrepare()
+        * @return Status
         */
-       function prepare( array $params ) {
+       protected function doPrepare( array $params ) {
                $status = Status::newGood();
                foreach ( $this->backends as $backend ) {
                        $realParams = $this->substOpPaths( $params, $backend );
-                       $status->merge( $backend->prepare( $realParams ) );
+                       $status->merge( $backend->doPrepare( $realParams ) );
                }
                return $status;
        }
 
        /**
-        * @see FileBackendBase::secure()
+        * @see FileBackend::doSecure()
+        * @return Status
         */
-       function secure( array $params ) {
+       protected function doSecure( array $params ) {
                $status = Status::newGood();
                foreach ( $this->backends as $backend ) {
                        $realParams = $this->substOpPaths( $params, $backend );
-                       $status->merge( $backend->secure( $realParams ) );
+                       $status->merge( $backend->doSecure( $realParams ) );
                }
                return $status;
        }
 
        /**
-        * @see FileBackendBase::clean()
+        * @see FileBackend::doClean()
+        * @return Status
         */
-       function clean( array $params ) {
+       protected function doClean( array $params ) {
                $status = Status::newGood();
                foreach ( $this->backends as $backend ) {
                        $realParams = $this->substOpPaths( $params, $backend );
-                       $status->merge( $backend->clean( $realParams ) );
+                       $status->merge( $backend->doClean( $realParams ) );
                }
                return $status;
        }
 
        /**
-        * @see FileBackendBase::fileExists()
+        * @see FileBackend::getFileList()
         */
-       function fileExists( array $params ) {
-               # Hit all backends in case of failed operations (out of sync)
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       if ( $backend->fileExists( $realParams ) ) {
-                               return true;
-                       }
-               }
-               return false;
+       public function concatenate( array $params ) {
+               // We are writing to an FS file, so we don't need to do this per-backend
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->concatenate( $realParams );
+       }
+
+       /**
+        * @see FileBackend::fileExists()
+        */
+       public function fileExists( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->fileExists( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getFileTimestamp()
+        * @see FileBackend::getFileTimestamp()
         */
-       function getFileTimestamp( array $params ) {
-               // Skip non-master for consistent timestamps
+       public function getFileTimestamp( array $params ) {
                $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
                return $this->backends[$this->masterIndex]->getFileTimestamp( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getFileSha1Base36()
+        * @see FileBackend::getFileSize()
         */
-       function getFileSha1Base36( array $params ) {
-               # Hit all backends in case of failed operations (out of sync)
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       $hash = $backend->getFileSha1Base36( $realParams );
-                       if ( $hash !== false ) {
-                               return $hash;
-                       }
-               }
-               return false;
+       public function getFileSize( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileSize( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getFileProps()
+        * @see FileBackend::getFileStat()
         */
-       function getFileProps( array $params ) {
-               # Hit all backends in case of failed operations (out of sync)
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       $props = $backend->getFileProps( $realParams );
-                       if ( $props !== null ) {
-                               return $props;
-                       }
-               }
-               return null;
+       public function getFileStat( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileStat( $realParams );
        }
 
        /**
-        * @see FileBackendBase::streamFile()
+        * @see FileBackend::getFileContents()
         */
-       function streamFile( array $params ) {
-               $status = Status::newGood();
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       $subStatus = $backend->streamFile( $realParams );
-                       $status->merge( $subStatus );
-                       if ( $subStatus->isOK() ) {
-                               // Pass isOK() despite fatals from other backends
-                               $status->setResult( true );
-                               return $status;
-                       } else { // failure
-                               if ( headers_sent() ) {
-                                       return $status; // died mid-stream...so this is already fubar
-                               } elseif ( strval( ob_get_contents() ) !== '' ) {
-                                       ob_clean(); // output was buffered but not sent; clear it
-                               }
-                       }
-               }
-               return $status;
+       public function getFileContents( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileContents( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getLocalReference()
+        * @see FileBackend::getFileSha1Base36()
         */
-       function getLocalReference( array $params ) {
-               # Hit all backends in case of failed operations (out of sync)
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       $fsFile = $backend->getLocalReference( $realParams );
-                       if ( $fsFile ) {
-                               return $fsFile;
-                       }
-               }
-               return null;
+       public function getFileSha1Base36( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileSha1Base36( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getLocalCopy()
+        * @see FileBackend::getFileProps()
         */
-       function getLocalCopy( array $params ) {
-               # Hit all backends in case of failed operations (out of sync)
-               foreach ( $this->backends as $backend ) {
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       $tmpFile = $backend->getLocalCopy( $realParams );
-                       if ( $tmpFile ) {
-                               return $tmpFile;
-                       }
-               }
-               return null;
+       public function getFileProps( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileProps( $realParams );
+       }
+
+       /**
+        * @see FileBackend::streamFile()
+        */
+       public function streamFile( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->streamFile( $realParams );
+       }
+
+       /**
+        * @see FileBackend::getLocalReference()
+        */
+       public function getLocalReference( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getLocalReference( $realParams );
+       }
+
+       /**
+        * @see FileBackend::getLocalCopy()
+        */
+       public function getLocalCopy( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getLocalCopy( $realParams );
        }
 
        /**
-        * @see FileBackendBase::getFileList()
+        * @see FileBackend::getFileList()
         */
-       function getFileList( array $params ) {
+       public function getFileList( array $params ) {
+               $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
+               return $this->backends[$this->masterIndex]->getFileList( $realParams );
+       }
+
+       /**
+        * @see FileBackend::clearCache()
+        */
+       public function clearCache( array $paths = null ) {
                foreach ( $this->backends as $backend ) {
-                       # Get results from the first backend
-                       $realParams = $this->substOpPaths( $params, $backend );
-                       return $backend->getFileList( $realParams );
+                       $realPaths = is_array( $paths ) ? $this->substPaths( $paths, $backend ) : null;
+                       $backend->clearCache( $realPaths );
                }
-               return array(); // sanity
        }
 }