* Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL, SQLite).
Change-Id: I33c9f9a598ba1f164c862b9dc3c718f9172db02b
'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
+ 'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
+ 'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
+ 'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php',
'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',
protected $readOnly; // string; read-only explanation message
/** @var LockManager */
protected $lockManager;
+ /** @var FileJournal */
+ protected $fileJournal;
/**
* Create a new backend instance from configuration.
$this->lockManager = ( $config['lockManager'] instanceof LockManager )
? $config['lockManager']
: LockManagerGroup::singleton()->get( $config['lockManager'] );
+ $this->fileJournal = isset( $config['fileJournal'] )
+ ? FileJournal::factory( $config['fileJournal'], $this->name )
+ : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
$this->readOnly = isset( $config['readOnly'] )
? (string)$config['readOnly']
: '';
* 'allowStale' : Don't require the latest available data.
* This can increase performance for non-critical writes.
* This has no effect unless the 'force' flag is set.
+ * 'nonJournaled' : Don't log this operation batch in the file journal.
+ * This limits the ability of recovery scripts.
*
* Remarks on locking:
* File system paths given to operations should refer to files that are
}
// Actually attempt the operation batch...
- $subStatus = FileOp::attemptBatch( $performOps, $opts );
+ $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
$success = array();
$failCount = 0;
$this->clearCache();
// Actually attempt the operation batch...
- $subStatus = FileOp::attemptBatch( $performOps, $opts );
+ $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
// Merge errors into status fields
$status->merge( $subStatus );
protected $state = self::STATE_NEW; // integer
protected $failed = false; // boolean
protected $useLatest = true; // boolean
+ protected $batchId; // string
protected $sourceSha1; // string
protected $destSameAsSource; // boolean
$this->params = $params;
}
+ /**
+ * Set the batch UUID this operation belongs to
+ *
+ * @param $batchId string
+ * @return void
+ */
+ final protected function setBatchId( $batchId ) {
+ $this->batchId = $batchId;
+ }
+
/**
* Whether to allow stale data for file reads and stat checks
*
}
/**
- * Attempt a series of file operations.
+ * Attempt to perform a series of file operations.
* Callers are responsible for handling file locking.
*
* $opts is an array of options, including:
- * 'force' : Errors that would normally cause a rollback do not.
- * The remaining operations are still attempted if any fail.
- * 'allowStale' : Don't require the latest available data.
- * This can increase performance for non-critical writes.
- * This has no effect unless the 'force' flag is set.
- *
+ * 'force' : Errors that would normally cause a rollback do not.
+ * The remaining operations are still attempted if any fail.
+ * 'allowStale' : Don't require the latest available data.
+ * This can increase performance for non-critical writes.
+ * This has no effect unless the 'force' flag is set.
+ * 'nonJournaled' : Don't log this operation batch in the file journal.
+ *
* The resulting Status will be "OK" unless:
* a) unexpected operation errors occurred (network partitions, disk full...)
* b) significant operation errors occured and 'force' was not set
*
* @param $performOps Array List of FileOp operations
* @param $opts Array Batch operation options
+ * @param $journal FileJournal Journal to log operations to
* @return Status
*/
- final public static function attemptBatch( array $performOps, array $opts ) {
+ final public static function attemptBatch(
+ array $performOps, array $opts, FileJournal $journal
+ ) {
$status = Status::newGood();
- $allowStale = !empty( $opts['allowStale'] );
- $ignoreErrors = !empty( $opts['force'] );
-
$n = count( $performOps );
if ( $n > self::MAX_BATCH_SIZE ) {
$status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE );
return $status;
}
+ $batchId = $journal->getTimestampedUUID();
+ $allowStale = !empty( $opts['allowStale'] );
+ $ignoreErrors = !empty( $opts['force'] );
+ $journaled = empty( $opts['nonJournaled'] );
+
+ $entries = array(); // file journal entries
$predicates = FileOp::newPredicates(); // account for previous op in prechecks
// Do pre-checks for each operation; abort on failure...
foreach ( $performOps as $index => $fileOp ) {
+ $fileOp->setBatchId( $batchId );
$fileOp->allowStaleReads( $allowStale );
- $subStatus = $fileOp->precheck( $predicates );
+ $oldPredicates = $predicates;
+ $subStatus = $fileOp->precheck( $predicates ); // updates $predicates
$status->merge( $subStatus );
- if ( !$subStatus->isOK() ) { // operation failed?
+ if ( $subStatus->isOK() ) {
+ if ( $journaled ) { // journal log entry
+ $entries = array_merge( $entries,
+ self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) );
+ }
+ } else { // operation failed?
$status->success[$index] = false;
++$status->failCount;
if ( !$ignoreErrors ) {
}
}
- if ( $ignoreErrors ) {
- # Treat all precheck() fatals as merely warnings
+ // Log the operations in file journal...
+ if ( count( $entries ) ) {
+ $subStatus = $journal->logChangeBatch( $entries, $batchId );
+ if ( !$subStatus->isOK() ) {
+ return $subStatus; // abort
+ }
+ }
+
+ if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings
$status->setResult( true, $status->value );
}
return $status;
}
+ /**
+ * Get the file journal entries for a single file operation
+ *
+ * @param $fileOp FileOp
+ * @param $oPredicates Array Pre-op information about files
+ * @param $nPredicates Array Post-op information about files
+ * @return Array
+ */
+ final protected static function getJournalEntries(
+ FileOp $fileOp, array $oPredicates, array $nPredicates
+ ) {
+ $nullEntries = array();
+ $updateEntries = array();
+ $deleteEntries = array();
+ $pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() );
+ foreach ( $pathsUsed as $path ) {
+ $nullEntries[] = array( // assertion for recovery
+ 'op' => 'null',
+ 'path' => $path,
+ 'newSha1' => $fileOp->fileSha1( $path, $oPredicates )
+ );
+ }
+ foreach ( $fileOp->storagePathsChanged() as $path ) {
+ if ( $nPredicates['sha1'][$path] === false ) { // deleted
+ $deleteEntries[] = array(
+ 'op' => 'delete',
+ 'path' => $path,
+ 'newSha1' => ''
+ );
+ } else { // created/updated
+ $updateEntries[] = array(
+ 'op' => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create',
+ 'path' => $path,
+ 'newSha1' => $nPredicates['sha1'][$path]
+ );
+ }
+ }
+ return array_merge( $nullEntries, $updateEntries, $deleteEntries );
+ }
+
/**
* Get the value of the parameter with the given name
*
$params = $this->params;
$params['failedAction'] = $action;
try {
- wfDebugLog( 'FileOperation',
- get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) );
+ wfDebugLog( 'FileOperation', get_class( $this ) .
+ " failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) );
} catch ( Exception $e ) {
// bad config? debug log error?
}
--- /dev/null
+<?php
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * Version of FileJournal that logs to a DB table
+ * @since 1.20
+ */
+class DBFileJournal extends FileJournal {
+ protected $wiki = false; // string; wiki DB name
+
+ /**
+ * Construct a new instance from configuration.
+ * $config includes:
+ * 'wiki' : wiki name to use for LoadBalancer
+ *
+ * @param $config Array
+ */
+ protected function __construct( array $config ) {
+ parent::__construct( $config );
+
+ $this->wiki = $config['wiki'];
+ }
+
+ /**
+ * @see FileJournal::logChangeBatch()
+ * @return Status
+ */
+ protected function doLogChangeBatch( array $entries, $batchId ) {
+ $status = Status::newGood();
+
+ $dbw = $this->getMasterDB();
+ if ( !$dbw ) {
+ $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
+ return $status;
+ }
+ $now = wfTimestamp( TS_UNIX );
+
+ $data = array();
+ foreach ( $entries as $entry ) {
+ $data[] = array(
+ 'fj_batch_uuid' => $batchId,
+ 'fj_backend' => $this->backend,
+ 'fj_op' => $entry['op'],
+ 'fj_path' => $entry['path'],
+ 'fj_path_sha1' => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ),
+ 'fj_new_sha1' => $entry['newSha1'],
+ 'fj_timestamp' => $dbw->timestamp( $now )
+ );
+ }
+
+ try {
+ $dbw->begin();
+ $dbw->insert( 'filejournal', $data, __METHOD__ );
+ $dbw->commit();
+ } catch ( DBError $e ) {
+ $status->fatal( 'filejournal-fail-dbquery', $this->backend );
+ return $status;
+ }
+
+ return $status;
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ protected function doPurgeOldLogs() {
+ $status = Status::newGood();
+ if ( $this->ttlDays <= 0 ) {
+ return $status; // nothing to do
+ }
+
+ $dbw = $this->getMasterDB();
+ if ( !$dbw ) {
+ $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
+ return $status;
+ }
+ $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
+
+ try {
+ $dbw->begin();
+ $dbw->delete( 'filejournal',
+ array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ),
+ __METHOD__
+ );
+ $dbw->commit();
+ } catch ( DBError $e ) {
+ $status->fatal( 'filejournal-fail-dbquery', $this->backend );
+ return $status;
+ }
+
+ return $status;
+ }
+
+ /**
+ * Get a master connection to the logging DB
+ *
+ * @return DatabaseBase|null
+ */
+ protected function getMasterDB() {
+ try {
+ $lb = wfGetLBFactory()->newMainLB();
+ return $lb->getConnection( DB_MASTER, array(), $this->wiki );
+ } catch ( DBConnectionError $e ) {
+ return null;
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * @defgroup FileJournal File journal
+ * @ingroup FileBackend
+ */
+
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * @brief Class for handling file operation journaling.
+ *
+ * Subclasses should avoid throwing exceptions at all costs.
+ *
+ * @ingroup FileJournal
+ * @since 1.20
+ */
+abstract class FileJournal {
+ protected $backend; // string
+ protected $ttlDays; // integer
+
+ /**
+ * Construct a new instance from configuration.
+ * $config includes:
+ * 'ttlDays' : days to keep log entries around (false means "forever")
+ *
+ * @param $config Array
+ */
+ protected function __construct( array $config ) {
+ $this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false;
+ }
+
+ /**
+ * Create an appropriate FileJournal object from config
+ *
+ * @param $config Array
+ * @param $backend string A registered file backend name
+ * @return FileJournal
+ */
+ final public static function factory( array $config, $backend ) {
+ $class = $config['class'];
+ $jrn = new $class( $config );
+ if ( !$jrn instanceof self ) {
+ throw new MWException( "Class given is not an instance of FileJournal." );
+ }
+ $jrn->backend = $backend;
+ return $jrn;
+ }
+
+ /**
+ * Get a statistically unique ID string
+ *
+ * @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars>
+ */
+ final public function getTimestampedUUID() {
+ $s = '';
+ for ( $i = 0; $i < 5; $i++ ) {
+ $s .= mt_rand( 0, 2147483647 );
+ }
+ $s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
+ return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 );
+ }
+
+ /**
+ * Log changes made by a batch file operation.
+ * $entries is an array of log entries, each of which contains:
+ * op : Basic operation name (create, store, copy, delete)
+ * path : The storage path of the file
+ * newSha1 : The final base 36 SHA-1 of the file
+ * Note that 'false' should be used as the SHA-1 for non-existing files.
+ *
+ * @param $entries Array List of file operations (each an array of parameters)
+ * @param $batchId string UUID string that identifies the operation batch
+ * @return Status
+ */
+ final public function logChangeBatch( array $entries, $batchId ) {
+ if ( !count( $entries ) ) {
+ return Status::newGood();
+ }
+ return $this->doLogChangeBatch( $entries, $batchId );
+ }
+
+ /**
+ * @see FileJournal::logChangeBatch()
+ *
+ * @param $entries Array List of file operations (each an array of parameters)
+ * @param $batchId string UUID string that identifies the operation batch
+ * @return Status
+ */
+ abstract protected function doLogChangeBatch( array $entries, $batchId );
+
+ /**
+ * Purge any old log entries
+ *
+ * @return Status
+ */
+ final public function purgeOldLogs() {
+ return $this->doPurgeOldLogs();
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ abstract protected function doPurgeOldLogs();
+}
+
+/**
+ * Simple version of FileJournal that does nothing
+ * @since 1.20
+ */
+class NullFileJournal extends FileJournal {
+ /**
+ * @see FileJournal::logChangeBatch()
+ * @return Status
+ */
+ protected function doLogChangeBatch( array $entries, $batchId ) {
+ return Status::newGood();
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ protected function doPurgeOldLogs() {
+ return Status::newGood();
+ }
+}
'backend-fail-contenttype' => 'Could not determine the content type of the file to store at "$1".',
'backend-fail-batchsize' => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.',
+# File journal
+'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".',
+'filejournal-fail-dbquery' => 'Could not update the journal database for storage backend "$1".',
+
# Lock manager
'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.',
'lockmanager-fail-closelock' => 'Could not close lock file for "$1".',
--- /dev/null
+-- File backend operation journal
+CREATE TABLE /*_*/filejournal (
+ -- Unique ID for each file operation
+ fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
+ -- UUID of the batch this operation belongs to
+ fj_batch_uuid varbinary(32) NOT NULL,
+ -- The registered file backend name
+ fj_backend varchar(255) NOT NULL,
+ -- The storage path that was affected (may be internal paths)
+ fj_path blob NOT NULL,
+ -- SHA-1 file path hash in base-36
+ fj_path_sha1 varbinary(32) NOT NULL default '',
+ -- Primitive operation description (create/update/delete)
+ fj_op varchar(16) NOT NULL default '',
+ -- SHA-1 file content hash in base-36
+ fj_new_sha1 varbinary(32) NOT NULL default '',
+ -- Timestamp of the batch operation
+ fj_timestamp varbinary(14) NOT NULL default ''
+);
+
+CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
+CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
+CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
+CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);
'backend-fail-batchsize'
),
+ 'filejournal-errors' => array(
+ 'filejournal-fail-dbconnect',
+ 'filejournal-fail-dbquery'
+ ),
+
'lockmanager-errors' => array(
'lockmanager-notlocked',
'lockmanager-fail-closelock',