From: Aaron Schulz Date: Tue, 13 Mar 2012 01:46:33 +0000 (+0000) Subject: [FileBackend] X-Git-Tag: 1.31.0-rc.0~24278 X-Git-Url: http://git.cyclocoop.org/%27.parametre_url%28%20%20%20generer_action_auteur%28%27charger_plugin%27%2C%20%27update_flux%27%29%2C%27update_flux%27%2C%20%27oui%27%29.%27?a=commitdiff_plain;h=d19f54602fb7a51184a1c7d53c572b7ff5288255;p=lhc%2Fweb%2Fwiklou.git [FileBackend] * Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing. * Added the optional schema change required for using the DBFileJournal (MySQL, SQLite). --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 91c6c2e1cb..b59efe669e 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -507,6 +507,9 @@ $wgAutoloadLocalClasses = array( 'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php', 'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php', 'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php', + 'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php', + 'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php', + 'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php', 'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php', 'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php', 'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php', diff --git a/includes/filerepo/backend/FileBackend.php b/includes/filerepo/backend/FileBackend.php index 7371cc9560..e0f654a03f 100644 --- a/includes/filerepo/backend/FileBackend.php +++ b/includes/filerepo/backend/FileBackend.php @@ -45,6 +45,8 @@ abstract class FileBackend { protected $readOnly; // string; read-only explanation message /** @var LockManager */ protected $lockManager; + /** @var FileJournal */ + protected $fileJournal; /** * Create a new backend instance from configuration. @@ -73,6 +75,9 @@ abstract class FileBackend { $this->lockManager = ( $config['lockManager'] instanceof LockManager ) ? $config['lockManager'] : LockManagerGroup::singleton()->get( $config['lockManager'] ); + $this->fileJournal = isset( $config['fileJournal'] ) + ? FileJournal::factory( $config['fileJournal'], $this->name ) + : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name ); $this->readOnly = isset( $config['readOnly'] ) ? (string)$config['readOnly'] : ''; @@ -177,6 +182,8 @@ abstract class FileBackend { * 'allowStale' : Don't require the latest available data. * This can increase performance for non-critical writes. * This has no effect unless the 'force' flag is set. + * 'nonJournaled' : Don't log this operation batch in the file journal. + * This limits the ability of recovery scripts. * * Remarks on locking: * File system paths given to operations should refer to files that are diff --git a/includes/filerepo/backend/FileBackendMultiWrite.php b/includes/filerepo/backend/FileBackendMultiWrite.php index 52c71d6f02..9c3cf5b5e7 100644 --- a/includes/filerepo/backend/FileBackendMultiWrite.php +++ b/includes/filerepo/backend/FileBackendMultiWrite.php @@ -133,7 +133,7 @@ class FileBackendMultiWrite extends FileBackend { } // Actually attempt the operation batch... - $subStatus = FileOp::attemptBatch( $performOps, $opts ); + $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal ); $success = array(); $failCount = 0; diff --git a/includes/filerepo/backend/FileBackendStore.php b/includes/filerepo/backend/FileBackendStore.php index e96f257c8f..ff32925f05 100644 --- a/includes/filerepo/backend/FileBackendStore.php +++ b/includes/filerepo/backend/FileBackendStore.php @@ -708,7 +708,7 @@ abstract class FileBackendStore extends FileBackend { $this->clearCache(); // Actually attempt the operation batch... - $subStatus = FileOp::attemptBatch( $performOps, $opts ); + $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal ); // Merge errors into status fields $status->merge( $subStatus ); diff --git a/includes/filerepo/backend/FileOp.php b/includes/filerepo/backend/FileOp.php index 825a666b55..6cee9f9a85 100644 --- a/includes/filerepo/backend/FileOp.php +++ b/includes/filerepo/backend/FileOp.php @@ -24,6 +24,7 @@ abstract class FileOp { protected $state = self::STATE_NEW; // integer protected $failed = false; // boolean protected $useLatest = true; // boolean + protected $batchId; // string protected $sourceSha1; // string protected $destSameAsSource; // boolean @@ -62,6 +63,16 @@ abstract class FileOp { $this->params = $params; } + /** + * Set the batch UUID this operation belongs to + * + * @param $batchId string + * @return void + */ + final protected function setBatchId( $batchId ) { + $this->batchId = $batchId; + } + /** * Whether to allow stale data for file reads and stat checks * @@ -73,43 +84,57 @@ abstract class FileOp { } /** - * Attempt a series of file operations. + * Attempt to perform a series of file operations. * Callers are responsible for handling file locking. * * $opts is an array of options, including: - * 'force' : Errors that would normally cause a rollback do not. - * The remaining operations are still attempted if any fail. - * 'allowStale' : Don't require the latest available data. - * This can increase performance for non-critical writes. - * This has no effect unless the 'force' flag is set. - * + * 'force' : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * 'allowStale' : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * 'nonJournaled' : Don't log this operation batch in the file journal. + * * The resulting Status will be "OK" unless: * a) unexpected operation errors occurred (network partitions, disk full...) * b) significant operation errors occured and 'force' was not set * * @param $performOps Array List of FileOp operations * @param $opts Array Batch operation options + * @param $journal FileJournal Journal to log operations to * @return Status */ - final public static function attemptBatch( array $performOps, array $opts ) { + final public static function attemptBatch( + array $performOps, array $opts, FileJournal $journal + ) { $status = Status::newGood(); - $allowStale = !empty( $opts['allowStale'] ); - $ignoreErrors = !empty( $opts['force'] ); - $n = count( $performOps ); if ( $n > self::MAX_BATCH_SIZE ) { $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE ); return $status; } + $batchId = $journal->getTimestampedUUID(); + $allowStale = !empty( $opts['allowStale'] ); + $ignoreErrors = !empty( $opts['force'] ); + $journaled = empty( $opts['nonJournaled'] ); + + $entries = array(); // file journal entries $predicates = FileOp::newPredicates(); // account for previous op in prechecks // Do pre-checks for each operation; abort on failure... foreach ( $performOps as $index => $fileOp ) { + $fileOp->setBatchId( $batchId ); $fileOp->allowStaleReads( $allowStale ); - $subStatus = $fileOp->precheck( $predicates ); + $oldPredicates = $predicates; + $subStatus = $fileOp->precheck( $predicates ); // updates $predicates $status->merge( $subStatus ); - if ( !$subStatus->isOK() ) { // operation failed? + if ( $subStatus->isOK() ) { + if ( $journaled ) { // journal log entry + $entries = array_merge( $entries, + self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) ); + } + } else { // operation failed? $status->success[$index] = false; ++$status->failCount; if ( !$ignoreErrors ) { @@ -118,8 +143,15 @@ abstract class FileOp { } } - if ( $ignoreErrors ) { - # Treat all precheck() fatals as merely warnings + // Log the operations in file journal... + if ( count( $entries ) ) { + $subStatus = $journal->logChangeBatch( $entries, $batchId ); + if ( !$subStatus->isOK() ) { + return $subStatus; // abort + } + } + + if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings $status->setResult( true, $status->value ); } @@ -154,6 +186,46 @@ abstract class FileOp { return $status; } + /** + * Get the file journal entries for a single file operation + * + * @param $fileOp FileOp + * @param $oPredicates Array Pre-op information about files + * @param $nPredicates Array Post-op information about files + * @return Array + */ + final protected static function getJournalEntries( + FileOp $fileOp, array $oPredicates, array $nPredicates + ) { + $nullEntries = array(); + $updateEntries = array(); + $deleteEntries = array(); + $pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() ); + foreach ( $pathsUsed as $path ) { + $nullEntries[] = array( // assertion for recovery + 'op' => 'null', + 'path' => $path, + 'newSha1' => $fileOp->fileSha1( $path, $oPredicates ) + ); + } + foreach ( $fileOp->storagePathsChanged() as $path ) { + if ( $nPredicates['sha1'][$path] === false ) { // deleted + $deleteEntries[] = array( + 'op' => 'delete', + 'path' => $path, + 'newSha1' => '' + ); + } else { // created/updated + $updateEntries[] = array( + 'op' => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create', + 'path' => $path, + 'newSha1' => $nPredicates['sha1'][$path] + ); + } + } + return array_merge( $nullEntries, $updateEntries, $deleteEntries ); + } + /** * Get the value of the parameter with the given name * @@ -352,8 +424,8 @@ abstract class FileOp { $params = $this->params; $params['failedAction'] = $action; try { - wfDebugLog( 'FileOperation', - get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) ); + wfDebugLog( 'FileOperation', get_class( $this ) . + " failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) ); } catch ( Exception $e ) { // bad config? debug log error? } diff --git a/includes/filerepo/backend/filejournal/DBFileJournal.php b/includes/filerepo/backend/filejournal/DBFileJournal.php new file mode 100644 index 0000000000..1eb9ecada1 --- /dev/null +++ b/includes/filerepo/backend/filejournal/DBFileJournal.php @@ -0,0 +1,112 @@ +wiki = $config['wiki']; + } + + /** + * @see FileJournal::logChangeBatch() + * @return Status + */ + protected function doLogChangeBatch( array $entries, $batchId ) { + $status = Status::newGood(); + + $dbw = $this->getMasterDB(); + if ( !$dbw ) { + $status->fatal( 'filejournal-fail-dbconnect', $this->backend ); + return $status; + } + $now = wfTimestamp( TS_UNIX ); + + $data = array(); + foreach ( $entries as $entry ) { + $data[] = array( + 'fj_batch_uuid' => $batchId, + 'fj_backend' => $this->backend, + 'fj_op' => $entry['op'], + 'fj_path' => $entry['path'], + 'fj_path_sha1' => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ), + 'fj_new_sha1' => $entry['newSha1'], + 'fj_timestamp' => $dbw->timestamp( $now ) + ); + } + + try { + $dbw->begin(); + $dbw->insert( 'filejournal', $data, __METHOD__ ); + $dbw->commit(); + } catch ( DBError $e ) { + $status->fatal( 'filejournal-fail-dbquery', $this->backend ); + return $status; + } + + return $status; + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + */ + protected function doPurgeOldLogs() { + $status = Status::newGood(); + if ( $this->ttlDays <= 0 ) { + return $status; // nothing to do + } + + $dbw = $this->getMasterDB(); + if ( !$dbw ) { + $status->fatal( 'filejournal-fail-dbconnect', $this->backend ); + return $status; + } + $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays ); + + try { + $dbw->begin(); + $dbw->delete( 'filejournal', + array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ), + __METHOD__ + ); + $dbw->commit(); + } catch ( DBError $e ) { + $status->fatal( 'filejournal-fail-dbquery', $this->backend ); + return $status; + } + + return $status; + } + + /** + * Get a master connection to the logging DB + * + * @return DatabaseBase|null + */ + protected function getMasterDB() { + try { + $lb = wfGetLBFactory()->newMainLB(); + return $lb->getConnection( DB_MASTER, array(), $this->wiki ); + } catch ( DBConnectionError $e ) { + return null; + } + } +} diff --git a/includes/filerepo/backend/filejournal/FileJournal.php b/includes/filerepo/backend/filejournal/FileJournal.php new file mode 100644 index 0000000000..f60b7f9b4e --- /dev/null +++ b/includes/filerepo/backend/filejournal/FileJournal.php @@ -0,0 +1,131 @@ +ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false; + } + + /** + * Create an appropriate FileJournal object from config + * + * @param $config Array + * @param $backend string A registered file backend name + * @return FileJournal + */ + final public static function factory( array $config, $backend ) { + $class = $config['class']; + $jrn = new $class( $config ); + if ( !$jrn instanceof self ) { + throw new MWException( "Class given is not an instance of FileJournal." ); + } + $jrn->backend = $backend; + return $jrn; + } + + /** + * Get a statistically unique ID string + * + * @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars> + */ + final public function getTimestampedUUID() { + $s = ''; + for ( $i = 0; $i < 5; $i++ ) { + $s .= mt_rand( 0, 2147483647 ); + } + $s = wfBaseConvert( sha1( $s ), 16, 36, 31 ); + return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 ); + } + + /** + * Log changes made by a batch file operation. + * $entries is an array of log entries, each of which contains: + * op : Basic operation name (create, store, copy, delete) + * path : The storage path of the file + * newSha1 : The final base 36 SHA-1 of the file + * Note that 'false' should be used as the SHA-1 for non-existing files. + * + * @param $entries Array List of file operations (each an array of parameters) + * @param $batchId string UUID string that identifies the operation batch + * @return Status + */ + final public function logChangeBatch( array $entries, $batchId ) { + if ( !count( $entries ) ) { + return Status::newGood(); + } + return $this->doLogChangeBatch( $entries, $batchId ); + } + + /** + * @see FileJournal::logChangeBatch() + * + * @param $entries Array List of file operations (each an array of parameters) + * @param $batchId string UUID string that identifies the operation batch + * @return Status + */ + abstract protected function doLogChangeBatch( array $entries, $batchId ); + + /** + * Purge any old log entries + * + * @return Status + */ + final public function purgeOldLogs() { + return $this->doPurgeOldLogs(); + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + */ + abstract protected function doPurgeOldLogs(); +} + +/** + * Simple version of FileJournal that does nothing + * @since 1.20 + */ +class NullFileJournal extends FileJournal { + /** + * @see FileJournal::logChangeBatch() + * @return Status + */ + protected function doLogChangeBatch( array $entries, $batchId ) { + return Status::newGood(); + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + */ + protected function doPurgeOldLogs() { + return Status::newGood(); + } +} diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index c6d09242a7..61b466fa74 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -2274,6 +2274,10 @@ If the problem persists, contact an [[Special:ListUsers/sysop|administrator]].', 'backend-fail-contenttype' => 'Could not determine the content type of the file to store at "$1".', 'backend-fail-batchsize' => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.', +# File journal +'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".', +'filejournal-fail-dbquery' => 'Could not update the journal database for storage backend "$1".', + # Lock manager 'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.', 'lockmanager-fail-closelock' => 'Could not close lock file for "$1".', diff --git a/maintenance/archives/patch-filejournal.sql b/maintenance/archives/patch-filejournal.sql new file mode 100644 index 0000000000..b7a7d09f7b --- /dev/null +++ b/maintenance/archives/patch-filejournal.sql @@ -0,0 +1,24 @@ +-- File backend operation journal +CREATE TABLE /*_*/filejournal ( + -- Unique ID for each file operation + fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment, + -- UUID of the batch this operation belongs to + fj_batch_uuid varbinary(32) NOT NULL, + -- The registered file backend name + fj_backend varchar(255) NOT NULL, + -- The storage path that was affected (may be internal paths) + fj_path blob NOT NULL, + -- SHA-1 file path hash in base-36 + fj_path_sha1 varbinary(32) NOT NULL default '', + -- Primitive operation description (create/update/delete) + fj_op varchar(16) NOT NULL default '', + -- SHA-1 file content hash in base-36 + fj_new_sha1 varbinary(32) NOT NULL default '', + -- Timestamp of the batch operation + fj_timestamp varbinary(14) NOT NULL default '' +); + +CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id); +CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id); +CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id); +CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp); diff --git a/maintenance/language/messages.inc b/maintenance/language/messages.inc index c9afbfa87e..6e6e8d0bc5 100644 --- a/maintenance/language/messages.inc +++ b/maintenance/language/messages.inc @@ -1377,6 +1377,11 @@ $wgMessageStructure = array( 'backend-fail-batchsize' ), + 'filejournal-errors' => array( + 'filejournal-fail-dbconnect', + 'filejournal-fail-dbquery' + ), + 'lockmanager-errors' => array( 'lockmanager-notlocked', 'lockmanager-fail-closelock',