[FileBackend]
authorAaron Schulz <aaron@users.mediawiki.org>
Tue, 13 Mar 2012 01:46:33 +0000 (01:46 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Tue, 13 Mar 2012 01:46:33 +0000 (01:46 +0000)
* Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL, SQLite).

includes/AutoLoader.php
includes/filerepo/backend/FileBackend.php
includes/filerepo/backend/FileBackendMultiWrite.php
includes/filerepo/backend/FileBackendStore.php
includes/filerepo/backend/FileOp.php
includes/filerepo/backend/filejournal/DBFileJournal.php [new file with mode: 0644]
includes/filerepo/backend/filejournal/FileJournal.php [new file with mode: 0644]
languages/messages/MessagesEn.php
maintenance/archives/patch-filejournal.sql [new file with mode: 0644]
maintenance/language/messages.inc

index 91c6c2e..b59efe6 100644 (file)
@@ -507,6 +507,9 @@ $wgAutoloadLocalClasses = array(
        'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
        'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
        'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
+       'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
+       'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
+       'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
        'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
        'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php',
        'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',
index 7371cc9..e0f654a 100644 (file)
@@ -45,6 +45,8 @@ abstract class FileBackend {
        protected $readOnly; // string; read-only explanation message
        /** @var LockManager */
        protected $lockManager;
+       /** @var FileJournal */
+       protected $fileJournal;
 
        /**
         * Create a new backend instance from configuration.
@@ -73,6 +75,9 @@ abstract class FileBackend {
                $this->lockManager = ( $config['lockManager'] instanceof LockManager )
                        ? $config['lockManager']
                        : LockManagerGroup::singleton()->get( $config['lockManager'] );
+               $this->fileJournal = isset( $config['fileJournal'] )
+                       ? FileJournal::factory( $config['fileJournal'], $this->name )
+                       : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
                $this->readOnly = isset( $config['readOnly'] )
                        ? (string)$config['readOnly']
                        : '';
@@ -177,6 +182,8 @@ abstract class FileBackend {
         * 'allowStale'          : Don't require the latest available data.
         *                         This can increase performance for non-critical writes.
         *                         This has no effect unless the 'force' flag is set.
+        * 'nonJournaled'        : Don't log this operation batch in the file journal.
+        *                         This limits the ability of recovery scripts.
         * 
         * Remarks on locking:
         * File system paths given to operations should refer to files that are
index 52c71d6..9c3cf5b 100644 (file)
@@ -133,7 +133,7 @@ class FileBackendMultiWrite extends FileBackend {
                }
 
                // Actually attempt the operation batch...
-               $subStatus = FileOp::attemptBatch( $performOps, $opts );
+               $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
 
                $success = array();
                $failCount = 0;
index e96f257..ff32925 100644 (file)
@@ -708,7 +708,7 @@ abstract class FileBackendStore extends FileBackend {
                $this->clearCache();
 
                // Actually attempt the operation batch...
-               $subStatus = FileOp::attemptBatch( $performOps, $opts );
+               $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
 
                // Merge errors into status fields
                $status->merge( $subStatus );
index 825a666..6cee9f9 100644 (file)
@@ -24,6 +24,7 @@ abstract class FileOp {
        protected $state = self::STATE_NEW; // integer
        protected $failed = false; // boolean
        protected $useLatest = true; // boolean
+       protected $batchId; // string
 
        protected $sourceSha1; // string
        protected $destSameAsSource; // boolean
@@ -62,6 +63,16 @@ abstract class FileOp {
                $this->params = $params;
        }
 
+       /**
+        * Set the batch UUID this operation belongs to
+        *
+        * @param $batchId string
+        * @return void
+        */
+       final protected function setBatchId( $batchId ) {
+               $this->batchId = $batchId;
+       }
+
        /**
         * Whether to allow stale data for file reads and stat checks
         *
@@ -73,43 +84,57 @@ abstract class FileOp {
        }
 
        /**
-        * Attempt a series of file operations.
+        * Attempt to perform a series of file operations.
         * Callers are responsible for handling file locking.
         * 
         * $opts is an array of options, including:
-        * 'force'      : Errors that would normally cause a rollback do not.
-        *                The remaining operations are still attempted if any fail.
-        * 'allowStale' : Don't require the latest available data.
-        *                This can increase performance for non-critical writes.
-        *                This has no effect unless the 'force' flag is set.
-        *
+        * 'force'        : Errors that would normally cause a rollback do not.
+        *                  The remaining operations are still attempted if any fail.
+        * 'allowStale'   : Don't require the latest available data.
+        *                  This can increase performance for non-critical writes.
+        *                  This has no effect unless the 'force' flag is set.
+        * 'nonJournaled' : Don't log this operation batch in the file journal.
+        * 
         * The resulting Status will be "OK" unless:
         *     a) unexpected operation errors occurred (network partitions, disk full...)
         *     b) significant operation errors occured and 'force' was not set
         * 
         * @param $performOps Array List of FileOp operations
         * @param $opts Array Batch operation options
+        * @param $journal FileJournal Journal to log operations to
         * @return Status 
         */
-       final public static function attemptBatch( array $performOps, array $opts ) {
+       final public static function attemptBatch(
+               array $performOps, array $opts, FileJournal $journal
+       ) {
                $status = Status::newGood();
 
-               $allowStale = !empty( $opts['allowStale'] );
-               $ignoreErrors = !empty( $opts['force'] );
-
                $n = count( $performOps );
                if ( $n > self::MAX_BATCH_SIZE ) {
                        $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE );
                        return $status;
                }
 
+               $batchId = $journal->getTimestampedUUID();
+               $allowStale = !empty( $opts['allowStale'] );
+               $ignoreErrors = !empty( $opts['force'] );
+               $journaled = empty( $opts['nonJournaled'] );
+
+               $entries = array(); // file journal entries
                $predicates = FileOp::newPredicates(); // account for previous op in prechecks
                // Do pre-checks for each operation; abort on failure...
                foreach ( $performOps as $index => $fileOp ) {
+                       $fileOp->setBatchId( $batchId );
                        $fileOp->allowStaleReads( $allowStale );
-                       $subStatus = $fileOp->precheck( $predicates );
+                       $oldPredicates = $predicates;
+                       $subStatus = $fileOp->precheck( $predicates ); // updates $predicates
                        $status->merge( $subStatus );
-                       if ( !$subStatus->isOK() ) { // operation failed?
+                       if ( $subStatus->isOK() ) {
+                               if ( $journaled ) { // journal log entry
+                                       $entries = array_merge( $entries,
+                                               self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) );
+                               }
+                       } else { // operation failed?
                                $status->success[$index] = false;
                                ++$status->failCount;
                                if ( !$ignoreErrors ) {
@@ -118,8 +143,15 @@ abstract class FileOp {
                        }
                }
 
-               if ( $ignoreErrors ) {
-                       # Treat all precheck() fatals as merely warnings
+               // Log the operations in file journal...
+               if ( count( $entries ) ) {
+                       $subStatus = $journal->logChangeBatch( $entries, $batchId );
+                       if ( !$subStatus->isOK() ) {
+                               return $subStatus; // abort
+                       }
+               }
+
+               if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings
                        $status->setResult( true, $status->value );
                }
 
@@ -154,6 +186,46 @@ abstract class FileOp {
                return $status;
        }
 
+       /**
+        * Get the file journal entries for a single file operation
+        * 
+        * @param $fileOp FileOp
+        * @param $oPredicates Array Pre-op information about files
+        * @param $nPredicates Array Post-op information about files
+        * @return Array
+        */
+       final protected static function getJournalEntries(
+               FileOp $fileOp, array $oPredicates, array $nPredicates
+       ) {
+               $nullEntries = array();
+               $updateEntries = array();
+               $deleteEntries = array();
+               $pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() );
+               foreach ( $pathsUsed as $path ) {
+                       $nullEntries[] = array( // assertion for recovery
+                               'op'      => 'null',
+                               'path'    => $path,
+                               'newSha1' => $fileOp->fileSha1( $path, $oPredicates )
+                       );
+               }
+               foreach ( $fileOp->storagePathsChanged() as $path ) {
+                       if ( $nPredicates['sha1'][$path] === false ) { // deleted
+                               $deleteEntries[] = array(
+                                       'op'      => 'delete',
+                                       'path'    => $path,
+                                       'newSha1' => ''
+                               );
+                       } else { // created/updated
+                               $updateEntries[] = array(
+                                       'op'      => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create',
+                                       'path'    => $path,
+                                       'newSha1' => $nPredicates['sha1'][$path]
+                               );
+                       }
+               }
+               return array_merge( $nullEntries, $updateEntries, $deleteEntries );
+       }
+
        /**
         * Get the value of the parameter with the given name
         * 
@@ -352,8 +424,8 @@ abstract class FileOp {
                $params = $this->params;
                $params['failedAction'] = $action;
                try {
-                       wfDebugLog( 'FileOperation',
-                               get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) );
+                       wfDebugLog( 'FileOperation', get_class( $this ) .
+                               " failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) );
                } catch ( Exception $e ) {
                        // bad config? debug log error?
                }
diff --git a/includes/filerepo/backend/filejournal/DBFileJournal.php b/includes/filerepo/backend/filejournal/DBFileJournal.php
new file mode 100644 (file)
index 0000000..1eb9eca
--- /dev/null
@@ -0,0 +1,112 @@
+<?php
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * Version of FileJournal that logs to a DB table
+ * @since 1.20
+ */
+class DBFileJournal extends FileJournal {
+       protected $wiki = false; // string; wiki DB name
+
+       /**
+        * Construct a new instance from configuration.
+        * $config includes:
+        *     'wiki' : wiki name to use for LoadBalancer
+        * 
+        * @param $config Array
+        */
+       protected function __construct( array $config ) {
+               parent::__construct( $config );
+
+               $this->wiki = $config['wiki'];
+       }
+
+       /**
+        * @see FileJournal::logChangeBatch()
+        * @return Status 
+        */
+       protected function doLogChangeBatch( array $entries, $batchId ) {
+               $status = Status::newGood();
+
+               $dbw = $this->getMasterDB();
+               if ( !$dbw ) {
+                       $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
+                       return $status;
+               }
+               $now = wfTimestamp( TS_UNIX );
+
+               $data = array();
+               foreach ( $entries as $entry ) {
+                       $data[] = array(
+                               'fj_batch_uuid' => $batchId,
+                               'fj_backend'    => $this->backend,
+                               'fj_op'         => $entry['op'],
+                               'fj_path'       => $entry['path'],
+                               'fj_path_sha1'  => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ),
+                               'fj_new_sha1'   => $entry['newSha1'],
+                               'fj_timestamp'  => $dbw->timestamp( $now )
+                       );
+               }
+
+               try {
+                       $dbw->begin();
+                       $dbw->insert( 'filejournal', $data, __METHOD__ );
+                       $dbw->commit();
+               } catch ( DBError $e ) {
+                       $status->fatal( 'filejournal-fail-dbquery', $this->backend );
+                       return $status;
+               }
+
+               return $status;
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       protected function doPurgeOldLogs() {
+               $status = Status::newGood();
+               if ( $this->ttlDays <= 0 ) {
+                       return $status; // nothing to do
+               }
+
+               $dbw = $this->getMasterDB();
+               if ( !$dbw ) {
+                       $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
+                       return $status;
+               }
+               $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
+
+               try {
+                       $dbw->begin();
+                       $dbw->delete( 'filejournal',
+                               array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ),
+                               __METHOD__
+                       );
+                       $dbw->commit();
+               } catch ( DBError $e ) {
+                       $status->fatal( 'filejournal-fail-dbquery', $this->backend );
+                       return $status;
+               }
+
+               return $status;
+       }
+
+       /**
+        * Get a master connection to the logging DB
+        * 
+        * @return DatabaseBase|null 
+        */
+       protected function getMasterDB() {
+               try {
+                       $lb = wfGetLBFactory()->newMainLB();
+                       return $lb->getConnection( DB_MASTER, array(), $this->wiki );
+               } catch ( DBConnectionError $e ) {
+                       return null;
+               }
+       }
+}
diff --git a/includes/filerepo/backend/filejournal/FileJournal.php b/includes/filerepo/backend/filejournal/FileJournal.php
new file mode 100644 (file)
index 0000000..f60b7f9
--- /dev/null
@@ -0,0 +1,131 @@
+<?php
+/**
+ * @defgroup FileJournal File journal
+ * @ingroup FileBackend
+ */
+
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * @brief Class for handling file operation journaling.
+ *
+ * Subclasses should avoid throwing exceptions at all costs.
+ *
+ * @ingroup FileJournal
+ * @since 1.20
+ */
+abstract class FileJournal {
+       protected $backend; // string
+       protected $ttlDays; // integer
+
+       /**
+        * Construct a new instance from configuration.
+        * $config includes:
+        *     'ttlDays' : days to keep log entries around (false means "forever")
+        * 
+        * @param $config Array
+        */
+       protected function __construct( array $config ) {
+               $this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false;
+       }
+
+       /**
+        * Create an appropriate FileJournal object from config
+        * 
+        * @param $config Array
+        * @param $backend string A registered file backend name
+        * @return FileJournal
+        */
+       final public static function factory( array $config, $backend ) {
+               $class = $config['class'];
+               $jrn = new $class( $config );
+               if ( !$jrn instanceof self ) {
+                       throw new MWException( "Class given is not an instance of FileJournal." );
+               }
+               $jrn->backend = $backend;
+               return $jrn;
+       }
+
+       /**
+        * Get a statistically unique ID string
+        * 
+        * @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars>
+        */
+       final public function getTimestampedUUID() {
+               $s = '';
+               for ( $i = 0; $i < 5; $i++ ) {
+                       $s .= mt_rand( 0, 2147483647 );
+               }
+               $s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
+               return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 );
+       }
+
+       /**
+        * Log changes made by a batch file operation.
+        * $entries is an array of log entries, each of which contains:
+        *     op      : Basic operation name (create, store, copy, delete)
+        *     path    : The storage path of the file
+        *     newSha1 : The final base 36 SHA-1 of the file
+        * Note that 'false' should be used as the SHA-1 for non-existing files.
+        * 
+        * @param $entries Array List of file operations (each an array of parameters)
+        * @param $batchId string UUID string that identifies the operation batch
+        * @return Status
+        */
+       final public function logChangeBatch( array $entries, $batchId ) {
+               if ( !count( $entries ) ) {
+                       return Status::newGood();
+               }
+               return $this->doLogChangeBatch( $entries, $batchId );
+       }
+
+       /**
+        * @see FileJournal::logChangeBatch()
+        * 
+        * @param $entries Array List of file operations (each an array of parameters)
+        * @param $batchId string UUID string that identifies the operation batch
+        * @return Status
+        */
+       abstract protected function doLogChangeBatch( array $entries, $batchId );
+
+       /**
+        * Purge any old log entries
+        * 
+        * @return Status 
+        */
+       final public function purgeOldLogs() {
+               return $this->doPurgeOldLogs();
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       abstract protected function doPurgeOldLogs();
+}
+
+/**
+ * Simple version of FileJournal that does nothing
+ * @since 1.20
+ */
+class NullFileJournal extends FileJournal {
+       /**
+        * @see FileJournal::logChangeBatch()
+        * @return Status 
+        */
+       protected function doLogChangeBatch( array $entries, $batchId ) {
+               return Status::newGood();
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       protected function doPurgeOldLogs() {
+               return Status::newGood();
+       }
+}
index c6d0924..61b466f 100644 (file)
@@ -2274,6 +2274,10 @@ If the problem persists, contact an [[Special:ListUsers/sysop|administrator]].',
 'backend-fail-contenttype'   => 'Could not determine the content type of the file to store at "$1".',
 'backend-fail-batchsize'     => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.',
 
+# File journal
+'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".',
+'filejournal-fail-dbquery'   => 'Could not update the journal database for storage backend "$1".',
+
 # Lock manager
 'lockmanager-notlocked'        => 'Could not unlock "$1"; it is not locked.',
 'lockmanager-fail-closelock'   => 'Could not close lock file for "$1".',
diff --git a/maintenance/archives/patch-filejournal.sql b/maintenance/archives/patch-filejournal.sql
new file mode 100644 (file)
index 0000000..b7a7d09
--- /dev/null
@@ -0,0 +1,24 @@
+-- File backend operation journal
+CREATE TABLE /*_*/filejournal (
+  -- Unique ID for each file operation
+  fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
+  -- UUID of the batch this operation belongs to
+  fj_batch_uuid varbinary(32) NOT NULL,
+  -- The registered file backend name
+  fj_backend varchar(255) NOT NULL,
+  -- The storage path that was affected (may be internal paths)
+  fj_path blob NOT NULL,
+  -- SHA-1 file path hash in base-36
+  fj_path_sha1 varbinary(32) NOT NULL default '',
+  -- Primitive operation description (create/update/delete)
+  fj_op varchar(16) NOT NULL default '',
+  -- SHA-1 file content hash in base-36
+  fj_new_sha1 varbinary(32) NOT NULL default '',
+  -- Timestamp of the batch operation
+  fj_timestamp varbinary(14) NOT NULL default ''
+);
+
+CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
+CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
+CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
+CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);
index c9afbfa..6e6e8d0 100644 (file)
@@ -1377,6 +1377,11 @@ $wgMessageStructure = array(
                'backend-fail-batchsize'
        ),
 
+       'filejournal-errors' => array(
+               'filejournal-fail-dbconnect',
+               'filejournal-fail-dbquery'
+       ),
+
        'lockmanager-errors' => array(
                'lockmanager-notlocked',
                'lockmanager-fail-closelock',