From f90a9ec93ae7112cb15ca49d81ca8933cfd91be1 Mon Sep 17 00:00:00 2001 From: Bryan Davis Date: Fri, 4 Oct 2013 16:38:09 -0600 Subject: [PATCH] Add script to purge changed files from cache This maintenance script enhances the prior purgeDeletedFiles script to optionally also process create and modify events that may have left stale information in front-end caches. Changes from purgeDeletedFiles: * `--starttime` & `--endtime` are now required * `--type` allows selecting 'created' and/or 'modified' events in addition to 'deleted' events. * `--dry-run` to list affected files but perform no purges * `--htcp-dest` option to restrict HTCP broadcast messages Change-Id: I7181ea461ba9758747bff4fd70530d00dab492cc --- maintenance/purgeChangedFiles.php | 255 ++++++++++++++++++++++++++++++ maintenance/purgeDeletedFiles.php | 125 --------------- 2 files changed, 255 insertions(+), 125 deletions(-) create mode 100644 maintenance/purgeChangedFiles.php delete mode 100644 maintenance/purgeDeletedFiles.php diff --git a/maintenance/purgeChangedFiles.php b/maintenance/purgeChangedFiles.php new file mode 100644 index 0000000000..9f83ee7f9b --- /dev/null +++ b/maintenance/purgeChangedFiles.php @@ -0,0 +1,255 @@ + array( + 'upload' => array( 'upload' ), + 'import' => array( 'upload', 'interwiki' ), + ), + 'deleted' => array( + 'delete' => array( 'delete', 'revision' ), + 'suppress' => array( 'delete', 'revision' ), + ), + 'modified' => array( + 'upload' => array( 'overwrite', 'revert' ), + 'move' => array( 'move', 'move_redir' ), + ), + ); + + /** + * @var string + */ + private $startTimestamp; + + /** + * @var string + */ + private $endTimestamp; + + public function __construct() { + parent::__construct(); + $this->mDescription = "Scan the logging table and purge files and thumbnails."; + $this->addOption( 'starttime', 'Starting timestamp', true, true ); + $this->addOption( 'endtime', 'Ending timestamp', true, true ); + $this->addOption( 'type', 'Comma-separated list of types of changes to send purges for (' . + implode( ',', array_keys( self::$typeMappings ) ) . ',all)', false, true ); + $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true ); + $this->addOption( 'dry-run', 'Do not send purge requests' ); + $this->addOption( 'verbose', 'Show more output', false, false, 'v' ); + } + + public function execute() { + global $wgHTCPRouting; + + if ( $this->hasOption( 'htcp-dest' ) ) { + $parts = explode( ':', $this->getOption( 'htcp-dest' ) ); + if ( count( $parts ) < 2 ) { + // Add default htcp port + $parts[] = '4827'; + } + + // Route all HTCP messages to provided host:port + $wgHTCPRouting = array( + '' => array( 'host' => $parts[0], 'port' => $parts[1] ), + ); + $this->verbose( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" ); + } + + // Find out which actions we should be concerned with + $typeOpt = $this->getOption( 'type', 'all' ); + $validTypes = array_keys( self::$typeMappings ); + if ( $typeOpt === 'all' ) { + // Convert 'all' to all registered types + $typeOpt = implode( ',', $validTypes ); + } + $typeList = explode( ',', $typeOpt ); + foreach ( $typeList as $type ) { + if ( !in_array( $type, $validTypes ) ) { + $this->error( "\nERROR: Unknown type: {$type}\n" ); + $this->maybeHelp( true ); + } + } + + // Validate the timestamps + $dbr = $this->getDB( DB_SLAVE ); + $this->startTimestamp = $dbr->timestamp( $this->getOption( 'starttime' ) ); + $this->endTimestamp = $dbr->timestamp( $this->getOption( 'endtime' ) ); + + if ( $this->startTimestamp > $this->endTimestamp ) { + $this->error( "\nERROR: starttime after endtime\n" ); + $this->maybeHelp( true ); + } + + // Turn on verbose when dry-run is enabled + if ( $this->hasOption( 'dry-run' ) ) { + $this->mOptions['verbose'] = 1; + } + + $this->verbose( 'Purging files that were: ' . implode( ', ', $typeList ) . "\n"); + foreach ( $typeList as $type ) { + $this->verbose( "Checking for {$type} files...\n" ); + $this->purgeFromLogType( $type ); + if ( !$this->hasOption( 'dry-run' ) ) { + $this->verbose( "...{$type} files purged.\n\n" ); + } + } + } + + /** + * Purge cache and thumbnails for changes of the given type. + * + * @param string $type Type of change to find + */ + protected function purgeFromLogType( $type ) { + $repo = RepoGroup::singleton()->getLocalRepo(); + $dbr = $this->getDB( DB_SLAVE ); + + foreach ( self::$typeMappings[$type] as $logType => $logActions ) { + $this->verbose( "Scanning for {$logType}/" . implode( ',', $logActions ) . "\n" ); + + $res = $dbr->select( + 'logging', + array( 'log_title', 'log_timestamp', 'log_params' ), + array( + 'log_namespace' => NS_FILE, + 'log_type' => $logType, + 'log_action' => $logActions, + 'log_timestamp >= ' . $dbr->addQuotes( $this->startTimestamp ), + 'log_timestamp <= ' . $dbr->addQuotes( $this->endTimestamp ), + ), + __METHOD__ + ); + + foreach ( $res as $row ) { + $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) ); + + if ( $this->hasOption( 'dry-run' ) ) { + $this->verbose( "{$type}[{$row->log_timestamp}]: {$row->log_title}\n" ); + continue; + } + + // Purge current version and any versions in oldimage table + $file->purgeCache(); + $file->purgeHistory(); + + if ( $logType === 'delete' ) { + // If there is an orphaned storage file... delete it + if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) { + $dpath = $this->getDeletedPath( $repo, $file ); + if ( $repo->fileExists( $dpath ) ) { + // Sanity check to avoid data loss + $repo->getBackend()->delete( array( 'src' => $file->getPath() ) ); + $this->verbose( "Deleted orphan file: {$file->getPath()}.\n" ); + + } else { + $this->error( "File was not deleted: {$file->getPath()}.\n" ); + } + } + + // Purge items from fileachive table (rows are likely here) + $this->purgeFromArchiveTable( $repo, $file ); + + } else if ( $logType === 'move' ) { + // Purge the target file as well + + $params = unserialize( $row->log_params ); + if ( isset( $params['4::target'] ) ) { + $target = $params['4::target']; + $targetFile = $repo->newFile( Title::makeTitle( NS_FILE, $target ) ); + $targetFile->purgeCache(); + $targetFile->purgeHistory(); + $this->verbose( "Purged file {$target}; move target @{$row->log_timestamp}.\n" ); + } + } + + $this->verbose( "Purged file {$row->log_title}; {$type} @{$row->log_timestamp}.\n" ); + } + } + } + + protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) { + $dbr = $repo->getSlaveDB(); + $res = $dbr->select( + 'filearchive', + array( 'fa_archive_name' ), + array( 'fa_name' => $file->getName() ), + __METHOD__ + ); + + foreach ( $res as $row ) { + if ( $row->fa_archive_name === null ) { + // Was not an old version (current version names checked already) + continue; + } + $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name ); + // If there is an orphaned storage file still there...delete it + if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) { + $dpath = $this->getDeletedPath( $repo, $ofile ); + if ( $repo->fileExists( $dpath ) ) { + // Sanity check to avoid data loss + $repo->getBackend()->delete( array( 'src' => $ofile->getPath() ) ); + $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" ); + + } else { + $this->error( "File was not deleted: {$ofile->getPath()}.\n" ); + } + } + $file->purgeOldThumbnails( $row->fa_archive_name ); + } + } + + protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) { + $hash = $repo->getFileSha1( $file->getPath() ); + $key = "{$hash}.{$file->getExtension()}"; + return $repo->getDeletedHashPath( $key ) . $key; + } + + /** + * Send an output message iff the 'verbose' option has been provided. + * + * @param string $msg Message to output + */ + protected function verbose( $msg ) { + if ( $this->hasOption( 'verbose' ) ) { + $this->output( $msg ); + } + } + +} + +$maintClass = "PurgeChangedFiles"; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/purgeDeletedFiles.php b/maintenance/purgeDeletedFiles.php deleted file mode 100644 index 9f2af33d71..0000000000 --- a/maintenance/purgeDeletedFiles.php +++ /dev/null @@ -1,125 +0,0 @@ -mDescription = "Scan the logging table and purge files that where deleted."; - $this->addOption( 'starttime', 'Starting timestamp', false, true ); - $this->addOption( 'endtime', 'Ending timestamp', false, true ); - } - - public function execute() { - $this->output( "Purging cache and thumbnails for deleted files...\n" ); - $this->purgeFromLogType( 'delete' ); - $this->output( "...deleted files purged.\n\n" ); - - $this->output( "Purging cache and thumbnails for suppressed files...\n" ); - $this->purgeFromLogType( 'suppress' ); - $this->output( "...suppressed files purged.\n" ); - } - - protected function purgeFromLogType( $logType ) { - $repo = RepoGroup::singleton()->getLocalRepo(); - $db = $repo->getSlaveDB(); - - $conds = array( - 'log_namespace' => NS_FILE, - 'log_type' => $logType, - 'log_action' => array( 'delete', 'revision' ) - ); - $start = $this->getOption( 'starttime' ); - if ( $start ) { - $conds[] = 'log_timestamp >= ' . $db->addQuotes( $db->timestamp( $start ) ); - } - $end = $this->getOption( 'endtime' ); - if ( $end ) { - $conds[] = 'log_timestamp <= ' . $db->addQuotes( $db->timestamp( $end ) ); - } - - $res = $db->select( 'logging', array( 'log_title', 'log_timestamp' ), $conds, __METHOD__ ); - foreach ( $res as $row ) { - $file = $repo->newFile( Title::makeTitle( NS_FILE, $row->log_title ) ); - // If there is an orphaned storage file still there...delete it - if ( !$file->exists() && $repo->fileExists( $file->getPath() ) ) { - $dpath = $this->getDeletedPath( $repo, $file ); - if ( $repo->fileExists( $dpath ) ) { // sanity check to avoid data loss - $repo->getBackend()->delete( array( 'src' => $file->getPath() ) ); - $this->output( "Deleted orphan file: {$file->getPath()}.\n" ); - } else { - $this->error( "File was not deleted: {$file->getPath()}.\n" ); - } - } - // Purge current version and any versions in oldimage table - $file->purgeCache(); - $file->purgeHistory(); - // Purge items from fileachive table (rows are likely here) - $this->purgeFromArchiveTable( $repo, $file ); - - $this->output( "Purged file {$row->log_title}; deleted on {$row->log_timestamp}.\n" ); - } - } - - protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile $file ) { - $db = $repo->getSlaveDB(); - $res = $db->select( 'filearchive', - array( 'fa_archive_name' ), - array( 'fa_name' => $file->getName() ), - __METHOD__ - ); - foreach ( $res as $row ) { - if ( $row->fa_archive_name === null ) { - continue; // was not an old version (current version names checked already) - } - $ofile = $repo->newFromArchiveName( $file->getTitle(), $row->fa_archive_name ); - // If there is an orphaned storage file still there...delete it - if ( !$file->exists() && $repo->fileExists( $ofile->getPath() ) ) { - $dpath = $this->getDeletedPath( $repo, $ofile ); - if ( $repo->fileExists( $dpath ) ) { // sanity check to avoid data loss - $repo->getBackend()->delete( array( 'src' => $ofile->getPath() ) ); - $this->output( "Deleted orphan file: {$ofile->getPath()}.\n" ); - } else { - $this->error( "File was not deleted: {$ofile->getPath()}.\n" ); - } - } - $file->purgeOldThumbnails( $row->fa_archive_name ); - } - } - - protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) { - $hash = $repo->getFileSha1( $file->getPath() ); - $key = "{$hash}.{$file->getExtension()}"; - return $repo->getDeletedHashPath( $key ) . $key; - } -} - -$maintClass = "PurgeDeletedFiles"; -require_once RUN_MAINTENANCE_IF_MAIN; -- 2.20.1