From: Aaron Schulz Date: Wed, 28 Oct 2015 20:01:50 +0000 (-0700) Subject: Add script to find orphaned LocalRepo files X-Git-Tag: 1.31.0-rc.0~9127^2 X-Git-Url: https://git.cyclocoop.org/%242?a=commitdiff_plain;h=828b5989113d2b3efdb98c47ee42a7cee3ebdbe5;p=lhc%2Fweb%2Fwiklou.git Add script to find orphaned LocalRepo files Bug: T111838 Change-Id: I60ac7f3451c8240a0716933dcfff413669458206 --- diff --git a/autoload.php b/autoload.php index 2bcf6b5dc3..2daa572a03 100644 --- a/autoload.php +++ b/autoload.php @@ -446,6 +446,7 @@ $wgAutoloadLocalClasses = array( 'FileRepoStatus' => __DIR__ . '/includes/filerepo/FileRepoStatus.php', 'FindHooks' => __DIR__ . '/maintenance/findHooks.php', 'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php', + 'FindOrphanedFiles' => __DIR__ . '/maintenance/findOrphanedFiles.php', 'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php', 'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php', 'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php', diff --git a/maintenance/findOrphanedFiles.php b/maintenance/findOrphanedFiles.php new file mode 100644 index 0000000000..2ee406fe58 --- /dev/null +++ b/maintenance/findOrphanedFiles.php @@ -0,0 +1,141 @@ +mDescription = "Find unregistered files in the 'public' repo zone."; + $this->addOption( 'subdir', + 'Only scan files in this subdirectory (e.g. "a/a0")', false, true ); + $this->addOption( 'verbose', "Mention file paths checked" ); + $this->setBatchSize( 500 ); + } + + function execute() { + $subdir = $this->getOption( 'subdir', '' ); + $verbose = $this->hasOption( 'verbose' ); + + $repo = RepoGroup::singleton()->getLocalRepo(); + if ( $repo->hasSha1Storage() ) { + $this->error( "Local repo uses SHA-1 file storage names; aborting.", 1 ); + } + + $directory = $repo->getZonePath( 'public' ); + if ( $subdir != '' ) { + $directory .= "/$subdir/"; + } + + if ( $verbose ) { + $this->output( "Scanning files under $directory:\n" ); + } + + $list = $repo->getBackend()->getFileList( array( 'dir' => $directory ) ); + if ( $list === null ) { + $this->error( "Could not get file listing.", 1 ); + } + + $nameBatch = array(); + foreach ( $list as $path ) { + if ( preg_match( '#^(thumb|deleted)/#', $path ) ) { + continue; // handle ugly nested containers on stock installs + } + + $nameBatch[] = basename( $path ); + if ( count( $nameBatch ) >= $this->mBatchSize ) { + $this->checkFiles( $repo, $nameBatch, $verbose ); + $nameBatch = array(); + } + } + $this->checkFiles( $repo, $nameBatch, $verbose ); + } + + protected function checkFiles( LocalRepo $repo, array $names, $verbose ) { + if ( !count( $names ) ) { + return; + } + + $dbr = $repo->getSlaveDB(); + + $imgIN = array(); + $oiWheres = array(); + foreach ( $names as $name ) { + if ( strpos( $name, '!' ) !== false ) { + if ( $verbose ) { + $this->output( "Checking old file $name\n" ); + } + + list( , $base ) = explode( '!', $name ); // ! + $oiWheres[] = $dbr->makeList( + array( 'oi_name' => $base, 'oi_archive_name' => $name ), + LIST_AND + ); + } else { + if ( $verbose ) { + $this->output( "Checking current file $name\n" ); + } + + $imgIN[] = $name; + } + } + + $res = $dbr->query( + $dbr->unionQueries( + array( + $dbr->selectSQLText( + 'image', + array( 'name' => 'img_name' ), + array( 'img_name' => $imgIN ) + ), + $dbr->selectSQLText( + 'oldimage', + array( 'name' => 'oi_archive_name' ), + $dbr->makeList( $oiWheres, LIST_OR ) + ) + ), + true // UNION ALL (performance) + ), + __METHOD__ + ); + + $namesFound = array(); + foreach ( $res as $row ) { + $namesFound[] = $row->name; + } + + $namesOrphans = array_diff( $names, $namesFound ); + foreach ( $namesOrphans as $name ) { + // Print name and public URL to ease recovery + if ( strpos( $name, '!' ) !== false ) { + list( , $base ) = explode( '!', $name ); // ! + $file = $repo->newFromArchiveName( Title::makeTitle( NS_FILE, $base ), $name ); + } else { + $file = $repo->newFile( $name ); + } + $this->output( $name . "\n" . $file->getUrl() . "\n\n" ); + } + } +} + +$maintClass = 'FindOrphanedFiles'; +require_once RUN_MAINTENANCE_IF_MAIN;