From 6362adf3f52f569358e765b93d5ae72c1a01c3fa Mon Sep 17 00:00:00 2001 From: Matthias Mullie Date: Mon, 18 Sep 2017 15:35:42 +0200 Subject: [PATCH] Block same-file reuploads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When uploading a file, there are a few ways of checking for and blocking (or at least warning about) duplicate uploads already. However, it occasionally seems to happen that files get uploaded twice. The exact same file, usually - submitted at (almost) the exact same time (possibly some error in whatever submits the file upload, but still) Given 2 uploads at (almost) the exact same time, both of them are stored, even if they are the exact same files. The last upload also ends up with a `logging` entry with `log_page = 0`. I don’t believe such upload should go through: if we do find that a file is an exact duplicate of something that already exists, I don’t see any reason it should go through. Note that with this patch, it will become impossible to reupload a file with the exact same hash (which was possible before.) If we still want to allow same-file reuploads while also blocking these kind of race-condition same-uploads, we could make the check more strict (e.g. also check timestamps, or check if page already exists, or …) Bug: T158480 Change-Id: I76cbd2c64c3b893997f1f85974d6f82cbfe121e1 --- includes/filerepo/file/LocalFile.php | 50 ++++++++++++++++++++-------- maintenance/importImages.php | 2 +- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/includes/filerepo/file/LocalFile.php b/includes/filerepo/file/LocalFile.php index 96e7a7e7f9..c7aa40e380 100644 --- a/includes/filerepo/file/LocalFile.php +++ b/includes/filerepo/file/LocalFile.php @@ -1244,8 +1244,22 @@ class LocalFile extends File { // Once the second operation goes through, then the current version was // updated and we must therefore update the DB too. $oldver = $status->value; - if ( !$this->recordUpload2( $oldver, $comment, $pageText, $props, $timestamp, $user, $tags ) ) { - $status->fatal( 'filenotfound', $srcPath ); + $uploadStatus = $this->recordUpload2( + $oldver, + $comment, + $pageText, + $props, + $timestamp, + $user, + $tags + ); + if ( !$uploadStatus->isOK() ) { + if ( $uploadStatus->hasMessage( 'filenotfound' ) ) { + // update filenotfound error with more specific path + $status->fatal( 'filenotfound', $srcPath ); + } else { + $status->merge( $uploadStatus ); + } } } @@ -1275,7 +1289,7 @@ class LocalFile extends File { $pageText = SpecialUpload::getInitialPageText( $desc, $license, $copyStatus, $source ); - if ( !$this->recordUpload2( $oldver, $desc, $pageText, false, $timestamp, $user ) ) { + if ( !$this->recordUpload2( $oldver, $desc, $pageText, false, $timestamp, $user )->isOK() ) { return false; } @@ -1295,7 +1309,7 @@ class LocalFile extends File { * @param string|bool $timestamp * @param null|User $user * @param string[] $tags - * @return bool + * @return Status */ function recordUpload2( $oldver, $comment, $pageText, $props = false, $timestamp = false, $user = null, $tags = [] @@ -1329,7 +1343,7 @@ class LocalFile extends File { if ( !$this->fileExists ) { wfDebug( __METHOD__ . ": File " . $this->getRel() . " went missing!\n" ); - return false; + return Status::newFatal( 'filenotfound', $this->getRel() ); } $dbw->startAtomic( __METHOD__ ); @@ -1362,16 +1376,24 @@ class LocalFile extends File { $reupload = ( $dbw->affectedRows() == 0 ); if ( $reupload ) { + $row = $dbw->selectRow( + 'image', + [ 'img_timestamp', 'img_sha1' ], + [ 'img_name' => $this->getName() ], + __METHOD__, + [ 'LOCK IN SHARE MODE' ] + ); + + if ( $row && $row->img_sha1 === $this->sha1 ) { + $dbw->endAtomic( __METHOD__ ); + wfDebug( __METHOD__ . ": File " . $this->getRel() . " already exists!\n" ); + $title = Title::newFromText( $this->getName(), NS_FILE ); + return Status::newFatal( 'fileexists-no-change', $title->getPrefixedText() ); + } + if ( $allowTimeKludge ) { # Use LOCK IN SHARE MODE to ignore any transaction snapshotting - $ltimestamp = $dbw->selectField( - 'image', - 'img_timestamp', - [ 'img_name' => $this->getName() ], - __METHOD__, - [ 'LOCK IN SHARE MODE' ] - ); - $lUnixtime = $ltimestamp ? wfTimestamp( TS_UNIX, $ltimestamp ) : false; + $lUnixtime = $row ? wfTimestamp( TS_UNIX, $row->img_timestamp ) : false; # Avoid a timestamp that is not newer than the last version # TODO: the image/oldimage tables should be like page/revision with an ID field if ( $lUnixtime && wfTimestamp( TS_UNIX, $timestamp ) <= $lUnixtime ) { @@ -1642,7 +1664,7 @@ class LocalFile extends File { # Invalidate cache for all pages using this file DeferredUpdates::addUpdate( new HTMLCacheUpdate( $this->getTitle(), 'imagelinks' ) ); - return true; + return Status::newGood(); } /** diff --git a/maintenance/importImages.php b/maintenance/importImages.php index 625e1f70de..e733b9a7df 100644 --- a/maintenance/importImages.php +++ b/maintenance/importImages.php @@ -334,7 +334,7 @@ class ImportImages extends Maintenance { $commentText, $props, $timestamp - ) ) { + )->isOK() ) { # We're done! $this->output( "done.\n" ); -- 2.20.1