From fb2c1c645bdc2f74477e4a4dd9c7b08c5afe4744 Mon Sep 17 00:00:00 2001 From: umherirrender Date: Sun, 14 Oct 2012 20:58:25 +0200 Subject: [PATCH] Add separate fa_sha1 field to filearchive table This allows sha1 searches with the api in miser mode for deleted files Added script to populate the rows Adding new field to selects and handle it in all places, where needed Using a 10 byte index for the new field per http://lists.wikimedia.org/pipermail/wikitech-l/2012-September/063429.html Change-Id: Ie54a513fe361202e63df44be44a0fdd91926c974 --- RELEASE-NOTES-1.21 | 2 + includes/AutoLoader.php | 1 + includes/api/ApiQueryFilearchive.php | 15 ++-- includes/filerepo/file/ArchivedFile.php | 86 ++++++++++++---------- includes/filerepo/file/LocalFile.php | 11 ++- includes/installer/DatabaseUpdater.php | 1 + includes/installer/MysqlUpdater.php | 1 + includes/installer/SqliteUpdater.php | 1 + includes/specials/SpecialUndelete.php | 3 +- maintenance/archives/patch-fa_sha1.sql | 4 + maintenance/deleteArchivedFiles.inc | 9 ++- maintenance/populateFilearchiveSha1.php | 97 +++++++++++++++++++++++++ maintenance/tables.sql | 7 +- 13 files changed, 185 insertions(+), 53 deletions(-) create mode 100644 maintenance/archives/patch-fa_sha1.sql create mode 100644 maintenance/populateFilearchiveSha1.php diff --git a/RELEASE-NOTES-1.21 b/RELEASE-NOTES-1.21 index c0853f2221..ade0b68585 100644 --- a/RELEASE-NOTES-1.21 +++ b/RELEASE-NOTES-1.21 @@ -24,6 +24,8 @@ production. * Added new backend to represent and store information about sites and site specific configuration. * jQuery UI upgraded from 1.8.23 to 1.8.24. +* Added separate fa_sha1 field to filearchive table. This allows sha1 + searches with the api in miser mode for deleted files. === Bug fixes in 1.21 === * (bug 40353) SpecialDoubleRedirect should support interwiki redirects. diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index e8df8d4650..0e310e2ef7 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -1051,6 +1051,7 @@ $wgAutoloadLocalClasses = array( 'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php', 'PopulateCategory' => 'maintenance/populateCategory.php', 'PopulateImageSha1' => 'maintenance/populateImageSha1.php', + 'PopulateFilearchiveSha1' => 'maintenance/populateFilearchiveSha1.php', 'PopulateLogSearch' => 'maintenance/populateLogSearch.php', 'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php', 'PopulateParentId' => 'maintenance/populateParentId.php', diff --git a/includes/api/ApiQueryFilearchive.php b/includes/api/ApiQueryFilearchive.php index a5486ef4a0..dbca1d96db 100644 --- a/includes/api/ApiQueryFilearchive.php +++ b/includes/api/ApiQueryFilearchive.php @@ -64,7 +64,7 @@ class ApiQueryFilearchive extends ApiQueryBase { $this->addTables( 'filearchive' ); $this->addFields( array( 'fa_name', 'fa_deleted' ) ); - $this->addFieldsIf( 'fa_storage_key', $fld_sha1 ); + $this->addFieldsIf( 'fa_sha1', $fld_sha1 ); $this->addFieldsIf( 'fa_timestamp', $fld_timestamp ); $this->addFieldsIf( array( 'fa_user', 'fa_user_text' ), $fld_user ); $this->addFieldsIf( array( 'fa_height', 'fa_width', 'fa_size' ), $fld_dimensions || $fld_size ); @@ -101,11 +101,6 @@ class ApiQueryFilearchive extends ApiQueryBase { $sha1Set = isset( $params['sha1'] ); $sha1base36Set = isset( $params['sha1base36'] ); if ( $sha1Set || $sha1base36Set ) { - global $wgMiserMode; - if ( $wgMiserMode ) { - $this->dieUsage( 'Search by hash disabled in Miser Mode', 'hashsearchdisabled' ); - } - $sha1 = false; if ( $sha1Set ) { if ( !$this->validateSha1Hash( $params['sha1'] ) ) { @@ -119,7 +114,7 @@ class ApiQueryFilearchive extends ApiQueryBase { $sha1 = $params['sha1base36']; } if ( $sha1 ) { - $this->addWhere( 'fa_storage_key ' . $db->buildLike( "{$sha1}.", $db->anyString() ) ); + $this->addWhereFld( 'fa_sha1', $sha1 ); } } @@ -155,7 +150,7 @@ class ApiQueryFilearchive extends ApiQueryBase { self::addTitleInfo( $file, $title ); if ( $fld_sha1 ) { - $file['sha1'] = wfBaseConvert( LocalRepo::getHashFromKey( $row->fa_storage_key ), 36, 16, 40 ); + $file['sha1'] = wfBaseConvert( $row->fa_sha1, 36, 16, 40 ); } if ( $fld_timestamp ) { $file['timestamp'] = wfTimestamp( TS_ISO_8601, $row->fa_timestamp ); @@ -276,8 +271,8 @@ class ApiQueryFilearchive extends ApiQueryBase { 'prefix' => 'Search for all image titles that begin with this value', 'dir' => 'The direction in which to list', 'limit' => 'How many images to return in total', - 'sha1' => "SHA1 hash of image. Overrides {$this->getModulePrefix()}sha1base36. Disabled in Miser Mode", - 'sha1base36' => 'SHA1 hash of image in base 36 (used in MediaWiki). Disabled in Miser Mode', + 'sha1' => "SHA1 hash of image. Overrides {$this->getModulePrefix()}sha1base36", + 'sha1base36' => 'SHA1 hash of image in base 36 (used in MediaWiki)', 'prop' => array( 'What image information to get:', ' sha1 - Adds SHA-1 hash for the image', diff --git a/includes/filerepo/file/ArchivedFile.php b/includes/filerepo/file/ArchivedFile.php index c9751bec29..694623b627 100644 --- a/includes/filerepo/file/ArchivedFile.php +++ b/includes/filerepo/file/ArchivedFile.php @@ -47,6 +47,7 @@ class ArchivedFile { $timestamp, # time of upload $dataLoaded, # Whether or not all this has been loaded from the database (loadFromXxx) $deleted, # Bitfield akin to rev_deleted + $sha1, # sha1 hash of file content $pageCount, $archive_name; @@ -87,6 +88,7 @@ class ArchivedFile { $this->deleted = 0; $this->dataLoaded = false; $this->exists = false; + $this->sha1 = ''; if( $title instanceof Title ) { $this->title = File::normalizeTitle( $title, 'exception' ); @@ -153,7 +155,8 @@ class ArchivedFile { 'fa_user', 'fa_user_text', 'fa_timestamp', - 'fa_deleted' ), + 'fa_deleted', + 'fa_sha1' ), $conds, __METHOD__, array( 'ORDER BY' => 'fa_timestamp DESC' ) ); @@ -165,23 +168,7 @@ class ArchivedFile { $row = $ret->fetchObject(); // initialize fields for filestore image object - $this->id = intval($row->fa_id); - $this->name = $row->fa_name; - $this->archive_name = $row->fa_archive_name; - $this->group = $row->fa_storage_group; - $this->key = $row->fa_storage_key; - $this->size = $row->fa_size; - $this->bits = $row->fa_bits; - $this->width = $row->fa_width; - $this->height = $row->fa_height; - $this->metadata = $row->fa_metadata; - $this->mime = "$row->fa_major_mime/$row->fa_minor_mime"; - $this->media_type = $row->fa_media_type; - $this->description = $row->fa_description; - $this->user = $row->fa_user; - $this->user_text = $row->fa_user_text; - $this->timestamp = $row->fa_timestamp; - $this->deleted = $row->fa_deleted; + $this->loadFromRow( $row ); } else { throw new MWException( 'This title does not correspond to an image page.' ); } @@ -200,28 +187,42 @@ class ArchivedFile { */ public static function newFromRow( $row ) { $file = new ArchivedFile( Title::makeTitle( NS_FILE, $row->fa_name ) ); - - $file->id = intval($row->fa_id); - $file->name = $row->fa_name; - $file->archive_name = $row->fa_archive_name; - $file->group = $row->fa_storage_group; - $file->key = $row->fa_storage_key; - $file->size = $row->fa_size; - $file->bits = $row->fa_bits; - $file->width = $row->fa_width; - $file->height = $row->fa_height; - $file->metadata = $row->fa_metadata; - $file->mime = "$row->fa_major_mime/$row->fa_minor_mime"; - $file->media_type = $row->fa_media_type; - $file->description = $row->fa_description; - $file->user = $row->fa_user; - $file->user_text = $row->fa_user_text; - $file->timestamp = $row->fa_timestamp; - $file->deleted = $row->fa_deleted; - + $file->loadFromRow( $row ); return $file; } + /** + * Load ArchivedFile object fields from a DB row. + * + * @param $row Object database row + * @since 1.21 + */ + public function loadFromRow( $row ) { + $this->id = intval($row->fa_id); + $this->name = $row->fa_name; + $this->archive_name = $row->fa_archive_name; + $this->group = $row->fa_storage_group; + $this->key = $row->fa_storage_key; + $this->size = $row->fa_size; + $this->bits = $row->fa_bits; + $this->width = $row->fa_width; + $this->height = $row->fa_height; + $this->metadata = $row->fa_metadata; + $this->mime = "$row->fa_major_mime/$row->fa_minor_mime"; + $this->media_type = $row->fa_media_type; + $this->description = $row->fa_description; + $this->user = $row->fa_user; + $this->user_text = $row->fa_user_text; + $this->timestamp = $row->fa_timestamp; + $this->deleted = $row->fa_deleted; + if( isset( $row->fa_sha1 ) ) { + $this->sha1 = $row->fa_sha1; + } else { + // old row, populate from key + $this->sha1 = LocalRepo::getHashFromKey( $this->key ); + } + } + /** * Return the associated title object * @@ -381,6 +382,17 @@ class ArchivedFile { return wfTimestamp( TS_MW, $this->timestamp ); } + /** + * Get the SHA-1 base 36 hash of the file + * + * @return string + * @since 1.21 + */ + function getSha1() { + $this->load(); + return $this->sha1; + } + /** * Return the user ID of the uploader. * diff --git a/includes/filerepo/file/LocalFile.php b/includes/filerepo/file/LocalFile.php index 05958d617c..caa93a42c6 100644 --- a/includes/filerepo/file/LocalFile.php +++ b/includes/filerepo/file/LocalFile.php @@ -1774,7 +1774,8 @@ class LocalFileDeleteBatch { 'fa_description' => 'img_description', 'fa_user' => 'img_user', 'fa_user_text' => 'img_user_text', - 'fa_timestamp' => 'img_timestamp' + 'fa_timestamp' => 'img_timestamp', + 'fa_sha1' => 'img_sha1', ), $where, __METHOD__ ); } @@ -1806,6 +1807,7 @@ class LocalFileDeleteBatch { 'fa_user' => 'oi_user', 'fa_user_text' => 'oi_user_text', 'fa_timestamp' => 'oi_timestamp', + 'fa_sha1' => 'oi_sha1', ), $where, __METHOD__ ); } } @@ -2038,7 +2040,12 @@ class LocalFileRestoreBatch { $deletedRel = $this->file->repo->getDeletedHashPath( $row->fa_storage_key ) . $row->fa_storage_key; $deletedUrl = $this->file->repo->getVirtualUrl() . '/deleted/' . $deletedRel; - $sha1 = substr( $row->fa_storage_key, 0, strcspn( $row->fa_storage_key, '.' ) ); + if( isset( $row->fa_sha1 ) ) { + $sha1 = $row->fa_sha1; + } else { + // old row, populate from key + $sha1 = LocalRepo::getHashFromKey( $row->fa_storage_key ); + } # Fix leading zero if ( strlen( $sha1 ) == 32 && $sha1[0] == '0' ) { diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index e00aef517c..7223003180 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -64,6 +64,7 @@ abstract class DatabaseUpdater { 'PopulateRevisionSha1', 'PopulateImageSha1', 'FixExtLinksProtocolRelative', + 'PopulateFilearchiveSha1', ); /** diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index c9ee446400..a6cb13f0ad 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -224,6 +224,7 @@ class MysqlUpdater extends DatabaseUpdater { array( 'dropField', 'site_stats', 'ss_admins', 'patch-drop-ss_admins.sql' ), array( 'dropField', 'recentchanges', 'rc_moved_to_title', 'patch-rc_moved.sql' ), array( 'addTable', 'sites', 'patch-sites.sql' ), + array( 'addField', 'filearchive', 'fa_sha1', 'patch-fa_sha1.sql' ), ); } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 6ec47063e2..e7f39396c9 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -104,6 +104,7 @@ class SqliteUpdater extends DatabaseUpdater { array( 'dropField', 'site_stats', 'ss_admins', 'patch-drop-ss_admins.sql' ), array( 'dropField', 'recentchanges', 'rc_moved_to_title', 'patch-rc_moved.sql' ), array( 'addTable', 'sites', 'patch-sites.sql' ), + array( 'addField', 'filearchive', 'fa_sha1', 'patch-fa_sha1.sql' ), ); } diff --git a/includes/specials/SpecialUndelete.php b/includes/specials/SpecialUndelete.php index b735b18593..036b867153 100644 --- a/includes/specials/SpecialUndelete.php +++ b/includes/specials/SpecialUndelete.php @@ -175,7 +175,8 @@ class PageArchive { 'fa_user', 'fa_user_text', 'fa_timestamp', - 'fa_deleted' ), + 'fa_deleted', + 'fa_sha1' ), array( 'fa_name' => $this->title->getDBkey() ), __METHOD__, array( 'ORDER BY' => 'fa_timestamp DESC' ) ); diff --git a/maintenance/archives/patch-fa_sha1.sql b/maintenance/archives/patch-fa_sha1.sql new file mode 100644 index 0000000000..931bc44de8 --- /dev/null +++ b/maintenance/archives/patch-fa_sha1.sql @@ -0,0 +1,4 @@ +-- Add fa_sha1 and related index +ALTER TABLE /*$wgDBprefix*/filearchive + ADD COLUMN fa_sha1 varbinary(32) NOT NULL default ''; +CREATE INDEX /*i*/fa_sha1 ON /*$wgDBprefix*/filearchive (fa_sha1(10)); diff --git a/maintenance/deleteArchivedFiles.inc b/maintenance/deleteArchivedFiles.inc index e638b17cb4..cc0970367b 100644 --- a/maintenance/deleteArchivedFiles.inc +++ b/maintenance/deleteArchivedFiles.inc @@ -35,14 +35,19 @@ class DeleteArchivedFilesImplementation { $repo = RepoGroup::singleton()->getLocalRepo(); # Get "active" revisions from the filearchive table $output->handleOutput( "Searching for and deleting archived files...\n" ); - $res = $dbw->query( "SELECT fa_id,fa_storage_group,fa_storage_key FROM $tbl_arch" ); + $res = $dbw->query( "SELECT fa_id,fa_storage_group,fa_storage_key,fa_sha1 FROM $tbl_arch" ); $count = 0; foreach ( $res as $row ) { $key = $row->fa_storage_key; $group = $row->fa_storage_group; $id = $row->fa_id; $path = $repo->getZonePath( 'deleted' ) . '/' . $repo->getDeletedHashPath( $key ) . $key; - $sha1 = substr( $key, 0, strcspn( $key, '.' ) ); + if( isset( $row->fa_sha1 ) ) { + $sha1 = $row->fa_sha1; + } else { + // old row, populate from key + $sha1 = LocalRepo::getHashFromKey( $key ); + } // Check if the file is used anywhere... $inuse = $dbw->selectField( 'oldimage', '1', array( 'oi_sha1' => $sha1, diff --git a/maintenance/populateFilearchiveSha1.php b/maintenance/populateFilearchiveSha1.php new file mode 100644 index 0000000000..e9baef9aa0 --- /dev/null +++ b/maintenance/populateFilearchiveSha1.php @@ -0,0 +1,97 @@ +mDescription = "Populate the fa_sha1 field from fa_storage_key"; + } + + protected function getUpdateKey() { + return 'populate fa_sha1'; + } + + protected function updateSkippedMessage() { + return 'fa_sha1 column of filearchive table already populated.'; + } + + public function doDBUpdates() { + $startTime = microtime( true ); + $dbw = wfGetDB( DB_MASTER ); + $table = 'filearchive'; + $conds = array( 'fa_sha1' => '', 'fa_storage_key IS NOT NULL' ); + $this->output( "Populating fa_sha1 field from fa_storage_key\n" ); + $endId = $dbw->selectField( $table, 'MAX(fa_id)', false, __METHOD__ ); + + $batchSize = $this->mBatchSize; + $done = 0; + + do { + $res = $dbw->select( + $table, + array( 'fa_id', 'fa_storage_key' ), + $conds, + __METHOD__, + array( 'LIMIT' => $batchSize ) + ); + + $i = 0; + foreach ( $res as $row ) { + $sha1 = LocalRepo::getHashFromKey( $row->fa_storage_key ); + $dbw->update( $table, + array( 'fa_sha1' => $sha1 ), + array( 'fa_id' => $row->fa_id ), + __METHOD__ + ); + $lastId = $row->fa_id; + $i++; + } + + $done += $i; + if( $i !== $batchSize ) { + break; + } + + // print status and let slaves catch up + $this->output( sprintf( + "id %d done (up to %d), %5.3f%% \r", $lastId, $endId, $lastId / $endId * 100 ) ); + wfWaitForSlaves(); + } while( true ); + + $processingTime = microtime( true ) - $startTime; + $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $done, $processingTime ) ); + + return true; // we only updated *some* files, don't log + } +} + +$maintClass = "PopulateFilearchiveSha1"; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 51115f15a6..0da3c7592a 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -945,7 +945,10 @@ CREATE TABLE /*_*/filearchive ( fa_timestamp binary(14) default '', -- Visibility of deleted revisions, bitfield - fa_deleted tinyint unsigned NOT NULL default 0 + fa_deleted tinyint unsigned NOT NULL default 0, + + -- sha1 hash of file content + fa_sha1 varbinary(32) NOT NULL default '' ) /*$wgDBTableOptions*/; -- pick out by image name @@ -956,6 +959,8 @@ CREATE INDEX /*i*/fa_storage_group ON /*_*/filearchive (fa_storage_group, fa_sto CREATE INDEX /*i*/fa_deleted_timestamp ON /*_*/filearchive (fa_deleted_timestamp); -- sort by uploader CREATE INDEX /*i*/fa_user_timestamp ON /*_*/filearchive (fa_user_text,fa_timestamp); +-- find file by sha1, 10 bytes will be enough for hashes to be indexed +CREATE INDEX /*i*/fa_sha1 ON /*_*/filearchive (fa_sha1(10)); -- -- 2.20.1