Add separate fa_sha1 field to filearchive table
authorumherirrender <umherirrender_de.wp@web.de>
Sun, 14 Oct 2012 18:58:25 +0000 (20:58 +0200)
committerumherirrender <umherirrender_de.wp@web.de>
Sun, 14 Oct 2012 18:58:25 +0000 (20:58 +0200)
This allows sha1 searches with the api in miser mode for deleted files

Added script to populate the rows
Adding new field to selects and handle it in all places, where needed

Using a 10 byte index for the new field per
http://lists.wikimedia.org/pipermail/wikitech-l/2012-September/063429.html

Change-Id: Ie54a513fe361202e63df44be44a0fdd91926c974

13 files changed:
RELEASE-NOTES-1.21
includes/AutoLoader.php
includes/api/ApiQueryFilearchive.php
includes/filerepo/file/ArchivedFile.php
includes/filerepo/file/LocalFile.php
includes/installer/DatabaseUpdater.php
includes/installer/MysqlUpdater.php
includes/installer/SqliteUpdater.php
includes/specials/SpecialUndelete.php
maintenance/archives/patch-fa_sha1.sql [new file with mode: 0644]
maintenance/deleteArchivedFiles.inc
maintenance/populateFilearchiveSha1.php [new file with mode: 0644]
maintenance/tables.sql

index c0853f2..ade0b68 100644 (file)
@@ -24,6 +24,8 @@ production.
 * Added new backend to represent and store information about sites and site
   specific configuration.
 * jQuery UI upgraded from 1.8.23 to 1.8.24.
+* Added separate fa_sha1 field to filearchive table. This allows sha1
+  searches with the api in miser mode for deleted files.
 
 === Bug fixes in 1.21 ===
 * (bug 40353) SpecialDoubleRedirect should support interwiki redirects.
index e8df8d4..0e310e2 100644 (file)
@@ -1051,6 +1051,7 @@ $wgAutoloadLocalClasses = array(
        'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php',
        'PopulateCategory' => 'maintenance/populateCategory.php',
        'PopulateImageSha1' => 'maintenance/populateImageSha1.php',
+       'PopulateFilearchiveSha1' => 'maintenance/populateFilearchiveSha1.php',
        'PopulateLogSearch' => 'maintenance/populateLogSearch.php',
        'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
        'PopulateParentId' => 'maintenance/populateParentId.php',
index a5486ef..dbca1d9 100644 (file)
@@ -64,7 +64,7 @@ class ApiQueryFilearchive extends ApiQueryBase {
                $this->addTables( 'filearchive' );
 
                $this->addFields( array( 'fa_name', 'fa_deleted' ) );
-               $this->addFieldsIf( 'fa_storage_key', $fld_sha1 );
+               $this->addFieldsIf( 'fa_sha1', $fld_sha1 );
                $this->addFieldsIf( 'fa_timestamp', $fld_timestamp );
                $this->addFieldsIf( array( 'fa_user', 'fa_user_text' ), $fld_user );
                $this->addFieldsIf( array( 'fa_height', 'fa_width', 'fa_size' ), $fld_dimensions || $fld_size );
@@ -101,11 +101,6 @@ class ApiQueryFilearchive extends ApiQueryBase {
                $sha1Set = isset( $params['sha1'] );
                $sha1base36Set = isset( $params['sha1base36'] );
                if ( $sha1Set || $sha1base36Set ) {
-                       global $wgMiserMode;
-                       if ( $wgMiserMode  ) {
-                               $this->dieUsage( 'Search by hash disabled in Miser Mode', 'hashsearchdisabled' );
-                       }
-
                        $sha1 = false;
                        if ( $sha1Set ) {
                                if ( !$this->validateSha1Hash( $params['sha1'] ) ) {
@@ -119,7 +114,7 @@ class ApiQueryFilearchive extends ApiQueryBase {
                                $sha1 = $params['sha1base36'];
                        }
                        if ( $sha1 ) {
-                               $this->addWhere( 'fa_storage_key ' . $db->buildLike( "{$sha1}.", $db->anyString() ) );
+                               $this->addWhereFld( 'fa_sha1', $sha1 );
                        }
                }
 
@@ -155,7 +150,7 @@ class ApiQueryFilearchive extends ApiQueryBase {
                        self::addTitleInfo( $file, $title );
 
                        if ( $fld_sha1 ) {
-                               $file['sha1'] = wfBaseConvert( LocalRepo::getHashFromKey( $row->fa_storage_key ), 36, 16, 40 );
+                               $file['sha1'] = wfBaseConvert( $row->fa_sha1, 36, 16, 40 );
                        }
                        if ( $fld_timestamp ) {
                                $file['timestamp'] = wfTimestamp( TS_ISO_8601, $row->fa_timestamp );
@@ -276,8 +271,8 @@ class ApiQueryFilearchive extends ApiQueryBase {
                        'prefix' => 'Search for all image titles that begin with this value',
                        'dir' => 'The direction in which to list',
                        'limit' => 'How many images to return in total',
-                       'sha1' => "SHA1 hash of image. Overrides {$this->getModulePrefix()}sha1base36. Disabled in Miser Mode",
-                       'sha1base36' => 'SHA1 hash of image in base 36 (used in MediaWiki). Disabled in Miser Mode',
+                       'sha1' => "SHA1 hash of image. Overrides {$this->getModulePrefix()}sha1base36",
+                       'sha1base36' => 'SHA1 hash of image in base 36 (used in MediaWiki)',
                        'prop' => array(
                                'What image information to get:',
                                ' sha1              - Adds SHA-1 hash for the image',
index c9751be..694623b 100644 (file)
@@ -47,6 +47,7 @@ class ArchivedFile {
                $timestamp, # time of upload
                $dataLoaded, # Whether or not all this has been loaded from the database (loadFromXxx)
                $deleted, # Bitfield akin to rev_deleted
+               $sha1, # sha1 hash of file content
                $pageCount,
                $archive_name;
 
@@ -87,6 +88,7 @@ class ArchivedFile {
                $this->deleted = 0;
                $this->dataLoaded = false;
                $this->exists = false;
+               $this->sha1 = '';
 
                if( $title instanceof Title ) {
                        $this->title = File::normalizeTitle( $title, 'exception' );
@@ -153,7 +155,8 @@ class ArchivedFile {
                                        'fa_user',
                                        'fa_user_text',
                                        'fa_timestamp',
-                                       'fa_deleted' ),
+                                       'fa_deleted',
+                                       'fa_sha1' ),
                                $conds,
                                __METHOD__,
                                array( 'ORDER BY' => 'fa_timestamp DESC' ) );
@@ -165,23 +168,7 @@ class ArchivedFile {
                        $row = $ret->fetchObject();
 
                        // initialize fields for filestore image object
-                       $this->id = intval($row->fa_id);
-                       $this->name = $row->fa_name;
-                       $this->archive_name = $row->fa_archive_name;
-                       $this->group = $row->fa_storage_group;
-                       $this->key = $row->fa_storage_key;
-                       $this->size = $row->fa_size;
-                       $this->bits = $row->fa_bits;
-                       $this->width = $row->fa_width;
-                       $this->height = $row->fa_height;
-                       $this->metadata = $row->fa_metadata;
-                       $this->mime = "$row->fa_major_mime/$row->fa_minor_mime";
-                       $this->media_type = $row->fa_media_type;
-                       $this->description = $row->fa_description;
-                       $this->user = $row->fa_user;
-                       $this->user_text = $row->fa_user_text;
-                       $this->timestamp = $row->fa_timestamp;
-                       $this->deleted = $row->fa_deleted;
+                       $this->loadFromRow( $row );
                } else {
                        throw new MWException( 'This title does not correspond to an image page.' );
                }
@@ -200,28 +187,42 @@ class ArchivedFile {
         */
        public static function newFromRow( $row ) {
                $file = new ArchivedFile( Title::makeTitle( NS_FILE, $row->fa_name ) );
-
-               $file->id = intval($row->fa_id);
-               $file->name = $row->fa_name;
-               $file->archive_name = $row->fa_archive_name;
-               $file->group = $row->fa_storage_group;
-               $file->key = $row->fa_storage_key;
-               $file->size = $row->fa_size;
-               $file->bits = $row->fa_bits;
-               $file->width = $row->fa_width;
-               $file->height = $row->fa_height;
-               $file->metadata = $row->fa_metadata;
-               $file->mime = "$row->fa_major_mime/$row->fa_minor_mime";
-               $file->media_type = $row->fa_media_type;
-               $file->description = $row->fa_description;
-               $file->user = $row->fa_user;
-               $file->user_text = $row->fa_user_text;
-               $file->timestamp = $row->fa_timestamp;
-               $file->deleted = $row->fa_deleted;
-
+               $file->loadFromRow( $row );
                return $file;
        }
 
+       /**
+        * Load ArchivedFile object fields from a DB row.
+        *
+        * @param $row Object database row
+        * @since 1.21
+        */
+       public function loadFromRow( $row ) {
+               $this->id = intval($row->fa_id);
+               $this->name = $row->fa_name;
+               $this->archive_name = $row->fa_archive_name;
+               $this->group = $row->fa_storage_group;
+               $this->key = $row->fa_storage_key;
+               $this->size = $row->fa_size;
+               $this->bits = $row->fa_bits;
+               $this->width = $row->fa_width;
+               $this->height = $row->fa_height;
+               $this->metadata = $row->fa_metadata;
+               $this->mime = "$row->fa_major_mime/$row->fa_minor_mime";
+               $this->media_type = $row->fa_media_type;
+               $this->description = $row->fa_description;
+               $this->user = $row->fa_user;
+               $this->user_text = $row->fa_user_text;
+               $this->timestamp = $row->fa_timestamp;
+               $this->deleted = $row->fa_deleted;
+               if( isset( $row->fa_sha1 ) ) {
+                       $this->sha1 = $row->fa_sha1;
+               } else {
+                       // old row, populate from key
+                       $this->sha1 = LocalRepo::getHashFromKey( $this->key );
+               }
+       }
+
        /**
         * Return the associated title object
         *
@@ -381,6 +382,17 @@ class ArchivedFile {
                return wfTimestamp( TS_MW, $this->timestamp );
        }
 
+       /**
+        * Get the SHA-1 base 36 hash of the file
+        *
+        * @return string
+        * @since 1.21
+        */
+       function getSha1() {
+               $this->load();
+               return $this->sha1;
+       }
+
        /**
         * Return the user ID of the uploader.
         *
index 05958d6..caa93a4 100644 (file)
@@ -1774,7 +1774,8 @@ class LocalFileDeleteBatch {
                                        'fa_description'  => 'img_description',
                                        'fa_user'         => 'img_user',
                                        'fa_user_text'    => 'img_user_text',
-                                       'fa_timestamp'    => 'img_timestamp'
+                                       'fa_timestamp'    => 'img_timestamp',
+                                       'fa_sha1'         => 'img_sha1',
                                ), $where, __METHOD__ );
                }
 
@@ -1806,6 +1807,7 @@ class LocalFileDeleteBatch {
                                        'fa_user'         => 'oi_user',
                                        'fa_user_text'    => 'oi_user_text',
                                        'fa_timestamp'    => 'oi_timestamp',
+                                       'fa_sha1'         => 'oi_sha1',
                                ), $where, __METHOD__ );
                }
        }
@@ -2038,7 +2040,12 @@ class LocalFileRestoreBatch {
                        $deletedRel = $this->file->repo->getDeletedHashPath( $row->fa_storage_key ) . $row->fa_storage_key;
                        $deletedUrl = $this->file->repo->getVirtualUrl() . '/deleted/' . $deletedRel;
 
-                       $sha1 = substr( $row->fa_storage_key, 0, strcspn( $row->fa_storage_key, '.' ) );
+                       if( isset( $row->fa_sha1 ) ) {
+                               $sha1 = $row->fa_sha1;
+                       } else {
+                               // old row, populate from key
+                               $sha1 = LocalRepo::getHashFromKey( $row->fa_storage_key );
+                       }
 
                        # Fix leading zero
                        if ( strlen( $sha1 ) == 32 && $sha1[0] == '0' ) {
index e00aef5..7223003 100644 (file)
@@ -64,6 +64,7 @@ abstract class DatabaseUpdater {
                'PopulateRevisionSha1',
                'PopulateImageSha1',
                'FixExtLinksProtocolRelative',
+               'PopulateFilearchiveSha1',
        );
 
        /**
index c9ee446..a6cb13f 100644 (file)
@@ -224,6 +224,7 @@ class MysqlUpdater extends DatabaseUpdater {
                        array( 'dropField', 'site_stats',   'ss_admins',        'patch-drop-ss_admins.sql' ),
                        array( 'dropField', 'recentchanges', 'rc_moved_to_title',            'patch-rc_moved.sql' ),
                        array( 'addTable', 'sites',                            'patch-sites.sql' ),
+                       array( 'addField', 'filearchive',   'fa_sha1',          'patch-fa_sha1.sql' ),
                );
        }
 
index 6ec4706..e7f3939 100644 (file)
@@ -104,6 +104,7 @@ class SqliteUpdater extends DatabaseUpdater {
                        array( 'dropField', 'site_stats',    'ss_admins',         'patch-drop-ss_admins.sql' ),
                        array( 'dropField', 'recentchanges', 'rc_moved_to_title', 'patch-rc_moved.sql' ),
                        array( 'addTable', 'sites',                            'patch-sites.sql' ),
+                       array( 'addField', 'filearchive',   'fa_sha1',          'patch-fa_sha1.sql' ),
                );
        }
 
index b735b18..036b867 100644 (file)
@@ -175,7 +175,8 @@ class PageArchive {
                                        'fa_user',
                                        'fa_user_text',
                                        'fa_timestamp',
-                                       'fa_deleted' ),
+                                       'fa_deleted',
+                                       'fa_sha1' ),
                                array( 'fa_name' => $this->title->getDBkey() ),
                                __METHOD__,
                                array( 'ORDER BY' => 'fa_timestamp DESC' ) );
diff --git a/maintenance/archives/patch-fa_sha1.sql b/maintenance/archives/patch-fa_sha1.sql
new file mode 100644 (file)
index 0000000..931bc44
--- /dev/null
@@ -0,0 +1,4 @@
+-- Add fa_sha1 and related index
+ALTER TABLE /*$wgDBprefix*/filearchive
+  ADD COLUMN fa_sha1 varbinary(32) NOT NULL default '';
+CREATE INDEX /*i*/fa_sha1 ON /*$wgDBprefix*/filearchive (fa_sha1(10));
index e638b17..cc09703 100644 (file)
@@ -35,14 +35,19 @@ class DeleteArchivedFilesImplementation {
                $repo = RepoGroup::singleton()->getLocalRepo();
                # Get "active" revisions from the filearchive table
                $output->handleOutput( "Searching for and deleting archived files...\n" );
-               $res = $dbw->query( "SELECT fa_id,fa_storage_group,fa_storage_key FROM $tbl_arch" );
+               $res = $dbw->query( "SELECT fa_id,fa_storage_group,fa_storage_key,fa_sha1 FROM $tbl_arch" );
                $count = 0;
                foreach ( $res as $row ) {
                        $key = $row->fa_storage_key;
                        $group = $row->fa_storage_group;
                        $id = $row->fa_id;
                        $path = $repo->getZonePath( 'deleted' ) . '/' . $repo->getDeletedHashPath( $key ) . $key;
-                       $sha1 = substr( $key, 0, strcspn( $key, '.' ) );
+                       if( isset( $row->fa_sha1 ) ) {
+                               $sha1 = $row->fa_sha1;
+                       } else {
+                               // old row, populate from key
+                               $sha1 = LocalRepo::getHashFromKey( $key );
+                       }
                        // Check if the file is used anywhere...
                        $inuse = $dbw->selectField( 'oldimage', '1',
                                array( 'oi_sha1' => $sha1,
diff --git a/maintenance/populateFilearchiveSha1.php b/maintenance/populateFilearchiveSha1.php
new file mode 100644 (file)
index 0000000..e9baef9
--- /dev/null
@@ -0,0 +1,97 @@
+<?php
+/**
+ * Optional upgrade script to populate the fa_sha1 field
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+/**
+ * Maintenance script to populate the fa_sha1 field.
+ *
+ * @ingroup Maintenance
+ * @since 1.21
+ */
+class PopulateFilearchiveSha1 extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Populate the fa_sha1 field from fa_storage_key";
+       }
+
+       protected function getUpdateKey() {
+               return 'populate fa_sha1';
+       }
+
+       protected function updateSkippedMessage() {
+               return 'fa_sha1 column of filearchive table already populated.';
+       }
+
+       public function doDBUpdates() {
+               $startTime = microtime( true );
+               $dbw = wfGetDB( DB_MASTER );
+               $table = 'filearchive';
+               $conds = array( 'fa_sha1' => '', 'fa_storage_key IS NOT NULL' );
+               $this->output( "Populating fa_sha1 field from fa_storage_key\n" );
+               $endId = $dbw->selectField( $table, 'MAX(fa_id)', false, __METHOD__ );\r
+
+               $batchSize = $this->mBatchSize;
+               $done = 0;
+
+               do {
+                       $res = $dbw->select(
+                               $table,
+                               array( 'fa_id', 'fa_storage_key' ),
+                               $conds,
+                               __METHOD__,
+                               array( 'LIMIT' => $batchSize )
+                       );
+
+                       $i = 0;
+                       foreach ( $res as $row ) {
+                               $sha1 = LocalRepo::getHashFromKey( $row->fa_storage_key );
+                               $dbw->update( $table,
+                                       array( 'fa_sha1' => $sha1 ),
+                                       array( 'fa_id' => $row->fa_id ),
+                                       __METHOD__
+                               );
+                               $lastId = $row->fa_id;
+                               $i++;
+                       }
+
+                       $done += $i;
+                       if( $i !== $batchSize ) {
+                               break;
+                       }
+
+                       // print status and let slaves catch up
+                       $this->output( sprintf(
+                               "id %d done (up to %d), %5.3f%%  \r", $lastId, $endId, $lastId / $endId * 100 ) );
+                       wfWaitForSlaves();
+               } while( true );
+
+               $processingTime = microtime( true ) - $startTime;
+               $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $done, $processingTime ) );
+
+               return true; // we only updated *some* files, don't log
+       }
+}
+
+$maintClass = "PopulateFilearchiveSha1";
+require_once( RUN_MAINTENANCE_IF_MAIN );
index 51115f1..0da3c75 100644 (file)
@@ -945,7 +945,10 @@ CREATE TABLE /*_*/filearchive (
   fa_timestamp binary(14) default '',
 
   -- Visibility of deleted revisions, bitfield
-  fa_deleted tinyint unsigned NOT NULL default 0
+  fa_deleted tinyint unsigned NOT NULL default 0,
+
+  -- sha1 hash of file content
+  fa_sha1 varbinary(32) NOT NULL default ''
 ) /*$wgDBTableOptions*/;
 
 -- pick out by image name
@@ -956,6 +959,8 @@ CREATE INDEX /*i*/fa_storage_group ON /*_*/filearchive (fa_storage_group, fa_sto
 CREATE INDEX /*i*/fa_deleted_timestamp ON /*_*/filearchive (fa_deleted_timestamp);
 -- sort by uploader
 CREATE INDEX /*i*/fa_user_timestamp ON /*_*/filearchive (fa_user_text,fa_timestamp);
+-- find file by sha1, 10 bytes will be enough for hashes to be indexed
+CREATE INDEX /*i*/fa_sha1 ON /*_*/filearchive (fa_sha1(10));
 
 
 --