From 24276faf6835424f27a423778fd0049c7a21f9be Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Sat, 21 Sep 2013 16:08:59 -0300 Subject: [PATCH] Add Special:ListDuplicatedFiles expensive query special page. I saw some comments recently on commons suggesting that better ways are needed to manage duplicate files (There are tools for if a specific file is a duplicate, but no backlog of outstanding duplicate files). This seems like a fairly easy first step in that direction. Wasn't sure if this should be an image gallery type query page, or just a list. I think in this case a plain list is more useful. Change-Id: Ibe4b9da71ca6451ec4e6b0050feaf3ca70e1b888 --- includes/AutoLoader.php | 1 + includes/QueryPage.php | 1 + includes/specialpage/SpecialPageFactory.php | 1 + .../specials/SpecialListDuplicatedFiles.php | 112 ++++++++++++++++++ languages/messages/MessagesEn.php | 5 + languages/messages/MessagesQqq.php | 5 + maintenance/language/messages.inc | 6 + 7 files changed, 131 insertions(+) create mode 100644 includes/specials/SpecialListDuplicatedFiles.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 9d764e144e..1a364c97c2 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -953,6 +953,7 @@ $wgAutoloadLocalClasses = array( 'IPBlockForm' => 'includes/specials/SpecialBlock.php', 'LinkSearchPage' => 'includes/specials/SpecialLinkSearch.php', 'ListredirectsPage' => 'includes/specials/SpecialListredirects.php', + 'ListDuplicatedFilesPage' => 'includes/specials/SpecialListDuplicatedFiles.php', 'LoginForm' => 'includes/specials/SpecialUserlogin.php', 'LonelyPagesPage' => 'includes/specials/SpecialLonelypages.php', 'LongPagesPage' => 'includes/specials/SpecialLongpages.php', diff --git a/includes/QueryPage.php b/includes/QueryPage.php index 69629e05f0..29bbd52b78 100644 --- a/includes/QueryPage.php +++ b/includes/QueryPage.php @@ -77,6 +77,7 @@ abstract class QueryPage extends SpecialPage { array( 'DeadendPagesPage', 'Deadendpages' ), array( 'DoubleRedirectsPage', 'DoubleRedirects' ), array( 'FileDuplicateSearchPage', 'FileDuplicateSearch' ), + array( 'ListDuplicatedFilesPage', 'ListDuplicatedFiles'), array( 'LinkSearchPage', 'LinkSearch' ), array( 'ListredirectsPage', 'Listredirects' ), array( 'LonelyPagesPage', 'Lonelypages' ), diff --git a/includes/specialpage/SpecialPageFactory.php b/includes/specialpage/SpecialPageFactory.php index c6735e695a..654e7ea69c 100644 --- a/includes/specialpage/SpecialPageFactory.php +++ b/includes/specialpage/SpecialPageFactory.php @@ -117,6 +117,7 @@ class SpecialPageFactory { 'FileDuplicateSearch' => 'FileDuplicateSearchPage', 'Upload' => 'SpecialUpload', 'UploadStash' => 'SpecialUploadStash', + 'ListDuplicatedFiles' => 'ListDuplicatedFilesPage', // Data and tools 'Statistics' => 'SpecialStatistics', diff --git a/includes/specials/SpecialListDuplicatedFiles.php b/includes/specials/SpecialListDuplicatedFiles.php new file mode 100644 index 0000000000..9401fca48b --- /dev/null +++ b/includes/specials/SpecialListDuplicatedFiles.php @@ -0,0 +1,112 @@ + array( 'image' ), + 'fields' => array( + 'namespace' => NS_FILE, + 'title' => 'MIN(img_name)', + 'value' => 'count(*)' + ), + 'options' => array( + 'GROUP BY' => 'img_sha1', + 'HAVING' => 'count(*) > 1', + ), + ); + } + + /** + * Pre-fill the link cache + * + * @param DatabaseBase $db + * @param ResultWrapper $res + */ + function preprocessResults( $db, $res ) { + if ( $res->numRows() > 0 ) { + $linkBatch = new LinkBatch(); + + foreach ( $res as $row ) { + $linkBatch->add( $row->namespace, $row->title ); + } + + $res->seek( 0 ); + $linkBatch->execute(); + } + } + + + /** + * @param Skin $skin + * @param object $result Result row + * @return string + */ + function formatResult( $skin, $result ) { + // Future version might include a list of the first 5 duplicates + // perhaps separated by an "↔". + $image1 = Title::makeTitle( $result->namespace, $result->title ); + $dupeSearch = SpecialPage::getTitleFor( 'FileDuplicateSearch', $image1->getDBKey() ); + + $msg = wfMessage( 'listduplicatedfiles-entry' ) + ->params( $image1->getText() ) + ->numParams( $result->value - 1 ) + ->params( $dupeSearch->getPrefixedDBKey() ); + + return $msg->parse(); + } + + protected function getGroupName() { + return 'media'; + } +} diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index 222333b2ab..192ef8d6df 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -423,6 +423,7 @@ $specialPageAliases = array( 'Listfiles' => array( 'ListFiles', 'FileList', 'ImageList' ), 'Listgrouprights' => array( 'ListGroupRights', 'UserGroupRights' ), 'Listredirects' => array( 'ListRedirects' ), + 'ListDuplicatedFiles' => array( 'ListDuplicatedFiles', 'ListFileDuplicates' ), 'Listusers' => array( 'ListUsers', 'UserList' ), 'Lockdb' => array( 'LockDB' ), 'Log' => array( 'Log', 'Logs' ), @@ -2641,6 +2642,10 @@ Input: contenttype/subtype, e.g. image/jpeg.', 'listredirects' => 'List of redirects', 'listredirects-summary' => '', # do not translate or duplicate this message to other languages +'listduplicatedfiles' => 'List of files with duplicates', +'listduplicatedfiles-summary' => 'This is a list of files where the most recent version of the file is a duplicate of the most recent version of some other file. Only local files are considered.', +'listduplicatedfiles-entry' => '[[:File:$1|$1]] has [[$3|{{PLURAL:$2|a duplicate|$2 duplicates}}]].', + # Unused templates 'unusedtemplates' => 'Unused templates', 'unusedtemplates-summary' => '', # do not translate or duplicate this message to other languages diff --git a/languages/messages/MessagesQqq.php b/languages/messages/MessagesQqq.php index 83383c19f5..520f6c6bf5 100644 --- a/languages/messages/MessagesQqq.php +++ b/languages/messages/MessagesQqq.php @@ -4895,6 +4895,11 @@ See also: # List redirects 'listredirects' => '{{doc-special|ListRedirects}}', +# List duplicates +'listduplicatedfiles' => '{{doc-special|ListDuplicatedFiles}}', +'listduplicatedfiles-summary' => 'Summary at top of Special:ListDuplicatedFiles', +'listduplicatedfiles-entry' => 'A list item on Special:ListDuplicatedFiles. $1 is the file name (no namespace prefix). $2 is the number of duplicates this file has. $3 is the name of the duplicate search page (aka "Special:FileDuplicateSearch/Foo.png" or "Spécial:Recherche_fichier_en_double/Firefox.png")', + # Unused templates 'unusedtemplates' => '{{doc-special|UnusedTemplates}}', 'unusedtemplatestext' => 'Shown on top of [[Special:Unusedtemplates]]', diff --git a/maintenance/language/messages.inc b/maintenance/language/messages.inc index 746c7d7e65..6b5506ef28 100644 --- a/maintenance/language/messages.inc +++ b/maintenance/language/messages.inc @@ -1690,6 +1690,11 @@ $wgMessageStructure = array( 'listredirects', 'listredirects-summary', ), + 'listduplicatedfiles' => array( + 'listduplicatedfiles', + 'listduplicatedfiles-summary', + 'listduplicatedfiles-entry', + ), 'unusedtemplates' => array( 'unusedtemplates', 'unusedtemplates-summary', @@ -4103,6 +4108,7 @@ future releases. Also note that since each list value is wrapped in a unique 'filedelete' => 'File deletion', 'mimesearch' => 'MIME search', 'unwatchedpages' => 'Unwatched pages', + 'listduplicatedfiles' => 'List duplicated files special page', 'listredirects' => 'List redirects', 'unusedtemplates' => 'Unused templates', 'randompage' => 'Random page', -- 2.20.1