Add script to clean up badly named image records. Doesn't touch files yet
authorBrion Vibber <brion@users.mediawiki.org>
Thu, 13 Jul 2006 21:54:13 +0000 (21:54 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Thu, 13 Jul 2006 21:54:13 +0000 (21:54 +0000)
maintenance/cleanupImages.php [new file with mode: 0644]
maintenance/cleanupTable.inc
maintenance/cleanupTitles.php

diff --git a/maintenance/cleanupImages.php b/maintenance/cleanupImages.php
new file mode 100644 (file)
index 0000000..389ec30
--- /dev/null
@@ -0,0 +1,140 @@
+<?php
+/*
+ * Script to clean up broken, unparseable upload filenames.
+ *
+ * Usage: php cleanupImages.php [--fix]
+ * Options:
+ *   --fix  Actually clean up titles; otherwise just checks for them
+ *
+ * Copyright (C) 2005-2006 Brion Vibber <brion@pobox.com>
+ * http://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Brion Vibber <brion at pobox.com>
+ * @package MediaWiki
+ * @subpackage maintenance
+ */
+
+require_once( 'commandLine.inc' );
+require_once( 'cleanupTable.inc' );
+
+class ImageCleanup extends TableCleanup {
+       function __construct( $dryrun = false ) {
+               parent::__construct( 'image', $dryrun );
+       }
+
+       function processPage( $row ) {
+               global $wgContLang;
+
+               $source = $row->img_name;
+               if( $source == '' ) {
+                       // Ye olde empty rows. Just kill them.
+                       $this->killRow( $source );
+                       return $this->progress( 1 );
+               }
+               
+               $cleaned = $source;
+               
+               // About half of old bad image names have percent-codes
+               $cleaned = urldecode( $cleaned );
+               
+               // Some are old latin-1
+               $cleaned = $wgContLang->checkTitleEncoding( $cleaned );
+               
+               // Many of remainder look like non-normalized unicode
+               $cleaned = UtfNormal::cleanUp( $cleaned );
+               
+               $title = Title::makeTitleSafe( NS_IMAGE, $cleaned );
+               
+               if( is_null( $title ) ) {
+                       $this->log( "page $source ($cleaned) is illegal." );
+                       $safe = $this->buildSafeTitle( $cleaned );
+                       $this->pokeFile( $source, $safe );
+                       return $this->progress( 1 );
+               }
+
+               if( $title->getDbKey() !== $source ) {
+                       $munged = $title->getDbKey();
+                       $this->log( "page $source ($munged) doesn't match self." );
+                       $this->pokeFile( $source, $munged );
+                       return $this->progress( 1 );
+               }
+
+               $this->progress( 0 );
+       }
+       
+       function killRow( $name ) {
+               if( $this->dryrun ) {
+                       $this->log( "DRY RUN: would delete bogus row '$name'" );
+               } else {
+                       $this->log( "deleting bogus row '$name'" );
+                       $db = wfGetDB( DB_MASTER );
+                       $db->delete( 'image',
+                               array( 'img_name' => $name ),
+                               __METHOD__ );
+               }
+       }
+       
+       function pokeFile( $orig, $new ) {
+               $db = wfGetDB( DB_MASTER );
+               $version = 0;
+               $final = $new;
+               
+               while( $db->selectField( 'image', 'img_name',
+                       array( 'img_name' => $final ), __METHOD__ ) ) {
+                       $this->log( "Rename conflicts with '$final'..." );
+                       $version++;
+                       $final = $this->appendTitle( $new, $final );
+               }
+               
+               if( $this->dryrun ) {
+                       $this->log( "DRY RUN: would rename '$orig' to '$final'" );
+               } else {
+                       $this->log( "renaming '$orig' to '$final'" );
+                       $db->update( 'image',
+                               array( 'img_name' => $final ),
+                               array( 'img_name' => $orig ),
+                               __METHOD__ );
+               }
+       }
+       
+       function appendTitle( $name, $suffix ) {
+               return preg_replace( '/^(.*)(\..*?)$/',
+                       "\1$suffix\2", $name );
+       }
+       
+       function buildSafeTitle( $name ) {
+               $x = preg_replace_callback(
+                       "/([^$wgLegalTitleChars])/",
+                       $name,
+                       array( $this, 'hexChar' ) );
+               
+               $test = Title::makeTitleSafe( NS_IMAGE, $x );
+               if( is_null( $test ) || $test->getDbKey() !== $x ) {
+                       $this->log( "Unable to generate safe title from '$name', got '$x'" );
+                       return false;
+               }
+               
+               return $x;
+       }
+}
+
+$wgUser->setName( 'Conversion script' );
+$caps = new ImageCleanup( !isset( $options['fix'] ) );
+$caps->cleanup();
+
+?>
index fd9b5af..265eca9 100644 (file)
@@ -75,6 +75,10 @@ abstract class TableCleanup extends FiveUpgrade {
                $this->log( "Finished $table... $this->updated of $this->processed rows updated" );
                $this->dbr->freeResult( $result );
        }
+
+       function hexChar( $matches ) {
+               return sprintf( "\\x%02x", ord( $matches[1] ) );
+       }
        
        abstract function processPage( $row );
        
index 6971099..12e07b6 100644 (file)
@@ -132,10 +132,6 @@ class TitleCleanup extends TableCleanup {
                        $linkCache->clear();
                }
        }
-
-       function hexChar( $matches ) {
-               return sprintf( "\\x%02x", ord( $matches[1] ) );
-       }
 }
 
 $wgUser->setName( 'Conversion script' );