From 9dc71928c49fe1fd9d45205dac7e84592f488ce8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 13 Jul 2006 21:54:13 +0000 Subject: [PATCH] Add script to clean up badly named image records. Doesn't touch files yet --- maintenance/cleanupImages.php | 140 ++++++++++++++++++++++++++++++++++ maintenance/cleanupTable.inc | 4 + maintenance/cleanupTitles.php | 4 - 3 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 maintenance/cleanupImages.php diff --git a/maintenance/cleanupImages.php b/maintenance/cleanupImages.php new file mode 100644 index 0000000000..389ec30a72 --- /dev/null +++ b/maintenance/cleanupImages.php @@ -0,0 +1,140 @@ + + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber + * @package MediaWiki + * @subpackage maintenance + */ + +require_once( 'commandLine.inc' ); +require_once( 'cleanupTable.inc' ); + +class ImageCleanup extends TableCleanup { + function __construct( $dryrun = false ) { + parent::__construct( 'image', $dryrun ); + } + + function processPage( $row ) { + global $wgContLang; + + $source = $row->img_name; + if( $source == '' ) { + // Ye olde empty rows. Just kill them. + $this->killRow( $source ); + return $this->progress( 1 ); + } + + $cleaned = $source; + + // About half of old bad image names have percent-codes + $cleaned = urldecode( $cleaned ); + + // Some are old latin-1 + $cleaned = $wgContLang->checkTitleEncoding( $cleaned ); + + // Many of remainder look like non-normalized unicode + $cleaned = UtfNormal::cleanUp( $cleaned ); + + $title = Title::makeTitleSafe( NS_IMAGE, $cleaned ); + + if( is_null( $title ) ) { + $this->log( "page $source ($cleaned) is illegal." ); + $safe = $this->buildSafeTitle( $cleaned ); + $this->pokeFile( $source, $safe ); + return $this->progress( 1 ); + } + + if( $title->getDbKey() !== $source ) { + $munged = $title->getDbKey(); + $this->log( "page $source ($munged) doesn't match self." ); + $this->pokeFile( $source, $munged ); + return $this->progress( 1 ); + } + + $this->progress( 0 ); + } + + function killRow( $name ) { + if( $this->dryrun ) { + $this->log( "DRY RUN: would delete bogus row '$name'" ); + } else { + $this->log( "deleting bogus row '$name'" ); + $db = wfGetDB( DB_MASTER ); + $db->delete( 'image', + array( 'img_name' => $name ), + __METHOD__ ); + } + } + + function pokeFile( $orig, $new ) { + $db = wfGetDB( DB_MASTER ); + $version = 0; + $final = $new; + + while( $db->selectField( 'image', 'img_name', + array( 'img_name' => $final ), __METHOD__ ) ) { + $this->log( "Rename conflicts with '$final'..." ); + $version++; + $final = $this->appendTitle( $new, $final ); + } + + if( $this->dryrun ) { + $this->log( "DRY RUN: would rename '$orig' to '$final'" ); + } else { + $this->log( "renaming '$orig' to '$final'" ); + $db->update( 'image', + array( 'img_name' => $final ), + array( 'img_name' => $orig ), + __METHOD__ ); + } + } + + function appendTitle( $name, $suffix ) { + return preg_replace( '/^(.*)(\..*?)$/', + "\1$suffix\2", $name ); + } + + function buildSafeTitle( $name ) { + $x = preg_replace_callback( + "/([^$wgLegalTitleChars])/", + $name, + array( $this, 'hexChar' ) ); + + $test = Title::makeTitleSafe( NS_IMAGE, $x ); + if( is_null( $test ) || $test->getDbKey() !== $x ) { + $this->log( "Unable to generate safe title from '$name', got '$x'" ); + return false; + } + + return $x; + } +} + +$wgUser->setName( 'Conversion script' ); +$caps = new ImageCleanup( !isset( $options['fix'] ) ); +$caps->cleanup(); + +?> diff --git a/maintenance/cleanupTable.inc b/maintenance/cleanupTable.inc index fd9b5af834..265eca9768 100644 --- a/maintenance/cleanupTable.inc +++ b/maintenance/cleanupTable.inc @@ -75,6 +75,10 @@ abstract class TableCleanup extends FiveUpgrade { $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); $this->dbr->freeResult( $result ); } + + function hexChar( $matches ) { + return sprintf( "\\x%02x", ord( $matches[1] ) ); + } abstract function processPage( $row ); diff --git a/maintenance/cleanupTitles.php b/maintenance/cleanupTitles.php index 6971099b02..12e07b67ad 100644 --- a/maintenance/cleanupTitles.php +++ b/maintenance/cleanupTitles.php @@ -132,10 +132,6 @@ class TitleCleanup extends TableCleanup { $linkCache->clear(); } } - - function hexChar( $matches ) { - return sprintf( "\\x%02x", ord( $matches[1] ) ); - } } $wgUser->setName( 'Conversion script' ); -- 2.20.1