* (r56711) Don't use unbufferred queries unless someone is holding a gun to your head, they cause no end of trouble. Use batched queries instead. Should fix the bug reported on bug 20741 comment 13.
* Fixed a bug in cleanupTitles.php which caused it to fail to convert spaces in page_title to underscores, possibly caused by r6355.
* Made cleanupWatchlist.php respect "--fix" as documented
* Made runTable accept a parameter array instead of an ever-growing formal parameter list
* Renamed processPage() to more accurate processRow(). Removed abstract function definition since the name of the function can be overridden.
* Made a few things public instead of protected for easier testing via eval.php
* Fixed missing newlines in progress messages in cleanupCaps.php
if( $wgCapitalLinks )
$this->error( "\$wgCapitalLinks is on -- no need for caps links cleanup.", true );
- $this->runTable( $this->targetTable,
- 'WHERE page_namespace=' . $this->namespace,
- array( &$this, 'processPage' ) );
+ $this->runTable( array(
+ 'table' => 'page',
+ 'conds' => array( 'page_namespace' => $this->namespace ),
+ 'index' => 'page_id',
+ 'callback' => 'processRow' ) );
}
- protected function processPage( $row ) {
+ protected function processRow( $row ) {
global $wgContLang;
$current = Title::makeTitle( $row->page_namespace, $row->page_title );
$upper = $row->page_title;
$lower = $wgContLang->lcfirst( $row->page_title );
if( $upper == $lower ) {
- $this->output( "\"$display\" already lowercase." );
+ $this->output( "\"$display\" already lowercase.\n" );
return $this->progress( 0 );
}
$target = Title::makeTitle( $row->page_namespace, $lower );
$targetDisplay = $target->getPrefixedText();
if( $target->exists() ) {
- $this->output( "\"$display\" skipped; \"$targetDisplay\" already exists" );
+ $this->output( "\"$display\" skipped; \"$targetDisplay\" already exists\n" );
return $this->progress( 0 );
}
if( $this->dryrun ) {
- $this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED" );
+ $this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED\n" );
$ok = true;
} else {
$ok = $current->moveTo( $target, false, 'Converting page titles to lowercase' );
- $this->output( "\"$display\" -> \"$targetDisplay\": $ok" );
+ $this->output( "\"$display\" -> \"$targetDisplay\": $ok\n" );
}
if( $ok === true ) {
$this->progress( 1 );
$talk = $target->getTalkPage();
$row->page_namespace = $talk->getNamespace();
if( $talk->exists() ) {
- return $this->processPage( $row );
+ return $this->processRow( $row );
}
}
} else {
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
class ImageCleanup extends TableCleanup {
- protected $targetTable = 'image';
+ protected $defaultParams = array(
+ 'table' => 'image',
+ 'conds' => array(),
+ 'index' => 'img_name',
+ 'callback' => 'processRow',
+ );
+
public function __construct() {
parent::__construct();
$this->mDescription = "Script to clean up broken, unparseable upload filenames";
}
- protected function processPage( $row ) {
+ protected function processRow( $row ) {
global $wgContLang;
$source = $row->img_name;
require_once( dirname(__FILE__) . '/Maintenance.php' );
-abstract class TableCleanup extends Maintenance {
- protected $targetTable = 'page';
+class TableCleanup extends Maintenance {
+ protected $defaultParams = array(
+ 'table' => 'page',
+ 'conds' => array(),
+ 'index' => 'page_id',
+ 'callback' => 'processRow',
+ );
+
protected $dryrun = false;
protected $maxLag = 10; # if slaves are lagged more than 10 secs, wait
+ public $batchSize = 100;
+ public $reportInterval = 100;
public function __construct() {
parent::__construct();
} else {
$this->output( "Checking and fixing bad titles...\n" );
}
- $this->runTable( $this->targetTable,
- '', //'WHERE page_namespace=0',
- array( $this, 'processPage' ) );
+ $this->runTable( $this->defaultParams );
}
protected function init( $count, $table ) {
protected function progress( $updated ) {
$this->updated += $updated;
$this->processed++;
- if( $this->processed % 100 != 0 ) {
+ if( $this->processed % $this->reportInterval != 0 ) {
return;
}
$portion = $this->processed / $this->count;
flush();
}
- protected function runTable( $table, $where, $callback ) {
+ public function runTable( $params ) {
$dbr = wfGetDB( DB_SLAVE );
-
- $count = $dbr->selectField( $table, 'count(*)', '', __METHOD__ );
+
+ if ( array_diff( array_keys( $params ),
+ array( 'table', 'conds', 'index', 'callback' ) ) )
+ {
+ throw new MWException( __METHOD__.': Missing parameter ' . implode( ', ', $params ) );
+ }
+
+ $table = $params['table'];
+ $count = $dbr->selectField( $table, 'count(*)', $params['conds'], __METHOD__ );
$this->init( $count, $table );
- $this->output( "Processing $table..." );
+ $this->output( "Processing $table...\n" );
+
+
+ $index = (array)$params['index'];
+ $indexConds = array();
+ $options = array(
+ 'ORDER BY' => implode( ',', $index ),
+ 'LIMIT' => $this->batchSize
+ );
+ $callback = array( $this, $params['callback'] );
- // Unbuffered queries, avoids OOM
- $dbr->bufferResults( false );
-
- $tableName = $dbr->tableName( $table );
- $sql = "SELECT * FROM $tableName $where";
- $result = $dbr->query( $sql, __METHOD__ );
+ while ( true ) {
+ $conds = array_merge( $params['conds'], $indexConds );
+ $res = $dbr->select( $table, '*', $conds, __METHOD__, $options );
+ if ( !$res->numRows() ) {
+ // Done
+ break;
+ }
- foreach( $result as $row ) {
- call_user_func( $callback, $row );
+ foreach ( $res as $row ) {
+ call_user_func( $callback, $row );
+ }
+
+ if ( $res->numRows() < $this->batchSize ) {
+ // Done
+ break;
+ }
+
+ // Update the conditions to select the next batch.
+ // Construct a condition string by starting with the least significant part
+ // of the index, and adding more significant parts progressively to the left
+ // of the string.
+ $nextCond = '';
+ foreach ( array_reverse( $index ) as $field ) {
+ $encValue = $dbr->addQuotes( $row->$field );
+ if ( $nextCond === '' ) {
+ $nextCond = "$field > $encValue";
+ } else {
+ $nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))";
+ }
+ }
+ $indexConds = array( $nextCond );
}
-
+
$this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );
-
- $result->free();
-
- $dbr->bufferResults( true );
}
protected function hexChar( $matches ) {
return sprintf( "\\x%02x", ord( $matches[1] ) );
}
-
- abstract protected function processPage( $row );
}
+
+class TableCleanupTest extends TableCleanup {
+ function processRow( $row ) {
+ $this->progress( mt_rand( 0, 1 ) );
+ }
+}
+
$this->mDescription = "Script to clean up broken, unparseable titles";
}
- protected function processPage( $row ) {
- $current = Title::makeTitle( $row->page_namespace, $row->page_title );
- $display = $current->getPrefixedText();
-
+ protected function processRow( $row ) {
+ $display = Title::makeName( $row->page_namespace, $row->page_title );
$verified = UtfNormal::cleanUp( $display );
-
$title = Title::newFromText( $verified );
- if( !is_null( $title ) && $title->equals( $current ) && $title->canExist() ) {
+ if( !is_null( $title )
+ && $title->canExist()
+ && $title->getNamespace() == $row->page_namespace
+ && $title->getDBkey() === $row->page_title )
+ {
return $this->progress( 0 ); // all is fine
}
require_once( dirname(__FILE__) . '/cleanupTable.inc' );
class WatchlistCleanup extends TableCleanup {
- protected $targetTable = 'watchlist';
+ protected $defaultParams = array(
+ 'table' => 'watchlist',
+ 'index' => array( 'wl_user', 'wl_namespace', 'wl_title' ),
+ 'conds' => array(),
+ 'callback' => 'processRow'
+ );
+
public function __construct() {
parent::__construct();
$this->mDescription = "Script to remove broken, unparseable titles in the Watchlist";
+ $this->addOption( 'fix', 'Actually remove entries; without will only report.' );
+ }
+
+ function execute() {
+ if ( !$this->hasOption( 'fix' ) ) {
+ $this->output( "Dry run only: use --fix to enable updates\n" );
+ }
+ parent::execute();
}
- protected function processPage( $row ) {
+ protected function processRow( $row ) {
$current = Title::makeTitle( $row->wl_namespace, $row->wl_title );
$display = $current->getPrefixedText();
$verified = UtfNormal::cleanUp( $display );
if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) {
$this->output( "invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")\n" );
- $this->removeWatch( $row );
- return $this->progress( 1 );
+ $updated = $this->removeWatch( $row );
+ $this->progress( $updated );
+ return;
}
$this->progress( 0 );
}
private function removeWatch( $row ) {
- if( !$this->dryrun ) {
+ if( !$this->dryrun && $this->hasOption( 'fix' ) ) {
$dbw = wfGetDB( DB_MASTER );
$dbw->delete( 'watchlist', array(
'wl_user' => $row->wl_user,
'wl_title' => $row->wl_title ),
__METHOD__ );
$this->output( "- removed\n" );
+ return 1;
+ } else {
+ return 0;
}
}
}