From c6fb8af8ef960b2a2d6f714ee7a809ba9eaca785 Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Thu, 15 Sep 2011 12:17:44 +0000 Subject: [PATCH] Merge live hacks from r83992 to trunk, after cleaning some things up. * Wait for slaves after every thousand rows rather than after processing every batch. r83992 had 1000 hard-coded, I put it in SYNC_INTERVAL * Set $lb->waitTimeout(100000). I have no idea why, but it was in the live hack. Maybe Tim or Domas could enlighten me * Use a STRAIGHT JOIN for the query on categorylinks and page because MySQL appears to want to join the tables the wrong way around * Use cl_collation='previousValue' rather than cl_collation!='newValue' if possible. This was originally a dirty live hack, but I re-implemented it nicely with a --previous-collation command line option * Print a status update both before and after the SELECT query. This allows the user to notice when the SELECT queries are getting increasingly slower, which is an indication you may want to set --previous-collation --- maintenance/updateCollation.php | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index f5b7d9c533..396d62c968 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -29,7 +29,8 @@ require_once( dirname( __FILE__ ) . '/Maintenance.php' ); class UpdateCollation extends Maintenance { - const BATCH_SIZE = 50; + const BATCH_SIZE = 50; // Number of rows to process in one batch + const SYNC_INTERVAL = 20; // Wait for slaves after this many batches public function __construct() { parent::__construct(); @@ -44,10 +45,17 @@ TEXT; $this->addOption( 'force', 'Run on all rows, even if the collation is ' . 'supposed to be up-to-date.' ); + $this->addOption( 'previous-collation', 'Set the previous value of ' . + '$wgCategoryCollation here to speed up this script, especially if your ' . + 'categorylinks table is large. This will only update rows with that ' . + 'collation, though, so it may miss out-of-date rows with a different, ' . + 'even older collation.', false, true ); } public function syncDBs() { + // TODO: Most of this is duplicated from wfWaitForSlaves(), except for the waitTimeout() call $lb = wfGetLB(); + $lb->waitTimeout(100000); // bug 27975 - Don't try to wait for slaves if there are none // Prevents permission error when getting master position if ( $lb->getServerCount() > 1 ) { @@ -63,14 +71,19 @@ TEXT; $dbw = $this->getDB( DB_MASTER ); $force = $this->getOption( 'force' ); - $options = array( 'LIMIT' => self::BATCH_SIZE ); + $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' ); if ( $force ) { $options['ORDER BY'] = 'cl_from, cl_to'; $collationConds = array(); } else { - $collationConds = array( 0 => - 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ) ); + if ( $this->hasOption( 'previous-collation' ) ) { + $collationConds['cl_collation'] = $this->getOption( 'previous-collation' ); + } else { + $collationConds = array( 0 => + 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ) + ); + } if ( !$wgMiserMode ) { $count = $dbw->selectField( @@ -89,9 +102,10 @@ TEXT; } $count = 0; + $batchCount = 0; $batchConds = array(); do { - $this->output( 'Processing next ' . self::BATCH_SIZE . ' rows... '); + $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); $res = $dbw->select( array( 'categorylinks', 'page' ), array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', @@ -101,6 +115,7 @@ TEXT; __METHOD__, $options ); + $this->output( " processing..." ); $dbw->begin(); foreach ( $res as $row ) { @@ -154,7 +169,11 @@ TEXT; $count += $res->numRows(); $this->output( "$count done.\n" ); - $this->syncDBs(); + if ( ++$batchCount % self::SYNC_INTERVAL == 0 ) { + $this->output( "Waiting for slaves ... " ); + $this->syncDBs(); + $this->output( "done\n" ); + } } while ( $res->numRows() == self::BATCH_SIZE ); } } -- 2.20.1