From ff6fec1e6fa2cd3741125cd3b8538443940cf36b Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Tue, 8 Mar 2011 16:47:26 +0000 Subject: [PATCH] Make updateCollation.php a bit less murderous for WMF databases: * Don't run a COUNT(*) query on what's potentially the entire categorylinks table on enwiki (hundreds of millions of rows). Put it in a miser mode check * Wait for DB replication to catch up before processing the next batch. Implemented LoadBalancer::waitAll() for this purpose, which should behave more nicely than wfWaitForSlaves() --- includes/db/LoadBalancer.php | 12 +++++++++++ maintenance/updateCollation.php | 35 ++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/includes/db/LoadBalancer.php b/includes/db/LoadBalancer.php index 847d8347fe..5f2af19443 100644 --- a/includes/db/LoadBalancer.php +++ b/includes/db/LoadBalancer.php @@ -338,6 +338,18 @@ class LoadBalancer { } wfProfileOut( __METHOD__ ); } + + /** + * Set the master wait position and wait for ALL slaves to catch up to it + */ + public function waitForAll( $pos ) { + wfProfileIn( __METHOD__ ); + $this->mWaitForPos = $pos; + for ( $i = 1; $i < count( $this->mServers ); $i++ ) { + $this->doWait( $i ); + } + wfProfileOut( __METHOD__ ); + } /** * Get any open connection to a given server index, local or foreign diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index 2985addbcf..66315575c5 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -45,9 +45,16 @@ TEXT; $this->addOption( 'force', 'Run on all rows, even if the collation is ' . 'supposed to be up-to-date.' ); } + + public function syncDBs() { + $lb = wfGetLB(); + $dbw = $lb->getConnection( DB_MASTER ); + $pos = $dbw->getMasterPos(); + $lb->waitForAll( $pos ); + } public function execute() { - global $wgCategoryCollation; + global $wgCategoryCollation, $wgMiserMode; $dbw = wfGetDB( DB_MASTER ); $force = $this->getOption( 'force' ); @@ -55,30 +62,32 @@ TEXT; $options = array( 'LIMIT' => self::BATCH_SIZE ); if ( $force ) { - $collationConds = array(); $options['ORDER BY'] = 'cl_from, cl_to'; } else { $collationConds = array( 0 => 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ) ); - $count = $dbw->selectField( - 'categorylinks', - 'COUNT(*)', - $collationConds, - __METHOD__ - ); + if ( !$wgMiserMode ) { + $count = $dbw->selectField( + 'categorylinks', + 'COUNT(*)', + $collationConds, + __METHOD__ + ); - if ( $count == 0 ) { - $this->output( "Collations up-to-date.\n" ); - return; + if ( $count == 0 ) { + $this->output( "Collations up-to-date.\n" ); + return; + } + $this->output( "Fixing collation for $count rows.\n" ); } - $this->output( "Fixing collation for $count rows.\n" ); } $count = 0; $row = false; $batchConds = array(); do { + $this->output( 'Processing next ' . self::BATCH_SIZE . ' rows... '); $res = $dbw->select( array( 'categorylinks', 'page' ), array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', @@ -140,6 +149,8 @@ TEXT; $count += $res->numRows(); $this->output( "$count done.\n" ); + + $this->syncDBs(); } while ( $res->numRows() == self::BATCH_SIZE ); } } -- 2.20.1