From 4c8c5c434f1f620e12710d7a81198775e6dd3116 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Tue, 15 Nov 2016 16:11:15 +1100 Subject: [PATCH] Rewrite runBatchedQuery.php This maintenance script was previously introduced as a way to do large UPDATE queries in a replication safe way. However, in modern versions of MySQL, UPDATE...LIMIT is considered a non-replication-safe query, and will emit a warning. So instead, ask the user to provide slightly more structured data about the update query being done, and partition the table based on an index. In the UPDATE queries, specify index ranges instead of using LIMIT. Also add a "db" option, which allows the script to update databases which are not valid wiki names, for example, centralauth. Change-Id: I462bdcb03e107af9db4738895952d5110f0ec4fc --- maintenance/runBatchedQuery.php | 64 ++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/maintenance/runBatchedQuery.php b/maintenance/runBatchedQuery.php index a5e7a2fa88..f8eedb33a0 100644 --- a/maintenance/runBatchedQuery.php +++ b/maintenance/runBatchedQuery.php @@ -34,26 +34,74 @@ class BatchedQueryRunner extends Maintenance { public function __construct() { parent::__construct(); $this->addDescription( - "Run a query repeatedly until it affects 0 rows, and wait for replica DBs in between.\n" . - "NOTE: You need to set a LIMIT clause yourself." ); + "Run an update query on all rows of a table. " . + "Waits for replicas at appropriate intervals." ); + $this->addOption( 'table', 'The table name', true, true ); + $this->addOption( 'set', 'The SET clause', true, true ); + $this->addOption( 'where', 'The WHERE clause', false, true ); + $this->addOption( 'key', 'A column name, the values of which are unique', true, true ); + $this->addOption( 'batch-size', 'The batch size (default 1000)', false, true ); + $this->addOption( 'db', 'The database name, or omit to use the current wiki.', false, true ); } public function execute() { - if ( !$this->hasArg() ) { - $this->error( "No query specified. Specify the query as a command line parameter.", true ); + $table = $this->getOption( 'table' ); + $key = $this->getOption( 'key' ); + $set = $this->getOption( 'set' ); + $where = $this->getOption( 'where', null ); + $where = $where === null ? [] : [ $where ]; + $batchSize = $this->getOption( 'batch-size', 1000 ); + + $dbName = $this->getOption( 'db', null ); + if ( $dbName === null ) { + $dbw = $this->getDB( DB_MASTER ); + } else { + $lbf = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $lb = $lbf->getMainLB( $dbName ); + $dbw = $lb->getConnection( DB_MASTER, [], $dbName ); } - $query = $this->getArg(); + $selectConds = $where; + $prevEnd = false; + $n = 1; - $dbw = $this->getDB( DB_MASTER ); do { $this->output( "Batch $n: " ); $n++; + + // Note that the update conditions do not rely on atomicity of the + // SELECT query in order to guarantee that all rows are updated. The + // results of the SELECT are merely a partitioning hint. Simultaneous + // updates merely result in the wrong number of rows being updated + // in a batch. + + $res = $dbw->select( $table, $key, $selectConds, __METHOD__, + [ 'ORDER BY' => $key, 'LIMIT' => $batchSize ] ); + if ( $res->numRows() ) { + $res->seek( $res->numRows() - 1 ); + $row = $res->fetchObject(); + $end = $dbw->addQuotes( $row->$key ); + $selectConds = array_merge( $where, [ "$key > $end" ] ); + $updateConds = array_merge( $where, [ "$key <= $end" ] ); + } else { + $updateConds = $where; + } + if ( $prevEnd !== false ) { + $updateConds = array_merge( [ "$key > $prevEnd" ], $updateConds ); + } + + $query = "UPDATE " . $dbw->tableName( $table ) . + " SET " . $set . + " WHERE " . $dbw->makeList( $updateConds, IDatabase::LIST_AND ); + $dbw->query( $query, __METHOD__ ); + + $prevEnd = $end; + $affected = $dbw->affectedRows(); - $this->output( "$affected rows\n" ); + $this->output( "$affected rows affected\n" ); wfWaitForSlaves(); - } while ( $affected > 0 ); + } while ( $res->numRows() ); } public function getDbType() { -- 2.20.1