From ab6568d056f79d83adc894d561622884b199a551 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Mon, 7 Apr 2008 14:05:12 +0000 Subject: [PATCH] Proposal for method to upgrade the logging table at wikimedia --- maintenance/archives/upgradeLogging.php | 181 ++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 maintenance/archives/upgradeLogging.php diff --git a/maintenance/archives/upgradeLogging.php b/maintenance/archives/upgradeLogging.php new file mode 100644 index 0000000000..254a90c190 --- /dev/null +++ b/maintenance/archives/upgradeLogging.php @@ -0,0 +1,181 @@ +dbw = wfGetDB( DB_MASTER ); + $logging = $this->dbw->tableName( 'logging' ); + $logging_1_10 = $this->dbw->tableName( 'logging_1_10' ); + $logging_pre_1_10 = $this->dbw->tableName( 'logging_pre_1_10' ); + + if ( $this->dbw->tableExists( 'logging_pre_1_10' ) && !$this->dbw->tableExists( 'logging' ) ) { + # Fix previous aborted run + echo "Cleaning up from previous aborted run\n"; + $this->dbw->query( "RENAME TABLE $logging_pre_1_10 TO $logging", __METHOD__ ); + } + + if ( $this->dbw->tableExists( 'logging_pre_1_10' ) ) { + echo "This script has already been run to completion\n"; + return; + } + + # Create the target table + if ( !$this->dbw->tableExists( 'logging_1_10' ) ) { + global $wgDBTableOptions; + + $sql = <<dbw->query( $sql, __METHOD__ ); + } + + # Synchronise the tables + echo "Doing initial sync...\n"; + $this->sync( 'logging', 'logging_1_10' ); + echo "Sync done\n\n"; + + # Rename the old table away + echo "Renaming the old table to $logging_pre_1_10\n"; + $this->dbw->query( "RENAME TABLE $logging TO $logging_pre_1_10", __METHOD__ ); + + # Copy remaining old rows + # Done before the new table is active so that $copyPos is accurate + echo "Doing final sync...\n"; + $this->sync( 'logging_pre_1_10', 'logging_1_10' ); + + # Move the new table in + echo "Moving the new table in...\n"; + $this->dbw->query( "RENAME TABLE $logging_1_10 TO $logging", __METHOD__ ); + echo "Finished.\n"; + } + + /** + * Copy all rows from $srcTable to $dstTable + */ + function sync( $srcTable, $dstTable ) { + $batchSize = 1000; + $minTs = $this->dbw->selectField( $srcTable, 'MIN(log_timestamp)', false, __METHOD__ ); + $numRowsCopied = 0; + + while ( true ) { + $maxTs = $this->dbw->selectField( $srcTable, 'MAX(log_timestamp)', false, __METHOD__ ); + $copyPos = $this->dbw->selectField( $dstTable, 'MAX(log_timestamp)', false, __METHOD__ ); + + $percent = ( wfTimestamp( TS_UNIX, $maxTs ) - wfTimestamp( TS_UNIX, $minTs ) ) + / wfTimestamp( TS_UNIX, $copyPos ) * 100; + printf( "%s %.2f%%\n", $copyPos, $percent ); + + # Handle all entries with timestamp equal to $copyPos + if ( $copyPos !== null ) { + $numRowsCopied += $this->copyExactMatch( $srcTable, $dstTable, $copyPos ); + } + + # Now copy a batch of rows + if ( $copyPos === null ) { + $conds = false; + } else { + $conds = array( 'log_timestamp > ' . $this->dbw->addQuotes( $copyPos ) ); + } + $srcRes = $this->dbw->select( $srcTable, '*', $conds, __METHOD__, + array( 'LIMIT' => $batchSize, 'ORDER BY' => 'log_timestamp' ) ); + + if ( ! $srcRes->numRows() ) { + # All done + break; + } + + $batch = array(); + foreach ( $srcRes as $srcRow ) { + $batch[] = (array)$srcRow; + } + $this->dbw->insert( $dstTable, $batch, __METHOD__ ); + $numRowsCopied += count( $batch ); + + wfWaitForSlaves( 5 ); + } + echo "Copied $numRowsCopied rows\n"; + } + + function copyExactMatch( $srcTable, $dstTable, $copyPos ) { + $numRowsCopied = 0; + $srcRes = $this->dbw->select( $srcTable, '*', array( 'log_timestamp' => $copyPos ), __METHOD__ ); + $dstRes = $this->dbw->select( $dstTable, '*', array( 'log_timestamp' => $copyPos ), __METHOD__ ); + + if ( $srcRes->numRows() ) { + $srcRow = $srcRes->fetchObject(); + $srcFields = array_keys( (array)$srcRow ); + $srcRes->seek( 0 ); + $dstRowsSeen = array(); + + # Make a hashtable of rows that already exist in the destination + foreach ( $dstRes as $dstRow ) { + $reducedDstRow = array(); + foreach ( $srcFields as $field ) { + $reducedDstRow[$field] = $dstRow->$field; + } + $hash = md5( serialize( $reducedDstRow ) ); + $dstRowsSeen[$hash] = true; + } + + # Copy all the source rows that aren't already in the destination + foreach ( $srcRes as $srcRow ) { + $hash = md5( serialize( (array)$srcRow ) ); + if ( !isset( $dstRowsSeen[$hash] ) ) { + $this->dbw->insert( $dstTable, (array)$srcRow, __METHOD__ ); + $numRowsCopied++; + } + } + } + return $numRowsCopied; + } +} + +$ul = new UpdateLogging; +$ul->execute(); + -- 2.20.1