From fb1cf94fb825a394a28ded779de1cc943d847a1c Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Wed, 9 Apr 2008 18:33:48 +0000 Subject: [PATCH] * Use timestamp * Fix generally fucked up batch counting --- maintenance/populateParentId.php | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/maintenance/populateParentId.php b/maintenance/populateParentId.php index 1dfc2298f9..66f30b8ca0 100644 --- a/maintenance/populateParentId.php +++ b/maintenance/populateParentId.php @@ -6,7 +6,7 @@ * and to find new page edits by users. */ -define( 'BATCH_SIZE', 500 ); +define( 'BATCH_SIZE', 200 ); require_once 'commandLine.inc'; @@ -22,30 +22,39 @@ function populate_rev_parent_id( $db ) { echo "Populating rev_parent_id column\n"; $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ ); $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ ); + # Do remaining chunk + $end += BATCH_SIZE - 1; $blockStart = $start; $blockEnd = $start + BATCH_SIZE - 1; $count = 0; + $changed = 0; while( $blockEnd <= $end ) { + echo "...doing rev_id from $blockStart to $blockEnd\n"; $cond = "rev_id BETWEEN $blockStart AND $blockEnd"; - $res = $db->select( 'revision', array('rev_id','rev_page'), $cond, __FUNCTION__ ); + $res = $db->select( 'revision', + array('rev_id','rev_page','rev_timestamp','rev_parent_id'), + $cond, __FUNCTION__ ); # Go through and update rev_parent_id from these rows. # Assume that the previous revision of the title was # the original previous revision of the title when the # edit was made... foreach( $res as $row ) { $previousID = $db->selectField( 'revision', 'rev_id', - array( 'rev_page' => $row->rev_page, "rev_id < '{$row->rev_id}'" ), + array( 'rev_page' => $row->rev_page, "rev_timestamp < '{$row->rev_timestamp}'" ), __FUNCTION__, - array( 'ORDER BY' => 'rev_id DESC' ) ); + array( 'ORDER BY' => 'rev_timestamp DESC' ) ); + $previousID = intval($previousID); + if( $previousID != $row->rev_parent_id ) + $changed++; # Update the row... $db->update( 'revision', - array( 'rev_parent_id' => intval($previousID) ), + array( 'rev_parent_id' => $previousID ), array( 'rev_id' => $row->rev_id ), __FUNCTION__ ); $count++; } - $blockStart += BATCH_SIZE; - $blockEnd += BATCH_SIZE; + $blockStart += BATCH_SIZE - 1; + $blockEnd += BATCH_SIZE - 1; wfWaitForSlaves( 5 ); } $logged = $db->insert( 'updatelog', @@ -53,7 +62,7 @@ function populate_rev_parent_id( $db ) { __FUNCTION__, 'IGNORE' ); if( $logged ) { - echo "rev_parent_id population complete ... {$count} rows\n"; + echo "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n"; return true; } else { echo "Could not insert rev_parent_id population row.\n"; -- 2.20.1