* Use timestamp
authorAaron Schulz <aaron@users.mediawiki.org>
Wed, 9 Apr 2008 18:33:48 +0000 (18:33 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Wed, 9 Apr 2008 18:33:48 +0000 (18:33 +0000)
* Fix generally fucked up batch counting

maintenance/populateParentId.php

index 1dfc229..66f30b8 100644 (file)
@@ -6,7 +6,7 @@
  * and to find new page edits by users.
  */
 
-define( 'BATCH_SIZE', 500 );
+define( 'BATCH_SIZE', 200 );
 
 require_once 'commandLine.inc';
        
@@ -22,30 +22,39 @@ function populate_rev_parent_id( $db ) {
        echo "Populating rev_parent_id column\n";
        $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
        $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
+       # Do remaining chunk
+       $end += BATCH_SIZE - 1;
        $blockStart = $start;
        $blockEnd = $start + BATCH_SIZE - 1;
        $count = 0;
+       $changed = 0;
        while( $blockEnd <= $end ) {
+               echo "...doing rev_id from $blockStart to $blockEnd\n";
                $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
-               $res = $db->select( 'revision', array('rev_id','rev_page'), $cond, __FUNCTION__ );
+               $res = $db->select( 'revision', 
+                       array('rev_id','rev_page','rev_timestamp','rev_parent_id'), 
+                       $cond, __FUNCTION__ );
                # Go through and update rev_parent_id from these rows.
                # Assume that the previous revision of the title was
                # the original previous revision of the title when the
                # edit was made...
                foreach( $res as $row ) {
                        $previousID = $db->selectField( 'revision', 'rev_id', 
-                               array( 'rev_page' => $row->rev_page, "rev_id < '{$row->rev_id}'" ), 
+                               array( 'rev_page' => $row->rev_page, "rev_timestamp < '{$row->rev_timestamp}'" ), 
                                __FUNCTION__,
-                               array( 'ORDER BY' => 'rev_id DESC' ) );
+                               array( 'ORDER BY' => 'rev_timestamp DESC' ) );
+                       $previousID = intval($previousID);
+                       if( $previousID != $row->rev_parent_id )
+                               $changed++;
                        # Update the row...
                        $db->update( 'revision',
-                               array( 'rev_parent_id' => intval($previousID) ),
+                               array( 'rev_parent_id' => $previousID ),
                                array( 'rev_id' => $row->rev_id ),
                                __FUNCTION__ );
                        $count++;
                }
-               $blockStart += BATCH_SIZE;
-               $blockEnd += BATCH_SIZE;
+               $blockStart += BATCH_SIZE - 1;
+               $blockEnd += BATCH_SIZE - 1;
                wfWaitForSlaves( 5 );
        }
        $logged = $db->insert( 'updatelog',
@@ -53,7 +62,7 @@ function populate_rev_parent_id( $db ) {
                __FUNCTION__,
                'IGNORE' );
        if( $logged ) {
-               echo "rev_parent_id population complete ... {$count} rows\n";
+               echo "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n";
                return true;
        } else {
                echo "Could not insert rev_parent_id population row.\n";