dd114fdd26748a826f3f757150b7df7ef6ba66b4
[lhc/web/wiklou.git] / maintenance / populateParentId.php
1 <?php
2 /*
3 * Makes the required database updates for rev_parent_id
4 * to be of any use. It can be used for some simple tracking
5 * and to find new page edits by users.
6 */
7
8 require_once( "Maintenance.php" );
9
10 class PopulateParentId extends Maintenance {
11
12 // Batch size
13 const BATCH_SIZE = 200;
14
15 public function __construct() {
16 parent::__construct();
17 $this->mDescription = "Populates rev_parent_id";
18 }
19
20 public function execute() {
21 $db = wfGetDB( DB_MASTER );
22 if ( !$db->tableExists( 'revision' ) ) {
23 $this->error( "revision table does not exist\n", true );
24 }
25 $this->output( "Populating rev_parent_id column\n" );
26 $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
27 $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
28 if( is_null( $start ) || is_null( $end ) ){
29 $this->output( "...revision table seems to be empty.\n" );
30 $db->insert( 'updatelog',
31 array( 'ul_key' => 'populate rev_parent_id' ),
32 __FUNCTION__,
33 'IGNORE' );
34 return;
35 }
36 # Do remaining chunk
37 $end += self::BATCH_SIZE - 1;
38 $blockStart = intval( $start );
39 $blockEnd = intval( $start ) + self::BATCH_SIZE - 1;
40 $count = 0;
41 $changed = 0;
42 while( $blockEnd <= $end ) {
43 $this->output( "...doing rev_id from $blockStart to $blockEnd\n" );
44 $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
45 $res = $db->select( 'revision',
46 array('rev_id','rev_page','rev_timestamp','rev_parent_id'),
47 $cond, __FUNCTION__ );
48 # Go through and update rev_parent_id from these rows.
49 # Assume that the previous revision of the title was
50 # the original previous revision of the title when the
51 # edit was made...
52 foreach( $res as $row ) {
53 # First, check rows with the same timestamp other than this one
54 # with a smaller rev ID. The highest ID "wins". This avoids loops
55 # as timestamp can only decrease and never loops with IDs (from parent to parent)
56 $previousID = $db->selectField( 'revision', 'rev_id',
57 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
58 "rev_id < " . intval( $row->rev_id ) ),
59 __FUNCTION__,
60 array( 'ORDER BY' => 'rev_id DESC' ) );
61 # If there are none, check the the highest ID with a lower timestamp
62 if( !$previousID ) {
63 # Get the highest older timestamp
64 $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp',
65 array( 'rev_page' => $row->rev_page, "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp ) ),
66 __FUNCTION__,
67 array( 'ORDER BY' => 'rev_timestamp DESC' ) );
68 # If there is one, let the highest rev ID win
69 if( $lastTimestamp ) {
70 $previousID = $db->selectField( 'revision', 'rev_id',
71 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ),
72 __FUNCTION__,
73 array( 'ORDER BY' => 'rev_id DESC' ) );
74 }
75 }
76 $previousID = intval($previousID);
77 if( $previousID != $row->rev_parent_id )
78 $changed++;
79 # Update the row...
80 $db->update( 'revision',
81 array( 'rev_parent_id' => $previousID ),
82 array( 'rev_id' => $row->rev_id ),
83 __FUNCTION__ );
84 $count++;
85 }
86 $blockStart += self::BATCH_SIZE - 1;
87 $blockEnd += self::BATCH_SIZE - 1;
88 wfWaitForSlaves( 5 );
89 }
90 $logged = $db->insert( 'updatelog',
91 array( 'ul_key' => 'populate rev_parent_id' ),
92 __FUNCTION__,
93 'IGNORE' );
94 if( $logged ) {
95 $this->output( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" );
96 return true;
97 } else {
98 $this->output( "Could not insert rev_parent_id population row.\n" );
99 return false;
100 }
101 }
102 }
103
104 $maintClass = "PopulateParentId";
105 require_once( DO_MAINTENANCE );