call populateRevisionLength.php from updaters.inc; make updaters.inc use populate...
authorIlmari Karonen <vyznev@users.mediawiki.org>
Wed, 3 Mar 2010 21:15:05 +0000 (21:15 +0000)
committerIlmari Karonen <vyznev@users.mediawiki.org>
Wed, 3 Mar 2010 21:15:05 +0000 (21:15 +0000)
maintenance/populateCategory.inc [deleted file]
maintenance/populateCategory.php
maintenance/populateLogSearch.inc [deleted file]
maintenance/populateParentId.inc [deleted file]
maintenance/populateParentId.php
maintenance/populateRevisionLength.php
maintenance/updaters.inc

diff --git a/maintenance/populateCategory.inc b/maintenance/populateCategory.inc
deleted file mode 100644 (file)
index deca453..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-<?php
-/**
- * @file
- * @ingroup Maintenance
- * @author Simetrical
- */
-
-define( 'REPORTING_INTERVAL', 1000 );
-
-function populateCategory( $begin, $maxlag, $throttle, $force ) {
-       $dbw = wfGetDB( DB_MASTER );
-
-       if( !$force ) {
-               $row = $dbw->selectRow(
-                       'updatelog',
-                       '1',
-                       array( 'ul_key' => 'populate category' ),
-                       __FUNCTION__
-               );
-               if( $row ) {
-                       wfOut( "Category table already populated.  Use php ".
-                       "maintenance/populateCategory.php\n--force from the command line ".
-                       "to override.\n" );
-                       return true;
-               }
-       }
-
-       $maxlag = intval( $maxlag );
-       $throttle = intval( $throttle );
-       $force = (bool)$force;
-       if( $begin !== '' ) {
-               $where = 'cl_to > '.$dbw->addQuotes( $begin );
-       } else {
-               $where = null;
-       }
-       $i = 0;
-
-       while( true ) {
-               # Find which category to update
-               $row = $dbw->selectRow(
-                       'categorylinks',
-                       'cl_to',
-                       $where,
-                       __FUNCTION__,
-                       array(
-                               'ORDER BY' => 'cl_to'
-                       )
-               );
-               if( !$row ) {
-                       # Done, hopefully.
-                       break;
-               }
-               $name = $row->cl_to;
-               $where = 'cl_to > '.$dbw->addQuotes( $name );
-
-               # Use the row to update the category count
-               $cat = Category::newFromName( $name );
-               if( !is_object( $cat ) ) {
-                       wfOut( "The category named $name is not valid?!\n" );
-               } else {
-                       $cat->refreshCounts();
-               }
-
-               ++$i;
-               if( !($i % REPORTING_INTERVAL) ) {
-                       wfOut( "$name\n" );
-                       wfWaitForSlaves( $maxlag );
-               }
-               usleep( $throttle*1000 );
-       }
-
-       if( $dbw->insert(
-                       'updatelog',
-                       array( 'ul_key' => 'populate category' ),
-                       __FUNCTION__,
-                       'IGNORE'
-               )
-       ) {
-               wfOut( "Category population complete.\n" );
-               return true;
-       } else {
-               wfOut( "Could not insert category population row.\n" );
-               return false;
-       }
-}
index bf84bb0..8b1675f 100644 (file)
@@ -31,6 +31,7 @@ added after the software update and so will be populated anyway.
 When the script has finished, it will make a note of this in the database, and
 will not run again without the --force option.
 TEXT;
+#'
                $this->addOption( 'begin', 'Only do categories whose names are alphabetically after the provided name', false, true );
                $this->addOption( 'max-slave-lag', 'If slave lag exceeds this many seconds, wait until it drops before continuing.  Default: 10', false, true );
                $this->addOption( 'throttle', 'Wait this many milliseconds after each category.  Default: 0', false, true );
diff --git a/maintenance/populateLogSearch.inc b/maintenance/populateLogSearch.inc
deleted file mode 100644 (file)
index b5e34fb..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-<?php
-/**
- * Makes the required database updates for log display in Special:RevisionDelete
- *
- * Run via update.php or directly through populateLogSearch.php
- *
- * @file
- * @ingroup Maintenance
- */
-
-define( 'LOG_SEARCH_BATCH_SIZE', 300 );
-
-function migrate_log_params( $db ) {
-       $start = $db->selectField( 'logging', 'MIN(log_id)', false, __FUNCTION__ );
-       if( !$start ) {
-               echo "Nothing to do.\n";
-               return true;
-       }
-       $end = $db->selectField( 'logging', 'MAX(log_id)', false, __FUNCTION__ );
-       
-       # Do remaining chunk
-       $end += LOG_SEARCH_BATCH_SIZE - 1;
-       $blockStart = $start;
-       $blockEnd = $start + LOG_SEARCH_BATCH_SIZE - 1;
-       while( $blockEnd <= $end ) {
-               echo "...doing log_id from $blockStart to $blockEnd\n";
-               $cond = array("log_id BETWEEN $blockStart AND $blockEnd");
-               # Applicable log types
-               $cond['log_type'] = array('delete','suppress');
-               $res = $db->select( 'logging', '*', $cond, __FUNCTION__ );
-               $batch = array();
-               while( $row = $db->fetchObject( $res ) ) {
-                       // RevisionDelete logs - revisions
-                       if( LogEventsList::typeAction( $row, array('delete','suppress'), 'revision' ) ) {
-                               $params = LogPage::extractParams( $row->log_params );
-                               // Param format: <urlparam> <item CSV> [<ofield> <nfield>]
-                               if( count($params) >= 2 ) {
-                                       $field = RevisionDeleter::getRelationType($params[0]);
-                                       // B/C, the params may start with a title key
-                                       if( $field == null ) {
-                                               array_shift($params);
-                                               $field = RevisionDeleter::getRelationType($params[0]);
-                                       }
-                                       if( $field == null ) {
-                                               echo "Invalid param type for $row->log_id\n";
-                                               continue; // skip this row
-                                       }
-                                       $items = explode(',',$params[1]);
-                                       $log = new LogPage( $row->log_type );
-                                       $log->addRelations( $field, $items, $row->log_id );
-                               }
-                       // RevisionDelete logs - log events
-                       } else if( LogEventsList::typeAction( $row, array('delete','suppress'), 'event' ) ) {
-                               $params = LogPage::extractParams( $row->log_params );
-                               // Param format: <item CSV> [<ofield> <nfield>]
-                               if( count($params) >= 1 ) {
-                                       $items = explode(',',$params[0]);
-                                       $log = new LogPage( $row->log_type );
-                                       $log->addRelations( 'log_id', $items, $row->log_id );
-                               }
-                       }
-               }
-               $blockStart += LOG_SEARCH_BATCH_SIZE;
-               $blockEnd += LOG_SEARCH_BATCH_SIZE;
-               wfWaitForSlaves( 5 );
-       }
-       if( $db->insert(
-                       'updatelog',
-                       array( 'ul_key' => 'populate log_search' ),
-                       __FUNCTION__,
-                       'IGNORE'
-               )
-       ) {
-               wfOut( "log_search population complete.\n" );
-               return true;
-       } else {
-               wfOut( "Could not insert log_search population row.\n" );
-               return false;
-       }
-}
diff --git a/maintenance/populateParentId.inc b/maintenance/populateParentId.inc
deleted file mode 100644 (file)
index 7b1ae3e..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-<?php
-
-define( 'BATCH_SIZE', 200 );
-
-function populate_rev_parent_id( $db ) {
-       wfOut( "Populating rev_parent_id column\n" );
-       $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
-       $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
-       if( is_null( $start ) || is_null( $end ) ){
-               wfOut( "...revision table seems to be empty.\n" );
-               $db->insert( 'updatelog',
-                       array( 'ul_key' => 'populate rev_parent_id' ),
-                       __FUNCTION__,
-                       'IGNORE' );
-               return;
-       }
-       # Do remaining chunk
-       $end += BATCH_SIZE - 1;
-       $blockStart = intval( $start );
-       $blockEnd = intval( $start ) + BATCH_SIZE - 1;
-       $count = 0;
-       $changed = 0;
-       while( $blockEnd <= $end ) {
-               wfOut( "...doing rev_id from $blockStart to $blockEnd\n" );
-               $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
-               $res = $db->select( 'revision', 
-                       array('rev_id','rev_page','rev_timestamp','rev_parent_id'), 
-                       $cond, __FUNCTION__ );
-               # Go through and update rev_parent_id from these rows.
-               # Assume that the previous revision of the title was
-               # the original previous revision of the title when the
-               # edit was made...
-               foreach( $res as $row ) {
-                       # First, check rows with the same timestamp other than this one
-                       # with a smaller rev ID. The highest ID "wins". This avoids loops
-                       # as timestamp can only decrease and never loops with IDs (from parent to parent)
-                       $previousID = $db->selectField( 'revision', 'rev_id', 
-                               array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
-                                       "rev_id < " . intval( $row->rev_id ) ), 
-                               __FUNCTION__,
-                               array( 'ORDER BY' => 'rev_id DESC' ) );
-                       # If there are none, check the the highest ID with a lower timestamp
-                       if( !$previousID ) {
-                               # Get the highest older timestamp
-                               $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp', 
-                                       array( 'rev_page' => $row->rev_page, "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp ) ), 
-                                       __FUNCTION__,
-                                       array( 'ORDER BY' => 'rev_timestamp DESC' ) );
-                               # If there is one, let the highest rev ID win
-                               if( $lastTimestamp ) {
-                                       $previousID = $db->selectField( 'revision', 'rev_id', 
-                                               array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ), 
-                                               __FUNCTION__,
-                                               array( 'ORDER BY' => 'rev_id DESC' ) );
-                               }
-                       }
-                       $previousID = intval($previousID);
-                       if( $previousID != $row->rev_parent_id )
-                               $changed++;
-                       # Update the row...
-                       $db->update( 'revision',
-                               array( 'rev_parent_id' => $previousID ),
-                               array( 'rev_id' => $row->rev_id ),
-                               __FUNCTION__ );
-                       $count++;
-               }
-               $blockStart += BATCH_SIZE - 1;
-               $blockEnd += BATCH_SIZE - 1;
-               wfWaitForSlaves( 5 );
-       }
-       $logged = $db->insert( 'updatelog',
-               array( 'ul_key' => 'populate rev_parent_id' ),
-               __FUNCTION__,
-               'IGNORE' );
-       if( $logged ) {
-               wfOut( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" );
-               return true;
-       } else {
-               wfOut( "Could not insert rev_parent_id population row.\n" );
-               return false;
-       }
-}
-
index bf81cb6..56e6467 100644 (file)
@@ -48,12 +48,11 @@ class PopulateParentId extends Maintenance {
                        return;
                }
                # Do remaining chunk
-               $end += $this->mBatchSize - 1;
                $blockStart = intval( $start );
                $blockEnd = intval( $start ) + $this->mBatchSize - 1;
                $count = 0;
                $changed = 0;
-               while( $blockEnd <= $end ) {
+               while( $blockStart <= $end ) {
                        $this->output( "...doing rev_id from $blockStart to $blockEnd\n" );
                        $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
                        $res = $db->select( 'revision', 
@@ -97,8 +96,8 @@ class PopulateParentId extends Maintenance {
                                        __METHOD__ );
                                $count++;
                        }
-                       $blockStart += $this->mBatchSize - 1;
-                       $blockEnd += $this->mBatchSize - 1;
+                       $blockStart += $this->mBatchSize;
+                       $blockEnd += $this->mBatchSize;
                        wfWaitForSlaves( 5 );
                }
                $logged = $db->insert( 'updatelog',
index b0c1d04..2ee6779 100644 (file)
@@ -1,8 +1,6 @@
 <?php
 /*
- * Makes the required database updates for rev_len
- * to be of any use. It can be used for some simple tracking
- * and to find new page edits by users.
+ * Populates the rev_len field for old revisions created before MW 1.10.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
index 775ecdc..1edcd5e 100644 (file)
@@ -170,6 +170,8 @@ $wgUpdates = array(
                // trunk
                array( 'rename_eu_wiki_id' ),
                array( 'do_update_mime_minor_field' ),
+               // Should've done this back in 1.10, but better late than never:
+               array( 'do_populate_rev_len' ),
        ),
 
        'sqlite' => array(
@@ -1257,12 +1259,13 @@ function do_category_population() {
                wfOut( "...category table already populated.\n" );
                return;
        }
-       require_once( 'populateCategory.inc' );
+       require_once( 'populateCategory.php' );
        wfOut( "Populating category table, printing progress markers.  " ).
 "For large databases, you\n".
 "may want to hit Ctrl-C and do this manually with maintenance/\n".
 "populateCategory.php.\n";
-       populateCategory( '', 10, 0, true );
+       $task = new PopulateCategory();
+       $task->execute();
        wfOut( "Done populating category table.\n" );
 }
 
@@ -1271,10 +1274,19 @@ function do_populate_parent_id() {
                wfOut( "...rev_parent_id column already populated.\n" );
                return;
        }
-       require_once( 'populateParentId.inc' );
+       require_once( 'populateParentId.php' );
+       $task = new PopulateParentId();
+       $task->execute();
+}
 
-       global $wgDatabase;
-       populate_rev_parent_id( $wgDatabase );
+function do_populate_rev_len() {
+       if( update_row_exists( 'populate rev_len' ) ) {
+               wfOut( "...rev_len column already populated.\n" );
+               return;
+       }
+       require_once( 'populateRevisionLength.php' );
+       $task = new PopulateRevisionLength();
+       $task->execute();
 }
 
 function sqlite_initial_indexes() {
@@ -1324,12 +1336,13 @@ function do_log_search_population() {
                wfOut( "...log_search table already populated.\n" );
                return;
        }
-       require_once( 'populateLogSearch.inc' );
+       require_once( 'populateLogSearch.php' );
        wfOut(
 "Populating log_search table, printing progress markers. For large\n" .
 "databases, you may want to hit Ctrl-C and do this manually with\n" .
 "maintenance/populateLogSearch.php.\n" );
-       migrate_log_params( $wgDatabase );
+       $task = new PopulateLogSearch();
+       $task->execute();
        wfOut( "Done populating log_search table.\n" );
 }