From 02bad4a06f21d232084678d4b669ab681b5a83ab Mon Sep 17 00:00:00 2001 From: Ilmari Karonen Date: Wed, 3 Mar 2010 21:15:05 +0000 Subject: [PATCH] call populateRevisionLength.php from updaters.inc; make updaters.inc use populate*.php instead of the old populate*.inc scripts for other population tasks; remove the now obsolete populate*.inc files; misc. fixes --- maintenance/populateCategory.inc | 85 -------------------------- maintenance/populateCategory.php | 1 + maintenance/populateLogSearch.inc | 80 ------------------------ maintenance/populateParentId.inc | 83 ------------------------- maintenance/populateParentId.php | 7 +-- maintenance/populateRevisionLength.php | 4 +- maintenance/updaters.inc | 27 +++++--- 7 files changed, 25 insertions(+), 262 deletions(-) delete mode 100644 maintenance/populateCategory.inc delete mode 100644 maintenance/populateLogSearch.inc delete mode 100644 maintenance/populateParentId.inc diff --git a/maintenance/populateCategory.inc b/maintenance/populateCategory.inc deleted file mode 100644 index deca453078..0000000000 --- a/maintenance/populateCategory.inc +++ /dev/null @@ -1,85 +0,0 @@ -selectRow( - 'updatelog', - '1', - array( 'ul_key' => 'populate category' ), - __FUNCTION__ - ); - if( $row ) { - wfOut( "Category table already populated. Use php ". - "maintenance/populateCategory.php\n--force from the command line ". - "to override.\n" ); - return true; - } - } - - $maxlag = intval( $maxlag ); - $throttle = intval( $throttle ); - $force = (bool)$force; - if( $begin !== '' ) { - $where = 'cl_to > '.$dbw->addQuotes( $begin ); - } else { - $where = null; - } - $i = 0; - - while( true ) { - # Find which category to update - $row = $dbw->selectRow( - 'categorylinks', - 'cl_to', - $where, - __FUNCTION__, - array( - 'ORDER BY' => 'cl_to' - ) - ); - if( !$row ) { - # Done, hopefully. - break; - } - $name = $row->cl_to; - $where = 'cl_to > '.$dbw->addQuotes( $name ); - - # Use the row to update the category count - $cat = Category::newFromName( $name ); - if( !is_object( $cat ) ) { - wfOut( "The category named $name is not valid?!\n" ); - } else { - $cat->refreshCounts(); - } - - ++$i; - if( !($i % REPORTING_INTERVAL) ) { - wfOut( "$name\n" ); - wfWaitForSlaves( $maxlag ); - } - usleep( $throttle*1000 ); - } - - if( $dbw->insert( - 'updatelog', - array( 'ul_key' => 'populate category' ), - __FUNCTION__, - 'IGNORE' - ) - ) { - wfOut( "Category population complete.\n" ); - return true; - } else { - wfOut( "Could not insert category population row.\n" ); - return false; - } -} diff --git a/maintenance/populateCategory.php b/maintenance/populateCategory.php index bf84bb0a41..8b1675f00b 100644 --- a/maintenance/populateCategory.php +++ b/maintenance/populateCategory.php @@ -31,6 +31,7 @@ added after the software update and so will be populated anyway. When the script has finished, it will make a note of this in the database, and will not run again without the --force option. TEXT; +#' $this->addOption( 'begin', 'Only do categories whose names are alphabetically after the provided name', false, true ); $this->addOption( 'max-slave-lag', 'If slave lag exceeds this many seconds, wait until it drops before continuing. Default: 10', false, true ); $this->addOption( 'throttle', 'Wait this many milliseconds after each category. Default: 0', false, true ); diff --git a/maintenance/populateLogSearch.inc b/maintenance/populateLogSearch.inc deleted file mode 100644 index b5e34fb78b..0000000000 --- a/maintenance/populateLogSearch.inc +++ /dev/null @@ -1,80 +0,0 @@ -selectField( 'logging', 'MIN(log_id)', false, __FUNCTION__ ); - if( !$start ) { - echo "Nothing to do.\n"; - return true; - } - $end = $db->selectField( 'logging', 'MAX(log_id)', false, __FUNCTION__ ); - - # Do remaining chunk - $end += LOG_SEARCH_BATCH_SIZE - 1; - $blockStart = $start; - $blockEnd = $start + LOG_SEARCH_BATCH_SIZE - 1; - while( $blockEnd <= $end ) { - echo "...doing log_id from $blockStart to $blockEnd\n"; - $cond = array("log_id BETWEEN $blockStart AND $blockEnd"); - # Applicable log types - $cond['log_type'] = array('delete','suppress'); - $res = $db->select( 'logging', '*', $cond, __FUNCTION__ ); - $batch = array(); - while( $row = $db->fetchObject( $res ) ) { - // RevisionDelete logs - revisions - if( LogEventsList::typeAction( $row, array('delete','suppress'), 'revision' ) ) { - $params = LogPage::extractParams( $row->log_params ); - // Param format: [ ] - if( count($params) >= 2 ) { - $field = RevisionDeleter::getRelationType($params[0]); - // B/C, the params may start with a title key - if( $field == null ) { - array_shift($params); - $field = RevisionDeleter::getRelationType($params[0]); - } - if( $field == null ) { - echo "Invalid param type for $row->log_id\n"; - continue; // skip this row - } - $items = explode(',',$params[1]); - $log = new LogPage( $row->log_type ); - $log->addRelations( $field, $items, $row->log_id ); - } - // RevisionDelete logs - log events - } else if( LogEventsList::typeAction( $row, array('delete','suppress'), 'event' ) ) { - $params = LogPage::extractParams( $row->log_params ); - // Param format: [ ] - if( count($params) >= 1 ) { - $items = explode(',',$params[0]); - $log = new LogPage( $row->log_type ); - $log->addRelations( 'log_id', $items, $row->log_id ); - } - } - } - $blockStart += LOG_SEARCH_BATCH_SIZE; - $blockEnd += LOG_SEARCH_BATCH_SIZE; - wfWaitForSlaves( 5 ); - } - if( $db->insert( - 'updatelog', - array( 'ul_key' => 'populate log_search' ), - __FUNCTION__, - 'IGNORE' - ) - ) { - wfOut( "log_search population complete.\n" ); - return true; - } else { - wfOut( "Could not insert log_search population row.\n" ); - return false; - } -} diff --git a/maintenance/populateParentId.inc b/maintenance/populateParentId.inc deleted file mode 100644 index 7b1ae3e8fe..0000000000 --- a/maintenance/populateParentId.inc +++ /dev/null @@ -1,83 +0,0 @@ -selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ ); - $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ ); - if( is_null( $start ) || is_null( $end ) ){ - wfOut( "...revision table seems to be empty.\n" ); - $db->insert( 'updatelog', - array( 'ul_key' => 'populate rev_parent_id' ), - __FUNCTION__, - 'IGNORE' ); - return; - } - # Do remaining chunk - $end += BATCH_SIZE - 1; - $blockStart = intval( $start ); - $blockEnd = intval( $start ) + BATCH_SIZE - 1; - $count = 0; - $changed = 0; - while( $blockEnd <= $end ) { - wfOut( "...doing rev_id from $blockStart to $blockEnd\n" ); - $cond = "rev_id BETWEEN $blockStart AND $blockEnd"; - $res = $db->select( 'revision', - array('rev_id','rev_page','rev_timestamp','rev_parent_id'), - $cond, __FUNCTION__ ); - # Go through and update rev_parent_id from these rows. - # Assume that the previous revision of the title was - # the original previous revision of the title when the - # edit was made... - foreach( $res as $row ) { - # First, check rows with the same timestamp other than this one - # with a smaller rev ID. The highest ID "wins". This avoids loops - # as timestamp can only decrease and never loops with IDs (from parent to parent) - $previousID = $db->selectField( 'revision', 'rev_id', - array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp, - "rev_id < " . intval( $row->rev_id ) ), - __FUNCTION__, - array( 'ORDER BY' => 'rev_id DESC' ) ); - # If there are none, check the the highest ID with a lower timestamp - if( !$previousID ) { - # Get the highest older timestamp - $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp', - array( 'rev_page' => $row->rev_page, "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp ) ), - __FUNCTION__, - array( 'ORDER BY' => 'rev_timestamp DESC' ) ); - # If there is one, let the highest rev ID win - if( $lastTimestamp ) { - $previousID = $db->selectField( 'revision', 'rev_id', - array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ), - __FUNCTION__, - array( 'ORDER BY' => 'rev_id DESC' ) ); - } - } - $previousID = intval($previousID); - if( $previousID != $row->rev_parent_id ) - $changed++; - # Update the row... - $db->update( 'revision', - array( 'rev_parent_id' => $previousID ), - array( 'rev_id' => $row->rev_id ), - __FUNCTION__ ); - $count++; - } - $blockStart += BATCH_SIZE - 1; - $blockEnd += BATCH_SIZE - 1; - wfWaitForSlaves( 5 ); - } - $logged = $db->insert( 'updatelog', - array( 'ul_key' => 'populate rev_parent_id' ), - __FUNCTION__, - 'IGNORE' ); - if( $logged ) { - wfOut( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" ); - return true; - } else { - wfOut( "Could not insert rev_parent_id population row.\n" ); - return false; - } -} - diff --git a/maintenance/populateParentId.php b/maintenance/populateParentId.php index bf81cb688d..56e6467cc5 100644 --- a/maintenance/populateParentId.php +++ b/maintenance/populateParentId.php @@ -48,12 +48,11 @@ class PopulateParentId extends Maintenance { return; } # Do remaining chunk - $end += $this->mBatchSize - 1; $blockStart = intval( $start ); $blockEnd = intval( $start ) + $this->mBatchSize - 1; $count = 0; $changed = 0; - while( $blockEnd <= $end ) { + while( $blockStart <= $end ) { $this->output( "...doing rev_id from $blockStart to $blockEnd\n" ); $cond = "rev_id BETWEEN $blockStart AND $blockEnd"; $res = $db->select( 'revision', @@ -97,8 +96,8 @@ class PopulateParentId extends Maintenance { __METHOD__ ); $count++; } - $blockStart += $this->mBatchSize - 1; - $blockEnd += $this->mBatchSize - 1; + $blockStart += $this->mBatchSize; + $blockEnd += $this->mBatchSize; wfWaitForSlaves( 5 ); } $logged = $db->insert( 'updatelog', diff --git a/maintenance/populateRevisionLength.php b/maintenance/populateRevisionLength.php index b0c1d04e3a..2ee67794b6 100644 --- a/maintenance/populateRevisionLength.php +++ b/maintenance/populateRevisionLength.php @@ -1,8 +1,6 @@ array( @@ -1257,12 +1259,13 @@ function do_category_population() { wfOut( "...category table already populated.\n" ); return; } - require_once( 'populateCategory.inc' ); + require_once( 'populateCategory.php' ); wfOut( "Populating category table, printing progress markers. " ). "For large databases, you\n". "may want to hit Ctrl-C and do this manually with maintenance/\n". "populateCategory.php.\n"; - populateCategory( '', 10, 0, true ); + $task = new PopulateCategory(); + $task->execute(); wfOut( "Done populating category table.\n" ); } @@ -1271,10 +1274,19 @@ function do_populate_parent_id() { wfOut( "...rev_parent_id column already populated.\n" ); return; } - require_once( 'populateParentId.inc' ); + require_once( 'populateParentId.php' ); + $task = new PopulateParentId(); + $task->execute(); +} - global $wgDatabase; - populate_rev_parent_id( $wgDatabase ); +function do_populate_rev_len() { + if( update_row_exists( 'populate rev_len' ) ) { + wfOut( "...rev_len column already populated.\n" ); + return; + } + require_once( 'populateRevisionLength.php' ); + $task = new PopulateRevisionLength(); + $task->execute(); } function sqlite_initial_indexes() { @@ -1324,12 +1336,13 @@ function do_log_search_population() { wfOut( "...log_search table already populated.\n" ); return; } - require_once( 'populateLogSearch.inc' ); + require_once( 'populateLogSearch.php' ); wfOut( "Populating log_search table, printing progress markers. For large\n" . "databases, you may want to hit Ctrl-C and do this manually with\n" . "maintenance/populateLogSearch.php.\n" ); - migrate_log_params( $wgDatabase ); + $task = new PopulateLogSearch(); + $task->execute(); wfOut( "Done populating log_search table.\n" ); } -- 2.20.1