From a789d32563bbe018d742d1ce52e17cc1419924b0 Mon Sep 17 00:00:00 2001 From: Jerome Jamnicky Date: Mon, 19 Apr 2004 12:30:24 +0000 Subject: [PATCH] * Add include for AdminSettings.php to allow table creation and deletion. This allows the script to work on my test system at least, but the include should probably be added to commandLine.inc before release. * Use the Database class for operations instead of wfQuery() et al, where appropriate, so that $wgDBadminuser and $wgDBadminpass can be used. * Drop temporary table and/or backup links table if already existing, when appropriate. * Optional progress reporting of links table conversion and reading of the cur table. * Read and write the 2 links tables in chunks of 1000 rows; this is faster than reading and writing rows 1 by 1, and uses much less RAM than reading the entire links table into RAM. For use when running the script on multiple processors and operating on the same DB (e.g. when doing the conversion at Wikipedia): * Options to enable/disable creating the temp table and overwriting the old links table. * $initialRowOffset * $finalRowOffset (not implemented yet) --- maintenance/convertLinks.php | 158 +++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 52 deletions(-) diff --git a/maintenance/convertLinks.php b/maintenance/convertLinks.php index 10374989b4..90e083dc39 100644 --- a/maintenance/convertLinks.php +++ b/maintenance/convertLinks.php @@ -1,29 +1,41 @@ ID) to the new schema (ID->ID) -# This hasn't been tested yet, so expect bugs +# This hasn't been thoroughly tested yet. # The wiki should be put into read-only mode while this script executes include_once( "commandLine.inc" ); - -# Check if it's already done - -$res = wfQuery( "SELECT * FROM links LIMIT 1", DB_WRITE ); -if ( !wfNumRows( $res ) ) { +# the below should probably be moved into commandLine.inc at some point +include_once( "../AdminSettings.php" ); + +$numRows = $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc +$totalTuplesInserted = 0; # total tuples INSERTed into links_temp +$reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table +$curReadReportInterval = 1000; #number of rows between progress reports +$reportLinksConvProgress = true; #whether or not to give progress reports during conversion +$linksConvInsertInterval = 1000; #number of rows between INSERTs and, optionally, reports +$initialRowOffset = 0; +$finalRowOffset = 0; # not used yet; highest row number from links table to process +$createTemporaryTable = 1; # create a temporary table to hold new links table during conv process +$overwriteLinksTable = 0; # overwrite the old links table with the new one + +$res = wfQuery( "SELECT COUNT(*) AS count FROM links", DB_WRITE ); +$row = wfFetchObject($res); +$numRows = $row->count; +wfFreeResult( $res ); + +if ( $numRows == 0 ) { print "No rows to convert. Updating schema...\n"; - createTable(); + createTempTable(); } else { $row = wfFetchObject( $res ); if ( is_numeric( $row->l_from ) ) { print "Schema already converted\n"; exit; } - - # Create a title -> cur_id map - - print "Loading IDs..."; + # Create a title -> cur_id map + print "Loading IDs from cur table..."; wfBufferSQLResults( false ); $res = wfQuery( "SELECT cur_namespace,cur_title,cur_id FROM cur", DB_WRITE ); $ids = array(); @@ -34,57 +46,99 @@ if ( !wfNumRows( $res ) ) { $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; } $ids[$title] = $row->cur_id; + $curRowsRead++; + if ($reportCurReadProgress) { + if (($curRowsRead % $curReadReportInterval) == 0) { + print "\t$curRowsRead rows of cur table read.\n"; + } + } } wfFreeResult( $res ); - - print "done\n"; - - # Now, load in all the links and create a links table in RAM - print "Processing links...\n"; - $res = wfQuery( "SELECT * FROM links", DB_WRITE ); - $links = array(); - $numBad = 0; - - while ( $row = wfFetchObject( $res ) ) { - if ( array_key_exists( $row->l_from, $ids ) ) { - $links[$ids[$row->l_from]][$row->l_to] = 1; - } else { - $numBad ++; - } + wfBufferSQLResults( true ); + print "Finished loading IDs.\n"; + if ( $createTemporaryTable ) { + # Create temporary table which will become the new links table. + createTempTable(); } - - print "Done, $numBad invalid titles\n"; - - # Save it to a new table - createTable(); - $sql = "INSERT INTO links_temp(l_from,l_to) VALUES "; - - $first = true; - foreach( $links as $from => $toArray ) { - foreach ( $toArray as $to => $one ) { - if ( $first ) { - $first = false; - } else { - $sql .= ","; + + # Now, step through the links table (in chunks of $linksConvInsertInterval rows), + # convert, and write to the new table. + print "Processing $numRows rows from links table...\n"; + for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) { + $sqlRead = "SELECT * FROM links LIMIT $linksConvInsertInterval OFFSET $rowOffset"; + $res = wfQuery($sqlRead, DB_READ); + $sqlWrite = array("INSERT IGNORE INTO links_temp(l_from,l_to) VALUES "); + $tuplesAdded = 0; # no tuples added to INSERT yet + while ( $row = wfFetchObject($res) ) { + $fromTitle = $row->l_from; + if ( array_key_exists( $fromTitle, $ids ) ) { # valid title + $from = $ids[$fromTitle]; + $to = $row->l_to; + if ( $tuplesAdded != 0 ) { + $sqlWrite[] = ","; + } + $sqlWrite[] = "($from,$to)"; + $tuplesAdded++; + } else { # invalid title + $numBadLinks++; } - $sql .= "($from,$to)"; + } + wfFreeResult($res); + #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n"; + if ( $tuplesAdded != 0 ) { + if ($reportLinksConvProgress) + print "Inserting $tuplesAdded tuples into links_temp..."; + wfQuery( implode("",$sqlWrite) , DB_WRITE ); + $totalTuplesInserted += $tuplesAdded; + if ($reportLinksConvProgress) + print " done. Total $totalTuplesInserted tuples inserted.\n"; } } - - wfQuery( $sql, DB_WRITE ); + print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"; } -# Swap in the new table -wfQuery( "RENAME TABLE links TO links_backup, links_temp TO links", DB_WRITE ); - -print "Conversion complete. The old table remains at links_backup, delete at your leisure.\n"; +if ( $overwriteLinksTable ) { + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + if (!($dbConn->isOpen())) { + print "Opening connection to database failed.\n"; + exit; + } + # Check for existing links_backup, and delete it if it exists. + print "Dropping backup links table if it exists..."; + $dbConn->query( "DROP TABLE IF EXISTS links_backup", DB_WRITE); + print " done.\n"; + + # Swap in the new table, and move old links table to links_backup + print "Swapping tables 'links' to 'links_backup'; 'links_temp' to 'links'..."; + $dbConn->query( "RENAME TABLE links TO links_backup, links_temp TO links", DB_WRITE ); + print " done.\n"; + + $dbConn->close(); + print "Conversion complete. The old table remains at links_backup;\n"; + print "delete at your leisure.\n"; +} else { + print "Conversion complete.\n"; +} -function createTable() { - wfQuery( "CREATE TABLE links_temp ( +function createTempTable() { + global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + + if (!($dbConn->isOpen())) { + print "Opening connection to database failed.\n"; + exit; + } + + print "Dropping temporary links table if it exists..."; + $dbConn->query( "DROP TABLE IF EXISTS links_temp", DB_WRITE); + print " done.\n"; + + print "Creating temporary links table..."; + $dbConn->query( "CREATE TABLE links_temp ( l_from int(8) unsigned NOT NULL default '0', l_to int(8) unsigned NOT NULL default '0', UNIQUE KEY l_from(l_from,l_to), KEY (l_to))", DB_WRITE); + print " done.\n"; } - -?> +?> \ No newline at end of file -- 2.20.1