From afc688d7e23b83afb6eda9243136b31794ac3ada Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 7 Jan 2009 03:33:14 +0000 Subject: [PATCH] Pull back r45431 for the moment "Updated deleteLinksFromNonexistent function:" etc There's some funny output with \x08 stuff, and I don't want to fiddle with it just now... --- maintenance/refreshLinks.inc | 92 +++++++++--------------------------- maintenance/refreshLinks.php | 22 ++++----- 2 files changed, 33 insertions(+), 81 deletions(-) diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc index 3408d1b351..036d4109c8 100644 --- a/maintenance/refreshLinks.inc +++ b/maintenance/refreshLinks.inc @@ -136,23 +136,13 @@ function fixLinksFromArticle( $id ) { $dbw->immediateCommit(); } -/* - * Removes non-existing links from pages from pagelinks, imagelinks, - * categorylinks, templatelinks and externallinks tables. - * - * @param $maxLag - * @param $batchSize The size of deletion batches - * - * @author Merlijn van Deen - */ -function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { +function deleteLinksFromNonexistent( $maxLag = 0 ) { $fname = 'deleteLinksFromNonexistent'; + wfWaitForSlaves( $maxLag ); - + $dbw = wfGetDB( DB_MASTER ); - $dbr = wfGetDB( DB_SLAVE ); - $dbr->bufferResults(false); - + $linksTables = array( 'pagelinks' => 'pl_from', 'imagelinks' => 'il_from', @@ -160,65 +150,27 @@ function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 'templatelinks' => 'tl_from', 'externallinks' => 'el_from', ); - - - $readPage = $dbr->tableName( 'page' ); + + $page = $dbw->tableName( 'page' ); + + foreach ( $linksTables as $table => $field ) { - $readLinks = $dbr->tableName( $table ); - - $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;"; - print "Retrieving illegal entries from $table: \tRUNNING"; - - $results = $dbr->query( $sql, $fname . ':' . $readLinks ); - print "\x08\x08\x08\x08\x08\x08\x08" . $results->numRows() . " illegal " . $field. "s. "; - - if ( $results->numRows() == 0 ) { - print "\n"; - continue; - } - - $counter = 0; - $list = array(); - print "Removing illegal links: 1.."; - foreach( $results as $row ) { - $counter++; - $list[] = $row->$field; - if ( ( $counter % $batchSize ) == 0 ) { - print $counter . ".."; - deleteBatch($dbw, $table, $field, $list); - $list = ''; + if ( !$dbw->ping() ) { + print "DB disconnected, reconnecting..."; + while ( !$dbw->ping() ) { + print "."; + sleep(10); } + print "\n"; } - print $counter . "\n"; - deleteBatch($dbw, $table, $field, $list); - } -} -/* Deletes a batch of items from a table. - * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>) - * - * @param $dbw Database Database object to run the DELETE query on - * @param $table table to work on; will be converted via $dbw->tableName. - * @param $field column to search in - * @param $list values to remove. Array with SQL-safe (!) values. - * - * @author Merlijn van Deen - */ -function deleteBatch($dbw, $table, $field, $list) { - if (count($list) == 0) return; - - $masterLinks = $dbw->tableName( $table ); - $fname = "deleteBatch:masterLinks"; - - if ( !$dbw->ping() ) { - print "\nDB disconnected, reconnecting..."; - while ( !$dbw->ping() ) { - print "."; - sleep(10); - } - print "\n"; - } + $pTable = $dbw->tableName( $table ); + $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL"; - $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");"; - $dbw->query($sql, $fname); + print "Deleting $table from non-existent articles..."; + $dbw->query( $sql, $fname ); + print " fixed " .$dbw->affectedRows() . " row(s)\n"; + } } + +?> diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index 81baa0714b..4893d58019 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -18,16 +18,14 @@ Usage: [--new-only] [--redirects-only] php refreshLinks.php [] [-e ] [-m ] --old-redirects-only - --help : This help message - --dfn-only : Delete links from nonexistent articles only - --batch-size : The delete batch size when removing links from - nonexistent articles (default 100) - --new-only : Only affect articles with just a single edit - --redirects-only : Only fix redirects, not all links - --old-redirects-only : Only fix redirects with no redirect table entry - -m : Maximum replication lag - : First page id to refresh - -e : Last page id to refresh + --help : This help message + --dfn-only : Delete links from nonexistent articles only + --new-only : Only affect articles with just a single edit + --redirects-only : Only fix redirects, not all links + --old-redirects-only : Only fix redirects with no redirect table entry + -m : Maximum replication lag + : First page id to refresh + -e : Last page id to refresh TEXT; exit(0); @@ -46,8 +44,10 @@ if ( !$options['dfn-only'] ) { } // this bit's bad for replication: disabling temporarily // --brion 2005-07-16 -deleteLinksFromNonexistent($options['m'], $options['batch-size']); +//deleteLinksFromNonexistent(); if ( $options['globals'] ) { print_r( $GLOBALS ); } + + -- 2.20.1