Fixes for r45514 and r45516:
authorMerlijn S. van Deen <valhallasw@users.mediawiki.org>
Tue, 13 Jan 2009 23:58:45 +0000 (23:58 +0000)
committerMerlijn S. van Deen <valhallasw@users.mediawiki.org>
Tue, 13 Jan 2009 23:58:45 +0000 (23:58 +0000)
* Removed deprecated constructs
* Replaced SQL queries with their functional equivalents
** Removed deleteBatch function: equivalent to $dbw->delete()
* Allow slave servers to catch up before deleting more rows
* Fixed to use a new, unbuffered, slave database connection
* This one should actually work

maintenance/refreshLinks.inc
maintenance/refreshLinks.php

index f38426a..b7d531c 100644 (file)
@@ -146,12 +146,13 @@ function fixLinksFromArticle( $id ) {
  * @author Merlijn van Deen <valhallasw@arctus.nl>
  */
 function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
-       $fname = 'deleteLinksFromNonexistent';
        wfWaitForSlaves( $maxLag );
        
        $dbw = wfGetDB( DB_MASTER );
-       $dbr = wfGetDB( DB_SLAVE );
-       $dbr->bufferResults(false);
+
+       $lb = wfGetLBFactory()->newMainLB();
+       $dbr = $lb->getConnection( DB_SLAVE );
+       $dbr->bufferResults( false );
        
        $linksTables = array( // table name => page_id field
                'pagelinks' => 'pl_from',
@@ -161,65 +162,41 @@ function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
                'externallinks' => 'el_from',
        );
        
-       $readPage = $dbr->tableName( 'page' );
-       
        foreach ( $linksTables as $table => $field ) {
-               $readLinks = $dbr->tableName( $table );
-               
                print "Retrieving illegal entries from $table... ";
                
-               $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;";
-               $results = $dbr->query( $sql, $fname . ':' . $readLinks );
-               
-               print $results->numRows() . " illegal " . $field. "s. ";
+               // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
+               $results = $dbr->select( array( $table, 'page' ),
+                             $field,
+                             array('page_id' => null ),
+                             __METHOD__,
+                             'DISTINCT',
+                             array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
+               );
                
-               if ( $results->numRows() > 0 ) {
-                       $counter = 0;
-                       $list = array();
-                       print "Removing illegal links: 1..";
+               $counter = 0;
+               $list = array();
+               print "0..";
                
-                       foreach( $results as $row ) {
-                               $counter++;
-                               $list[] = $row->$field;
-                               if ( ( $counter % $batchSize ) == 0 ) {
-                                       print $counter . "..";
-                                       deleteBatch($dbw, $table, $field, $list);
-                                       $list = array();
-                               }
+               foreach( $results as $row ) {
+                       $counter++;
+                       $list[] = $row->$field;
+                       if ( ( $counter % $batchSize ) == 0 ) {
+                               wfWaitForSlaves(5);
+                               $dbw->delete( $table, array( $field => $list ), __METHOD__ );
+                               
+                               print $counter . "..";
+                               $list = array();
                        }
-                       print $counter;
-                       deleteBatch($dbw, $table, $field, $list);
                }
                
-               print "\n";
-       }
-}
-
-/* Deletes a batch of items from a table.
- * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>)
- * 
- * @param $dbw Database Database object to run the DELETE query on
- * @param $table table to work on; will be converted via $dbw->tableName.
- * @param $field column to search in
- * @param $list values to remove. Array with SQL-safe (!) values. 
- *
- * @author Merlijn van Deen <valhallasw@arctus.nl>
- */
-function deleteBatch($dbw, $table, $field, $list) {
-       if (count($list) == 0) return;
-       
-       $masterLinks = $dbw->tableName( $table );
-       $fname = "deleteBatch:masterLinks";     
-       
-       if ( !$dbw->ping() ) {
-               print "\nDB disconnected, reconnecting...";
-               while ( !$dbw->ping() ) {
-                       print ".";
-                       sleep(10);
+               print $counter;
+               if (count($list) > 0) {
+                       $dbw->delete( $table, array( $field => $list ), __METHOD__ );
                }
+               
                print "\n";
        }
-
-       $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");";
-       $dbw->query($sql, $fname);
+       
+       $lb->closeAll();
 }
index fa91160..c766752 100644 (file)
@@ -45,6 +45,10 @@ if ( !$options['dfn-only'] ) {
        refreshLinks( $start, $options['new-only'], $options['m'], $options['e'], $options['redirects-only'], $options['old-redirects-only'] );
 }
 
+if ( !isset( $options['batch-size'] ) ) {
+  $options['batch-size'] = 100;
+}
+
 deleteLinksFromNonexistent($options['m'], $options['batch-size']);
 
 if ( $options['globals'] ) {