refreshLinks.php: Get IDs in batches in deleteLinksFromNonexistent()
authorKevin Israel <pleasestand@live.com>
Tue, 24 Feb 2015 02:16:23 +0000 (21:16 -0500)
committerPleaseStand <pleasestand@live.com>
Fri, 27 Feb 2015 15:16:48 +0000 (15:16 +0000)
... instead of making an unbuffered query, which is discouraged in the
doc comment for DatabaseBase::bufferResults().

Also used NOT IN for the antijoin instead of LEFT JOIN...IS NULL; when
combined with DISTINCT, the latter causes MySQL to use a temporary
table rather than an appropriate index, according to EXPLAIN. (Using
GROUP BY instead of DISTINCT also avoids this problem. I don't know why.)

Bug: T44180
Change-Id: Idca85fac7dd7879f9fbef2712b6aa83343099e02

maintenance/refreshLinks.php

index 0c2f722..7c85a1c 100644 (file)
@@ -262,10 +262,7 @@ class RefreshLinks extends Maintenance {
                wfWaitForSlaves();
 
                $dbw = wfGetDB( DB_MASTER );
-
-               $lb = wfGetLBFactory()->newMainLB();
-               $dbr = $lb->getConnection( DB_SLAVE );
-               $dbr->bufferResults( false );
+               $dbr = wfGetDB( DB_SLAVE );
 
                $linksTables = array( // table name => page_id field
                        'pagelinks' => 'pl_from',
@@ -282,38 +279,35 @@ class RefreshLinks extends Maintenance {
                foreach ( $linksTables as $table => $field ) {
                        $this->output( "Retrieving illegal entries from $table... " );
 
-                       // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
-                       $results = $dbr->select(
-                               array( $table, 'page' ),
-                               $field,
-                               array( 'page_id' => null ),
-                               __METHOD__,
-                               'DISTINCT',
-                               array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) )
-                       );
-
+                       $start = 0;
                        $counter = 0;
-                       $list = array();
                        $this->output( "0.." );
-                       foreach ( $results as $row ) {
-                               $counter++;
-                               $list[] = $row->$field;
-                               if ( ( $counter % $batchSize ) == 0 ) {
+
+                       do {
+                               $list = $dbr->selectFieldValues(
+                                       $table,
+                                       $field,
+                                       array(
+                                               "$field >= {$dbr->addQuotes( $start )}",
+                                               "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id' )})",
+                                       ),
+                                       __METHOD__,
+                                       array( 'DISTINCT', 'ORDER BY' => $field, 'LIMIT' => $batchSize )
+                               );
+
+                               if ( $list ) {
+                                       $counter += count( $list );
                                        wfWaitForSlaves();
                                        $dbw->delete( $table, array( $field => $list ), __METHOD__ );
-
                                        $this->output( $counter . ".." );
-                                       $list = array();
+                                       $start = $list[count( $list ) - 1] + 1;
                                }
-                       }
-                       $this->output( $counter );
-                       if ( count( $list ) > 0 ) {
-                               $dbw->delete( $table, array( $field => $list ), __METHOD__ );
-                       }
+
+                       } while ( $list );
+
                        $this->output( "\n" );
                        wfWaitForSlaves();
                }
-               $lb->closeAll();
        }
 }