From: Kevin Israel Date: Tue, 24 Feb 2015 02:16:23 +0000 (-0500) Subject: refreshLinks.php: Get IDs in batches in deleteLinksFromNonexistent() X-Git-Tag: 1.31.0-rc.0~12252^2 X-Git-Url: http://git.cyclocoop.org/%24self?a=commitdiff_plain;h=40e300b8273dae00eff38b9c136747b595656017;p=lhc%2Fweb%2Fwiklou.git refreshLinks.php: Get IDs in batches in deleteLinksFromNonexistent() ... instead of making an unbuffered query, which is discouraged in the doc comment for DatabaseBase::bufferResults(). Also used NOT IN for the antijoin instead of LEFT JOIN...IS NULL; when combined with DISTINCT, the latter causes MySQL to use a temporary table rather than an appropriate index, according to EXPLAIN. (Using GROUP BY instead of DISTINCT also avoids this problem. I don't know why.) Bug: T44180 Change-Id: Idca85fac7dd7879f9fbef2712b6aa83343099e02 --- diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index 0c2f722c42..7c85a1ccf9 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -262,10 +262,7 @@ class RefreshLinks extends Maintenance { wfWaitForSlaves(); $dbw = wfGetDB( DB_MASTER ); - - $lb = wfGetLBFactory()->newMainLB(); - $dbr = $lb->getConnection( DB_SLAVE ); - $dbr->bufferResults( false ); + $dbr = wfGetDB( DB_SLAVE ); $linksTables = array( // table name => page_id field 'pagelinks' => 'pl_from', @@ -282,38 +279,35 @@ class RefreshLinks extends Maintenance { foreach ( $linksTables as $table => $field ) { $this->output( "Retrieving illegal entries from $table... " ); - // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; - $results = $dbr->select( - array( $table, 'page' ), - $field, - array( 'page_id' => null ), - __METHOD__, - 'DISTINCT', - array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) - ); - + $start = 0; $counter = 0; - $list = array(); $this->output( "0.." ); - foreach ( $results as $row ) { - $counter++; - $list[] = $row->$field; - if ( ( $counter % $batchSize ) == 0 ) { + + do { + $list = $dbr->selectFieldValues( + $table, + $field, + array( + "$field >= {$dbr->addQuotes( $start )}", + "$field NOT IN ({$dbr->selectSQLText( 'page', 'page_id' )})", + ), + __METHOD__, + array( 'DISTINCT', 'ORDER BY' => $field, 'LIMIT' => $batchSize ) + ); + + if ( $list ) { + $counter += count( $list ); wfWaitForSlaves(); $dbw->delete( $table, array( $field => $list ), __METHOD__ ); - $this->output( $counter . ".." ); - $list = array(); + $start = $list[count( $list ) - 1] + 1; } - } - $this->output( $counter ); - if ( count( $list ) > 0 ) { - $dbw->delete( $table, array( $field => $list ), __METHOD__ ); - } + + } while ( $list ); + $this->output( "\n" ); wfWaitForSlaves(); } - $lb->closeAll(); } }