/**
* Multi dimensions array representing batches. Keys are:
* > (string) links table name
+ * > (int) batch size
* > 'numRows' : Number of rows for this link table
* > 'batches' : array( $start, $end )
*
* @param $startId Integer|false
* @param $endId Integer|false
* @param $max Integer|INF
+ * @param $select string 'all' or 'ids'
* @return ResultWrapper
*/
- protected function queryLinks( $table, $startId, $endId, $max ) {
+ protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
wfProfileIn( __METHOD__ );
$fromField = $this->getPrefix( $table ) . '_from';
if ( $endId ) {
$conds[] = "$fromField <= " . intval( $endId );
}
- $options = array( 'STRAIGHT_JOIN', 'ORDER BY' => $fromField );
+ $options = array( 'ORDER BY' => $fromField );
if ( is_finite( $max ) && $max > 0 ) {
$options['LIMIT'] = $max;
}
- $res = $this->getDB()->select(
- array( $table, 'page' ),
- array( 'page_namespace', 'page_title', 'page_id' ),
- $conds,
- __METHOD__,
- $options
- );
+ if ( $select === 'ids' ) {
+ // Just select from the backlink table and ignore the page JOIN
+ $res = $this->getDB()->select(
+ $table,
+ array( $this->getPrefix( $table ) . '_from AS page_id' ),
+ array_filter( $conds, function( $clause ) { // kind of janky
+ return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
+ } ),
+ __METHOD__,
+ $options
+ );
+ } else {
+ // Select from the backlink table and JOIN with page title information
+ $res = $this->getDB()->select(
+ array( $table, 'page' ),
+ array( 'page_namespace', 'page_title', 'page_id' ),
+ $conds,
+ __METHOD__,
+ array_merge( array( 'STRAIGHT_JOIN' ), $options )
+ );
+ }
if ( !$startId && !$endId && $res->numRows() < $max ) {
// The full results fit within the limit, so cache them
case 'imagelinks':
$conds = array(
'il_to' => $this->title->getDBkey(),
- 'page_id=il_from'
+ "page_id={$prefix}_from"
);
break;
case 'categorylinks':
$conds = array(
'cl_to' => $this->title->getDBkey(),
- 'page_id=cl_from',
+ "page_id={$prefix}_from"
);
break;
default:
* @return integer
*/
public function getNumLinks( $table, $max = INF ) {
- global $wgMemc;
+ global $wgMemc, $wgUpdateRowsPerJob;
// 1) try partition cache ...
if ( isset( $this->partitionCache[$table] ) ) {
}
// 4) fetch from the database ...
- $count = $this->getLinks( $table, false, false, $max )->count();
- if ( $count < $max ) { // full count
- $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY );
+ if ( is_infinite( $max ) ) { // no limit at all
+ // Use partition() since it will batch the query and skip the JOIN.
+ // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
+ $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
+ return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
+ } else { // probably some sane limit
+ // Fetch the full title info, since the caller will likely need it next
+ $count = $this->getLinks( $table, false, false, $max )->count();
+ if ( $count < $max ) { // full count
+ $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY );
+ }
}
return min( $max, $count );
$selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
$start = false;
do {
- $res = $this->queryLinks( $table, $start, false, $selectSize );
+ $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
$partitions = $this->partitionResult( $res, $batchSize, false );
// Merge the link count and range partitions for this chunk
$cacheEntry['numRows'] += $partitions['numRows'];