From: Aaron Schulz Date: Sat, 9 Nov 2013 20:33:59 +0000 (-0800) Subject: BacklinkCache performance tweaks X-Git-Tag: 1.31.0-rc.0~18163^2 X-Git-Url: http://git.cyclocoop.org/%22%2C%20generer_url_ecrire%28?a=commitdiff_plain;h=a9dc4d943e8f7e7824f31dc7fd1270873f8a9fdb;p=lhc%2Fweb%2Fwiklou.git BacklinkCache performance tweaks * In partition(), avoid doing the JOIN since it is not needed and prevents index-only queries. * Made numLinks() wrap partition when no $max is specified. * Also fixed some docs Change-Id: I05a83f71efb4c6e99e40883d7fa53da59184b13c --- diff --git a/includes/cache/BacklinkCache.php b/includes/cache/BacklinkCache.php index 193f20fee8..4775b96a9c 100644 --- a/includes/cache/BacklinkCache.php +++ b/includes/cache/BacklinkCache.php @@ -48,6 +48,7 @@ class BacklinkCache { /** * Multi dimensions array representing batches. Keys are: * > (string) links table name + * > (int) batch size * > 'numRows' : Number of rows for this link table * > 'batches' : array( $start, $end ) * @@ -169,9 +170,10 @@ class BacklinkCache { * @param $startId Integer|false * @param $endId Integer|false * @param $max Integer|INF + * @param $select string 'all' or 'ids' * @return ResultWrapper */ - protected function queryLinks( $table, $startId, $endId, $max ) { + protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) { wfProfileIn( __METHOD__ ); $fromField = $this->getPrefix( $table ) . '_from'; @@ -192,18 +194,32 @@ class BacklinkCache { if ( $endId ) { $conds[] = "$fromField <= " . intval( $endId ); } - $options = array( 'STRAIGHT_JOIN', 'ORDER BY' => $fromField ); + $options = array( 'ORDER BY' => $fromField ); if ( is_finite( $max ) && $max > 0 ) { $options['LIMIT'] = $max; } - $res = $this->getDB()->select( - array( $table, 'page' ), - array( 'page_namespace', 'page_title', 'page_id' ), - $conds, - __METHOD__, - $options - ); + if ( $select === 'ids' ) { + // Just select from the backlink table and ignore the page JOIN + $res = $this->getDB()->select( + $table, + array( $this->getPrefix( $table ) . '_from AS page_id' ), + array_filter( $conds, function( $clause ) { // kind of janky + return !preg_match( '/(\b|=)page_id(\b|=)/', $clause ); + } ), + __METHOD__, + $options + ); + } else { + // Select from the backlink table and JOIN with page title information + $res = $this->getDB()->select( + array( $table, 'page' ), + array( 'page_namespace', 'page_title', 'page_id' ), + $conds, + __METHOD__, + array_merge( array( 'STRAIGHT_JOIN' ), $options ) + ); + } if ( !$startId && !$endId && $res->numRows() < $max ) { // The full results fit within the limit, so cache them @@ -280,13 +296,13 @@ class BacklinkCache { case 'imagelinks': $conds = array( 'il_to' => $this->title->getDBkey(), - 'page_id=il_from' + "page_id={$prefix}_from" ); break; case 'categorylinks': $conds = array( 'cl_to' => $this->title->getDBkey(), - 'page_id=cl_from', + "page_id={$prefix}_from" ); break; default: @@ -316,7 +332,7 @@ class BacklinkCache { * @return integer */ public function getNumLinks( $table, $max = INF ) { - global $wgMemc; + global $wgMemc, $wgUpdateRowsPerJob; // 1) try partition cache ... if ( isset( $this->partitionCache[$table] ) ) { @@ -338,9 +354,17 @@ class BacklinkCache { } // 4) fetch from the database ... - $count = $this->getLinks( $table, false, false, $max )->count(); - if ( $count < $max ) { // full count - $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY ); + if ( is_infinite( $max ) ) { // no limit at all + // Use partition() since it will batch the query and skip the JOIN. + // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs. + $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache + return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows']; + } else { // probably some sane limit + // Fetch the full title info, since the caller will likely need it next + $count = $this->getLinks( $table, false, false, $max )->count(); + if ( $count < $max ) { // full count + $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY ); + } } return min( $max, $count ); @@ -396,7 +420,7 @@ class BacklinkCache { $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) ); $start = false; do { - $res = $this->queryLinks( $table, $start, false, $selectSize ); + $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' ); $partitions = $this->partitionResult( $res, $batchSize, false ); // Merge the link count and range partitions for this chunk $cacheEntry['numRows'] += $partitions['numRows'];