From 59e856e3e64e9fa05c39a6d0bfb5514183c9658b Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Wed, 19 Sep 2018 16:25:45 -0400 Subject: [PATCH] Improve some queries ordering by rev_timestamp with actor migration READ_NEW The revision_actor_temp table has denormalized copies of rev_page and rev_timestamp to support the actor indexes equivalent to `actor_timestamp` and `page_actor_timestamp`. But to get these to be used, we need to have the WHERE and ORDER BY use the denormalized fields instead of the main revision-table fields. We can take generally advantage of the fact that "ORDER BY field" uses aliased field names before the names in the actual table, but WHERE conditions do need to explicitly vary. Bug: T204669 Change-Id: I992aa50f02c35d76e91a5a28e05c870f8a32f858 --- includes/api/ApiQueryAllRevisions.php | 90 +++++++++++++++------- includes/api/ApiQueryRevisions.php | 51 ++++++++---- includes/specials/pagers/ContribsPager.php | 6 ++ includes/user/User.php | 6 +- 4 files changed, 111 insertions(+), 42 deletions(-) diff --git a/includes/api/ApiQueryAllRevisions.php b/includes/api/ApiQueryAllRevisions.php index 6a26eff7f5..922d2c3e25 100644 --- a/includes/api/ApiQueryAllRevisions.php +++ b/includes/api/ApiQueryAllRevisions.php @@ -40,6 +40,8 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase { * @return void */ protected function run( ApiPageSet $resultPageSet = null ) { + global $wgActorTableSchemaMigrationStage; + $db = $this->getDB(); $params = $this->extractRequestParams( false ); $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); @@ -48,6 +50,19 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase { $this->requireMaxOneParameter( $params, 'user', 'excludeuser' ); + $tsField = 'rev_timestamp'; + $idField = 'rev_id'; + $pageField = 'rev_page'; + if ( $params['user'] !== null && + ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_READ_NEW ) + ) { + // The query is probably best done using the actor_timestamp index on + // revision_actor_temp. Use the denormalized fields from that table. + $tsField = 'revactor_timestamp'; + $idField = 'revactor_rev'; + $pageField = 'revactor_page'; + } + // Namespace check is likely to be desired, but can't be done // efficiently in SQL. $miser_ns = null; @@ -70,34 +85,59 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase { $revQuery = $revisionStore->getQueryInfo( $this->fetchContent ? [ 'page', 'text' ] : [ 'page' ] ); - $this->addTables( $revQuery['tables'] ); - $this->addFields( $revQuery['fields'] ); - $this->addJoinConds( $revQuery['joins'] ); - - // Review this depeneding on the outcome of T113901 - $this->addOption( 'STRAIGHT_JOIN' ); } else { $this->limit = $this->getParameter( 'limit' ) ?: 10; - $this->addTables( 'revision' ); - $this->addFields( [ 'rev_timestamp', 'rev_id' ] ); + $revQuery = [ + 'tables' => [ 'revision' ], + 'fields' => [ 'rev_timestamp', 'rev_id' ], + 'joins' => [], + ]; + if ( $params['generatetitles'] ) { - $this->addFields( [ 'rev_page' ] ); + $revQuery['fields'][] = 'rev_page'; } - if ( $needPageTable ) { - $this->addTables( 'page' ); - $this->addJoinConds( - [ 'page' => [ 'INNER JOIN', [ 'rev_page = page_id' ] ] ] - ); - $this->addFieldsIf( [ 'page_namespace' ], (bool)$miser_ns ); + if ( $params['user'] !== null || $params['excludeuser'] !== null ) { + $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' ); + $revQuery['tables'] += $actorQuery['tables']; + $revQuery['joins'] += $actorQuery['joins']; + } - // Review this depeneding on the outcome of T113901 - $this->addOption( 'STRAIGHT_JOIN' ); + if ( $needPageTable ) { + $revQuery['tables'][] = 'page'; + $revQuery['joins']['page'] = [ 'INNER JOIN', [ "$pageField = page_id" ] ]; + if ( (bool)$miser_ns ) { + $revQuery['fields'][] = 'page_namespace'; + } } } + // If we're going to be using actor_timestamp, we need to swap the order of `revision` + // and `revision_actor_temp` in the query (for the straight join) and adjust some field aliases. + if ( $idField !== 'rev_id' && isset( $revQuery['tables']['temp_rev_user'] ) ) { + $aliasFields = [ 'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField ]; + $revQuery['fields'] = array_merge( + $aliasFields, + array_diff( $revQuery['fields'], array_keys( $aliasFields ) ) + ); + unset( $revQuery['tables']['temp_rev_user'] ); + $revQuery['tables'] = array_merge( + [ 'temp_rev_user' => 'revision_actor_temp' ], + $revQuery['tables'] + ); + $revQuery['joins']['revision'] = $revQuery['joins']['temp_rev_user']; + unset( $revQuery['joins']['temp_rev_user'] ); + } + + $this->addTables( $revQuery['tables'] ); + $this->addFields( $revQuery['fields'] ); + $this->addJoinConds( $revQuery['joins'] ); + + // Seems to be needed to avoid a planner bug (T113901) + $this->addOption( 'STRAIGHT_JOIN' ); + $dir = $params['dir']; - $this->addTimestampWhereRange( 'rev_timestamp', $dir, $params['start'], $params['end'] ); + $this->addTimestampWhereRange( $tsField, $dir, $params['start'], $params['end'] ); if ( $this->fld_tags ) { $this->addTables( 'tag_summary' ); @@ -110,14 +150,10 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase { if ( $params['user'] !== null ) { $actorQuery = ActorMigration::newMigration() ->getWhere( $db, 'rev_user', User::newFromName( $params['user'], false ) ); - $this->addTables( $actorQuery['tables'] ); - $this->addJoinConds( $actorQuery['joins'] ); $this->addWhere( $actorQuery['conds'] ); } elseif ( $params['excludeuser'] !== null ) { $actorQuery = ActorMigration::newMigration() ->getWhere( $db, 'rev_user', User::newFromName( $params['excludeuser'], false ) ); - $this->addTables( $actorQuery['tables'] ); - $this->addJoinConds( $actorQuery['joins'] ); $this->addWhere( 'NOT(' . $actorQuery['conds'] . ')' ); } @@ -142,17 +178,17 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase { $ts = $db->addQuotes( $db->timestamp( $cont[0] ) ); $rev_id = (int)$cont[1]; $this->dieContinueUsageIf( strval( $rev_id ) !== $cont[1] ); - $this->addWhere( "rev_timestamp $op $ts OR " . - "(rev_timestamp = $ts AND " . - "rev_id $op= $rev_id)" ); + $this->addWhere( "$tsField $op $ts OR " . + "($tsField = $ts AND " . + "$idField $op= $rev_id)" ); } $this->addOption( 'LIMIT', $this->limit + 1 ); $sort = ( $dir == 'newer' ? '' : ' DESC' ); $orderby = []; - // Targeting index rev_timestamp, user_timestamp, or usertext_timestamp - // But 'user' is always constant for the latter two, so it doesn't matter here. + // Targeting index rev_timestamp, user_timestamp, usertext_timestamp, or actor_timestamp. + // But 'user' is always constant for the latter three, so it doesn't matter here. $orderby[] = "rev_timestamp $sort"; $orderby[] = "rev_id $sort"; $this->addOption( 'ORDER BY', $orderby ); diff --git a/includes/api/ApiQueryRevisions.php b/includes/api/ApiQueryRevisions.php index b8a180f2a1..ac7ee0ae9f 100644 --- a/includes/api/ApiQueryRevisions.php +++ b/includes/api/ApiQueryRevisions.php @@ -84,7 +84,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { } protected function run( ApiPageSet $resultPageSet = null ) { - global $wgChangeTagsSchemaMigrationStage; + global $wgActorTableSchemaMigrationStage, $wgChangeTagsSchemaMigrationStage; $params = $this->extractRequestParams( false ); $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); @@ -133,6 +133,19 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $db = $this->getDB(); + $idField = 'rev_id'; + $tsField = 'rev_timestamp'; + $pageField = 'rev_page'; + if ( $params['user'] !== null && + ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_READ_NEW ) + ) { + // We're going to want to use the page_actor_timestamp index (on revision_actor_temp) + // so use that table's denormalized fields. + $idField = 'revactor_rev'; + $tsField = 'revactor_timestamp'; + $pageField = 'revactor_page'; + } + if ( $resultPageSet === null ) { $this->parseParameters( $params ); $this->token = $params['token']; @@ -147,6 +160,15 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $opts[] = 'user'; } $revQuery = $revisionStore->getQueryInfo( $opts ); + + if ( $idField !== 'rev_id' ) { + $aliasFields = [ 'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField ]; + $revQuery['fields'] = array_merge( + $aliasFields, + array_diff( $revQuery['fields'], array_keys( $aliasFields ) ) + ); + } + $this->addTables( $revQuery['tables'] ); $this->addFields( $revQuery['fields'] ); $this->addJoinConds( $revQuery['joins'] ); @@ -157,7 +179,9 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $this->addJoinConds( [ 'page' => [ 'INNER JOIN', [ 'page_id = rev_page' ] ] ] ); - $this->addFields( [ 'rev_id', 'rev_timestamp', 'rev_page' ] ); + $this->addFields( [ + 'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField + ] ); } if ( $this->fld_tags ) { @@ -207,6 +231,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { if ( $enumRevMode ) { // Indexes targeted: // page_timestamp if we don't have rvuser + // page_actor_timestamp (on revision_actor_temp) if we have rvuser in READ_NEW mode // page_user_timestamp if we have a logged-in rvuser // page_timestamp or usertext_timestamp if we have an IP rvuser @@ -222,9 +247,9 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $continueTimestamp = $db->addQuotes( $db->timestamp( $cont[0] ) ); $continueId = (int)$cont[1]; $this->dieContinueUsageIf( $continueId != $cont[1] ); - $this->addWhere( "rev_timestamp $op $continueTimestamp OR " . - "(rev_timestamp = $continueTimestamp AND " . - "rev_id $op= $continueId)" + $this->addWhere( "$tsField $op $continueTimestamp OR " . + "($tsField = $continueTimestamp AND " . + "$idField $op= $continueId)" ); } @@ -274,24 +299,24 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $op = ( $params['dir'] === 'newer' ? '>' : '<' ); $ts = $db->addQuotes( $db->timestampOrNull( $params['start'] ) ); if ( $params['startid'] !== null ) { - $this->addWhere( "rev_timestamp $op $ts OR " - . "rev_timestamp = $ts AND rev_id $op= " . intval( $params['startid'] ) ); + $this->addWhere( "$tsField $op $ts OR " + . "$tsField = $ts AND $idField $op= " . intval( $params['startid'] ) ); } else { - $this->addWhere( "rev_timestamp $op= $ts" ); + $this->addWhere( "$tsField $op= $ts" ); } } if ( $params['end'] !== null ) { $op = ( $params['dir'] === 'newer' ? '<' : '>' ); // Yes, opposite of the above $ts = $db->addQuotes( $db->timestampOrNull( $params['end'] ) ); if ( $params['endid'] !== null ) { - $this->addWhere( "rev_timestamp $op $ts OR " - . "rev_timestamp = $ts AND rev_id $op= " . intval( $params['endid'] ) ); + $this->addWhere( "$tsField $op $ts OR " + . "$tsField = $ts AND $idField $op= " . intval( $params['endid'] ) ); } else { - $this->addWhere( "rev_timestamp $op= $ts" ); + $this->addWhere( "$tsField $op= $ts" ); } } } else { - $this->addTimestampWhereRange( 'rev_timestamp', $params['dir'], + $this->addTimestampWhereRange( $tsField, $params['dir'], $params['start'], $params['end'] ); } @@ -300,7 +325,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { // There is only one ID, use it $ids = array_keys( $pageSet->getGoodTitles() ); - $this->addWhereFld( 'rev_page', reset( $ids ) ); + $this->addWhereFld( $pageField, reset( $ids ) ); if ( $params['user'] !== null ) { $actorQuery = ActorMigration::newMigration() diff --git a/includes/specials/pagers/ContribsPager.php b/includes/specials/pagers/ContribsPager.php index 9900340957..5dec30c8ae 100644 --- a/includes/specials/pagers/ContribsPager.php +++ b/includes/specials/pagers/ContribsPager.php @@ -224,6 +224,12 @@ class ContribsPager extends RangeChronologicalPager { if ( isset( $conds['orconds']['actor'] ) ) { // @todo: This will need changing when revision_comment_temp goes away $queryInfo['options']['USE INDEX']['temp_rev_user'] = 'actor_timestamp'; + // Alias 'rev_timestamp' => 'revactor_timestamp' so "ORDER BY rev_timestamp" is interpreted to + // use revactor_timestamp instead. + $queryInfo['fields'] = array_merge( + array_diff( $queryInfo['fields'], [ 'rev_timestamp' ] ), + [ 'rev_timestamp' => 'revactor_timestamp' ] + ); } else { $queryInfo['options']['USE INDEX']['revision'] = isset( $conds['orconds']['userid'] ) ? 'user_timestamp' : 'usertext_timestamp'; diff --git a/includes/user/User.php b/includes/user/User.php index 9b08737d1f..6066c87944 100644 --- a/includes/user/User.php +++ b/includes/user/User.php @@ -4942,12 +4942,14 @@ class User implements IDBAccessObject, UserIdentity { } $dbr = wfGetDB( DB_REPLICA ); $actorWhere = ActorMigration::newMigration()->getWhere( $dbr, 'rev_user', $this ); + $tsField = isset( $actorWhere['tables']['temp_rev_user'] ) + ? 'revactor_timestamp' : 'rev_timestamp'; $time = $dbr->selectField( [ 'revision' ] + $actorWhere['tables'], - 'rev_timestamp', + $tsField, [ $actorWhere['conds'] ], __METHOD__, - [ 'ORDER BY' => 'rev_timestamp ASC' ], + [ 'ORDER BY' => "$tsField ASC" ], $actorWhere['joins'] ); if ( !$time ) { -- 2.20.1