From c1db9d74430a8038766a62d32e06135aceb05c21 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Tue, 23 Apr 2019 12:13:08 -0400 Subject: [PATCH] ContribsPager: Fix slow queries When ContribsPager is using an auxiliary table like ip_changes or revision_actor_temp for the main action of the query, we already had code in place to let it use the auxiliary table's denormalized timestamp field for the ordering. What we didn't have was code to let it also use the auxiliary table's denormalized timestamp field for *continuation*. With the schema defined in tables.sql, the simplest thing to do would be to be to add a redundant JOIN condition between rev_timestamp and the denormalized timestamp field which would be enough to allow MySQL/MariaDB to propagate the continuation conditional on rev_timestamp to the denormalized timestamp field. Unfortunately many Wikimedia wikis have rev_timestamp defined differently from table.sql (P8433), and that difference is enough to break that propagation. So we need to take a more difficult route, restructuring the code tell IndexPager to explicitly use the denormalized fields for ordering and continuation. On the plus side, since we're doing that anyway we can get rid of the code mentioned in the first paragraph. Bug: T221380 Change-Id: Iad6c0c2f1ac5e1c610de15fe6e85a637c287bcd8 --- includes/specials/pagers/ContribsPager.php | 131 ++++++++++++------ .../includes/specials/ContribsPagerTest.php | 4 +- 2 files changed, 88 insertions(+), 47 deletions(-) diff --git a/includes/specials/pagers/ContribsPager.php b/includes/specials/pagers/ContribsPager.php index 44ecb6fb03..a187a44489 100644 --- a/includes/specials/pagers/ContribsPager.php +++ b/includes/specials/pagers/ContribsPager.php @@ -103,6 +103,21 @@ class ContribsPager extends RangeChronologicalPager { private $templateParser; public function __construct( IContextSource $context, array $options ) { + // Set ->target and ->contribs before calling parent::__construct() so + // parent can call $this->getIndexField() and get the right result. Set + // the rest too just to keep things simple. + $this->target = $options['target'] ?? ''; + $this->contribs = $options['contribs'] ?? 'users'; + $this->namespace = $options['namespace'] ?? ''; + $this->tagFilter = $options['tagfilter'] ?? false; + $this->nsInvert = $options['nsInvert'] ?? false; + $this->associated = $options['associated'] ?? false; + + $this->deletedOnly = !empty( $options['deletedOnly'] ); + $this->topOnly = !empty( $options['topOnly'] ); + $this->newOnly = !empty( $options['newOnly'] ); + $this->hideMinor = !empty( $options['hideMinor'] ); + parent::__construct( $context ); $msgs = [ @@ -116,18 +131,6 @@ class ContribsPager extends RangeChronologicalPager { $this->messages[$msg] = $this->msg( $msg )->escaped(); } - $this->target = $options['target'] ?? ''; - $this->contribs = $options['contribs'] ?? 'users'; - $this->namespace = $options['namespace'] ?? ''; - $this->tagFilter = $options['tagfilter'] ?? false; - $this->nsInvert = $options['nsInvert'] ?? false; - $this->associated = $options['associated'] ?? false; - - $this->deletedOnly = !empty( $options['deletedOnly'] ); - $this->topOnly = !empty( $options['topOnly'] ); - $this->newOnly = !empty( $options['newOnly'] ); - $this->hideMinor = !empty( $options['hideMinor'] ); - // Date filtering: use timestamp if available $startTimestamp = ''; $endTimestamp = ''; @@ -235,6 +238,35 @@ class ContribsPager extends RangeChronologicalPager { return new FakeResultWrapper( $result ); } + /** + * Return the table targeted for ordering and continuation + * + * See T200259 and T221380. + * + * @warning Keep this in sync with self::getQueryInfo()! + * + * @return string + */ + private function getTargetTable() { + if ( $this->contribs == 'newbie' ) { + return 'revision'; + } + + $user = User::newFromName( $this->target, false ); + $ipRangeConds = $user->isAnon() ? $this->getIpRangeConds( $this->mDb, $this->target ) : null; + if ( $ipRangeConds ) { + return 'ip_changes'; + } else { + $conds = ActorMigration::newMigration()->getWhere( $this->mDb, 'rev_user', $user ); + if ( isset( $conds['orconds']['actor'] ) ) { + // @todo: This will need changing when revision_actor_temp goes away + return 'revision_actor_temp'; + } + } + + return 'revision'; + } + function getQueryInfo() { $revQuery = Revision::getQueryInfo( [ 'page', 'user' ] ); $queryInfo = [ @@ -245,6 +277,8 @@ class ContribsPager extends RangeChronologicalPager { 'join_conds' => $revQuery['joins'], ]; + // WARNING: Keep this in sync with getTargetTable()! + if ( $this->contribs == 'newbie' ) { $max = $this->mDb->selectField( 'user', 'max(user_id)', '', __METHOD__ ); $queryInfo['conds'][] = $revQuery['fields']['rev_user'] . ' >' . (int)( $max - $max / 100 ); @@ -273,22 +307,6 @@ class ContribsPager extends RangeChronologicalPager { $ipRangeConds = $user->isAnon() ? $this->getIpRangeConds( $this->mDb, $this->target ) : null; if ( $ipRangeConds ) { $queryInfo['tables'][] = 'ip_changes'; - /** - * These aliases make `ORDER BY rev_timestamp, rev_id` from {@see getIndexField} and - * {@see getExtraSortFields} use the replicated `ipc_rev_timestamp` and `ipc_rev_id` - * columns from the `ip_changes` table, for more efficient queries. - * @see https://phabricator.wikimedia.org/T200259#4832318 - */ - $queryInfo['fields'] = array_merge( - [ - 'rev_timestamp' => 'ipc_rev_timestamp', - 'rev_id' => 'ipc_rev_id', - ], - array_diff( $queryInfo['fields'], [ - 'rev_timestamp', - 'rev_id', - ] ) - ); $queryInfo['join_conds']['ip_changes'] = [ 'LEFT JOIN', [ 'ipc_rev_id = rev_id' ] ]; @@ -299,15 +317,8 @@ class ContribsPager extends RangeChronologicalPager { $queryInfo['conds'][] = $conds['conds']; // Force the appropriate index to avoid bad query plans (T189026) if ( isset( $conds['orconds']['actor'] ) ) { - // @todo: This will need changing when revision_comment_temp goes away + // @todo: This will need changing when revision_actor_temp goes away $queryInfo['options']['USE INDEX']['temp_rev_user'] = 'actor_timestamp'; - // Alias 'rev_timestamp' => 'revactor_timestamp' and 'rev_id' => 'revactor_rev' so - // "ORDER BY rev_timestamp, rev_id" is interpreted to use denormalized revision_actor_temp - // fields instead. - $queryInfo['fields'] = array_merge( - array_diff( $queryInfo['fields'], [ 'rev_timestamp', 'rev_id' ] ), - [ 'rev_timestamp' => 'revactor_timestamp', 'rev_id' => 'revactor_rev' ] - ); } else { $queryInfo['options']['USE INDEX']['revision'] = isset( $conds['orconds']['userid'] ) ? 'user_timestamp' : 'usertext_timestamp'; @@ -342,10 +353,10 @@ class ContribsPager extends RangeChronologicalPager { ' != ' . Revision::SUPPRESSED_USER; } - // For IPv6, we use ipc_rev_timestamp on ip_changes as the index field, - // which will be referenced when parsing the results of a query. - if ( self::isQueryableRange( $this->target ) ) { - $queryInfo['fields'][] = 'ipc_rev_timestamp'; + // $this->getIndexField() must be in the result rows, as reallyDoQuery() tries to access it. + $indexField = $this->getIndexField(); + if ( $indexField !== 'rev_timestamp' ) { + $queryInfo['fields'][] = $indexField; } ChangeTags::modifyDisplayQuery( @@ -431,8 +442,24 @@ class ContribsPager extends RangeChronologicalPager { * @return string */ public function getIndexField() { - // Note this is run via parent::__construct() *before* $this->target is set! - return 'rev_timestamp'; + // The returned column is used for sorting and continuation, so we need to + // make sure to use the right denormalized column depending on which table is + // being targeted by the query to avoid bad query plans. + // See T200259, T204669, T220991, and T221380. + $target = $this->getTargetTable(); + switch ( $target ) { + case 'revision': + return 'rev_timestamp'; + case 'ip_changes': + return 'ipc_rev_timestamp'; + case 'revision_actor_temp': + return 'revactor_timestamp'; + default: + wfWarn( + __METHOD__ . ": Unknown value '$target' from " . static::class . '::getTargetTable()', 0 + ); + return 'rev_timestamp'; + } } /** @@ -474,8 +501,24 @@ class ContribsPager extends RangeChronologicalPager { * @return string[] */ protected function getExtraSortFields() { - // Note this is run via parent::__construct() *before* $this->target is set! - return [ 'rev_id' ]; + // The returned columns are used for sorting, so we need to make sure + // to use the right denormalized column depending on which table is + // being targeted by the query to avoid bad query plans. + // See T200259, T204669, T220991, and T221380. + $target = $this->getTargetTable(); + switch ( $target ) { + case 'revision': + return [ 'rev_id' ]; + case 'ip_changes': + return [ 'ipc_rev_id' ]; + case 'revision_actor_temp': + return [ 'revactor_rev' ]; + default: + wfWarn( + __METHOD__ . ": Unknown value '$target' from " . static::class . '::getTargetTable()', 0 + ); + return [ 'rev_id' ]; + } } protected function doBatchLookups() { diff --git a/tests/phpunit/includes/specials/ContribsPagerTest.php b/tests/phpunit/includes/specials/ContribsPagerTest.php index dc02922408..e881611dbb 100644 --- a/tests/phpunit/includes/specials/ContribsPagerTest.php +++ b/tests/phpunit/includes/specials/ContribsPagerTest.php @@ -158,9 +158,7 @@ class ContribsPagerTest extends MediaWikiTestCase { $this->assertContains( 'ip_changes', $queryInfo[0] ); $this->assertArrayHasKey( 'ip_changes', $queryInfo[5] ); - $this->assertSame( 'ipc_rev_timestamp', $queryInfo[1]['rev_timestamp'] ); - $this->assertSame( 'ipc_rev_id', $queryInfo[1]['rev_id'] ); - $this->assertSame( [ 'rev_timestamp DESC', 'rev_id DESC' ], $queryInfo[4]['ORDER BY'] ); + $this->assertSame( [ 'ipc_rev_timestamp DESC', 'ipc_rev_id DESC' ], $queryInfo[4]['ORDER BY'] ); } } -- 2.20.1