From 2a04f2dbf9dce34fed578d996f9a780c5633bc0d Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Mon, 10 Jul 2017 10:14:57 -0700 Subject: [PATCH] RCFilters: Filter duplicates when filtering for multiple tags When filtering for multiple tags, add DISTINCT to the query. To make this not result in terrible query performance, we also have to add the rc_id to the end of the ORDER BY, and add a GROUP BY equal to the ORDER BY. Make ChangeTags::modifyDisplayQuery() take an array of tags instead of a pipe-separated string. This allows each caller to explicitly opt into supporting multi-tag filters and the conditional GROUP BY mess that that entails. Only support multi-tag filters in SpecialRecentChanges and SpecialRecentchangesLinked. This means we don't have to adapt the queries in HistoryAction, ContribsPager, LogPager and NewPagesPager to deal with a possible DISTINCT modifier. Example query with no tag filters: SELECT rc_id, ... FROM recentchanges ... ORDER BY rc_timestamp DESC Example query with one tag filter: SELECT rc_id, ... FROM recentchanges JOIN change_tag ON ... WHERE ... AND ct_tag='foo' ORDER BY rc_timestamp DESC Example query with two tag filters: SELECT DISTINCT rc_id, ... FROM recentchanges JOIN change_tag ON ... WHERE ... AND ct_tag IN ('foo', 'bar') GROUP BY rc_timestamp, rc_id ORDER BY rc_timestamp DESC, rc_id DESC Bug: T168501 Change-Id: I54a270a563d99b143b55ce83c7d6f70ac4861c64 --- includes/changetags/ChangeTags.php | 20 +++++++++++--- includes/specials/SpecialRecentchanges.php | 20 +++++++++++--- .../specials/SpecialRecentchangeslinked.php | 26 ++++++++++++++----- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/includes/changetags/ChangeTags.php b/includes/changetags/ChangeTags.php index c9b5f96d50..feed7885ee 100644 --- a/includes/changetags/ChangeTags.php +++ b/includes/changetags/ChangeTags.php @@ -643,12 +643,18 @@ class ChangeTags { * Handles selecting tags, and filtering. * Needs $tables to be set up properly, so we can figure out which join conditions to use. * + * WARNING: If $filter_tag contains more than one tag, this function will add DISTINCT, + * which may cause performance problems for your query unless you put the ID field of your + * table at the end of the ORDER BY, and set a GROUP BY equal to the ORDER BY. For example, + * if you had ORDER BY foo_timestamp DESC, you will now need GROUP BY foo_timestamp, foo_id + * ORDER BY foo_timestamp DESC, foo_id DESC. + * * @param string|array $tables Table names, see Database::select * @param string|array $fields Fields used in query, see Database::select * @param string|array $conds Conditions used in query, see Database::select * @param array $join_conds Join conditions, see Database::select * @param array $options Options, see Database::select - * @param bool|string $filter_tag Tag to select on + * @param bool|string|array $filter_tag Tag(s) to select on * * @throws MWException When unable to determine appropriate JOIN condition for tagging */ @@ -660,7 +666,7 @@ class ChangeTags { $filter_tag = $wgRequest->getVal( 'tagfilter' ); } - // Figure out which conditions can be done. + // Figure out which ID field to use if ( in_array( 'recentchanges', $tables ) ) { $join_cond = 'ct_rc_id=rc_id'; } elseif ( in_array( 'logging', $tables ) ) { @@ -683,7 +689,13 @@ class ChangeTags { $tables[] = 'change_tag'; $join_conds['change_tag'] = [ 'INNER JOIN', $join_cond ]; - $conds['ct_tag'] = explode( '|', $filter_tag ); + $conds['ct_tag'] = $filter_tag; + if ( + is_array( $filter_tag ) && count( $filter_tag ) > 1 && + !in_array( 'DISTINCT', $options ) + ) { + $options[] = 'DISTINCT'; + } } } @@ -962,7 +974,7 @@ class ChangeTags { // tags cannot contain commas (used as a delimiter in tag_summary table), // pipe (used as a delimiter between multiple tags in - // modifyDisplayQuery), or slashes (would break tag description messages in + // SpecialRecentchanges and friends), or slashes (would break tag description messages in // MediaWiki namespace) if ( strpos( $tag, ',' ) !== false || strpos( $tag, '|' ) !== false || strpos( $tag, '/' ) !== false ) { diff --git a/includes/specials/SpecialRecentchanges.php b/includes/specials/SpecialRecentchanges.php index bec87c592a..45235aae79 100644 --- a/includes/specials/SpecialRecentchanges.php +++ b/includes/specials/SpecialRecentchanges.php @@ -422,13 +422,14 @@ class SpecialRecentChanges extends ChangesListSpecialPage { $fields[] = 'page_latest'; $join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ]; + $tagFilter = explode( '|', $opts['tagfilter'] ); ChangeTags::modifyDisplayQuery( $tables, $fields, $conds, $join_conds, $query_options, - $opts['tagfilter'] + $tagFilter ); if ( !$this->runMainQueryHook( $tables, $fields, $conds, $query_options, $join_conds, @@ -441,13 +442,24 @@ class SpecialRecentChanges extends ChangesListSpecialPage { return false; } + $orderByAndLimit = [ + 'ORDER BY' => 'rc_timestamp DESC', + 'LIMIT' => $opts['limit'] + ]; + if ( in_array( 'DISTINCT', $query_options ) ) { + // ChangeTags::modifyDisplayQuery() adds DISTINCT when filtering on multiple tags. + // In order to prevent DISTINCT from causing query performance problems, + // we have to GROUP BY the primary key. This in turn requires us to add + // the primary key to the end of the ORDER BY, and the old ORDER BY to the + // start of the GROUP BY + $orderByAndLimit['ORDER BY'] = 'rc_timestamp DESC, rc_id DESC'; + $orderByAndLimit['GROUP BY'] = 'rc_timestamp, rc_id'; + } // array_merge() is used intentionally here so that hooks can, should // they so desire, override the ORDER BY / LIMIT condition(s); prior to // MediaWiki 1.26 this used to use the plus operator instead, which meant // that extensions weren't able to change these conditions - $query_options = array_merge( [ - 'ORDER BY' => 'rc_timestamp DESC', - 'LIMIT' => $opts['limit'] ], $query_options ); + $query_options = array_merge( $orderByAndLimit, $query_options ); $rows = $dbr->select( $tables, $fields, diff --git a/includes/specials/SpecialRecentchangeslinked.php b/includes/specials/SpecialRecentchangeslinked.php index b3b9210380..80ec2b16cf 100644 --- a/includes/specials/SpecialRecentchangeslinked.php +++ b/includes/specials/SpecialRecentchangeslinked.php @@ -103,15 +103,33 @@ class SpecialRecentChangesLinked extends SpecialRecentChanges { $join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ]; $select[] = 'page_latest'; } + + $tagFilter = explode( '|', $opts['tagfilter'] ); ChangeTags::modifyDisplayQuery( $tables, $select, $conds, $join_conds, $query_options, - $opts['tagfilter'] + $tagFilter ); + if ( $dbr->unionSupportsOrderAndLimit() ) { + if ( count( $tagFilter ) > 1 ) { + // ChangeTags::modifyDisplayQuery() will have added DISTINCT. + // To prevent this from causing query performance problems, we need to add + // a GROUP BY, and add rc_id to the ORDER BY. + $order = [ + 'GROUP BY' => 'rc_timestamp, rc_id', + 'ORDER BY' => 'rc_timestamp DESC, rc_id DESC' + ]; + } else { + $order = [ 'ORDER BY' => 'rc_timestamp DESC' ]; + } + } else { + $order = []; + } + if ( !$this->runMainQueryHook( $tables, $select, $conds, $query_options, $join_conds, $opts ) ) { @@ -181,12 +199,6 @@ class SpecialRecentChangesLinked extends SpecialRecentChanges { } } - if ( $dbr->unionSupportsOrderAndLimit() ) { - $order = [ 'ORDER BY' => 'rc_timestamp DESC' ]; - } else { - $order = []; - } - $query = $dbr->selectSQLText( array_merge( $tables, [ $link_table ] ), $select, -- 2.20.1