When filtering for multiple tags, add DISTINCT to the query.
To make this not result in terrible query performance, we also
have to add the rc_id to the end of the ORDER BY, and add
a GROUP BY equal to the ORDER BY.
Make ChangeTags::modifyDisplayQuery() take an array of tags
instead of a pipe-separated string. This allows each caller
to explicitly opt into supporting multi-tag filters and the
conditional GROUP BY mess that that entails.
Only support multi-tag filters in SpecialRecentChanges and
SpecialRecentchangesLinked. This means we don't have to adapt
the queries in HistoryAction, ContribsPager, LogPager and
NewPagesPager to deal with a possible DISTINCT modifier.
Example query with no tag filters:
SELECT rc_id, ... FROM recentchanges ... ORDER BY rc_timestamp DESC
Example query with one tag filter:
SELECT rc_id, ... FROM recentchanges JOIN change_tag ON ...
WHERE ... AND ct_tag='foo' ORDER BY rc_timestamp DESC
Example query with two tag filters:
SELECT DISTINCT rc_id, ... FROM recentchanges JOIN change_tag ON ...
WHERE ... AND ct_tag IN ('foo', 'bar') GROUP BY rc_timestamp, rc_id
ORDER BY rc_timestamp DESC, rc_id DESC
Bug: T168501
Change-Id: I54a270a563d99b143b55ce83c7d6f70ac4861c64
* Handles selecting tags, and filtering.
* Needs $tables to be set up properly, so we can figure out which join conditions to use.
*
+ * WARNING: If $filter_tag contains more than one tag, this function will add DISTINCT,
+ * which may cause performance problems for your query unless you put the ID field of your
+ * table at the end of the ORDER BY, and set a GROUP BY equal to the ORDER BY. For example,
+ * if you had ORDER BY foo_timestamp DESC, you will now need GROUP BY foo_timestamp, foo_id
+ * ORDER BY foo_timestamp DESC, foo_id DESC.
+ *
* @param string|array $tables Table names, see Database::select
* @param string|array $fields Fields used in query, see Database::select
* @param string|array $conds Conditions used in query, see Database::select
* @param array $join_conds Join conditions, see Database::select
* @param array $options Options, see Database::select
- * @param bool|string $filter_tag Tag to select on
+ * @param bool|string|array $filter_tag Tag(s) to select on
*
* @throws MWException When unable to determine appropriate JOIN condition for tagging
*/
$filter_tag = $wgRequest->getVal( 'tagfilter' );
}
- // Figure out which conditions can be done.
+ // Figure out which ID field to use
if ( in_array( 'recentchanges', $tables ) ) {
$join_cond = 'ct_rc_id=rc_id';
} elseif ( in_array( 'logging', $tables ) ) {
$tables[] = 'change_tag';
$join_conds['change_tag'] = [ 'INNER JOIN', $join_cond ];
- $conds['ct_tag'] = explode( '|', $filter_tag );
+ $conds['ct_tag'] = $filter_tag;
+ if (
+ is_array( $filter_tag ) && count( $filter_tag ) > 1 &&
+ !in_array( 'DISTINCT', $options )
+ ) {
+ $options[] = 'DISTINCT';
+ }
}
}
// tags cannot contain commas (used as a delimiter in tag_summary table),
// pipe (used as a delimiter between multiple tags in
- // modifyDisplayQuery), or slashes (would break tag description messages in
+ // SpecialRecentchanges and friends), or slashes (would break tag description messages in
// MediaWiki namespace)
if ( strpos( $tag, ',' ) !== false || strpos( $tag, '|' ) !== false
|| strpos( $tag, '/' ) !== false ) {
$fields[] = 'page_latest';
$join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ];
+ $tagFilter = explode( '|', $opts['tagfilter'] );
ChangeTags::modifyDisplayQuery(
$tables,
$fields,
$conds,
$join_conds,
$query_options,
- $opts['tagfilter']
+ $tagFilter
);
if ( !$this->runMainQueryHook( $tables, $fields, $conds, $query_options, $join_conds,
return false;
}
+ $orderByAndLimit = [
+ 'ORDER BY' => 'rc_timestamp DESC',
+ 'LIMIT' => $opts['limit']
+ ];
+ if ( in_array( 'DISTINCT', $query_options ) ) {
+ // ChangeTags::modifyDisplayQuery() adds DISTINCT when filtering on multiple tags.
+ // In order to prevent DISTINCT from causing query performance problems,
+ // we have to GROUP BY the primary key. This in turn requires us to add
+ // the primary key to the end of the ORDER BY, and the old ORDER BY to the
+ // start of the GROUP BY
+ $orderByAndLimit['ORDER BY'] = 'rc_timestamp DESC, rc_id DESC';
+ $orderByAndLimit['GROUP BY'] = 'rc_timestamp, rc_id';
+ }
// array_merge() is used intentionally here so that hooks can, should
// they so desire, override the ORDER BY / LIMIT condition(s); prior to
// MediaWiki 1.26 this used to use the plus operator instead, which meant
// that extensions weren't able to change these conditions
- $query_options = array_merge( [
- 'ORDER BY' => 'rc_timestamp DESC',
- 'LIMIT' => $opts['limit'] ], $query_options );
+ $query_options = array_merge( $orderByAndLimit, $query_options );
$rows = $dbr->select(
$tables,
$fields,
$join_conds['page'] = [ 'LEFT JOIN', 'rc_cur_id=page_id' ];
$select[] = 'page_latest';
}
+
+ $tagFilter = explode( '|', $opts['tagfilter'] );
ChangeTags::modifyDisplayQuery(
$tables,
$select,
$conds,
$join_conds,
$query_options,
- $opts['tagfilter']
+ $tagFilter
);
+ if ( $dbr->unionSupportsOrderAndLimit() ) {
+ if ( count( $tagFilter ) > 1 ) {
+ // ChangeTags::modifyDisplayQuery() will have added DISTINCT.
+ // To prevent this from causing query performance problems, we need to add
+ // a GROUP BY, and add rc_id to the ORDER BY.
+ $order = [
+ 'GROUP BY' => 'rc_timestamp, rc_id',
+ 'ORDER BY' => 'rc_timestamp DESC, rc_id DESC'
+ ];
+ } else {
+ $order = [ 'ORDER BY' => 'rc_timestamp DESC' ];
+ }
+ } else {
+ $order = [];
+ }
+
if ( !$this->runMainQueryHook( $tables, $select, $conds, $query_options, $join_conds,
$opts )
) {
}
}
- if ( $dbr->unionSupportsOrderAndLimit() ) {
- $order = [ 'ORDER BY' => 'rc_timestamp DESC' ];
- } else {
- $order = [];
- }
-
$query = $dbr->selectSQLText(
array_merge( $tables, [ $link_table ] ),
$select,