Add Database::unionConditionPermutations()
authorBrad Jorsch <bjorsch@wikimedia.org>
Fri, 16 Jun 2017 17:32:03 +0000 (13:32 -0400)
committerAnomie <bjorsch@wikimedia.org>
Fri, 16 Jun 2017 19:39:05 +0000 (19:39 +0000)
Constructs a query for the union of permutations of a set of fields, for
use in situations where the database otherwise makes poor plans due to
inability to use indexes effectively (e.g. T149077 and T168010).

Change-Id: I20980dcada664486c09198b8c45896620bd83e81

includes/libs/rdbms/database/DBConnRef.php
includes/libs/rdbms/database/Database.php
includes/libs/rdbms/database/IDatabase.php
tests/phpunit/includes/db/DatabaseSQLTest.php
tests/phpunit/includes/db/DatabaseTestHelper.php

index fb4122d..eb0e954 100644 (file)
@@ -424,6 +424,13 @@ class DBConnRef implements IDatabase {
                return $this->__call( __FUNCTION__, func_get_args() );
        }
 
+       public function unionConditionPermutations(
+               $table, $vars, array $permute_conds, $extra_conds = '', $fname = __METHOD__,
+               $options = [], $join_conds = []
+       ) {
+               return $this->__call( __FUNCTION__, func_get_args() );
+       }
+
        public function conditional( $cond, $trueVal, $falseVal ) {
                return $this->__call( __FUNCTION__, func_get_args() );
        }
index 9e91592..76bf002 100644 (file)
@@ -2470,6 +2470,77 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware
                return '(' . implode( $glue, $sqls ) . ')';
        }
 
+       public function unionConditionPermutations(
+               $table, $vars, array $permute_conds, $extra_conds = '', $fname = __METHOD__,
+               $options = [], $join_conds = []
+       ) {
+               // First, build the Cartesian product of $permute_conds
+               $conds = [ [] ];
+               foreach ( $permute_conds as $field => $values ) {
+                       if ( !$values ) {
+                               // Skip empty $values
+                               continue;
+                       }
+                       $values = array_unique( $values ); // For sanity
+                       $newConds = [];
+                       foreach ( $conds as $cond ) {
+                               foreach ( $values as $value ) {
+                                       $cond[$field] = $value;
+                                       $newConds[] = $cond; // Arrays are by-value, not by-reference, so this works
+                               }
+                       }
+                       $conds = $newConds;
+               }
+
+               $extra_conds = $extra_conds === '' ? [] : (array)$extra_conds;
+
+               // If there's just one condition and no subordering, hand off to
+               // selectSQLText directly.
+               if ( count( $conds ) === 1 &&
+                       ( !isset( $options['INNER ORDER BY'] ) || !$this->unionSupportsOrderAndLimit() )
+               ) {
+                       return $this->selectSQLText(
+                               $table, $vars, $conds[0] + $extra_conds, $fname, $options, $join_conds
+                       );
+               }
+
+               // Otherwise, we need to pull out the order and limit to apply after
+               // the union. Then build the SQL queries for each set of conditions in
+               // $conds. Then union them together (using UNION ALL, because the
+               // product *should* already be distinct).
+               $orderBy = $this->makeOrderBy( $options );
+               $limit = isset( $options['LIMIT'] ) ? $options['LIMIT'] : null;
+               $offset = isset( $options['OFFSET'] ) ? $options['OFFSET'] : false;
+               $all = empty( $options['NOTALL'] ) && !in_array( 'NOTALL', $options );
+               if ( !$this->unionSupportsOrderAndLimit() ) {
+                       unset( $options['ORDER BY'], $options['LIMIT'], $options['OFFSET'] );
+               } else {
+                       if ( array_key_exists( 'INNER ORDER BY', $options ) ) {
+                               $options['ORDER BY'] = $options['INNER ORDER BY'];
+                       }
+                       if ( $limit !== null && is_numeric( $offset ) && $offset != 0 ) {
+                               // We need to increase the limit by the offset rather than
+                               // using the offset directly, otherwise it'll skip incorrectly
+                               // in the subqueries.
+                               $options['LIMIT'] = $limit + $offset;
+                               unset( $options['OFFSET'] );
+                       }
+               }
+
+               $sqls = [];
+               foreach ( $conds as $cond ) {
+                       $sqls[] = $this->selectSQLText(
+                               $table, $vars, $cond + $extra_conds, $fname, $options, $join_conds
+                       );
+               }
+               $sql = $this->unionQueries( $sqls, $all ) . $orderBy;
+               if ( $limit !== null ) {
+                       $sql = $this->limitResult( $sql, $limit, $offset );
+               }
+
+               return $sql;
+       }
+
        public function conditional( $cond, $trueVal, $falseVal ) {
                if ( is_array( $cond ) ) {
                        $cond = $this->makeList( $cond, self::LIST_AND );
index 7c6413c..0698dd8 100644 (file)
@@ -1276,6 +1276,37 @@ interface IDatabase {
         */
        public function unionQueries( $sqls, $all );
 
+       /**
+        * Construct a UNION query for permutations of conditions
+        *
+        * Databases sometimes have trouble with queries that have multiple values
+        * for multiple condition parameters combined with limits and ordering.
+        * This method constructs queries for the Cartesian product of the
+        * conditions and unions them all together.
+        *
+        * @see IDatabase::select()
+        * @since 1.30
+        * @param string|array $table Table name
+        * @param string|array $vars Field names
+        * @param array $permute_conds Conditions for the Cartesian product. Keys
+        *  are field names, values are arrays of the possible values for that
+        *  field.
+        * @param string|array $extra_conds Additional conditions to include in the
+        *  query.
+        * @param string $fname Caller function name
+        * @param string|array $options Query options. In addition to the options
+        *  recognized by IDatabase::select(), the following may be used:
+        *   - NOTALL: Set to use UNION instead of UNION ALL.
+        *   - INNER ORDER BY: If specified and supported, subqueries will use this
+        *     instead of ORDER BY.
+        * @param string|array $join_conds Join conditions
+        * @return string SQL query string.
+        */
+       public function unionConditionPermutations(
+               $table, $vars, array $permute_conds, $extra_conds = '', $fname = __METHOD__,
+               $options = [], $join_conds = []
+       );
+
        /**
         * Returns an SQL expression for a simple conditional. This doesn't need
         * to be overridden unless CASE isn't supported in your DBMS.
index 206655c..2b587db 100644 (file)
@@ -750,6 +750,175 @@ class DatabaseSQLTest extends MediaWikiTestCase {
                ];
        }
 
+       /**
+        * @dataProvider provideUnionConditionPermutations
+        * @covers Database::unionConditionPermutations
+        */
+       public function testUnionConditionPermutations( $params, $expect ) {
+               if ( isset( $params['unionSupportsOrderAndLimit'] ) ) {
+                       $this->database->setUnionSupportsOrderAndLimit( $params['unionSupportsOrderAndLimit'] );
+               }
+
+               $sql = trim( $this->database->unionConditionPermutations(
+                       $params['table'],
+                       $params['vars'],
+                       $params['permute_conds'],
+                       isset( $params['extra_conds'] ) ? $params['extra_conds'] : '',
+                       'FNAME',
+                       isset( $params['options'] ) ? $params['options'] : [],
+                       isset( $params['join_conds'] ) ? $params['join_conds'] : []
+               ) );
+               $this->assertEquals( $expect, $sql );
+       }
+
+       public static function provideUnionConditionPermutations() {
+               return [
+                       // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
+                       [
+                               [
+                                       'table' => [ 'table1', 'table2' ],
+                                       'vars' => [ 'field1', 'alias' => 'field2' ],
+                                       'permute_conds' => [
+                                               'field3' => [ 1, 2, 3 ],
+                                               'duplicates' => [ 4, 5, 4 ],
+                                               'empty' => [],
+                                               'single' => [ 0 ],
+                                       ],
+                                       'extra_conds' => 'table2.bar > 23',
+                                       'options' => [
+                                               'ORDER BY' => [ 'field1', 'alias' ],
+                                               'INNER ORDER BY' => [ 'field1', 'field2' ],
+                                               'LIMIT' => 100,
+                                       ],
+                                       'join_conds' => [
+                                               'table2' => [ 'JOIN', 'table1.foo_id = table2.foo_id' ],
+                                       ],
+                               ],
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '1' AND duplicates = '4' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) UNION ALL " .
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '1' AND duplicates = '5' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) UNION ALL " .
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '2' AND duplicates = '4' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) UNION ALL " .
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '2' AND duplicates = '5' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) UNION ALL " .
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '3' AND duplicates = '4' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) UNION ALL " .
+                               "(SELECT  field1,field2 AS alias  FROM table1 JOIN table2 ON ((table1.foo_id = table2.foo_id))   WHERE field3 = '3' AND duplicates = '5' AND single = '0' AND (table2.bar > 23)  ORDER BY field1,field2 LIMIT 100  ) " .
+                               "ORDER BY field1,alias LIMIT 100"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [
+                                               'bar' => [ 1, 2, 3 ],
+                                       ],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'NOTALL',
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                       ],
+                               ],
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '1' AND baz IS NULL  ORDER BY foo_id LIMIT 25  ) UNION " .
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '2' AND baz IS NULL  ORDER BY foo_id LIMIT 25  ) UNION " .
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '3' AND baz IS NULL  ORDER BY foo_id LIMIT 25  ) " .
+                               "ORDER BY foo_id LIMIT 25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [
+                                               'bar' => [ 1, 2, 3 ],
+                                       ],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'NOTALL' => true,
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                       ],
+                                       'unionSupportsOrderAndLimit' => false,
+                               ],
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '1' AND baz IS NULL  ) UNION " .
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '2' AND baz IS NULL  ) UNION " .
+                               "(SELECT  foo_id  FROM foo    WHERE bar = '3' AND baz IS NULL  ) " .
+                               "ORDER BY foo_id LIMIT 25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                       ],
+                               ],
+                               "SELECT  foo_id  FROM foo    WHERE baz IS NULL  ORDER BY foo_id LIMIT 25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [
+                                               'bar' => [],
+                                       ],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                       ],
+                               ],
+                               "SELECT  foo_id  FROM foo    WHERE baz IS NULL  ORDER BY foo_id LIMIT 25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [
+                                               'bar' => [ 1 ],
+                                       ],
+                                       'options' => [
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                               'OFFSET' => 150,
+                                       ],
+                               ],
+                               "SELECT  foo_id  FROM foo    WHERE bar = '1'  ORDER BY foo_id LIMIT 150,25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                               'OFFSET' => 150,
+                                               'INNER ORDER BY' => [ 'bar_id' ],
+                                       ],
+                               ],
+                               "(SELECT  foo_id  FROM foo    WHERE baz IS NULL  ORDER BY bar_id LIMIT 175  ) ORDER BY foo_id LIMIT 150,25"
+                       ],
+                       [
+                               [
+                                       'table' => 'foo',
+                                       'vars' => [ 'foo_id' ],
+                                       'permute_conds' => [],
+                                       'extra_conds' => [ 'baz' => null ],
+                                       'options' => [
+                                               'ORDER BY' => [ 'foo_id' ],
+                                               'LIMIT' => 25,
+                                               'OFFSET' => 150,
+                                               'INNER ORDER BY' => [ 'bar_id' ],
+                                       ],
+                                       'unionSupportsOrderAndLimit' => false,
+                               ],
+                               "SELECT  foo_id  FROM foo    WHERE baz IS NULL  ORDER BY foo_id LIMIT 150,25"
+                       ],
+                       // @codingStandardsIgnoreEnd
+               ];
+       }
+
        /**
         * @covers Database::commit
         */
index ce05e33..9ed8f15 100644 (file)
@@ -31,6 +31,11 @@ class DatabaseTestHelper extends Database {
         */
        protected $tablesExists;
 
+       /**
+        * Value to return from unionSupportsOrderAndLimit()
+        */
+       protected $unionSupportsOrderAndLimit = true;
+
        public function __construct( $testName, array $opts = [] ) {
                $this->testName = $testName;
 
@@ -203,4 +208,12 @@ class DatabaseTestHelper extends Database {
 
                return new FakeResultWrapper( $res );
        }
+
+       public function unionSupportsOrderAndLimit() {
+               return $this->unionSupportsOrderAndLimit;
+       }
+
+       public function setUnionSupportsOrderAndLimit( $v ) {
+               $this->unionSupportsOrderAndLimit = (bool)$v;
+       }
 }