Skip to content

Commit 64bb06a

Browse files
committed
opt: add exploration rule to eliminate Project inside GroupBy
This commit updates the exploration rule EliminateIndexJoinInsideGroupBy and renames it to EliminateIndexJoinOrProjectInsideGroupBy. The rule now removes either an IndexJoin or Project operator if it can be proven that the removal does not affect the output of the parent grouping operator. Removal of a Project is needed in cases where the partial index predicate constrains some columns to be constant, and therefore provides those columns as constant projections. If the projected columns are not actually needed by the GroupBy, however, the Project is not necessary and interferes with other rules matching, such as SplitGroupByScanIntoUnionScans. Informs #65473 Release note (performance improvement): Improved the efficiency of validation for some partial unique indexes in REGIONAL BY ROW tables by improving the query plan to use all streaming operations.
1 parent 19fee46 commit 64bb06a

6 files changed

Lines changed: 420 additions & 256 deletions

File tree

pkg/sql/opt/memo/testdata/stats_quality/tpch/q20

Lines changed: 78 additions & 111 deletions
Large diffs are not rendered by default.

pkg/sql/opt/xform/groupby_funcs.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,16 @@ func (c *CustomFuncs) GroupingColumns(private *memo.GroupingPrivate) opt.ColSet
266266
func (c *CustomFuncs) GroupingOrdering(private *memo.GroupingPrivate) props.OrderingChoice {
267267
return private.Ordering
268268
}
269+
270+
// MakeGroupingPrivate constructs a new GroupingPrivate using the given
271+
// grouping columns, OrderingChoice, NullsAreDistinct bool, and ErrorOnDup text.
272+
func (c *CustomFuncs) MakeGroupingPrivate(
273+
groupingCols opt.ColSet, ordering props.OrderingChoice, nullsAreDistinct bool, errorText string,
274+
) *memo.GroupingPrivate {
275+
return &memo.GroupingPrivate{
276+
GroupingCols: groupingCols,
277+
Ordering: ordering,
278+
NullsAreDistinct: nullsAreDistinct,
279+
ErrorOnDup: errorText,
280+
}
281+
}

pkg/sql/opt/xform/rules/groupby.opt

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -212,17 +212,17 @@
212212
=>
213213
((OpName) (Select $unionScans $filters) $aggs $private)
214214

215-
# EliminateIndexJoinInsideGroupBy removes an IndexJoin operator if it can be
216-
# proven that the removal does not affect the output of the parent grouping
217-
# operator. This is the case if:
215+
# EliminateIndexJoinOrProjectInsideGroupBy removes an IndexJoin or Project
216+
# operator if it can be proven that the removal does not affect the output of
217+
# the parent grouping operator. This is the case if:
218218
#
219-
# 1. Only columns from the index join's input are being used by the grouping
220-
# operator.
219+
# 1. Only columns from the index join/project's input are being used by the
220+
# grouping operator.
221221
#
222222
# 2. The OrderingChoice of the grouping operator can be expressed with only
223-
# columns from the index join's input. Or in other words, at least one column
224-
# in every ordering group is one of the output columns from the index join's
225-
# input.
223+
# columns from the index join/project's input. Or in other words, at least
224+
# one column in every ordering group is one of the output columns from the
225+
# index join/project's input.
226226
#
227227
# This rule is useful when using partial indexes. When generating partial index
228228
# scans, expressions can be removed from filters because they exactly match
@@ -270,22 +270,35 @@
270270
# └── scan t@secondary,partial
271271
# └── columns: i:1 rowid:4!null
272272
#
273-
[EliminateIndexJoinInsideGroupBy, Explore]
273+
# A Project is created in cases where the partial index predicate constrains
274+
# some columns to be constant, and therefore provides those columns as constant
275+
# projections instead of using an IndexJoin. The Project can be eliminated for
276+
# the same reasons as the IndexJoin.
277+
[EliminateIndexJoinOrProjectInsideGroupBy, Explore]
274278
(GroupBy | DistinctOn | EnsureUpsertDistinctOn
275-
(IndexJoin $input:*)
279+
(IndexJoin | Project $input:*)
276280
$aggs:*
277281
$private:* &
278282
(OrderingCanProjectCols
279-
(GroupingOrdering $private)
283+
$ordering:(GroupingOrdering $private)
280284
$inputCols:(OutputCols $input)
281285
) &
282286
(ColsAreSubset
283287
(UnionCols
284-
(GroupingColumns $private)
288+
$groupingCols:(GroupingColumns $private)
285289
(AggregationOuterCols $aggs)
286290
)
287291
$inputCols
288292
)
289293
)
290294
=>
291-
((OpName) $input $aggs $private)
295+
((OpName)
296+
$input
297+
$aggs
298+
(MakeGroupingPrivate
299+
$groupingCols
300+
(PruneOrdering $ordering $inputCols)
301+
(NullsAreDistinct $private)
302+
(ErrorOnDup $private)
303+
)
304+
)

pkg/sql/opt/xform/testdata/external/tpch

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,59 +2186,50 @@ sort
21862186
│ │ ├── grouping columns: ps_suppkey:17!null
21872187
│ │ ├── immutable
21882188
│ │ ├── key: (17)
2189-
│ │ └── project
2190-
│ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null
2189+
│ │ └── inner-join (lookup part)
2190+
│ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null p_partkey:23!null p_name:24!null sum:52!null
2191+
│ │ ├── key columns: [16] = [23]
2192+
│ │ ├── lookup columns are key
21912193
│ │ ├── immutable
2192-
│ │ ├── key: (16,17)
2193-
│ │ └── project
2194-
│ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null p_partkey:23!null
2195-
│ │ ├── immutable
2196-
│ │ ├── key: (17,23)
2197-
│ │ ├── fd: (16)==(23), (23)==(16)
2198-
│ │ └── inner-join (lookup part)
2199-
│ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null p_partkey:23!null p_name:24!null sum:52!null
2200-
│ │ ├── key columns: [16] = [23]
2201-
│ │ ├── lookup columns are key
2202-
│ │ ├── immutable
2203-
│ │ ├── key: (17,23)
2204-
│ │ ├── fd: (16,17)-->(18,52), (23)-->(24), (16)==(23), (23)==(16)
2205-
│ │ ├── select
2206-
│ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null sum:52!null
2207-
│ │ │ ├── immutable
2208-
│ │ │ ├── key: (16,17)
2209-
│ │ │ ├── fd: (16,17)-->(18,52)
2210-
│ │ │ ├── group-by
2211-
│ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null sum:52!null
2212-
│ │ │ │ ├── grouping columns: ps_partkey:16!null ps_suppkey:17!null
2213-
│ │ │ │ ├── key: (16,17)
2214-
│ │ │ │ ├── fd: (16,17)-->(18,52)
2215-
│ │ │ │ ├── inner-join (hash)
2216-
│ │ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null l_partkey:35!null l_suppkey:36!null l_quantity:38!null l_shipdate:44!null
2217-
│ │ │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
2218-
│ │ │ │ │ ├── fd: (16,17)-->(18), (16)==(35), (35)==(16), (17)==(36), (36)==(17)
2219-
│ │ │ │ │ ├── index-join lineitem
2220-
│ │ │ │ │ │ ├── columns: l_partkey:35!null l_suppkey:36!null l_quantity:38!null l_shipdate:44!null
2221-
│ │ │ │ │ │ └── scan lineitem@l_sd
2222-
│ │ │ │ │ │ ├── columns: l_orderkey:34!null l_linenumber:37!null l_shipdate:44!null
2223-
│ │ │ │ │ │ ├── constraint: /44/34/37: [/'1994-01-01' - /'1994-12-31']
2224-
│ │ │ │ │ │ ├── key: (34,37)
2225-
│ │ │ │ │ │ └── fd: (34,37)-->(44)
2226-
│ │ │ │ │ ├── scan partsupp
2227-
│ │ │ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null
2228-
│ │ │ │ │ │ ├── key: (16,17)
2229-
│ │ │ │ │ │ └── fd: (16,17)-->(18)
2230-
│ │ │ │ │ └── filters
2231-
│ │ │ │ │ ├── l_partkey:35 = ps_partkey:16 [outer=(16,35), constraints=(/16: (/NULL - ]; /35: (/NULL - ]), fd=(16)==(35), (35)==(16)]
2232-
│ │ │ │ │ └── l_suppkey:36 = ps_suppkey:17 [outer=(17,36), constraints=(/17: (/NULL - ]; /36: (/NULL - ]), fd=(17)==(36), (36)==(17)]
2233-
│ │ │ │ └── aggregations
2234-
│ │ │ │ ├── sum [as=sum:52, outer=(38)]
2235-
│ │ │ │ │ └── l_quantity:38
2236-
│ │ │ │ └── const-agg [as=ps_availqty:18, outer=(18)]
2237-
│ │ │ │ └── ps_availqty:18
2238-
│ │ │ └── filters
2239-
│ │ │ └── ps_availqty:18 > (sum:52 * 0.5) [outer=(18,52), immutable, constraints=(/18: (/NULL - ])]
2240-
│ │ └── filters
2241-
│ │ └── p_name:24 LIKE 'forest%' [outer=(24), constraints=(/24: [/'forest' - /'foresu'); tight)]
2194+
│ │ ├── key: (17,23)
2195+
│ │ ├── fd: (16,17)-->(18,52), (23)-->(24), (16)==(23), (23)==(16)
2196+
│ │ ├── select
2197+
│ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null sum:52!null
2198+
│ │ │ ├── immutable
2199+
│ │ │ ├── key: (16,17)
2200+
│ │ │ ├── fd: (16,17)-->(18,52)
2201+
│ │ │ ├── group-by
2202+
│ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null sum:52!null
2203+
│ │ │ │ ├── grouping columns: ps_partkey:16!null ps_suppkey:17!null
2204+
│ │ │ │ ├── key: (16,17)
2205+
│ │ │ │ ├── fd: (16,17)-->(18,52)
2206+
│ │ │ │ ├── inner-join (hash)
2207+
│ │ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null l_partkey:35!null l_suppkey:36!null l_quantity:38!null l_shipdate:44!null
2208+
│ │ │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
2209+
│ │ │ │ │ ├── fd: (16,17)-->(18), (16)==(35), (35)==(16), (17)==(36), (36)==(17)
2210+
│ │ │ │ │ ├── index-join lineitem
2211+
│ │ │ │ │ │ ├── columns: l_partkey:35!null l_suppkey:36!null l_quantity:38!null l_shipdate:44!null
2212+
│ │ │ │ │ │ └── scan lineitem@l_sd
2213+
│ │ │ │ │ │ ├── columns: l_orderkey:34!null l_linenumber:37!null l_shipdate:44!null
2214+
│ │ │ │ │ │ ├── constraint: /44/34/37: [/'1994-01-01' - /'1994-12-31']
2215+
│ │ │ │ │ │ ├── key: (34,37)
2216+
│ │ │ │ │ │ └── fd: (34,37)-->(44)
2217+
│ │ │ │ │ ├── scan partsupp
2218+
│ │ │ │ │ │ ├── columns: ps_partkey:16!null ps_suppkey:17!null ps_availqty:18!null
2219+
│ │ │ │ │ │ ├── key: (16,17)
2220+
│ │ │ │ │ │ └── fd: (16,17)-->(18)
2221+
│ │ │ │ │ └── filters
2222+
│ │ │ │ │ ├── l_partkey:35 = ps_partkey:16 [outer=(16,35), constraints=(/16: (/NULL - ]; /35: (/NULL - ]), fd=(16)==(35), (35)==(16)]
2223+
│ │ │ │ │ └── l_suppkey:36 = ps_suppkey:17 [outer=(17,36), constraints=(/17: (/NULL - ]; /36: (/NULL - ]), fd=(17)==(36), (36)==(17)]
2224+
│ │ │ │ └── aggregations
2225+
│ │ │ │ ├── sum [as=sum:52, outer=(38)]
2226+
│ │ │ │ │ └── l_quantity:38
2227+
│ │ │ │ └── const-agg [as=ps_availqty:18, outer=(18)]
2228+
│ │ │ │ └── ps_availqty:18
2229+
│ │ │ └── filters
2230+
│ │ │ └── ps_availqty:18 > (sum:52 * 0.5) [outer=(18,52), immutable, constraints=(/18: (/NULL - ])]
2231+
│ │ └── filters
2232+
│ │ └── p_name:24 LIKE 'forest%' [outer=(24), constraints=(/24: [/'forest' - /'foresu'); tight)]
22422233
│ └── filters (true)
22432234
└── filters
22442235
└── n_name:11 = 'CANADA' [outer=(11), constraints=(/11: [/'CANADA' - /'CANADA']; tight), fd=()-->(11)]

0 commit comments

Comments
 (0)