From d6e3f3f630d0c7d33227027a4bfb4704fa431f3d Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Thu, 16 May 2024 11:06:12 +0800 Subject: [PATCH] [AQUMV] Support DISTINCT clause on origin query. SELECT DISTINCT clause references are processed in target list, open it on origin query. DISTINCT in aggregation and Group By DISTINCT are already supported, add cases to verify that. create incremental materialized view mv as select c1 as mc1, c2 as mc2, c3 as mc3, c4 as mc4 from t1 where c1 > 90; Origin querys: select DISTINCT c2 from t1 where c1 > 90; select count(DISTINCT c2) from t1 where c1 > 90; select c1, c2, c3, sum(c4) from t1 where c1 > 90 group by DISTINCT rollup(c1, c2), rollup(c1, c3); Could be rewritten to: select DISTINCT mc2 from mv; select count(DISTINCT mc2) from mv; select mc1, mc2, mc3, sum(mc4) from mv group by DISTINCT rollup(mc1, mc2), rollup(mc1, mc3); Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/README.cbdb.aqumv | 2 +- src/backend/optimizer/plan/aqumv.c | 5 +- src/test/regress/expected/aqumv.out | 218 ++++++++++++++++++++++++ src/test/regress/sql/aqumv.sql | 56 ++++++ 4 files changed, 277 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/README.cbdb.aqumv b/src/backend/optimizer/README.cbdb.aqumv index c18d295983a..a0a1c349fa7 100644 --- a/src/backend/optimizer/README.cbdb.aqumv +++ b/src/backend/optimizer/README.cbdb.aqumv @@ -227,7 +227,7 @@ Below are not supported now: Group By/Grouping Sets/Rollup/Cube (on view_query) Window Functions CTE - Distinct + Distinct (on view_query) Distinct On UNION/INTERSECT/EXCEPT FOR UPDATE, FOR NO KEY UPDATE, FOR SHARE, FOR KEY SHARE diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index f8249a38115..0ea441dfd52 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -115,7 +115,6 @@ answer_query_using_materialized_views(PlannerInfo *root, /* Group By without agg could be possible though IMMV doesn't support it yet. */ bool can_not_use_mv = (parse->commandType != CMD_SELECT) || (parse->rowMarks != NIL) || - (parse->distinctClause != NIL) || (parse->scatterClause != NIL) || (parse->cteList != NIL) || (parse->setOperations != NULL) || @@ -345,6 +344,7 @@ answer_query_using_materialized_views(PlannerInfo *root, viewQuery->groupClause = parse->groupClause; viewQuery->groupingSets = parse->groupingSets; viewQuery->sortClause = parse->sortClause; + viewQuery->distinctClause = parse->distinctClause; /* * AQUMV @@ -419,13 +419,12 @@ answer_query_using_materialized_views(PlannerInfo *root, /* * Update pathkeys which may be changed by qp_callback. * Set belows if corresponding feature is supported. - * distinct_pathkey * window_pathkeys */ root->group_pathkeys = subroot->group_pathkeys; root->sort_pathkeys = subroot->sort_pathkeys; root->query_pathkeys = subroot->query_pathkeys; - + root->distinct_pathkeys = subroot->distinct_pathkeys; /* * AQUMV_FIXME_MVP * Use new query's ecs. diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index dd50fcdbf06..3fee06cd8aa 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -1867,6 +1867,224 @@ select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by su 91 | 188 (10 rows) +abort; +-- Test DISTINCT +begin; +create table aqumv_t6(c1 int, c2 int, c3 int, c4 int) distributed by (c1); +insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i; +insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i; +insert into aqumv_t6 values (91, NULL, 97, 98); +analyze aqumv_t6; +create incremental materialized view aqumv_mvt6_0 as + select c1 as mc1, c2 as mc2 + from aqumv_t6 where c1 > 90; +analyze aqumv_mvt6_0; +-- DISTINCT +\pset null NULL +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select distinct c2, c1 from aqumv_t6 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c2, c1 + -> HashAggregate + Output: c2, c1 + Group Key: aqumv_t6.c2, aqumv_t6.c1 + -> Seq Scan on public.aqumv_t6 + Output: c1, c2, c3, c4 + Filter: (aqumv_t6.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1; + c2 | c1 +------+----- + 92 | 91 + 93 | 92 + 94 | 93 + 95 | 94 + 96 | 95 + 97 | 96 + 98 | 97 + 99 | 98 + 100 | 99 + 101 | 100 + NULL | 91 +(11 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select distinct c2, c1 from aqumv_t6 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc2, mc1 + -> HashAggregate + Output: mc2, mc1 + Group Key: aqumv_mvt6_0.mc2, aqumv_mvt6_0.mc1 + -> Seq Scan on public.aqumv_mvt6_0 + Output: mc1, mc2 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(9 rows) + +select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1; + c2 | c1 +------+----- + 92 | 91 + 93 | 92 + 94 | 93 + 95 | 94 + 96 | 95 + 97 | 96 + 98 | 97 + 99 | 98 + 100 | 99 + 101 | 100 + NULL | 91 +(11 rows) + +-- Agg DISTINCT +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Aggregate + Output: count(c1), count(DISTINCT c1), sum(c2), sum(DISTINCT c2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2 + -> Seq Scan on public.aqumv_t6 + Output: c1, c2 + Filter: (aqumv_t6.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(9 rows) + +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; + count_c1 | count_distinct_c1 | sum_c2 | sum_distinct_c2 +----------+-------------------+--------+----------------- + 21 | 10 | 1930 | 965 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Aggregate + Output: count(mc1), count(DISTINCT mc1), sum(mc2), sum(DISTINCT mc2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc2 + -> Seq Scan on public.aqumv_mvt6_0 + Output: mc1, mc2 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(8 rows) + +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; + count_c1 | count_distinct_c1 | sum_c2 | sum_distinct_c2 +----------+-------------------+--------+----------------- + 21 | 10 | 1930 | 965 +(1 row) + +-- Group DISTINCT +create incremental materialized view aqumv_mvt6_1 as + select c3 as mc3, c4 as mc4, c1 as mc1, c2 as mc2 + from aqumv_t6 where c1 > 97; +analyze aqumv_mvt6_1; +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); + QUERY PLAN +----------------------------------------------------------------------------------- + MixedAggregate + Output: c1, c2, c3, sum(c4) + Hash Key: aqumv_t6.c1, aqumv_t6.c3 + Group Key: aqumv_t6.c1, aqumv_t6.c2, aqumv_t6.c3 + Group Key: aqumv_t6.c1, aqumv_t6.c2 + Group Key: aqumv_t6.c1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3, c4 + Merge Key: c1, c2, c3 + -> Sort + Output: c1, c2, c3, c4 + Sort Key: aqumv_t6.c1, aqumv_t6.c2, aqumv_t6.c3 + -> Seq Scan on public.aqumv_t6 + Output: c1, c2, c3, c4 + Filter: (aqumv_t6.c1 > 97) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(18 rows) + +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); + c1 | c2 | c3 | sum +------+------+------+----- + 98 | 99 | 100 | 202 + 98 | 99 | NULL | 202 + 98 | NULL | NULL | 202 + 99 | 100 | 101 | 204 + 99 | 100 | NULL | 204 + 99 | NULL | NULL | 204 + 100 | 101 | 102 | 206 + 100 | 101 | NULL | 206 + 100 | NULL | NULL | 206 + NULL | NULL | NULL | 612 + 100 | NULL | 102 | 206 + 99 | NULL | 101 | 204 + 98 | NULL | 100 | 202 +(13 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); + QUERY PLAN +---------------------------------------------------------------------------------- + MixedAggregate + Output: mc1, mc2, mc3, sum(mc4) + Hash Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc3 + Group Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2, aqumv_mvt6_1.mc3 + Group Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2 + Group Key: aqumv_mvt6_1.mc1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc2, mc3, mc4 + Merge Key: mc1, mc2, mc3 + -> Sort + Output: mc1, mc2, mc3, mc4 + Sort Key: aqumv_mvt6_1.mc1, aqumv_mvt6_1.mc2, aqumv_mvt6_1.mc3 + -> Seq Scan on public.aqumv_mvt6_1 + Output: mc1, mc2, mc3, mc4 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(17 rows) + +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); + c1 | c2 | c3 | sum +------+------+------+----- + 98 | 99 | 100 | 202 + 98 | 99 | NULL | 202 + 98 | NULL | NULL | 202 + 99 | 100 | 101 | 204 + 99 | 100 | NULL | 204 + 99 | NULL | NULL | 204 + 100 | 101 | 102 | 206 + 100 | 101 | NULL | 206 + 100 | NULL | NULL | 206 + NULL | NULL | NULL | 612 + 100 | NULL | 102 | 206 + 99 | NULL | 101 | 204 + 98 | NULL | 100 | 202 +(13 rows) + +\pset null '' abort; reset optimizer; reset enable_answer_query_using_materialized_views; diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 772a1358b36..cc60dcb6819 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -462,6 +462,62 @@ select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by su select c1, sum(c3) as sum_c3 from aqumv_t5 where c1 > 90 group by c1 order by sum_c3 asc; abort; +-- Test DISTINCT +begin; +create table aqumv_t6(c1 int, c2 int, c3 int, c4 int) distributed by (c1); +insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i; +insert into aqumv_t6 select i, i+1, i+2, i+3 from generate_series(1, 100) i; +insert into aqumv_t6 values (91, NULL, 97, 98); +analyze aqumv_t6; + +create incremental materialized view aqumv_mvt6_0 as + select c1 as mc1, c2 as mc2 + from aqumv_t6 where c1 > 90; +analyze aqumv_mvt6_0; + +-- DISTINCT +\pset null NULL +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select distinct c2, c1 from aqumv_t6 where c1 > 90; +select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select distinct c2, c1 from aqumv_t6 where c1 > 90; +select distinct c2, c1 from aqumv_t6 where c1 > 90 order by c2, c1; + +-- Agg DISTINCT +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; +select count(c1) as count_c1, count(distinct c1) as count_distinct_c1, +sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > 90; + +-- Group DISTINCT +create incremental materialized view aqumv_mvt6_1 as + select c3 as mc3, c4 as mc4, c1 as mc1, c2 as mc2 + from aqumv_t6 where c1 > 97; +analyze aqumv_mvt6_1; +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); +select c1, c2, c3, sum(c4) from aqumv_t6 where c1 > 97 group by distinct rollup(c1, c2), rollup(c1, c3); + +\pset null '' +abort; + + reset optimizer; reset enable_answer_query_using_materialized_views; drop table aqumv_t1 cascade;