diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysical.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysical.cpp index 362c72e9dd3..142dfb75d27 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysical.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysical.cpp @@ -396,7 +396,7 @@ CPhysical::PdsRequireSingletonOrReplicated(CMemoryPool *mp, CDistributionSpec *pdsRequired, ULONG child_index, ULONG ulOptReq) { - GPOS_ASSERT(2 > ulOptReq); + GPOS_ASSERT(3 > ulOptReq); // if expression has to execute on a single host then we need a gather motion if (exprhdl.NeedsSingletonExecution()) diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp index ecc9743b3e0..8b72a96b19d 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp @@ -45,13 +45,32 @@ CPhysicalSerialUnionAll::CPhysicalSerialUnionAll( CColRef2dArray *pdrgpdrgpcrInput) : CPhysicalUnionAll(mp, pdrgpcrOutput, pdrgpdrgpcrInput) { - // UnionAll creates two distribution requests to enforce distribution of its children: - // (1) (Hashed, Hashed): used to pass hashed distribution (requested from above) - // to child operators and match request Exactly - // (2) (ANY, matching_distr): used to request ANY distribution from outer child, and - // match its response on the distribution requested from inner child + // UnionAll creates 3 distribution requests to enforce + // distribution of its children: + // + // Request 1: HASH + // Pass hashed distribution (requested from above) to child + // operators and match request exactly + // + // Request 2: NON-SINGLETON, matching_dist + // Request NON-SINGLETON from the outer child, and match the + // requests on the rest children based what dist spec the outer + // child NOW delivers (derived from property plan). Note, the + // NON-SINGLETON that we request from the outer child is not + // satisfiable by REPLICATED. + // + // Request 3: ANY, matching_dist + // Request ANY distribution from the outer child, and match the + // requests on the rest children based on what dist spec the outer + // child delivers. Note, no enforcement should ever be applied to + // the outer child, because ANY is satisfiable by all specs. + // + // If request 1 falls through, request 3 serves as the + // backup request. Duplicate requests would eventually be + // deduplicated. + + SetDistrRequests(3 /*ulDistrReq*/); - SetDistrRequests(2 /*ulDistrReq*/); GPOS_ASSERT(0 < UlDistrRequests()); } @@ -73,8 +92,9 @@ CPhysicalSerialUnionAll::PdsRequired( { GPOS_ASSERT(nullptr != PdrgpdrgpcrInput()); GPOS_ASSERT(child_index < PdrgpdrgpcrInput()->Size()); - GPOS_ASSERT(2 > ulOptReq); + GPOS_ASSERT(3 > ulOptReq); + // First check if we have to request SINGLETON or REPLICATED CDistributionSpec *pds = PdsRequireSingletonOrReplicated( mp, exprhdl, pdsRequired, child_index, ulOptReq); if (nullptr != pds) @@ -82,6 +102,8 @@ CPhysicalSerialUnionAll::PdsRequired( return pds; } + // Request 1: HASH + // This request applies to all union all children if (0 == ulOptReq && CDistributionSpec::EdtHashed == pdsRequired->Edt()) { // attempt passing requested hashed distribution to children @@ -95,8 +117,18 @@ CPhysicalSerialUnionAll::PdsRequired( if (0 == child_index) { - // otherwise, ANY distribution is requested from outer child - return GPOS_NEW(mp) CDistributionSpecAny(this->Eopid()); + if (1 == ulOptReq) + { + // Request 2: NON-SINGLETON from outer child + return GPOS_NEW(mp) + CDistributionSpecNonSingleton(false /*fAllowReplicated*/); + } + else + { + // Request 3: ANY from outer child + return GPOS_NEW(mp) CDistributionSpecAny(this->Eopid()); + } + } // inspect distribution delivered by outer child diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 3e7a6b2afdd..bd72cd0b09d 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -907,8 +907,8 @@ select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 or explain (costs off) select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; - QUERY PLAN ----------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: (NULL::integer) -> Sort @@ -916,24 +916,23 @@ explain (costs off) -> Sequence -> Shared Scan (share slice:id 1:0) -> Seq Scan on gstest2 - -> Redistribute Motion 1:3 (slice2) - -> Append + -> Append + -> Redistribute Motion 1:3 (slice2) -> Result Filter: ((NULL::integer) IS DISTINCT FROM 1) -> Finalize Aggregate -> Gather Motion 3:1 (slice3; segments: 3) -> Partial Aggregate -> Shared Scan (share slice:id 3:0) - -> Gather Motion 3:1 (slice4; segments: 3) - -> GroupAggregate - Group Key: share0_ref3.a - -> Sort - Sort Key: share0_ref3.a - -> Result - Filter: (share0_ref3.a IS DISTINCT FROM 1) - -> Shared Scan (share slice:id 4:0) + -> GroupAggregate + Group Key: share0_ref3.a + -> Sort + Sort Key: share0_ref3.a + -> Result + Filter: (share0_ref3.a IS DISTINCT FROM 1) + -> Shared Scan (share slice:id 1:0) Optimizer: Pivotal Optimizer (GPORCA) -(24 rows) +(23 rows) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; diff --git a/src/test/regress/expected/olap_window_seq_optimizer.out b/src/test/regress/expected/olap_window_seq_optimizer.out index 5ea54e2de3e..b72fc326edb 100644 --- a/src/test/regress/expected/olap_window_seq_optimizer.out +++ b/src/test/regress/expected/olap_window_seq_optimizer.out @@ -8352,23 +8352,23 @@ select k from ( select k from (select row_number() over() as k from window_preds (23 rows) explain insert into window_preds select k from ( select k from (select row_number() over() as k from window_preds) f union all select 1::bigint as k from window_preds) as t where k = 1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------- Insert on window_preds (cost=0.00..862.04 rows=1 width=4) - -> Result (cost=0.00..0.00 rows=0 width=0) - -> Redistribute Motion 1:3 (slice1; segments: 1) (cost=0.00..862.00 rows=2 width=16) - Hash Key: (int4((row_number() OVER (?)))) - -> Result (cost=0.00..862.00 rows=2 width=16) - -> Append (cost=0.00..862.00 rows=2 width=8) - -> Result (cost=0.00..431.00 rows=1 width=8) - Filter: ((row_number() OVER (?)) = 1) - -> WindowAgg (cost=0.00..431.00 rows=1 width=8) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) - -> Seq Scan on window_preds window_preds_1 (cost=0.00..431.00 rows=1 width=1) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Result (cost=0.00..862.00 rows=2 width=16) + -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=12) + Hash Key: (((row_number() OVER (?)))::integer) + -> Result (cost=0.00..862.00 rows=1 width=12) + -> Append (cost=0.00..862.00 rows=1 width=8) + -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=0.00..431.00 rows=1 width=8) -> Result (cost=0.00..431.00 rows=1 width=8) - Filter: (('1'::bigint) = 1) - -> Seq Scan on window_preds window_preds_2 (cost=0.00..431.00 rows=1 width=1) + Filter: ((row_number() OVER (?)) = 1) + -> WindowAgg (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on window_preds window_preds_1 (cost=0.00..431.00 rows=1 width=1) + -> Result (cost=0.00..431.00 rows=1 width=8) + Filter: (('1'::bigint) = 1) + -> Seq Scan on window_preds window_preds_2 (cost=0.00..431.00 rows=1 width=1) Optimizer: Pivotal Optimizer (GPORCA) (16 rows) diff --git a/src/test/regress/expected/qp_union_intersect_optimizer.out b/src/test/regress/expected/qp_union_intersect_optimizer.out index 27068ae62b3..b8b1fdd440f 100644 --- a/src/test/regress/expected/qp_union_intersect_optimizer.out +++ b/src/test/regress/expected/qp_union_intersect_optimizer.out @@ -1858,15 +1858,16 @@ explain (costs off) select * from t_test_append_rep union all select * from t_test_append_hash; - QUERY PLAN ---------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) -> Append - -> Seq Scan on t_test_append_rep - -> Broadcast Motion 3:1 (slice2; segments: 3) - -> Seq Scan on t_test_append_hash + -> Result + One-Time Filter: (gp_execution_segment() = 2) + -> Seq Scan on t_test_append_rep + -> Seq Scan on t_test_append_hash Optimizer: Pivotal Optimizer (GPORCA) -(6 rows) +(7 rows) select * from t_test_append_rep union all @@ -1935,15 +1936,16 @@ insert into t1 select i, i+1 from generate_series(6, 9) i; insert into rt1 select i, i+1 from generate_series(1, 5) i; set local gp_enable_direct_dispatch = on; explain(costs off) select * from rt1 union all select * from t1; - QUERY PLAN ---------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) -> Append - -> Seq Scan on rt1 - -> Broadcast Motion 3:1 (slice2; segments: 3) - -> Seq Scan on t1 + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> Seq Scan on rt1 + -> Seq Scan on t1 Optimizer: Pivotal Optimizer (GPORCA) -(6 rows) +(7 rows) select * from rt1 union all select * from t1; a | b diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index 4991b985676..5f3854b66ba 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -918,15 +918,17 @@ explain (costs off) insert into t_replicate_volatile select nextval('seq_for_ins (5 rows) explain (costs off) select a from t_replicate_volatile union all select * from nextval('seq_for_insert_replicated_table'); - QUERY PLAN ----------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) -> Append - -> Seq Scan on t_replicate_volatile - -> Broadcast Motion 1:1 (slice2) + -> Result + One-Time Filter: (gp_execution_segment() = 2) + -> Seq Scan on t_replicate_volatile + -> Redistribute Motion 1:3 (slice2) -> Function Scan on nextval Optimizer: Pivotal Optimizer (GPORCA) -(6 rows) +(8 rows) -- CTAS explain (costs off) create table rpt_ctas as select random() from generate_series(1, 10) distributed replicated; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 36e39aeb6a7..48511a17296 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -2911,7 +2911,7 @@ where dt < '2010-01-01'::date; -> Nested Loop Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b Join Filter: true - -> Broadcast Motion 3:3 (slice6; segments: 3) + -> Broadcast Motion 3:3 (slice7; segments: 3) Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b @@ -2930,9 +2930,9 @@ where dt < '2010-01-01'::date; -> Sort Output: ((SubPlan 1)) Sort Key: ((SubPlan 1)) - -> Redistribute Motion 1:3 (slice3) - Output: ((SubPlan 1)) - -> Append + -> Append + -> Redistribute Motion 1:3 (slice3) + Output: ((SubPlan 1)) -> Result Output: ((SubPlan 1)) Filter: (((SubPlan 1)) < '01-01-2010'::date) @@ -2950,6 +2950,8 @@ where dt < '2010-01-01'::date; Output: extra_flow_dist.b, extra_flow_dist.c -> Seq Scan on subselect_gp.extra_flow_dist Output: extra_flow_dist.b, extra_flow_dist.c + -> Redistribute Motion 1:3 (slice5) + Output: ((SubPlan 2)) -> Result Output: ((SubPlan 2)) Filter: (((SubPlan 2)) < '01-01-2010'::date) @@ -2963,12 +2965,12 @@ where dt < '2010-01-01'::date; Filter: (extra_flow_dist_1.b = max(1)) -> Materialize Output: extra_flow_dist_1.b, extra_flow_dist_1.c - -> Gather Motion 3:1 (slice5; segments: 3) + -> Gather Motion 3:1 (slice6; segments: 3) Output: extra_flow_dist_1.b, extra_flow_dist_1.c -> Seq Scan on subselect_gp.extra_flow_dist extra_flow_dist_1 Output: extra_flow_dist_1.b, extra_flow_dist_1.c Optimizer: Pivotal Optimizer (GPORCA) -(62 rows) +(64 rows) -- Check DISTINCT ON clause and ORDER BY clause in SubLink, See https://github.com/greenplum-db/gpdb/issues/12656. -- For EXISTS SubLink, we don’t need to care about the data deduplication problem, we can delete DISTINCT ON clause and diff --git a/src/test/regress/output/external_table_union_all_optimizer.source b/src/test/regress/output/external_table_union_all_optimizer.source index 893a41a7707..61ec2235742 100644 --- a/src/test/regress/output/external_table_union_all_optimizer.source +++ b/src/test/regress/output/external_table_union_all_optimizer.source @@ -31,15 +31,16 @@ SELECT A FROM multilocation_external_table UNION ALL SELECT A FROM simple_replic (4 rows) EXPLAIN SELECT A FROM simple_replicated_table UNION ALL SELECT A FROM multilocation_external_table; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..100000000000919.44 rows=1000001 width=4) - -> Append (cost=0.00..100000000000874.72 rows=3000003 width=4) - -> Seq Scan on simple_replicated_table (cost=0.00..431.00 rows=3 width=4) - -> Broadcast Motion 3:1 (slice2; segments: 3) (cost=0.00..100000000000439.72 rows=3000000 width=4) - -> Foreign Scan on multilocation_external_table (cost=0.00..437.23 rows=333334 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..886.95 rows=1000001 width=4) + -> Append (cost=0.00..872.05 rows=333334 width=4) + -> Result (cost=0.00..431.00 rows=1 width=4) + One-Time Filter: (gp_execution_segment() = 0) + -> Seq Scan on simple_replicated_table (cost=0.00..431.00 rows=1 width=4) + -> Foreign Scan on multilocation_external_table (cost=0.00..437.23 rows=333334 width=4) Optimizer: Pivotal Optimizer (GPORCA) -(6 rows) +(7 rows) SELECT A FROM simple_replicated_table UNION ALL SELECT A FROM multilocation_external_table; a @@ -51,15 +52,15 @@ SELECT A FROM simple_replicated_table UNION ALL SELECT A FROM multilocation_exte (4 rows) EXPLAIN SELECT A FROM simple_replicated_table UNION ALL SELECT a FROM multilocation_external_table UNION ALL SELECT A FROM simple_distributed_table; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..100000000001350.44 rows=1000002 width=4) - -> Append (cost=0.00..100000000001305.72 rows=3000006 width=4) - -> Seq Scan on simple_replicated_table (cost=0.00..431.00 rows=3 width=4) - -> Broadcast Motion 3:1 (slice2; segments: 3) (cost=0.00..100000000000439.72 rows=3000000 width=4) - -> Foreign Scan on multilocation_external_table (cost=0.00..437.23 rows=333334 width=4) - -> Broadcast Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=3 width=4) - -> Seq Scan on simple_distributed_table (cost=0.00..431.00 rows=1 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1317.95 rows=1000002 width=4) + -> Append (cost=0.00..1303.05 rows=333334 width=4) + -> Result (cost=0.00..431.00 rows=1 width=4) + One-Time Filter: (gp_execution_segment() = 1) + -> Seq Scan on simple_replicated_table (cost=0.00..431.00 rows=1 width=4) + -> Foreign Scan on multilocation_external_table (cost=0.00..437.23 rows=333334 width=4) + -> Seq Scan on simple_distributed_table (cost=0.00..431.00 rows=1 width=4) Optimizer: Pivotal Optimizer (GPORCA) (8 rows)