From 6913f4e610d9555c413e791a5ffbbd464ed299ef Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Wed, 26 Jul 2023 11:05:36 +0800 Subject: [PATCH] Let Replicated locus join with others. Replicated locus could happen besides we Broadcast a node. Ex: Writeable CTE on replicated table join with others. See issue: https://github.com/greenplum-db/gpdb/issues/15860. The principle to decide join locus of Replicated with others is: the slice does the writeable operation on replicated table must be executed on all segments. Replicated locus join SegmentGeneral locus. If either is not ok to replicate(outer join) or the num segments of SegmentGeneral is less than Replicated, Gather them to SingleQE. We need a Explicit Gather Motion to operate on replicated tables and Gather SegmentGeneral too, avoiding to be elided to Entry locus. Else return Replicated locus. Replicated locus join Partitioned locus. If Replicated is not ok to replicate, gather them to SingleQE. If num segments of Partitioned(M) is not equal to Replicated(N), redistribute Paritioned(M) to Paritioned(N). Else return Partitioned locus. Replicated locus join General locus. If either of Replicated and General is not ok to replicate, gather them to SingleQE. Else return Replicated locus. Replicated locus join SingleQE locus. Bring Replicated(Explicit Gather Motion) to SingleQE for join. Replicated locus join Entry locus. Bring Replicated(Explicit Gather Motion) to SingleQE for join. We should return SingleQE here to guarantee not to be elided to Entry loucs early. Let cdbpathlocus_join() do it after Motion added. Replicated locus join OuterQuery locus. Make Replicated to OuterQuery locus may be wrong. OuterQuery will be finally be Broadcast or Gathered to single. If it's Gathered, we will insert/update/delete only on one segment for a raplicated table, that's not right. It may also be wrong if Broadcast, we don't have such a Motion for that.And it causes an error. Left a FXIME for this case. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/cdb/cdbpath.c | 127 +++++++-- src/backend/cdb/cdbpathlocus.c | 40 ++- src/backend/optimizer/util/pathnode.c | 17 -- src/test/regress/expected/rpt_joins.out | 360 +++++++++++++++++++++++- src/test/regress/sql/rpt_joins.sql | 80 +++++- 5 files changed, 571 insertions(+), 53 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index d507613a419..4143f73d2af 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -1407,21 +1407,6 @@ cdbpath_motion_for_join(PlannerInfo *root, } } - /* - * Locus type Replicated can only be generated by join operation. - * And in the function cdbpathlocus_join there is a rule: - * join => any locus type - * Proof by contradiction, it shows that when code arrives here, - * is is impossible that any of the two input paths' locus - * is Replicated. So we add two asserts here. - */ - Assert(!CdbPathLocus_IsReplicated(outer.locus)); - Assert(!CdbPathLocus_IsReplicated(inner.locus)); - - if (CdbPathLocus_IsReplicated(outer.locus) || - CdbPathLocus_IsReplicated(inner.locus)) - goto fail; - outer.has_wts = cdbpath_contains_wts(outer.path); inner.has_wts = cdbpath_contains_wts(inner.path); @@ -1566,6 +1551,21 @@ cdbpath_motion_for_join(PlannerInfo *root, outer.bytes = outer.path->rows * outer.path->pathtarget->width; inner.bytes = inner.path->rows * inner.path->pathtarget->width; + if (join_quals_contain_outer_references || + CdbPathLocus_IsOuterQuery(outer.locus) || + CdbPathLocus_IsOuterQuery(inner.locus)) + { + /* + * CBDB_FIXME: Consider Replicated locus. + * Replicated join OuterQuery, make Replicated to OuterQuery locus may be wrong. + * OuterQuery will be finally be Broadcast or Gathered. + * If it's Gathered, we will insert/update/delete only on one segment for a replicated table, that's not right. + * Ex: insert into a replicated table join with OuterQuery subslect. + */ + if (CdbPathLocus_IsReplicated(outer.locus) || CdbPathLocus_IsReplicated(inner.locus)) + goto fail; + } + if (join_quals_contain_outer_references) { if (CdbPathLocus_IsOuterQuery(outer.locus) && @@ -1593,6 +1593,98 @@ cdbpath_motion_for_join(PlannerInfo *root, else outer.move_to = inner.locus; } + else if (CdbPathLocus_IsReplicated(outer.locus) || + CdbPathLocus_IsReplicated(inner.locus)) + { + /* + * Replicated locus could happen here before we add Motion for join. + * Ex: insert/update/delete a replicated table with returning and join with others. + * We must broadcast to all segments for replicated table, so the upper node have + * the Replicated locus. + */ + + /* + * CBDB only allow to modify one CTE now limited by gramma, but in case that there + * are multiple references for writeable CTE. We couldn't handle that now. + */ + if ((CdbPathLocus_IsReplicated(outer.locus) && CdbPathLocus_IsReplicated(inner.locus))) + goto fail; + + CdbpathMfjRel *replicated = &outer; + CdbpathMfjRel *other = &inner; + if (CdbPathLocus_IsReplicated(inner.locus)) + { + replicated = &inner; + other = &outer; + } + + if (CdbPathLocus_IsSegmentGeneral(other->locus)) + { + /* + * If it's not ok to replicate(outer join) or the numsegments of SegmentGeneral is less than Replicated, gather them to SingleQE. + * Don't worry about operation on all segments for replicated table, there will be a Explicit Gather Motion to guarantee that. + */ + if(!replicated->ok_to_replicate || + !other->ok_to_replicate || + (CdbPathLocus_NumSegments(other->locus) < CdbPathLocus_NumSegments(replicated->locus))) + { + CdbPathLocus_MakeSingleQE(&replicated->move_to, CdbPathLocus_NumSegments(replicated->locus)); + CdbPathLocus_MakeSingleQE(&other->move_to, CdbPathLocus_NumSegments(other->locus)); + } + else + return cdbpathlocus_join(jointype, replicated->locus, other->locus); + } + else if (CdbPathLocus_IsGeneral(other->locus)) + { + /* + * Quite similar to SegementGeneral and we don't need to care about num segments. + * And we must Gather segment to that as SingleQE to Entry Motion may be elided, see changes in cdbpathlocus_join. + */ + if(!replicated->ok_to_replicate || !other->ok_to_replicate) + { + CdbPathLocus_MakeSingleQE(&replicated->move_to, CdbPathLocus_NumSegments(replicated->locus)); + CdbPathLocus_MakeSingleQE(&other->move_to, CdbPathLocus_NumSegments(replicated->locus)); + } + else + return cdbpathlocus_join(jointype, replicated->locus, other->locus); + } + else if (CdbPathLocus_IsSingleQE(other->locus) || CdbPathLocus_IsEntry(other->locus)) + { + /* + * Bring to SingleQE and we should guarantee not to be elided to Entry early. + * Let cdbpathlocus_join() do it after Motion added. + */ + CdbPathLocus_MakeSingleQE(&replicated->move_to, CdbPathLocus_NumSegments(replicated->locus)); + } + else if (CdbPathLocus_IsPartitioned(other->locus)) + { + /* + * Hashed, Strewn, HashedOJ are similar. + * Redistribute Partition to the num segments of Replicated if num segments are not matched. + */ + if (!replicated->ok_to_replicate) + { + CdbPathLocus_MakeSingleQE(&replicated->move_to, CdbPathLocus_NumSegments(replicated->locus)); + CdbPathLocus_MakeSingleQE(&other->move_to, CdbPathLocus_NumSegments(other->locus)); + } + else if (CdbPathLocus_NumSegments(other->locus) != CdbPathLocus_NumSegments(replicated->locus)) + { + CdbPathLocus_MakeHashed(&other->move_to, other->locus.distkey, + CdbPathLocus_NumSegments(replicated->locus), 0); + } + else + { + /* Compatible! */ + return other->locus; + } + } + else + { + Assert(false); + /* Shouldn't get here */ + goto fail; + } + } else if (CdbPathLocus_IsGeneral(outer.locus) || CdbPathLocus_IsGeneral(inner.locus)) { @@ -2675,7 +2767,7 @@ create_split_update_path(PlannerInfo *root, Index rti, GpPolicy *policy, Path *s * turn_volatile_seggen_to_singleqe * * This function is the key tool to build correct plan - * for general or segmentgeneral locus paths that contain + * for general, segmentgeneral, replicated locus paths that contain * volatile functions. * * If we find such a pattern: @@ -2695,8 +2787,9 @@ turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) { if ((CdbPathLocus_IsSegmentGeneral(path->locus) || CdbPathLocus_IsGeneral(path->locus) || + CdbPathLocus_IsReplicated(path->locus) || CdbPathLocus_IsSegmentGeneralWorkers(path->locus)) && - (contain_volatile_functions(node) || IsA(path, LimitPath))) + (contain_volatile_functions(node) || IsA(path, LimitPath))) { CdbPathLocus singleQE; Path *mpath; diff --git a/src/backend/cdb/cdbpathlocus.c b/src/backend/cdb/cdbpathlocus.c index 4cfe272f798..509bfe0f95c 100644 --- a/src/backend/cdb/cdbpathlocus.c +++ b/src/backend/cdb/cdbpathlocus.c @@ -774,6 +774,18 @@ cdbpathlocus_join(JoinType jointype, CdbPathLocus a, CdbPathLocus b) return resultlocus; } + /* + * Could get here if Replicated join Entry and we Gather Replicated to SingleQE + * with cte1 as (insert into rpt_table values (1, 2) returning *) + * select * from cte1 join gp_segment_configuration g on g.dbid = cte1.c1; + * We return SingleQE to ensure not to be elided Motion. + */ + if ((CdbPathLocus_IsSingleQE(a) && CdbPathLocus_IsEntry(b)) || + (CdbPathLocus_IsSingleQE(b) && CdbPathLocus_IsEntry(a))) + { + return CdbPathLocus_IsSingleQE(a) ? a : b; + } + if (CdbPathLocus_IsGeneral(a)) return b; @@ -781,39 +793,37 @@ cdbpathlocus_join(JoinType jointype, CdbPathLocus a, CdbPathLocus b) return a; /* - * If one rel is replicated, result stays with the other rel, + * If one rel is SegmentGeneral, result stays with the other rel, * but need to ensure the result is on the common segments. + * NB: the code check SegmentGeneral and Replicated is quite similar, + * but we have to put check-segmentgeneral first here. Consider one + * is SegmentGeneral and the other is Replicated, only by this order + * we can be sure that this function return a locus of Replicated, + * else if we return SegmentGeneral, plan will allocate gangs on only + * one segment, which will insert/update/delelte rows on that segment + * for a replicated tables. */ - if (CdbPathLocus_IsReplicated(a)) + if (CdbPathLocus_IsSegmentGeneral(a)) { b.numsegments = CdbPathLocus_CommonSegments(a, b); return b; } - if (CdbPathLocus_IsReplicated(b)) + if (CdbPathLocus_IsSegmentGeneral(b)) { a.numsegments = CdbPathLocus_CommonSegments(a, b); return a; } /* - * If one rel is segmentgeneral, result stays with the other rel, + * If one rel is replicated, result stays with the other rel, * but need to ensure the result is on the common segments. - * - * NB: the code check SegmentGeneral and replicated is quite similar, - * but we have to put check-segmentgeneral below. Consider one - * is segmentgeneral and the other is replicated, only by this order - * we can be sure that this function never return a locus of - * Replicated. - * update a replicated table join with a partitioned locus table will - * reach here. */ - - if (CdbPathLocus_IsSegmentGeneral(a)) + if (CdbPathLocus_IsReplicated(a)) { b.numsegments = CdbPathLocus_CommonSegments(a, b); return b; } - if (CdbPathLocus_IsSegmentGeneral(b)) + if (CdbPathLocus_IsReplicated(b)) { a.numsegments = CdbPathLocus_CommonSegments(a, b); return a; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 9c50c24c121..918c554579e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -6004,23 +6004,6 @@ adjust_modifytable_subpath(PlannerInfo *root, CmdType operation, * currently, because a ModifyTable node can only be at the top of the * plan, it won't make any difference to the overall plan. * - * GPDB_96_MERGE_FIXME: it might with e.g. a INSERT RETURNING in a CTE - * I tried here, the locus setting is quite simple, but failed if it's not - * in a CTE and the locus is General. Haven't figured out how to create - * flow in that case. - * Example: - * CREATE TABLE cte_returning_locus(c1 int) DISTRIBUTED BY (c1); - * COPY cte_returning_locus FROM PROGRAM 'seq 1 100'; - * EXPLAIN WITH aa AS ( - * INSERT INTO cte_returning_locus SELECT generate_series(3,300) RETURNING c1 - * ) - * SELECT count(*) FROM aa,cte_returning_locus WHERE aa.c1 = cte_returning_locus.c1; - * - * The returning doesn't need a motion to be hash joined, works fine. But - * without the WITH, what is the proper flow? FLOW_SINGLETON returns - * nothing, FLOW_PARTITIONED without hashExprs(General locus has no - * distkeys) returns duplication. - * * GPDB_90_MERGE_FIXME: I've hacked a basic implementation of the above for * the case where all the subplans are POLICYTYPE_ENTRY, but it seems like * there should be a more general way to do this. diff --git a/src/test/regress/expected/rpt_joins.out b/src/test/regress/expected/rpt_joins.out index 3011990efa9..9a96e8132c0 100644 --- a/src/test/regress/expected/rpt_joins.out +++ b/src/test/regress/expected/rpt_joins.out @@ -2,7 +2,7 @@ -- Tests for joins between replicated tables -- create schema rpt_joins; -set search_path to rpt_joins; +set search_path to rpt_joins, public; -- -- Test JOIN clauses, bellow tests are copy from tests for partitioned table -- @@ -2155,10 +2155,366 @@ select max(c1) from pg_class left join t_5628 on true; 2 (1 row) +-- +-- Writeable CTE on replicated table join with other tables. +-- See issue https://github.com/greenplum-db/gpdb/issues/15860 +-- +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table rpt_issue_15860_2_segments(c1 int, c2 int) distributed replicated; +create table hash_issue_15860_2_segments(c1 int, c2 int) distributed by (c1); +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +create table rpt_issue_15860 (c1 int, c2 int) distributed replicated; +create table rpt2_issue_15860 (c1 int, c2 int) distributed replicated; +create table hash_issue_15860(c1 int, c2 int) distributed by (c1); +create table strewn_issue_15860(c1 int, c2 int) distributed randomly; +insert into rpt2_issue_15860 values (1, 2), (2, 3); +insert into rpt_issue_15860_2_segments values (1, 2), (2, 3); +analyze rpt_issue_15860; +analyze rpt2_issue_15860; +analyze hash_issue_15860; +analyze strewn_issue_15860; +analyze rpt_issue_15860_2_segments; +-- Replicated join SegmentGeneral. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join rpt2_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (rpt2_issue_15860.c1 = rpt_issue_15860.c1) + -> Seq Scan on rpt2_issue_15860 + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicated join SegmentGeneral, Replicated is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join rpt2_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Hash Right Join + Hash Cond: (rpt2_issue_15860.c1 = rpt_issue_15860.c1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rpt2_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +-- Replicated join SegmentGeneral, SegmentGeneral is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join rpt2_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Hash Left Join + Hash Cond: (rpt2_issue_15860.c1 = cte1.c1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rpt2_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Subquery Scan on cte1 + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +-- Replicated join SegmentGeneral, both are not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join rpt2_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Hash Full Join + Hash Cond: (rpt2_issue_15860.c1 = rpt_issue_15860.c1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rpt2_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +-- Replicated join SegmentGeneral, num segments are not matched. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join rpt_issue_15860_2_segments using(c1); + QUERY PLAN +------------------------------------------------------------------- + Hash Join + Hash Cond: (rpt_issue_15860_2_segments.c1 = rpt_issue_15860.c1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rpt_issue_15860_2_segments + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +-- Replicated join General. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join generate_series(1, 5) i on i= cte1.c1 ; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (i.i = rpt_issue_15860.c1) + -> Function Scan on generate_series i + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicated join General, Replicated is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join generate_series(1, 5) i on i= cte1.c1 ; + QUERY PLAN +--------------------------------------------------------------- + Hash Right Join + Hash Cond: (i.i = rpt_issue_15860.c1) + -> Function Scan on generate_series i + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicated join General, General is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join generate_series(1, 5) i on i= cte1.c1 ; + QUERY PLAN +--------------------------------------------------------------- + Hash Left Join + Hash Cond: (i.i = rpt_issue_15860.c1) + -> Function Scan on generate_series i + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicated join General, both are not not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join generate_series(1, 5) i on i= cte1.c1 ; + QUERY PLAN +--------------------------------------------------------------- + Hash Full Join + Hash Cond: (i.i = rpt_issue_15860.c1) + -> Function Scan on generate_series i + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicate join SingleQE. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; + QUERY PLAN +--------------------------------------------------------------- + Hash Join + Hash Cond: ((count(*)) = rpt_issue_15860.c1) + -> Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; + QUERY PLAN +--------------------------------------------------------------- + Hash Right Join + Hash Cond: ((count(*)) = rpt_issue_15860.c1) + -> Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; + QUERY PLAN +--------------------------------------------------------------- + Hash Left Join + Hash Cond: ((count(*)) = rpt_issue_15860.c1) + -> Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; + QUERY PLAN +--------------------------------------------------------------- + Hash Full Join + Hash Cond: ((count(*)) = rpt_issue_15860.c1) + -> Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +-- Replicate join Entry. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join gp_segment_configuration g on g.dbid = cte1.c1; + QUERY PLAN +--------------------------------------------------------------- + Hash Join + Hash Cond: (g.dbid = rpt_issue_15860.c1) + -> Seq Scan on gp_segment_configuration g + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- +-- Begin of Replicated join Partitioned. +-- +-- Replicated join Hashed. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join hash_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (hash_issue_15860.c1 = rpt_issue_15860.c1) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join hash_issue_15860 using(c1); + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (hash_issue_15860.c1 = cte1.c1) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Subquery Scan on cte1 + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +-- Replicated join Hashed, Replicated is not ok to replicate +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join hash_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Hash Right Join + Hash Cond: (hash_issue_15860.c1 = rpt_issue_15860.c1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join hash_issue_15860 using(c1); + QUERY PLAN +--------------------------------------------------------------- + Hash Full Join + Hash Cond: (hash_issue_15860.c1 = rpt_issue_15860.c1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_issue_15860 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(9 rows) + +-- Replicated join Hashed, num segments are not match. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join hash_issue_15860_2_segments using(c1); + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (hash_issue_15860_2_segments.c1 = rpt_issue_15860.c1) + -> Redistribute Motion 2:3 (slice2; segments: 2) + Hash Key: hash_issue_15860_2_segments.c1 + -> Seq Scan on hash_issue_15860_2_segments + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(10 rows) + +-- Replicated join Strewn = Strewn. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join strewn_issue_15860 using(c1); + QUERY PLAN +----------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (strewn_issue_15860.c1 = rpt_issue_15860.c1) + -> Seq Scan on strewn_issue_15860 + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(8 rows) + +-- Replicated join HashedOJ = HashedOJ +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join (select * from hash_issue_15860 a full join hash_issue_15860 b using(c1)) c using(c1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((COALESCE(a.c1, b.c1)) = rpt_issue_15860.c1) + -> Hash Full Join + Hash Cond: (a.c1 = b.c1) + -> Seq Scan on hash_issue_15860 a + -> Hash + -> Seq Scan on hash_issue_15860 b + -> Hash + -> Insert on rpt_issue_15860 + -> Result + Optimizer: Postgres query optimizer +(12 rows) + +-- +-- End of Replicated join Partitioned. +-- +-- CBDB_FIXME: How to derive a plan? +-- Replicates join OuterQuery +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select ( select foo.c1 from (select * from strewn_issue_15860) foo join cte1 using(c2) where foo.c1 = hash_issue_15860.c1) from hash_issue_15860; +ERROR: could not devise a query plan for the given query (pathnode.c:275) drop schema rpt_joins cascade; -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 11 other objects DETAIL: drop cascades to table j1_tbl drop cascades to table j2_tbl drop cascades to table rpt_joins.t1 drop cascades to table rpt_joins.t2 drop cascades to table rpt_joins.t3 +drop cascades to table rpt_issue_15860_2_segments +drop cascades to table hash_issue_15860_2_segments +drop cascades to table rpt_issue_15860 +drop cascades to table rpt2_issue_15860 +drop cascades to table hash_issue_15860 +drop cascades to table strewn_issue_15860 diff --git a/src/test/regress/sql/rpt_joins.sql b/src/test/regress/sql/rpt_joins.sql index 8d9b587cc4e..de2c31dbe86 100644 --- a/src/test/regress/sql/rpt_joins.sql +++ b/src/test/regress/sql/rpt_joins.sql @@ -1,9 +1,11 @@ -- -- Tests for joins between replicated tables -- +-- start_ignore +create extension if not exists gp_debug_numsegments; +-- end_ignore create schema rpt_joins; -set search_path to rpt_joins; - +set search_path to rpt_joins, public; -- -- Test JOIN clauses, bellow tests are copy from tests for partitioned table -- @@ -452,4 +454,78 @@ set enable_bitmapscan to off; explain (costs off) select max(c1) from pg_class left join t_5628 on true; select max(c1) from pg_class left join t_5628 on true; + +-- +-- Writeable CTE on replicated table join with other tables. +-- See issue https://github.com/greenplum-db/gpdb/issues/15860 +-- +select gp_debug_set_create_table_default_numsegments(2); +create table rpt_issue_15860_2_segments(c1 int, c2 int) distributed replicated; +create table hash_issue_15860_2_segments(c1 int, c2 int) distributed by (c1); +select gp_debug_reset_create_table_default_numsegments(); +create table rpt_issue_15860 (c1 int, c2 int) distributed replicated; +create table rpt2_issue_15860 (c1 int, c2 int) distributed replicated; +create table hash_issue_15860(c1 int, c2 int) distributed by (c1); +create table strewn_issue_15860(c1 int, c2 int) distributed randomly; + +insert into rpt2_issue_15860 values (1, 2), (2, 3); +insert into rpt_issue_15860_2_segments values (1, 2), (2, 3); +analyze rpt_issue_15860; +analyze rpt2_issue_15860; +analyze hash_issue_15860; +analyze strewn_issue_15860; +analyze rpt_issue_15860_2_segments; + +-- Replicated join SegmentGeneral. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join rpt2_issue_15860 using(c1); +-- Replicated join SegmentGeneral, Replicated is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join rpt2_issue_15860 using(c1); +-- Replicated join SegmentGeneral, SegmentGeneral is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join rpt2_issue_15860 using(c1); +-- Replicated join SegmentGeneral, both are not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join rpt2_issue_15860 using(c1); +-- Replicated join SegmentGeneral, num segments are not matched. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join rpt_issue_15860_2_segments using(c1); + +-- Replicated join General. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join generate_series(1, 5) i on i= cte1.c1 ; +-- Replicated join General, Replicated is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join generate_series(1, 5) i on i= cte1.c1 ; +-- Replicated join General, General is not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join generate_series(1, 5) i on i= cte1.c1 ; +-- Replicated join General, both are not not ok to replicate. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join generate_series(1, 5) i on i= cte1.c1 ; + +-- Replicate join SingleQE. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join (select count(*) as c from hash_issue_15860) a on a.c = cte1.c1; + +-- Replicate join Entry. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join gp_segment_configuration g on g.dbid = cte1.c1; + +-- +-- Begin of Replicated join Partitioned. +-- +-- Replicated join Hashed. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join hash_issue_15860 using(c1); +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 right join hash_issue_15860 using(c1); +-- Replicated join Hashed, Replicated is not ok to replicate +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 left join hash_issue_15860 using(c1); +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 full join hash_issue_15860 using(c1); +-- Replicated join Hashed, num segments are not match. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join hash_issue_15860_2_segments using(c1); +-- Replicated join Strewn = Strewn. +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join strewn_issue_15860 using(c1); +-- Replicated join HashedOJ = HashedOJ +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select * from cte1 join (select * from hash_issue_15860 a full join hash_issue_15860 b using(c1)) c using(c1); +-- +-- End of Replicated join Partitioned. +-- + +-- CBDB_FIXME: How to derive a plan? +-- Replicates join OuterQuery +explain(costs off) with cte1 as (insert into rpt_issue_15860 values (1, 2) returning *) select ( select foo.c1 from (select * from strewn_issue_15860) foo join cte1 using(c2) where foo.c1 = hash_issue_15860.c1) from hash_issue_15860; + drop schema rpt_joins cascade;