Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 58 additions & 2 deletions src/backend/cdb/cdbsubselect.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ typedef struct NonNullableVarsContext
{
Query *query; /* Query in question. */
List *nonNullableVars; /* Known non-nullable vars */
List *nullableVars; /* Identify vars could be nullable, must be eliminated from nonNullableVars */
} NonNullableVarsContext;


Expand Down Expand Up @@ -1039,6 +1040,7 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context)

c1.query = context->query;
c1.nonNullableVars = NIL;
c1.nullableVars = NIL;
ListCell *lc = NULL;
int orArgNum = 0;

Expand All @@ -1050,21 +1052,64 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context)

c2.query = context->query;
c2.nonNullableVars = NIL;
expression_tree_walker(orArg, cdb_find_nonnullable_vars_walker, &c2);
c2.nullableVars = NIL;

/*
* If we are NullTest, expression_tree_walker will iterate the tree using
* (NullTest*)Node->arg.
* Ex, a qual like: c1 is null or c1 > 0
* NullTest [nulltesttype=IS_NULL argisrow=false location=102]
* [arg] Var [varno=1 varattno=1 vartype=23 varnoold=1 varoattno=1]
* Recursive cdb_find_nonnullable_vars_walker will first check the node->arg Var and
* insert into nonNullableVars.
* That's incorrect for NullTest type IS_NULL.
* And the var under IS_NOT_NULL NullTest should be nullable.
* We should record that nullable var inside an OR expression.
*
* See issue https://github.com/greenplum-db/gpdb/issues/15662.
*/
if (IsA(orArg, NullTest))
{
NullTest *nexpr = (NullTest *)orArg;
if (nexpr->nulltesttype != IS_NOT_NULL && IsA(nexpr->arg, Var))
{
Var* nullableVar = (Var *)(nexpr->arg);
c1.nullableVars = list_append_unique_ptr(c1.nullableVars, nullableVar);
}
}

/*
* In case there is NullTest under recursive OR expression
* CBDB_FIXME: this is more strict.
*/
if (IsA(orArg, BoolExpr) && (((BoolExpr *) orArg)->boolop == OR_EXPR))
{
cdb_find_nonnullable_vars_walker(orArg, context);
}
else
expression_tree_walker(orArg, cdb_find_nonnullable_vars_walker, &c2);

if (orArgNum == 0)
{
Assert(c1.nonNullableVars == NIL);
c1.nonNullableVars = c2.nonNullableVars;
if (c1.nullableVars == NIL)
c1.nullableVars = c2.nullableVars;
}
else
{
c1.nonNullableVars = list_intersection(c1.nonNullableVars, c2.nonNullableVars);
c1.nullableVars = list_append_unique(c1.nullableVars, c2.nullableVars);
}
/*
* If nullable var was found here, eliminate it to keep nonNullableVars compact.
*/
c1.nonNullableVars = list_difference(c1.nonNullableVars, c1.nullableVars);
orArgNum++;
}

context->nonNullableVars = list_concat_unique(context->nonNullableVars, c1.nonNullableVars);
context->nullableVars = list_append_unique(context->nullableVars, c1.nullableVars);
return false;
}

Expand All @@ -1081,6 +1126,11 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context)

if (expr->nulltesttype != IS_NOT_NULL)
{
if (IsA(expr->arg, Var))
{
Var* nullableVar = (Var *)(expr->arg);
context->nullableVars = list_append_unique_ptr(context->nullableVars, nullableVar);
}
return false;
}

Expand Down Expand Up @@ -1323,11 +1373,17 @@ is_exprs_nullable(Node *exprs, Query *query)
NonNullableVarsContext context;
context.query = query;
context.nonNullableVars = NIL;
context.nullableVars = NIL;

/* Find nullable vars in the jointree */
(void) expression_tree_walker((Node *) query->jointree,
cdb_find_nonnullable_vars_walker, &context);

/*
* Eliminate potential nullable vars.
*/
context.nonNullableVars = list_difference(context.nonNullableVars, context.nullableVars);

return is_exprs_nullable_internal(exprs, context.nonNullableVars);
}

Expand Down Expand Up @@ -1399,7 +1455,7 @@ is_exprs_nullable_internal(Node *exprs, List *nonnullable_vars)
* select c1 from t1 where c1 NOT IN (select c2 from t2);
* (to)
* select c1 from t1 left anti semi join (select 0 as zero, c2 from t2) foo
* ON (c1 = c2) IS NOT FALSE where zero is NULL;
* ON (c1 != c2) IS NOT FALSE where zero is NULL;
*
* The pseudoconstant column zero is needed to correctly pipe in the NULLs
* from the subselect upstream.
Expand Down
65 changes: 65 additions & 0 deletions src/test/regress/expected/notin.out
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,71 @@ select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is
---+---
(0 rows)

--
-- Test left anti semi (not-in) join
-- With is null expression inside an OR expression.
--
begin;
create table t1_lasj(c1 int) distributed by (c1);
create table t2_lasj_has_null(c1n int) distributed by (c1n);
insert into t1_lasj values (generate_series (1,10));
insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7);
analyze t1_lasj;
analyze t2_lasj_has_null;
-- null test
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null;
c1
----
(0 rows)

-- null test under OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null));
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null;
c1
----
(0 rows)

-- null test under recursive OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)

abort;
reset search_path;
drop schema notin cascade;
NOTICE: drop cascades to 18 other objects
Expand Down
65 changes: 65 additions & 0 deletions src/test/regress/expected/notin_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -1480,6 +1480,71 @@ select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is
---+---
(0 rows)

--
-- Test left anti semi (not-in) join
-- With is null expression inside an OR expression.
--
begin;
create table t1_lasj(c1 int) distributed by (c1);
create table t2_lasj_has_null(c1n int) distributed by (c1n);
insert into t1_lasj values (generate_series (1,10));
insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7);
analyze t1_lasj;
analyze t2_lasj_has_null;
-- null test
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null;
c1
----
(0 rows)

-- null test under OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null));
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null;
c1
----
(0 rows)

-- null test under recursive OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)

select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)

abort;
reset search_path;
drop schema notin cascade;
NOTICE: drop cascades to 18 other objects
Expand Down
25 changes: 25 additions & 0 deletions src/test/regress/sql/notin.sql
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,31 @@ explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null;
select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null;
--
-- Test left anti semi (not-in) join
-- With is null expression inside an OR expression.
--
begin;
create table t1_lasj(c1 int) distributed by (c1);
create table t2_lasj_has_null(c1n int) distributed by (c1n);
insert into t1_lasj values (generate_series (1,10));
insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7);
analyze t1_lasj;
analyze t2_lasj_has_null;
-- null test
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null;
-- null test under OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null));
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null;
-- null test under recursive OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null;
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null;
abort;

reset search_path;
drop schema notin cascade;