diff --git a/src/backend/cdb/cdbsubselect.c b/src/backend/cdb/cdbsubselect.c index 39d70062c08..17ce5f82372 100644 --- a/src/backend/cdb/cdbsubselect.c +++ b/src/backend/cdb/cdbsubselect.c @@ -48,6 +48,7 @@ typedef struct NonNullableVarsContext { Query *query; /* Query in question. */ List *nonNullableVars; /* Known non-nullable vars */ + List *nullableVars; /* Identify vars could be nullable, must be eliminated from nonNullableVars */ } NonNullableVarsContext; @@ -1039,6 +1040,7 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context) c1.query = context->query; c1.nonNullableVars = NIL; + c1.nullableVars = NIL; ListCell *lc = NULL; int orArgNum = 0; @@ -1050,21 +1052,64 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context) c2.query = context->query; c2.nonNullableVars = NIL; - expression_tree_walker(orArg, cdb_find_nonnullable_vars_walker, &c2); + c2.nullableVars = NIL; + + /* + * If we are NullTest, expression_tree_walker will iterate the tree using + * (NullTest*)Node->arg. + * Ex, a qual like: c1 is null or c1 > 0 + * NullTest [nulltesttype=IS_NULL argisrow=false location=102] + * [arg] Var [varno=1 varattno=1 vartype=23 varnoold=1 varoattno=1] + * Recursive cdb_find_nonnullable_vars_walker will first check the node->arg Var and + * insert into nonNullableVars. + * That's incorrect for NullTest type IS_NULL. + * And the var under IS_NOT_NULL NullTest should be nullable. + * We should record that nullable var inside an OR expression. + * + * See issue https://github.com/greenplum-db/gpdb/issues/15662. + */ + if (IsA(orArg, NullTest)) + { + NullTest *nexpr = (NullTest *)orArg; + if (nexpr->nulltesttype != IS_NOT_NULL && IsA(nexpr->arg, Var)) + { + Var* nullableVar = (Var *)(nexpr->arg); + c1.nullableVars = list_append_unique_ptr(c1.nullableVars, nullableVar); + } + } + + /* + * In case there is NullTest under recursive OR expression + * CBDB_FIXME: this is more strict. + */ + if (IsA(orArg, BoolExpr) && (((BoolExpr *) orArg)->boolop == OR_EXPR)) + { + cdb_find_nonnullable_vars_walker(orArg, context); + } + else + expression_tree_walker(orArg, cdb_find_nonnullable_vars_walker, &c2); if (orArgNum == 0) { Assert(c1.nonNullableVars == NIL); c1.nonNullableVars = c2.nonNullableVars; + if (c1.nullableVars == NIL) + c1.nullableVars = c2.nullableVars; } else { c1.nonNullableVars = list_intersection(c1.nonNullableVars, c2.nonNullableVars); + c1.nullableVars = list_append_unique(c1.nullableVars, c2.nullableVars); } + /* + * If nullable var was found here, eliminate it to keep nonNullableVars compact. + */ + c1.nonNullableVars = list_difference(c1.nonNullableVars, c1.nullableVars); orArgNum++; } context->nonNullableVars = list_concat_unique(context->nonNullableVars, c1.nonNullableVars); + context->nullableVars = list_append_unique(context->nullableVars, c1.nullableVars); return false; } @@ -1081,6 +1126,11 @@ cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context) if (expr->nulltesttype != IS_NOT_NULL) { + if (IsA(expr->arg, Var)) + { + Var* nullableVar = (Var *)(expr->arg); + context->nullableVars = list_append_unique_ptr(context->nullableVars, nullableVar); + } return false; } @@ -1323,11 +1373,17 @@ is_exprs_nullable(Node *exprs, Query *query) NonNullableVarsContext context; context.query = query; context.nonNullableVars = NIL; + context.nullableVars = NIL; /* Find nullable vars in the jointree */ (void) expression_tree_walker((Node *) query->jointree, cdb_find_nonnullable_vars_walker, &context); + /* + * Eliminate potential nullable vars. + */ + context.nonNullableVars = list_difference(context.nonNullableVars, context.nullableVars); + return is_exprs_nullable_internal(exprs, context.nonNullableVars); } @@ -1399,7 +1455,7 @@ is_exprs_nullable_internal(Node *exprs, List *nonnullable_vars) * select c1 from t1 where c1 NOT IN (select c2 from t2); * (to) * select c1 from t1 left anti semi join (select 0 as zero, c2 from t2) foo - * ON (c1 = c2) IS NOT FALSE where zero is NULL; + * ON (c1 != c2) IS NOT FALSE where zero is NULL; * * The pseudoconstant column zero is needed to correctly pipe in the NULLs * from the subselect upstream. diff --git a/src/test/regress/expected/notin.out b/src/test/regress/expected/notin.out index d4b58dd1bfc..97ce4dce01e 100644 --- a/src/test/regress/expected/notin.out +++ b/src/test/regress/expected/notin.out @@ -1436,6 +1436,71 @@ select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is ---+--- (0 rows) +-- +-- Test left anti semi (not-in) join +-- With is null expression inside an OR expression. +-- +begin; +create table t1_lasj(c1 int) distributed by (c1); +create table t2_lasj_has_null(c1n int) distributed by (c1n); +insert into t1_lasj values (generate_series (1,10)); +insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7); +analyze t1_lasj; +analyze t2_lasj_has_null; +-- null test +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +-- null test under OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null)); + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +-- null test under recursive OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null; + c1 +---- +(0 rows) + +abort; reset search_path; drop schema notin cascade; NOTICE: drop cascades to 18 other objects diff --git a/src/test/regress/expected/notin_optimizer.out b/src/test/regress/expected/notin_optimizer.out index 1ad1e29427e..0b3d6ab3d5e 100644 --- a/src/test/regress/expected/notin_optimizer.out +++ b/src/test/regress/expected/notin_optimizer.out @@ -1480,6 +1480,71 @@ select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is ---+--- (0 rows) +-- +-- Test left anti semi (not-in) join +-- With is null expression inside an OR expression. +-- +begin; +create table t1_lasj(c1 int) distributed by (c1); +create table t2_lasj_has_null(c1n int) distributed by (c1n); +insert into t1_lasj values (generate_series (1,10)); +insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7); +analyze t1_lasj; +analyze t2_lasj_has_null; +-- null test +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +-- null test under OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null)); + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null; + c1 +---- +(0 rows) + +-- null test under recursive OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null; + c1 +---- +(0 rows) + +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null; + c1 +---- +(0 rows) + +abort; reset search_path; drop schema notin cascade; NOTICE: drop cascades to 18 other objects diff --git a/src/test/regress/sql/notin.sql b/src/test/regress/sql/notin.sql index 317aa52fd3f..73d86ad4808 100644 --- a/src/test/regress/sql/notin.sql +++ b/src/test/regress/sql/notin.sql @@ -474,6 +474,31 @@ explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930); select * from t1_12930 where (a, b) not in (select a, b from t2_12930); explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null; select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null; +-- +-- Test left anti semi (not-in) join +-- With is null expression inside an OR expression. +-- +begin; +create table t1_lasj(c1 int) distributed by (c1); +create table t2_lasj_has_null(c1n int) distributed by (c1n); +insert into t1_lasj values (generate_series (1,10)); +insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7); +analyze t1_lasj; +analyze t2_lasj_has_null; +-- null test +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null; +-- null test under OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null)); +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null; +-- null test under recursive OR expression +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null; +select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null; +abort; reset search_path; drop schema notin cascade;