From bf779186482875222d0f13aa9749e6ea13626016 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 20 Mar 2025 09:55:25 -0700 Subject: [PATCH 01/10] Add query rewrite for boolean queries with must_not range queries Signed-off-by: Peter Alfonsi --- .../index/query/BoolQueryBuilder.java | 52 ++++++++++++- .../index/query/RangeQueryBuilder.java | 40 ++++++++++ .../index/query/BoolQueryBuilderTests.java | 75 +++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index 58009f055650b..2d0a8c597cf1e 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -48,10 +48,13 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.function.Consumer; import java.util.stream.Stream; @@ -393,9 +396,13 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws return any.get(); } + changed |= rewriteMustNotRangeClausesToShould(newBuilder); + if (changed) { newBuilder.adjustPureNegative = adjustPureNegative; - newBuilder.minimumShouldMatch = minimumShouldMatch; + if (minimumShouldMatch != null) { + newBuilder.minimumShouldMatch = minimumShouldMatch; + } newBuilder.boost(boost()); newBuilder.queryName(queryName()); return newBuilder; @@ -460,4 +467,47 @@ public void visit(QueryBuilderVisitor visitor) { } + private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder) { + // If there is a range query on a given field in a must_not clause, it's more performant to execute it as + // multiple should clauses representing everything outside the target range. + + boolean changed = false; + // For now, only handle the case where there's exactly 1 range query for this field. + Map fieldCounts = new HashMap<>(); + Set rangeQueries = new HashSet<>(); + for (QueryBuilder clause : mustNotClauses) { + if (clause instanceof RangeQueryBuilder rq) { + fieldCounts.merge(rq.fieldName(), 1, Integer::sum); + rangeQueries.add(rq); + } + } + + for (RangeQueryBuilder rq : rangeQueries) { + String fieldName = rq.fieldName(); + if (fieldCounts.getOrDefault(fieldName, 0) == 1) { + List complement = rq.getComplement(); + if (complement != null) { + BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder(); + nestedBoolQuery.minimumShouldMatch(1); + for (RangeQueryBuilder complementComponent : complement) { + nestedBoolQuery.should(complementComponent); + } + newBuilder.must(nestedBoolQuery); + newBuilder.mustNotClauses.remove(rq); + changed = true; + } + } + } + + if (minimumShouldMatch == null && changed) { + if ((!shouldClauses.isEmpty()) && mustClauses.isEmpty() && filterClauses.isEmpty()) { + // If there were originally should clauses and no must/filter clauses, null minimumShouldMatch is set to a default of 1 + // within Lucene. + // But if there was originally a must or filter clause, the default is 0. + // If we added a must clause due to this rewrite, we should respect what the original default would have been. + newBuilder.minimumShouldMatch(1); + } + } + return changed; + } } diff --git a/server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java index fdbef2c732361..1ccae5cf37479 100644 --- a/server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java @@ -51,6 +51,8 @@ import java.io.IOException; import java.time.DateTimeException; import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; /** @@ -542,4 +544,42 @@ protected boolean doEquals(RangeQueryBuilder other) { && Objects.equals(includeUpper, other.includeUpper) && Objects.equals(format, other.format); } + + /** + * Returns a list of RangeQueryBuilder whose elements, when combined, form the complement of this range query. + * May be null. + * @return the complement + */ + public List getComplement() { + if (relation != null && relation != ShapeRelation.INTERSECTS) { + return null; + } + List complement = new ArrayList<>(); + if (from != null) { + RangeQueryBuilder belowRange = new RangeQueryBuilder(fieldName); + belowRange.to(from); + belowRange.includeUpper(!includeLower); + complement.add(belowRange); + } + + if (to != null) { + RangeQueryBuilder aboveRange = new RangeQueryBuilder(fieldName); + aboveRange.from(to); + aboveRange.includeLower(!includeUpper); + complement.add(aboveRange); + } + + if (format != null) { + for (RangeQueryBuilder rq : complement) { + rq.format(format); + } + } + if (timeZone != null) { + for (RangeQueryBuilder rq : complement) { + rq.timeZone = timeZone; + } + } + + return complement; + } } diff --git a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java index f3de666c52932..3bfc24ba5a450 100644 --- a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java @@ -497,4 +497,79 @@ public void testVisit() { assertEquals(0, set.size()); } + + public void testOneMustNotRangeRewritten() throws Exception { + int from = 10; + int to = 20; + for (boolean includeLower : new boolean[] { true, false }) { + for (boolean includeUpper : new boolean[] { true, false }) { + BoolQueryBuilder qb = new BoolQueryBuilder(); + QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, includeLower, includeUpper); + qb.mustNot(rq); + + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + assertFalse(rewritten.mustNot().contains(rq)); + + QueryBuilder expectedLowerQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, !includeLower); + QueryBuilder expectedUpperQuery = getRangeQueryBuilder(INT_FIELD_NAME, to, null, !includeUpper, true); + assertEquals(1, rewritten.must().size()); + + BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0); + assertEquals(2, nestedBoolQuery.should().size()); + assertEquals("1", nestedBoolQuery.minimumShouldMatch()); + assertTrue(nestedBoolQuery.should().contains(expectedLowerQuery)); + assertTrue(nestedBoolQuery.should().contains(expectedUpperQuery)); + } + } + } + + public void testOneSingleEndedMustNotRangeRewritten() throws Exception { + // Test a must_not range query with only one endpoint is rewritten correctly + int from = 10; + BoolQueryBuilder qb = new BoolQueryBuilder(); + QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, null, false, false); + qb.mustNot(rq); + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + assertFalse(rewritten.mustNot().contains(rq)); + + QueryBuilder expectedQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, true); + assertEquals(1, rewritten.must().size()); + BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0); + assertEquals(1, nestedBoolQuery.should().size()); + assertTrue(nestedBoolQuery.should().contains(expectedQuery)); + assertEquals("1", nestedBoolQuery.minimumShouldMatch()); + } + + public void testMultipleMustNotRangesNotRewritten() throws Exception { + BoolQueryBuilder qb = new BoolQueryBuilder(); + // Test a field with two ranges is not rewritten + QueryBuilder rq1of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(10).lt(20); + QueryBuilder rq2of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(30).lt(40); + qb.mustNot(rq1of2); + qb.mustNot(rq2of2); + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + + assertTrue(rewritten.mustNot().contains(rq1of2)); + assertTrue(rewritten.mustNot().contains(rq2of2)); + assertEquals(0, rewritten.should().size()); + } + + private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) { + RangeQueryBuilder rq = new RangeQueryBuilder(fieldName); + if (lower != null) { + if (includeLower) { + rq.gte(lower); + } else { + rq.gt(lower); + } + } + if (upper != null) { + if (includeUpper) { + rq.lte(upper); + } else { + rq.lt(upper); + } + } + return rq; + } } From 6b67b4aefa7b6034f29f65f92d1494ecc4aa9099 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 20 Mar 2025 09:58:35 -0700 Subject: [PATCH 02/10] add ITs Signed-off-by: Peter Alfonsi --- .../search/query/BooleanQueryIT.java | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java new file mode 100644 index 0000000000000..9b781e2be8070 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -0,0 +1,165 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.opensearch.common.settings.Settings; +import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase; + +import java.util.Arrays; +import java.util.Collection; + +import static org.opensearch.index.query.QueryBuilders.boolQuery; +import static org.opensearch.index.query.QueryBuilders.rangeQuery; +import static org.opensearch.index.query.QueryBuilders.termQuery; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; + +public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase { + + public BooleanQueryIT(Settings staticSettings) { + super(staticSettings); + } + + @ParametersFactory + public static Collection parameters() { + return Arrays.asList( + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + ); + } + + public void testMustNotRangeRewrite() throws Exception { + String intField = "int_field"; + String termField1 = "term_field_1"; + String termField2 = "term_field_2"; + // If we don't explicitly set this mapping, term_field_2 is not correctly mapped as a keyword field and queries don't work as + // expected + createIndex( + "test", + Settings.EMPTY, + "{\"properties\":{\"int_field\":{\"type\": \"integer\"},\"term_field_1\":{\"type\": \"keyword\"},\"term_field_2\":{\"type\": \"keyword\"}}}}" + ); + int numDocs = 100; + + for (int i = 0; i < numDocs; i++) { + String termValue1 = "odd"; + String termValue2 = "A"; + if (i % 2 == 0) { + termValue1 = "even"; + } + if (i >= numDocs / 2) { + termValue2 = "B"; + } + client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource(intField, i, termField1, termValue1, termField2, termValue2) + .get(); + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + int lt = 80; + int gte = 20; + int expectedHitCount = numDocs - (lt - gte); + + assertHitCount( + client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(), + expectedHitCount + ); + + assertHitCount( + client().prepareSearch() + .setQuery(boolQuery().must(termQuery(termField1, "even")).mustNot(rangeQuery(intField).lt(lt).gte(gte))) + .get(), + expectedHitCount / 2 + ); + + // Test with preexisting should fields, to ensure the original default minimumShouldMatch is respected + // once we add a must clause during rewrite, even if minimumShouldMatch is not explicitly set + assertHitCount( + client().prepareSearch().setQuery(boolQuery().should(termQuery(termField1, "even")).should(termQuery(termField2, "B"))).get(), + 3 * numDocs / 4 + ); + assertHitCount( + client().prepareSearch() + .setQuery( + boolQuery().should(termQuery(termField1, "even")) + .should(termQuery(termField2, "A")) + .mustNot(rangeQuery(intField).lt(lt).gte(gte)) + ) + .get(), + 3 * expectedHitCount / 4 + ); + + assertHitCount(client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocs * 2))).get(), 0); + } + + public void testMustNotDateRangeRewrite() throws Exception { + int minDay = 10; + int maxDay = 31; + String dateField = "date_field"; + createIndex("test"); + for (int day = minDay; day <= maxDay; day++) { + client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get(); + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + int minExcludedDay = 15; + int maxExcludedDay = 25; + int expectedHitCount = maxDay + 1 - minDay - (maxExcludedDay - minExcludedDay + 1); + + assertHitCount( + client().prepareSearch("test") + .setQuery(boolQuery().mustNot(rangeQuery(dateField).gte(getDate(minExcludedDay, 0)).lte(getDate(maxExcludedDay, 0)))) + .get(), + expectedHitCount + ); + } + + public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception { + // Index a document for each hour of 1/1 in UTC. Then, do a boolean query excluding 1/1 for UTC+3. + // If the time zone is respected by the rewrite, we should see 3 matching documents, + // as there will be 3 hours that are 1/1 in UTC but not UTC+3. + + String dateField = "date_field"; + createIndex("test"); + String format = "strict_date_hour_minute_second_fraction"; + + for (int hour = 0; hour < 24; hour++) { + client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get(); + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + int zoneOffset = 3; + assertHitCount( + client().prepareSearch("test") + .setQuery( + boolQuery().mustNot( + rangeQuery(dateField).gte(getDate(1, 0)).lte(getDate(1, 23)).timeZone("+" + zoneOffset).format(format) + ) + ) + .get(), + zoneOffset + ); + } + + private String getDate(int day, int hour) { + return "2016-01-" + String.format("%02d", day) + "T" + String.format("%02d", hour) + ":00:00.000"; + } +} From d9eee10fe9c3e443761fbc49175a1fd9e4d7a10f Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 21 Mar 2025 13:52:57 -0700 Subject: [PATCH 03/10] changelog Signed-off-by: Peter Alfonsi --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd8d2c427b29..773015ba56c26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Rule Based Auto-tagging] Add rule schema for auto tagging ([#17238](https://github.com/opensearch-project/OpenSearch/pull/17238)) - Renaming the node role search to warm ([#17573](https://github.com/opensearch-project/OpenSearch/pull/17573)) - Introduce a new search node role to hold search only shards ([#17620](https://github.com/opensearch-project/OpenSearch/pull/17620)) -- Fix systemd integTest on deb regarding path ownership check ([#17641](https://github.com/opensearch-project/OpenSearch/pull/17641)) +- Fix systemd integTest on deb regarding path ownership check ([#17641](https://github.com/opensearch-project/OpenSearch/pull/17641)) - Add dfs transformation function in XContentMapValues ([#17612](https://github.com/opensearch-project/OpenSearch/pull/17612)) +- Add BooleanQuery rewrite for must_not RangeQuery clauses ([#17655](https://github.com/opensearch-project/OpenSearch/pull/17655)) ### Changed - Migrate BC libs to their FIPS counterparts ([#14912](https://github.com/opensearch-project/OpenSearch/pull/14912)) From 25367bb6fedbeae5e6b6ed1ac049ee9842abe3b6 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 21 Mar 2025 14:46:06 -0700 Subject: [PATCH 04/10] Fix forbiddenApis Signed-off-by: Peter Alfonsi --- .../org/opensearch/search/query/BooleanQueryIT.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java index 9b781e2be8070..9647cde4fe5e5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -159,7 +159,16 @@ public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception ); } + private String padZeros(int value, int length) { + // String.format() not allowed + String ret = Integer.toString(value); + if (ret.length() < length) { + ret = "0".repeat(length - ret.length()) + ret; + } + return ret; + } + private String getDate(int day, int hour) { - return "2016-01-" + String.format("%02d", day) + "T" + String.format("%02d", hour) + ":00:00.000"; + return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000"; } } From 24a65614b4d07e0a21a6d525e81b3e31d5506a16 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 27 Mar 2025 12:58:01 -0700 Subject: [PATCH 05/10] Adds case for fields that have != 1 values per doc Signed-off-by: Peter Alfonsi --- .../search/query/BooleanQueryIT.java | 85 +++++++++++++++++++ .../index/query/BoolQueryBuilder.java | 64 +++++++++++--- 2 files changed, 138 insertions(+), 11 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java index 9647cde4fe5e5..25b0d1b9e7245 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -17,6 +17,7 @@ import java.util.Collection; import static org.opensearch.index.query.QueryBuilders.boolQuery; +import static org.opensearch.index.query.QueryBuilders.matchAllQuery; import static org.opensearch.index.query.QueryBuilders.rangeQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; @@ -159,6 +160,90 @@ public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception ); } + public void testMustNotRangeRewriteWithMissingValues() throws Exception { + // This test passes because the rewrite is skipped when not all docs have exactly 1 value + String intField = "int_field"; + String termField = "term_field"; + createIndex("test"); + int numDocs = 100; + int numDocsMissingIntValues = 20; + + for (int i = 0; i < numDocs; i++) { + String termValue = "odd"; + if (i % 2 == 0) { + termValue = "even"; + } + + if (i >= numDocsMissingIntValues) { + client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource(intField, i, termField, termValue) + .get(); + } else { + client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource(termField, termValue) + .get(); + } + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + // Search excluding range 30 to 50 + + int lt = 50; + int gte = 30; + int expectedHitCount = numDocs - (lt - gte); // Expected hit count includes documents missing this value + + assertHitCount( + client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(), + expectedHitCount + ); + } + + public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception { + // This test passes because the rewrite is skipped when not all docs have exactly 1 value + String intField = "int_field"; + createIndex("test"); + int numDocs = 100; + int numDocsWithExtraValue = 20; + int extraValue = -1; + + for (int i = 0; i < numDocs; i++) { + if (i >= numDocsWithExtraValue) { + client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource(intField, new int[]{extraValue, i}) + .get(); + } else { + client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource(intField, i) + .get(); + } + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + // Range queries will match if ANY of the doc's values are within the range. + // So if we exclude the range 0 to 20, we shouldn't see any of those documents returned, + // even though they also have a value -1 which is not excluded. + + int expectedHitCount = numDocs - numDocsWithExtraValue; + assertHitCount( + client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocsWithExtraValue).gte(0))).get(), + expectedHitCount + ); + assertHitCount( + client().prepareSearch().setQuery(matchAllQuery()).get(), + numDocs + ); + } + private String padZeros(int value, int length) { // String.format() not allowed String ret = Integer.toString(value); diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index 2d0a8c597cf1e..fc8b2e1b113ac 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -32,9 +32,13 @@ package org.opensearch.index.query; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.opensearch.common.lucene.search.Queries; @@ -45,6 +49,7 @@ import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.internal.ContextIndexSearcher; import java.io.IOException; import java.util.ArrayList; @@ -396,7 +401,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws return any.get(); } - changed |= rewriteMustNotRangeClausesToShould(newBuilder); + changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext); if (changed) { newBuilder.adjustPureNegative = adjustPureNegative; @@ -467,10 +472,16 @@ public void visit(QueryBuilderVisitor visitor) { } - private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder) { + private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) { // If there is a range query on a given field in a must_not clause, it's more performant to execute it as // multiple should clauses representing everything outside the target range. + // First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm every doc has exactly 1 value for this field. + List leafReaderContexts = getLeafReaderContexts(queryRewriteContext); + if (leafReaderContexts == null || leafReaderContexts.isEmpty()) { + return false; + } + boolean changed = false; // For now, only handle the case where there's exactly 1 range query for this field. Map fieldCounts = new HashMap<>(); @@ -485,16 +496,19 @@ private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder) for (RangeQueryBuilder rq : rangeQueries) { String fieldName = rq.fieldName(); if (fieldCounts.getOrDefault(fieldName, 0) == 1) { - List complement = rq.getComplement(); - if (complement != null) { - BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder(); - nestedBoolQuery.minimumShouldMatch(1); - for (RangeQueryBuilder complementComponent : complement) { - nestedBoolQuery.should(complementComponent); + // Check that all docs on this field have exactly 1 value, otherwise we can't perform this rewrite + if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) { + List complement = rq.getComplement(); + if (complement != null) { + BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder(); + nestedBoolQuery.minimumShouldMatch(1); + for (RangeQueryBuilder complementComponent : complement) { + nestedBoolQuery.should(complementComponent); + } + newBuilder.must(nestedBoolQuery); + newBuilder.mustNotClauses.remove(rq); + changed = true; } - newBuilder.must(nestedBoolQuery); - newBuilder.mustNotClauses.remove(rq); - changed = true; } } } @@ -510,4 +524,32 @@ private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder) } return changed; } + + private List getLeafReaderContexts(QueryRewriteContext queryRewriteContext) { + if (queryRewriteContext == null) return null; + QueryShardContext shardContext = queryRewriteContext.convertToShardContext(); + if (shardContext == null) return null; + IndexSearcher indexSearcher = shardContext.searcher(); + if ((indexSearcher instanceof ContextIndexSearcher cis)) { + return cis.getLeafContexts(); + } + return null; + } + + private boolean checkAllDocsHaveOneValue(List contexts, String fieldName) { + for (LeafReaderContext lrc : contexts) { + PointValues values; + try { + LeafReader reader = lrc.reader(); + values = reader.getPointValues(fieldName); // TODO: Is this an expensive operation?? I don't think it is since PointRangeQuery does it a few times... but not sure. + if (!(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) { + return false; + } + } catch (IOException e) { + // If we can't get PointValues to check on the number of values per doc, assume the query is ineligible + return false; + } + } + return true; + } } From bb3243d1e4468ecae98c874fd4570c7f6e303faf Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 17 Apr 2025 13:28:59 -0700 Subject: [PATCH 06/10] fix UTs Signed-off-by: Peter Alfonsi --- .../search/query/BooleanQueryIT.java | 25 +---- .../index/query/BoolQueryBuilder.java | 7 +- .../index/query/BoolQueryBuilderTests.java | 101 +++++++++++++++++- 3 files changed, 105 insertions(+), 28 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java index 25b0d1b9e7245..b7a83ade882dc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -175,15 +175,9 @@ public void testMustNotRangeRewriteWithMissingValues() throws Exception { } if (i >= numDocsMissingIntValues) { - client().prepareIndex("test") - .setId(Integer.toString(i)) - .setSource(intField, i, termField, termValue) - .get(); + client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i, termField, termValue).get(); } else { - client().prepareIndex("test") - .setId(Integer.toString(i)) - .setSource(termField, termValue) - .get(); + client().prepareIndex("test").setId(Integer.toString(i)).setSource(termField, termValue).get(); } } ensureGreen(); @@ -213,15 +207,9 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception { for (int i = 0; i < numDocs; i++) { if (i >= numDocsWithExtraValue) { - client().prepareIndex("test") - .setId(Integer.toString(i)) - .setSource(intField, new int[]{extraValue, i}) - .get(); + client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, new int[] { extraValue, i }).get(); } else { - client().prepareIndex("test") - .setId(Integer.toString(i)) - .setSource(intField, i) - .get(); + client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get(); } } ensureGreen(); @@ -238,10 +226,7 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception { client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocsWithExtraValue).gte(0))).get(), expectedHitCount ); - assertHitCount( - client().prepareSearch().setQuery(matchAllQuery()).get(), - numDocs - ); + assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs); } private String padZeros(int value, int length) { diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index fc8b2e1b113ac..e5ed2fb38e1c9 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -476,7 +476,8 @@ private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder, // If there is a range query on a given field in a must_not clause, it's more performant to execute it as // multiple should clauses representing everything outside the target range. - // First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm every doc has exactly 1 value for this field. + // First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm + // every doc has exactly 1 value for this field. List leafReaderContexts = getLeafReaderContexts(queryRewriteContext); if (leafReaderContexts == null || leafReaderContexts.isEmpty()) { return false; @@ -541,8 +542,8 @@ private boolean checkAllDocsHaveOneValue(List contexts, Strin PointValues values; try { LeafReader reader = lrc.reader(); - values = reader.getPointValues(fieldName); // TODO: Is this an expensive operation?? I don't think it is since PointRangeQuery does it a few times... but not sure. - if (!(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) { + values = reader.getPointValues(fieldName); + if (values == null || !(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) { return false; } } catch (IOException e) { diff --git a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java index a15208fe73e96..f08b7786c22af 100644 --- a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java @@ -32,11 +32,19 @@ package org.opensearch.index.query; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.opensearch.common.util.io.IOUtils; import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -44,6 +52,8 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.approximate.ApproximateMatchAllQuery; import org.opensearch.search.approximate.ApproximateScoreQuery; +import org.opensearch.search.internal.ContextIndexSearcher; +import org.opensearch.search.internal.SearchContext; import org.opensearch.test.AbstractQueryTestCase; import org.hamcrest.Matchers; @@ -62,6 +72,7 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.mockito.Mockito.mock; public class BoolQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -508,13 +519,19 @@ public void testVisit() { public void testOneMustNotRangeRewritten() throws Exception { int from = 10; int to = 20; + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); + addDocument(w, INT_FIELD_NAME, 1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = getIndexSearcher(reader); + for (boolean includeLower : new boolean[] { true, false }) { for (boolean includeUpper : new boolean[] { true, false }) { BoolQueryBuilder qb = new BoolQueryBuilder(); QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, includeLower, includeUpper); qb.mustNot(rq); - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); assertFalse(rewritten.mustNot().contains(rq)); QueryBuilder expectedLowerQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, !includeLower); @@ -528,15 +545,22 @@ public void testOneMustNotRangeRewritten() throws Exception { assertTrue(nestedBoolQuery.should().contains(expectedUpperQuery)); } } + IOUtils.close(w, reader, dir); } public void testOneSingleEndedMustNotRangeRewritten() throws Exception { // Test a must_not range query with only one endpoint is rewritten correctly int from = 10; + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); + addDocument(w, INT_FIELD_NAME, 1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = getIndexSearcher(reader); + BoolQueryBuilder qb = new BoolQueryBuilder(); QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, null, false, false); qb.mustNot(rq); - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); assertFalse(rewritten.mustNot().contains(rq)); QueryBuilder expectedQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, true); @@ -545,20 +569,65 @@ public void testOneSingleEndedMustNotRangeRewritten() throws Exception { assertEquals(1, nestedBoolQuery.should().size()); assertTrue(nestedBoolQuery.should().contains(expectedQuery)); assertEquals("1", nestedBoolQuery.minimumShouldMatch()); + + IOUtils.close(w, reader, dir); } public void testMultipleMustNotRangesNotRewritten() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); + addDocument(w, INT_FIELD_NAME, 1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = getIndexSearcher(reader); + BoolQueryBuilder qb = new BoolQueryBuilder(); // Test a field with two ranges is not rewritten - QueryBuilder rq1of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(10).lt(20); - QueryBuilder rq2of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(30).lt(40); + QueryBuilder rq1of2 = new RangeQueryBuilder(INT_FIELD_NAME).gt(10).lt(20); + QueryBuilder rq2of2 = new RangeQueryBuilder(INT_FIELD_NAME).gt(30).lt(40); qb.mustNot(rq1of2); qb.mustNot(rq2of2); - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); assertTrue(rewritten.mustNot().contains(rq1of2)); assertTrue(rewritten.mustNot().contains(rq2of2)); assertEquals(0, rewritten.should().size()); + + IOUtils.close(w, reader, dir); + } + + public void testMustNotRewriteDisabledWithoutLeafReaders() throws Exception { + // If we don't have access the LeafReaderContexts, don't perform the must_not rewrite + int from = 10; + int to = 20; + + BoolQueryBuilder qb = new BoolQueryBuilder(); + QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, true, true); + qb.mustNot(rq); + + // Context has no searcher available --> no leaf readers available + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + assertTrue(rewritten.mustNot().contains(rq)); + } + + public void testMustNotRewriteDisabledWithoutExactlyOneValuePerDoc() throws Exception { + // If the PointValues returned don't show exactly 1 value per doc, don't perform the must_not rewrite + int from = 10; + int to = 20; + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); + addDocument(w, INT_FIELD_NAME, 1, 2, 3); // This doc will have 3 values, so the rewrite shouldn't happen + addDocument(w, INT_FIELD_NAME, 1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = getIndexSearcher(reader); + + BoolQueryBuilder qb = new BoolQueryBuilder(); + QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, true, true); + qb.mustNot(rq); + + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); + assertTrue(rewritten.mustNot().contains(rq)); + + IOUtils.close(w, reader, dir); } private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) { @@ -579,4 +648,26 @@ private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integ } return rq; } + + private void addDocument(IndexWriter w, String fieldName, int... values) throws Exception { + Document d = new Document(); + for (int value : values) { + d.add(new IntPoint(fieldName, value)); + } + w.addDocument(d); + w.commit(); + } + + private IndexSearcher getIndexSearcher(DirectoryReader reader) throws Exception { + SearchContext searchContext = mock(SearchContext.class); + return new ContextIndexSearcher( + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), + true, + null, + searchContext + ); + } } From fc49ddd528ba787a35468a0f60e9885c8bad9a75 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 17 Apr 2025 13:53:51 -0700 Subject: [PATCH 07/10] fix >1 value IT Signed-off-by: Peter Alfonsi --- .../java/org/opensearch/search/query/BooleanQueryIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java index b7a83ade882dc..ff55889e041d5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -206,7 +206,7 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception { int extraValue = -1; for (int i = 0; i < numDocs; i++) { - if (i >= numDocsWithExtraValue) { + if (i < numDocsWithExtraValue) { client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, new int[] { extraValue, i }).get(); } else { client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get(); From 4b00d3603e8f79ecdc367a2b79d48b3bbd4b2539 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 19 May 2025 12:45:45 -0700 Subject: [PATCH 08/10] Address LeafReaderContext comment Signed-off-by: Peter Alfonsi --- .../java/org/opensearch/index/query/BoolQueryBuilder.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index e5ed2fb38e1c9..3db071e6cc9de 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -531,10 +531,8 @@ private List getLeafReaderContexts(QueryRewriteContext queryR QueryShardContext shardContext = queryRewriteContext.convertToShardContext(); if (shardContext == null) return null; IndexSearcher indexSearcher = shardContext.searcher(); - if ((indexSearcher instanceof ContextIndexSearcher cis)) { - return cis.getLeafContexts(); - } - return null; + if (indexSearcher == null) return null; + return indexSearcher.getIndexReader().leaves(); } private boolean checkAllDocsHaveOneValue(List contexts, String fieldName) { From c273159c3cd922c63dd19c274086c17f6bb1be92 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 19 May 2025 13:23:54 -0700 Subject: [PATCH 09/10] spotless apply Signed-off-by: Peter Alfonsi --- .../main/java/org/opensearch/index/query/BoolQueryBuilder.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index 3db071e6cc9de..85b555cc612ab 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -49,7 +49,6 @@ import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.search.internal.ContextIndexSearcher; import java.io.IOException; import java.util.ArrayList; From 58d9a45ed6557ce90450a8bb911d3b0d514d1ca9 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 27 May 2025 10:11:10 -0700 Subject: [PATCH 10/10] rerun gradle Signed-off-by: Peter Alfonsi