opensearch-project · msfroh · Jun 4, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 21, 2025
@@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Use QueryCoordinatorContext for the rewrite in validate API. ([#18272](https://github.com/opensearch-project/OpenSearch/pull/18272))
 - Upgrade crypto kms plugin dependencies for AWS SDK v2.x. ([#18268](https://github.com/opensearch-project/OpenSearch/pull/18268))
 - Add support for `matched_fields` with the unified highlighter ([#18164](https://github.com/opensearch-project/OpenSearch/issues/18164))
+- Add BooleanQuery rewrite for must_not RangeQuery clauses ([#17655](https://github.com/opensearch-project/OpenSearch/pull/17655))
 - [repository-s3] Add support for SSE-KMS and S3 bucket owner verification ([#18312](https://github.com/opensearch-project/OpenSearch/pull/18312))
 - Added File Cache Stats - Involves Block level as well as full file level stats ([#17538](https://github.com/opensearch-project/OpenSearch/issues/17479))
 

@@ -0,0 +1,244 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.query;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.opensearch.common.settings.Settings;
+import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import static org.opensearch.index.query.QueryBuilders.boolQuery;
+import static org.opensearch.index.query.QueryBuilders.matchAllQuery;
+import static org.opensearch.index.query.QueryBuilders.rangeQuery;
+import static org.opensearch.index.query.QueryBuilders.termQuery;
+import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
+import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;
+
+public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {
+
+    public BooleanQueryIT(Settings staticSettings) {
+        super(staticSettings);
+    }
+
+    @ParametersFactory
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(
+            new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
+            new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
+        );
+    }
+
+    public void testMustNotRangeRewrite() throws Exception {
+        String intField = "int_field";
+        String termField1 = "term_field_1";
+        String termField2 = "term_field_2";
+        // If we don't explicitly set this mapping, term_field_2 is not correctly mapped as a keyword field and queries don't work as
+        // expected
+        createIndex(
+            "test",
+            Settings.EMPTY,
+            "{\"properties\":{\"int_field\":{\"type\": \"integer\"},\"term_field_1\":{\"type\": \"keyword\"},\"term_field_2\":{\"type\": \"keyword\"}}}}"
+        );
+        int numDocs = 100;
+
+        for (int i = 0; i < numDocs; i++) {
+            String termValue1 = "odd";
+            String termValue2 = "A";
+            if (i % 2 == 0) {
+                termValue1 = "even";
+            }
+            if (i >= numDocs / 2) {
+                termValue2 = "B";
+            }
+            client().prepareIndex("test")
+                .setId(Integer.toString(i))
+                .setSource(intField, i, termField1, termValue1, termField2, termValue2)
+                .get();
+        }
+        ensureGreen();
+        waitForRelocation();
+        forceMerge();
+        refresh();
+
+        int lt = 80;
+        int gte = 20;
+        int expectedHitCount = numDocs - (lt - gte);
+
+        assertHitCount(
+            client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
+            expectedHitCount
+        );
+
+        assertHitCount(
+            client().prepareSearch()
+                .setQuery(boolQuery().must(termQuery(termField1, "even")).mustNot(rangeQuery(intField).lt(lt).gte(gte)))
+                .get(),
+            expectedHitCount / 2
+        );
+
+        // Test with preexisting should fields, to ensure the original default minimumShouldMatch is respected
+        // once we add a must clause during rewrite, even if minimumShouldMatch is not explicitly set
+        assertHitCount(
+            client().prepareSearch().setQuery(boolQuery().should(termQuery(termField1, "even")).should(termQuery(termField2, "B"))).get(),
+            3 * numDocs / 4
+        );
+        assertHitCount(
+            client().prepareSearch()
+                .setQuery(
+                    boolQuery().should(termQuery(termField1, "even"))
+                        .should(termQuery(termField2, "A"))
+                        .mustNot(rangeQuery(intField).lt(lt).gte(gte))
+                )
+                .get(),
+            3 * expectedHitCount / 4
+        );
+
+        assertHitCount(client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocs * 2))).get(), 0);
+    }
+
+    public void testMustNotDateRangeRewrite() throws Exception {
+        int minDay = 10;
+        int maxDay = 31;
+        String dateField = "date_field";
+        createIndex("test");
+        for (int day = minDay; day <= maxDay; day++) {
+            client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get();
+        }
+        ensureGreen();
+        waitForRelocation();
+        forceMerge();
+        refresh();
+
+        int minExcludedDay = 15;
+        int maxExcludedDay = 25;
+        int expectedHitCount = maxDay + 1 - minDay - (maxExcludedDay - minExcludedDay + 1);
+
+        assertHitCount(
+            client().prepareSearch("test")
+                .setQuery(boolQuery().mustNot(rangeQuery(dateField).gte(getDate(minExcludedDay, 0)).lte(getDate(maxExcludedDay, 0))))
+                .get(),
+            expectedHitCount
+        );
+    }
+
+    public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception {
+        // Index a document for each hour of 1/1 in UTC. Then, do a boolean query excluding 1/1 for UTC+3.
+        // If the time zone is respected by the rewrite, we should see 3 matching documents,
+        // as there will be 3 hours that are 1/1 in UTC but not UTC+3.
+
+        String dateField = "date_field";
+        createIndex("test");
+        String format = "strict_date_hour_minute_second_fraction";
+
+        for (int hour = 0; hour < 24; hour++) {
+            client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get();
+        }
+        ensureGreen();
+        waitForRelocation();
+        forceMerge();
+        refresh();
+
+        int zoneOffset = 3;
+        assertHitCount(
+            client().prepareSearch("test")
+                .setQuery(
+                    boolQuery().mustNot(
+                        rangeQuery(dateField).gte(getDate(1, 0)).lte(getDate(1, 23)).timeZone("+" + zoneOffset).format(format)
+                    )
+                )
+                .get(),
+            zoneOffset
+        );
+    }
+
+    public void testMustNotRangeRewriteWithMissingValues() throws Exception {
+        // This test passes because the rewrite is skipped when not all docs have exactly 1 value
+        String intField = "int_field";
+        String termField = "term_field";
+        createIndex("test");
+        int numDocs = 100;
+        int numDocsMissingIntValues = 20;
+
+        for (int i = 0; i < numDocs; i++) {
+            String termValue = "odd";
+            if (i % 2 == 0) {
+                termValue = "even";
+            }
+
+            if (i >= numDocsMissingIntValues) {
+                client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i, termField, termValue).get();
+            } else {
+                client().prepareIndex("test").setId(Integer.toString(i)).setSource(termField, termValue).get();
+            }
+        }
+        ensureGreen();
+        waitForRelocation();
+        forceMerge();
+        refresh();
+
+        // Search excluding range 30 to 50
+
+        int lt = 50;
+        int gte = 30;
+        int expectedHitCount = numDocs - (lt - gte); // Expected hit count includes documents missing this value
+
+        assertHitCount(
+            client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
+            expectedHitCount
+        );
+    }
+
+    public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
+        // This test passes because the rewrite is skipped when not all docs have exactly 1 value
+        String intField = "int_field";
+        createIndex("test");
+        int numDocs = 100;
+        int numDocsWithExtraValue = 20;
+        int extraValue = -1;
+
+        for (int i = 0; i < numDocs; i++) {
+            if (i < numDocsWithExtraValue) {
+                client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, new int[] { extraValue, i }).get();
+            } else {
+                client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get();
+            }
+        }
+        ensureGreen();
+        waitForRelocation();
+        forceMerge();
+        refresh();
+
+        // Range queries will match if ANY of the doc's values are within the range.
+        // So if we exclude the range 0 to 20, we shouldn't see any of those documents returned,
+        // even though they also have a value -1 which is not excluded.
+
+        int expectedHitCount = numDocs - numDocsWithExtraValue;
+        assertHitCount(
+            client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocsWithExtraValue).gte(0))).get(),
+            expectedHitCount
+        );
+        assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
+    }
+
+    private String padZeros(int value, int length) {
+        // String.format() not allowed
+        String ret = Integer.toString(value);
+        if (ret.length() < length) {
+            ret = "0".repeat(length - ret.length()) + ret;
+        }
+        return ret;
+    }
+
+    private String getDate(int day, int hour) {
+        return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000";
+    }
+}
@@ -32,9 +32,13 @@
 
 package org.opensearch.index.query;
 
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.opensearch.common.lucene.search.Queries;
@@ -48,10 +52,13 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 import java.util.function.Consumer;
 import java.util.stream.Stream;
 
@@ -393,9 +400,13 @@
             return any.get();
         }
 
+        changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);
+
         if (changed) {
             newBuilder.adjustPureNegative = adjustPureNegative;
-            newBuilder.minimumShouldMatch = minimumShouldMatch;
+            if (minimumShouldMatch != null) {
+                newBuilder.minimumShouldMatch = minimumShouldMatch;
+            }
             newBuilder.boost(boost());
             newBuilder.queryName(queryName());
             return newBuilder;
@@ -460,4 +471,83 @@
 
     }
 
+    private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
+        // If there is a range query on a given field in a must_not clause, it's more performant to execute it as
+        // multiple should clauses representing everything outside the target range.
+
+        // First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm
+        // every doc has exactly 1 value for this field.
+        List<LeafReaderContext> leafReaderContexts = getLeafReaderContexts(queryRewriteContext);
+        if (leafReaderContexts == null || leafReaderContexts.isEmpty()) {
+            return false;
+        }
+
+        boolean changed = false;
+        // For now, only handle the case where there's exactly 1 range query for this field.
+        Map<String, Integer> fieldCounts = new HashMap<>();
+        Set<RangeQueryBuilder> rangeQueries = new HashSet<>();
+        for (QueryBuilder clause : mustNotClauses) {
+            if (clause instanceof RangeQueryBuilder rq) {
+                fieldCounts.merge(rq.fieldName(), 1, Integer::sum);
+                rangeQueries.add(rq);
+            }
+        }
+
+        for (RangeQueryBuilder rq : rangeQueries) {
+            String fieldName = rq.fieldName();
+            if (fieldCounts.getOrDefault(fieldName, 0) == 1) {
+                // Check that all docs on this field have exactly 1 value, otherwise we can't perform this rewrite
+                if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) {
+                    List<RangeQueryBuilder> complement = rq.getComplement();
+                    if (complement != null) {
+                        BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder();
+                        nestedBoolQuery.minimumShouldMatch(1);
+                        for (RangeQueryBuilder complementComponent : complement) {
+                            nestedBoolQuery.should(complementComponent);
+                        }
+                        newBuilder.must(nestedBoolQuery);
+                        newBuilder.mustNotClauses.remove(rq);
+                        changed = true;
+                    }
+                }
+            }
+        }
+
+        if (minimumShouldMatch == null && changed) {
+            if ((!shouldClauses.isEmpty()) && mustClauses.isEmpty() && filterClauses.isEmpty()) {
+                // If there were originally should clauses and no must/filter clauses, null minimumShouldMatch is set to a default of 1
+                // within Lucene.
+                // But if there was originally a must or filter clause, the default is 0.
+                // If we added a must clause due to this rewrite, we should respect what the original default would have been.
+                newBuilder.minimumShouldMatch(1);
+            }
+        }
+        return changed;
+    }
+
+    private List<LeafReaderContext> getLeafReaderContexts(QueryRewriteContext queryRewriteContext) {
+        if (queryRewriteContext == null) return null;
+        QueryShardContext shardContext = queryRewriteContext.convertToShardContext();
+        if (shardContext == null) return null;
+        IndexSearcher indexSearcher = shardContext.searcher();
+        if (indexSearcher == null) return null;
+        return indexSearcher.getIndexReader().leaves();
+    }
+
+    private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, String fieldName) {
+        for (LeafReaderContext lrc : contexts) {
+            PointValues values;
+            try {
+                LeafReader reader = lrc.reader();
+                values = reader.getPointValues(fieldName);
+                if (values == null || !(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) {
+                    return false;
+                }
+            } catch (IOException e) {
+                // If we can't get PointValues to check on the number of values per doc, assume the query is ineligible
+                return false;
+            }
+        }
+        return true;
+    }
 }