Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Use QueryCoordinatorContext for the rewrite in validate API. ([#18272](https://github.com/opensearch-project/OpenSearch/pull/18272))
- Upgrade crypto kms plugin dependencies for AWS SDK v2.x. ([#18268](https://github.com/opensearch-project/OpenSearch/pull/18268))
- Add support for `matched_fields` with the unified highlighter ([#18164](https://github.com/opensearch-project/OpenSearch/issues/18164))
- Add BooleanQuery rewrite for must_not RangeQuery clauses ([#17655](https://github.com/opensearch-project/OpenSearch/pull/17655))
- [repository-s3] Add support for SSE-KMS and S3 bucket owner verification ([#18312](https://github.com/opensearch-project/OpenSearch/pull/18312))
- Added File Cache Stats - Involves Block level as well as full file level stats ([#17538](https://github.com/opensearch-project/OpenSearch/issues/17479))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.query;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.opensearch.common.settings.Settings;
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;

import java.util.Arrays;
import java.util.Collection;

import static org.opensearch.index.query.QueryBuilders.boolQuery;
import static org.opensearch.index.query.QueryBuilders.matchAllQuery;
import static org.opensearch.index.query.QueryBuilders.rangeQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;

public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

public BooleanQueryIT(Settings staticSettings) {
super(staticSettings);
}

@ParametersFactory
public static Collection<Object[]> parameters() {
return Arrays.asList(
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
);
}

public void testMustNotRangeRewrite() throws Exception {
String intField = "int_field";
String termField1 = "term_field_1";
String termField2 = "term_field_2";
// If we don't explicitly set this mapping, term_field_2 is not correctly mapped as a keyword field and queries don't work as
// expected
createIndex(
"test",
Settings.EMPTY,
"{\"properties\":{\"int_field\":{\"type\": \"integer\"},\"term_field_1\":{\"type\": \"keyword\"},\"term_field_2\":{\"type\": \"keyword\"}}}}"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The json mapping passed here seems incorrect. The json string has an extra closing braces.

);
int numDocs = 100;

for (int i = 0; i < numDocs; i++) {
String termValue1 = "odd";
String termValue2 = "A";
if (i % 2 == 0) {
termValue1 = "even";
}
if (i >= numDocs / 2) {
termValue2 = "B";
}
client().prepareIndex("test")
.setId(Integer.toString(i))
.setSource(intField, i, termField1, termValue1, termField2, termValue2)
.get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int lt = 80;
int gte = 20;
int expectedHitCount = numDocs - (lt - gte);

assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
expectedHitCount
);

assertHitCount(
client().prepareSearch()
.setQuery(boolQuery().must(termQuery(termField1, "even")).mustNot(rangeQuery(intField).lt(lt).gte(gte)))
.get(),
expectedHitCount / 2
);

// Test with preexisting should fields, to ensure the original default minimumShouldMatch is respected
// once we add a must clause during rewrite, even if minimumShouldMatch is not explicitly set
assertHitCount(
client().prepareSearch().setQuery(boolQuery().should(termQuery(termField1, "even")).should(termQuery(termField2, "B"))).get(),
3 * numDocs / 4
);
assertHitCount(
client().prepareSearch()
.setQuery(
boolQuery().should(termQuery(termField1, "even"))
.should(termQuery(termField2, "A"))
.mustNot(rangeQuery(intField).lt(lt).gte(gte))
)
.get(),
3 * expectedHitCount / 4
);

assertHitCount(client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocs * 2))).get(), 0);
}

public void testMustNotDateRangeRewrite() throws Exception {
int minDay = 10;
int maxDay = 31;
String dateField = "date_field";
createIndex("test");
for (int day = minDay; day <= maxDay; day++) {
client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int minExcludedDay = 15;
int maxExcludedDay = 25;
int expectedHitCount = maxDay + 1 - minDay - (maxExcludedDay - minExcludedDay + 1);

assertHitCount(
client().prepareSearch("test")
.setQuery(boolQuery().mustNot(rangeQuery(dateField).gte(getDate(minExcludedDay, 0)).lte(getDate(maxExcludedDay, 0))))
.get(),
expectedHitCount
);
}

public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception {
// Index a document for each hour of 1/1 in UTC. Then, do a boolean query excluding 1/1 for UTC+3.
// If the time zone is respected by the rewrite, we should see 3 matching documents,
// as there will be 3 hours that are 1/1 in UTC but not UTC+3.

String dateField = "date_field";
createIndex("test");
String format = "strict_date_hour_minute_second_fraction";

for (int hour = 0; hour < 24; hour++) {
client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int zoneOffset = 3;
assertHitCount(
client().prepareSearch("test")
.setQuery(
boolQuery().mustNot(
rangeQuery(dateField).gte(getDate(1, 0)).lte(getDate(1, 23)).timeZone("+" + zoneOffset).format(format)
)
)
.get(),
zoneOffset
);
}

public void testMustNotRangeRewriteWithMissingValues() throws Exception {
// This test passes because the rewrite is skipped when not all docs have exactly 1 value
String intField = "int_field";
String termField = "term_field";
createIndex("test");
int numDocs = 100;
int numDocsMissingIntValues = 20;

for (int i = 0; i < numDocs; i++) {
String termValue = "odd";
if (i % 2 == 0) {
termValue = "even";
}

if (i >= numDocsMissingIntValues) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i, termField, termValue).get();
} else {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(termField, termValue).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

// Search excluding range 30 to 50

int lt = 50;
int gte = 30;
int expectedHitCount = numDocs - (lt - gte); // Expected hit count includes documents missing this value

assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
expectedHitCount
);
}

public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
// This test passes because the rewrite is skipped when not all docs have exactly 1 value
String intField = "int_field";
createIndex("test");
int numDocs = 100;
int numDocsWithExtraValue = 20;
int extraValue = -1;

for (int i = 0; i < numDocs; i++) {
if (i < numDocsWithExtraValue) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, new int[] { extraValue, i }).get();
} else {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

// Range queries will match if ANY of the doc's values are within the range.
// So if we exclude the range 0 to 20, we shouldn't see any of those documents returned,
// even though they also have a value -1 which is not excluded.

int expectedHitCount = numDocs - numDocsWithExtraValue;
assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocsWithExtraValue).gte(0))).get(),
expectedHitCount
);
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
}

private String padZeros(int value, int length) {
// String.format() not allowed
String ret = Integer.toString(value);
if (ret.length() < length) {
ret = "0".repeat(length - ret.length()) + ret;
}
return ret;
}

private String getDate(int day, int hour) {
return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@

package org.opensearch.index.query;

import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.opensearch.common.lucene.search.Queries;
Expand All @@ -48,10 +52,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Stream;

Expand Down Expand Up @@ -393,9 +400,13 @@
return any.get();
}

changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);

if (changed) {
newBuilder.adjustPureNegative = adjustPureNegative;
newBuilder.minimumShouldMatch = minimumShouldMatch;
if (minimumShouldMatch != null) {
newBuilder.minimumShouldMatch = minimumShouldMatch;
}
newBuilder.boost(boost());
newBuilder.queryName(queryName());
return newBuilder;
Expand Down Expand Up @@ -460,4 +471,83 @@

}

private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
// If there is a range query on a given field in a must_not clause, it's more performant to execute it as
// multiple should clauses representing everything outside the target range.

// First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm
// every doc has exactly 1 value for this field.
List<LeafReaderContext> leafReaderContexts = getLeafReaderContexts(queryRewriteContext);
if (leafReaderContexts == null || leafReaderContexts.isEmpty()) {
return false;
}

boolean changed = false;
// For now, only handle the case where there's exactly 1 range query for this field.
Map<String, Integer> fieldCounts = new HashMap<>();
Set<RangeQueryBuilder> rangeQueries = new HashSet<>();
for (QueryBuilder clause : mustNotClauses) {
if (clause instanceof RangeQueryBuilder rq) {
fieldCounts.merge(rq.fieldName(), 1, Integer::sum);
rangeQueries.add(rq);
}
}

for (RangeQueryBuilder rq : rangeQueries) {
String fieldName = rq.fieldName();
if (fieldCounts.getOrDefault(fieldName, 0) == 1) {
// Check that all docs on this field have exactly 1 value, otherwise we can't perform this rewrite
if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) {
List<RangeQueryBuilder> complement = rq.getComplement();
if (complement != null) {
BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder();
nestedBoolQuery.minimumShouldMatch(1);
for (RangeQueryBuilder complementComponent : complement) {
nestedBoolQuery.should(complementComponent);
}
newBuilder.must(nestedBoolQuery);
newBuilder.mustNotClauses.remove(rq);
changed = true;
}
}
}
}

if (minimumShouldMatch == null && changed) {
if ((!shouldClauses.isEmpty()) && mustClauses.isEmpty() && filterClauses.isEmpty()) {
// If there were originally should clauses and no must/filter clauses, null minimumShouldMatch is set to a default of 1
// within Lucene.
// But if there was originally a must or filter clause, the default is 0.
// If we added a must clause due to this rewrite, we should respect what the original default would have been.
newBuilder.minimumShouldMatch(1);

Check warning on line 522 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L522

Added line #L522 was not covered by tests
}
}
return changed;
}

private List<LeafReaderContext> getLeafReaderContexts(QueryRewriteContext queryRewriteContext) {
if (queryRewriteContext == null) return null;
QueryShardContext shardContext = queryRewriteContext.convertToShardContext();
if (shardContext == null) return null;
IndexSearcher indexSearcher = shardContext.searcher();
if (indexSearcher == null) return null;
return indexSearcher.getIndexReader().leaves();
}

private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, String fieldName) {
for (LeafReaderContext lrc : contexts) {
PointValues values;
try {
LeafReader reader = lrc.reader();
values = reader.getPointValues(fieldName);
if (values == null || !(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) {
return false;
}
} catch (IOException e) {

Check warning on line 546 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L546

Added line #L546 was not covered by tests
// If we can't get PointValues to check on the number of values per doc, assume the query is ineligible
return false;

Check warning on line 548 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L548

Added line #L548 was not covered by tests
}
}
return true;
}
}
Loading
Loading