diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fa5652ccc7eb..48dd5a6760e6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - Expand fetch phase profiling to support inner hits and top hits aggregation phases ([##18936](https://github.com/opensearch-project/OpenSearch/pull/18936)) - Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920)) +- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060)) - The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065)) - Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054)) - Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091)) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 8aec7386fcf81..3f954cd9f9c37 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -804,6 +804,8 @@ public void apply(Settings value, Settings current, Settings previous) { BlobStoreRepository.SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD, SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING, + SearchService.QUERY_REWRITING_ENABLED_SETTING, + SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING, // Composite index settings CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING, diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index 46c5a40457ce7..a2f71a7064903 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -49,8 +49,6 @@ import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import java.io.IOException; import java.util.ArrayList; @@ -402,9 +400,6 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws return any.get(); } - changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext); - changed |= rewriteMustClausesToFilter(newBuilder, queryRewriteContext); - if (changed) { newBuilder.adjustPureNegative = adjustPureNegative; if (minimumShouldMatch != null) { @@ -559,53 +554,4 @@ private boolean checkAllDocsHaveOneValue(List contexts, Strin } return true; } - - private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) { - // If we have must clauses which return the same score for all matching documents, like numeric term queries or ranges, - // moving them from must clauses to filter clauses improves performance in some cases. - // This works because it can let Lucene use MaxScoreCache to skip non-competitive docs. - boolean changed = false; - Set mustClausesToMove = new HashSet<>(); - - QueryShardContext shardContext; - if (queryRewriteContext == null) { - shardContext = null; - } else { - shardContext = queryRewriteContext.convertToShardContext(); // can still be null - } - - for (QueryBuilder clause : mustClauses) { - if (isClauseIrrelevantToScoring(clause, shardContext)) { - mustClausesToMove.add(clause); - changed = true; - } - } - - newBuilder.mustClauses.removeAll(mustClausesToMove); - newBuilder.filterClauses.addAll(mustClausesToMove); - return changed; - } - - private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) { - // This is an incomplete list of clauses this might apply for; it can be expanded in future. - - // If a clause is purely numeric, for example a date range, its score is unimportant as - // it'll be the same for all returned docs - if (clause instanceof RangeQueryBuilder) return true; - if (clause instanceof GeoBoundingBoxQueryBuilder) return true; - - // Further optimizations depend on knowing whether the field is numeric. - // QueryBuilder.doRewrite() is called several times in the search flow, and the shard context telling us this - // is only available the last time, when it's called from SearchService.executeQueryPhase(). - // Skip moving these clauses if we don't have the shard context. - if (context == null) return false; - if (!(clause instanceof WithFieldName wfn)) return false; - MappedFieldType fieldType = context.fieldMapper(wfn.fieldName()); - if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false; - - if (clause instanceof MatchQueryBuilder) return true; - if (clause instanceof TermQueryBuilder) return true; - if (clause instanceof TermsQueryBuilder) return true; - return false; - } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index beecab73ffeab..c6fe57188eff1 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -136,6 +136,7 @@ import org.opensearch.search.profile.Profilers; import org.opensearch.search.profile.SearchProfileShardResults; import org.opensearch.search.query.QueryPhase; +import org.opensearch.search.query.QueryRewriterRegistry; import org.opensearch.search.query.QuerySearchRequest; import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.query.ScrollQuerySearchResult; @@ -276,6 +277,27 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv Property.Deprecated ); + public static final Setting QUERY_REWRITING_ENABLED_SETTING = Setting.boolSetting( + "search.query_rewriting.enabled", + true, + Property.Dynamic, + Property.NodeScope + ); + + /** + * Controls the threshold for the number of term queries on the same field that triggers + * the TermsMergingRewriter to combine them into a single terms query. For example, + * if set to 16 (default), when 16 or more term queries target the same field within + * a boolean clause, they will be merged into a single terms query for better performance. + */ + public static final Setting QUERY_REWRITING_TERMS_THRESHOLD_SETTING = Setting.intSetting( + "search.query_rewriting.terms_threshold", + 16, + 2, // minimum value + Property.Dynamic, + Property.NodeScope + ); + // Allow concurrent segment search for all requests public static final String CONCURRENT_SEGMENT_SEARCH_MODE_ALL = "all"; @@ -507,6 +529,10 @@ public SearchService( this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; this.pluginProfilers = pluginProfilers; + + // Initialize QueryRewriterRegistry with cluster settings so TermsMergingRewriter + // can register its settings update consumer + QueryRewriterRegistry.INSTANCE.initialize(settings, clusterService.getClusterSettings()); } private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) { @@ -1488,8 +1514,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc context.size(source.size()); Map innerHitBuilders = new HashMap<>(); if (source.query() != null) { - InnerHitContextBuilder.extractInnerHits(source.query(), innerHitBuilders); - context.parsedQuery(queryShardContext.toQuery(source.query())); + QueryBuilder query = source.query(); + + // Apply query rewriting optimizations + query = QueryRewriterRegistry.INSTANCE.rewrite(query, queryShardContext); + + InnerHitContextBuilder.extractInnerHits(query, innerHitBuilders); + context.parsedQuery(queryShardContext.toQuery(query)); } if (source.postFilter() != null) { InnerHitContextBuilder.extractInnerHits(source.postFilter(), innerHitBuilders); diff --git a/server/src/main/java/org/opensearch/search/query/QueryRewriter.java b/server/src/main/java/org/opensearch/search/query/QueryRewriter.java new file mode 100644 index 0000000000000..32854c5881d61 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/QueryRewriter.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; + +/** + * Interface for query rewriting implementations that optimize query structure + * before conversion to Lucene queries. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface QueryRewriter { + + /** + * Rewrites the given query builder to a more optimal form. + * + * @param query The query to rewrite + * @param context The search execution context + * @return The rewritten query (may be the same instance if no rewrite needed) + */ + QueryBuilder rewrite(QueryBuilder query, QueryShardContext context); + + /** + * Returns the priority of this rewriter. Lower values execute first. + * This allows control over rewrite ordering when multiple rewriters + * may interact. + * + * @return The priority value + */ + default int priority() { + return 1000; + } + + /** + * Returns the name of this rewriter for debugging and profiling. + * + * @return The rewriter name + */ + String name(); +} diff --git a/server/src/main/java/org/opensearch/search/query/QueryRewriterRegistry.java b/server/src/main/java/org/opensearch/search/query/QueryRewriterRegistry.java new file mode 100644 index 0000000000000..7de5a5c8e554e --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/QueryRewriterRegistry.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.SearchService; +import org.opensearch.search.query.rewriters.BooleanFlatteningRewriter; +import org.opensearch.search.query.rewriters.MatchAllRemovalRewriter; +import org.opensearch.search.query.rewriters.MustNotToShouldRewriter; +import org.opensearch.search.query.rewriters.MustToFilterRewriter; +import org.opensearch.search.query.rewriters.TermsMergingRewriter; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; + +/** + * Registry for query rewriters + * + * @opensearch.internal + */ +public final class QueryRewriterRegistry { + + private static final Logger logger = LogManager.getLogger(QueryRewriterRegistry.class); + + public static final QueryRewriterRegistry INSTANCE = new QueryRewriterRegistry(); + + /** + * Default rewriters. + * CopyOnWriteArrayList is used for thread-safety during registration. + */ + private final CopyOnWriteArrayList rewriters; + + /** + * Whether query rewriting is enabled. + */ + private volatile boolean enabled; + + private QueryRewriterRegistry() { + this.rewriters = new CopyOnWriteArrayList<>(); + + // Register default rewriters using singletons + registerRewriter(BooleanFlatteningRewriter.INSTANCE); + registerRewriter(MustToFilterRewriter.INSTANCE); + registerRewriter(MustNotToShouldRewriter.INSTANCE); + registerRewriter(MatchAllRemovalRewriter.INSTANCE); + registerRewriter(TermsMergingRewriter.INSTANCE); + } + + /** + * Register a custom query rewriter. + * + * @param rewriter The rewriter to register + */ + public void registerRewriter(QueryRewriter rewriter) { + if (rewriter != null) { + rewriters.add(rewriter); + logger.info("Registered query rewriter: {}", rewriter.name()); + } + } + + /** + * Initialize the registry with cluster settings. + * This must be called once during system startup to properly configure + * the TermsMergingRewriter with settings and update consumers. + * + * @param settings Initial cluster settings + * @param clusterSettings Cluster settings for registering update consumers + */ + public void initialize(Settings settings, ClusterSettings clusterSettings) { + TermsMergingRewriter.INSTANCE.initialize(settings, clusterSettings); + this.enabled = SearchService.QUERY_REWRITING_ENABLED_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer( + SearchService.QUERY_REWRITING_ENABLED_SETTING, + (Boolean enabled) -> this.enabled = enabled + ); + } + + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (!enabled || query == null) { + return query; + } + + List sortedRewriters = new ArrayList<>(rewriters); + sortedRewriters.sort(Comparator.comparingInt(QueryRewriter::priority)); + + QueryBuilder current = query; + for (QueryRewriter rewriter : sortedRewriters) { + try { + QueryBuilder rewritten = rewriter.rewrite(current, context); + if (rewritten != current) { + current = rewritten; + } + } catch (Exception e) { + logger.warn("Query rewriter {} failed: {}", rewriter.name(), e.getMessage()); + } + } + + return current; + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java new file mode 100644 index 0000000000000..5fe316e9a1c1a --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java @@ -0,0 +1,239 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.query.QueryRewriter; + +import java.util.ArrayList; +import java.util.List; + +/** + * Rewrites nested boolean queries to flatten unnecessary nesting levels. + * For example: + *
+ * {"bool": {"filter": [{"bool": {"filter": [{"term": {"field": "value"}}]}}]}}
+ * 
+ * becomes: + *
+ * {"bool": {"filter": [{"term": {"field": "value"}}]}}
+ * 
+ * + * Note: While Lucene's BooleanQuery does flatten pure disjunctions (SHOULD-only clauses) + * for WAND optimization, it does NOT flatten other nested structures like filter-in-filter + * or must-in-must. This rewriter handles those additional patterns that are common in + * user-generated and template-based queries but not optimized by Lucene. + * + * @opensearch.internal + */ +public class BooleanFlatteningRewriter implements QueryRewriter { + + public static final BooleanFlatteningRewriter INSTANCE = new BooleanFlatteningRewriter(); + + private BooleanFlatteningRewriter() { + // Singleton + } + + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (!(query instanceof BoolQueryBuilder)) { + return query; + } + + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; + + // First check if flattening is needed + if (!needsFlattening(boolQuery)) { + return query; + } + + return flattenBoolQuery(boolQuery); + } + + private boolean needsFlattening(BoolQueryBuilder boolQuery) { + // Check all clause types for nested bool queries that can be flattened + if (hasFlattenableBool(boolQuery.must(), ClauseType.MUST) + || hasFlattenableBool(boolQuery.filter(), ClauseType.FILTER) + || hasFlattenableBool(boolQuery.should(), ClauseType.SHOULD) + || hasFlattenableBool(boolQuery.mustNot(), ClauseType.MUST_NOT)) { + return true; + } + + // Check if any nested bool queries need flattening + return hasNestedBoolThatNeedsFlattening(boolQuery); + } + + private boolean hasFlattenableBool(List clauses, ClauseType parentType) { + for (QueryBuilder clause : clauses) { + if (clause instanceof BoolQueryBuilder) { + BoolQueryBuilder nestedBool = (BoolQueryBuilder) clause; + // Can flatten if nested bool only has one type of clause matching parent + if (canFlatten(nestedBool, parentType)) { + return true; + } + } + } + return false; + } + + private boolean hasNestedBoolThatNeedsFlattening(BoolQueryBuilder boolQuery) { + for (QueryBuilder clause : boolQuery.must()) { + if (clause instanceof BoolQueryBuilder && needsFlattening((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.filter()) { + if (clause instanceof BoolQueryBuilder && needsFlattening((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.should()) { + if (clause instanceof BoolQueryBuilder && needsFlattening((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.mustNot()) { + if (clause instanceof BoolQueryBuilder && needsFlattening((BoolQueryBuilder) clause)) { + return true; + } + } + return false; + } + + private BoolQueryBuilder flattenBoolQuery(BoolQueryBuilder original) { + BoolQueryBuilder flattened = new BoolQueryBuilder(); + + flattened.boost(original.boost()); + flattened.queryName(original.queryName()); + flattened.minimumShouldMatch(original.minimumShouldMatch()); + flattened.adjustPureNegative(original.adjustPureNegative()); + + flattenClauses(original.must(), flattened, ClauseType.MUST); + flattenClauses(original.filter(), flattened, ClauseType.FILTER); + flattenClauses(original.should(), flattened, ClauseType.SHOULD); + flattenClauses(original.mustNot(), flattened, ClauseType.MUST_NOT); + + return flattened; + } + + private void flattenClauses(List clauses, BoolQueryBuilder target, ClauseType clauseType) { + for (QueryBuilder clause : clauses) { + if (clause instanceof BoolQueryBuilder) { + BoolQueryBuilder nestedBool = (BoolQueryBuilder) clause; + + if (canFlatten(nestedBool, clauseType)) { + // Flatten the nested bool query by extracting its clauses + List nestedClauses = getClausesForType(nestedBool, clauseType); + for (QueryBuilder nestedClause : nestedClauses) { + // Recursively flatten if needed + if (nestedClause instanceof BoolQueryBuilder) { + nestedClause = flattenBoolQuery((BoolQueryBuilder) nestedClause); + } + addClauseBasedOnType(target, nestedClause, clauseType); + } + } else { + // Can't flatten this bool, but recursively flatten its contents + BoolQueryBuilder flattenedNested = flattenBoolQuery(nestedBool); + addClauseBasedOnType(target, flattenedNested, clauseType); + } + } else { + // Non-boolean clause, add as-is + addClauseBasedOnType(target, clause, clauseType); + } + } + } + + private boolean canFlatten(BoolQueryBuilder nestedBool, ClauseType parentType) { + // Can only flatten if: + // 1. The nested bool has the same properties as default (boost=1, no queryName, etc.) + // 2. The nested bool only has clauses of the same type as the parent + + if (nestedBool.boost() != 1.0f || nestedBool.queryName() != null) { + return false; + } + + // Check if only has clauses matching parent type + switch (parentType) { + case MUST: + return !nestedBool.must().isEmpty() + && nestedBool.filter().isEmpty() + && nestedBool.should().isEmpty() + && nestedBool.mustNot().isEmpty(); + case FILTER: + return nestedBool.must().isEmpty() + && !nestedBool.filter().isEmpty() + && nestedBool.should().isEmpty() + && nestedBool.mustNot().isEmpty(); + case SHOULD: + return nestedBool.must().isEmpty() + && nestedBool.filter().isEmpty() + && !nestedBool.should().isEmpty() + && nestedBool.mustNot().isEmpty() + && nestedBool.minimumShouldMatch() == null; + case MUST_NOT: + return nestedBool.must().isEmpty() + && nestedBool.filter().isEmpty() + && nestedBool.should().isEmpty() + && !nestedBool.mustNot().isEmpty(); + default: + return false; + } + } + + private List getClausesForType(BoolQueryBuilder bool, ClauseType type) { + switch (type) { + case MUST: + return bool.must(); + case FILTER: + return bool.filter(); + case SHOULD: + return bool.should(); + case MUST_NOT: + return bool.mustNot(); + default: + return new ArrayList<>(); + } + } + + private void addClauseBasedOnType(BoolQueryBuilder target, QueryBuilder clause, ClauseType type) { + switch (type) { + case MUST: + target.must(clause); + break; + case FILTER: + target.filter(clause); + break; + case SHOULD: + target.should(clause); + break; + case MUST_NOT: + target.mustNot(clause); + break; + } + } + + @Override + public int priority() { + return 100; + } + + @Override + public String name() { + return "boolean_flattening"; + } + + private enum ClauseType { + MUST, + FILTER, + SHOULD, + MUST_NOT + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriter.java new file mode 100644 index 0000000000000..39d2257e483bc --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriter.java @@ -0,0 +1,239 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.query.QueryRewriter; + +import java.util.List; + +/** + * Removes unnecessary match_all queries from boolean contexts where they have no effect. + * + * @opensearch.internal + */ +public class MatchAllRemovalRewriter implements QueryRewriter { + + public static final MatchAllRemovalRewriter INSTANCE = new MatchAllRemovalRewriter(); + + private MatchAllRemovalRewriter() { + // Singleton + } + + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (query instanceof BoolQueryBuilder) { + return rewriteBoolQuery((BoolQueryBuilder) query); + } + return query; + } + + private QueryBuilder rewriteBoolQuery(BoolQueryBuilder original) { + // Special case: bool query with only match_all queries and no should/mustNot + if (original.should().isEmpty() && original.mustNot().isEmpty()) { + boolean onlyMatchAll = true; + int matchAllCount = 0; + int matchAllInMust = 0; + + for (QueryBuilder q : original.must()) { + if (q instanceof MatchAllQueryBuilder) { + matchAllCount++; + matchAllInMust++; + } else { + // Don't treat constant score queries or any other queries as match_all + onlyMatchAll = false; + break; + } + } + + if (onlyMatchAll) { + for (QueryBuilder q : original.filter()) { + if (q instanceof MatchAllQueryBuilder) { + matchAllCount++; + } else { + onlyMatchAll = false; + break; + } + } + } + + // Only convert to single match_all if there are no must clauses + // (to preserve scoring) or if there's only one match_all total + if (onlyMatchAll && matchAllCount > 0 && (matchAllInMust == 0 || matchAllCount == 1)) { + // Convert to single match_all, preserving boost + MatchAllQueryBuilder matchAll = new MatchAllQueryBuilder(); + if (original.boost() != 1.0f) { + matchAll.boost(original.boost()); + } + return matchAll; + } + } + + // Check if we need rewriting + boolean needsRewrite = shouldRewrite(original); + + if (!needsRewrite) { + return original; + } + + // Clone the query structure + BoolQueryBuilder rewritten = new BoolQueryBuilder(); + rewritten.boost(original.boost()); + rewritten.queryName(original.queryName()); + rewritten.minimumShouldMatch(original.minimumShouldMatch()); + rewritten.adjustPureNegative(original.adjustPureNegative()); + + // Process each clause type with different match_all removal logic: + // - must: Remove match_all only if other queries exist (preserves scoring semantics) + // - filter: Always remove match_all (it's redundant in non-scoring context) + // - should: Keep match_all (changes OR semantics if removed) + // - mustNot: Keep match_all (excluding all docs is meaningful) + processClausesWithContext(original.must(), rewritten::must, true, original, true); + processClauses(original.filter(), rewritten::filter, true, original); + processClauses(original.should(), rewritten::should, false, original); + processClauses(original.mustNot(), rewritten::mustNot, false, original); + + return rewritten; + } + + private boolean shouldRewrite(BoolQueryBuilder bool) { + // Check if any must/filter has match_all + if (hasMatchAll(bool.must()) || hasMatchAll(bool.filter())) { + return true; + } + + // Check nested bool queries + return hasNestedBoolThatNeedsRewrite(bool); + } + + private boolean hasMatchAll(List clauses) { + for (QueryBuilder q : clauses) { + if (q instanceof MatchAllQueryBuilder) { + return true; + } + } + return false; + } + + private boolean hasNestedBoolThatNeedsRewrite(BoolQueryBuilder bool) { + for (QueryBuilder q : bool.must()) { + if (q instanceof BoolQueryBuilder && shouldRewrite((BoolQueryBuilder) q)) { + return true; + } + } + for (QueryBuilder q : bool.filter()) { + if (q instanceof BoolQueryBuilder && shouldRewrite((BoolQueryBuilder) q)) { + return true; + } + } + for (QueryBuilder q : bool.should()) { + if (q instanceof BoolQueryBuilder && shouldRewrite((BoolQueryBuilder) q)) { + return true; + } + } + for (QueryBuilder q : bool.mustNot()) { + if (q instanceof BoolQueryBuilder && shouldRewrite((BoolQueryBuilder) q)) { + return true; + } + } + return false; + } + + private void processClausesWithContext( + List clauses, + ClauseAdder adder, + boolean removeMatchAll, + BoolQueryBuilder original, + boolean isMustClause + ) { + if (!removeMatchAll) { + processClauses(clauses, adder, false, original); + return; + } + + // For must clauses, only remove match_all if there are other non-match_all queries + if (isMustClause) { + boolean hasNonMatchAll = clauses.stream().anyMatch(q -> !(q instanceof MatchAllQueryBuilder)); + + // Also check if we're in a scoring context (no filter/should/mustNot clauses) + boolean isScoringContext = original.filter().isEmpty() && original.should().isEmpty() && original.mustNot().isEmpty(); + + if (!hasNonMatchAll || isScoringContext) { + // All queries are match_all or we're in a scoring context, don't remove any to preserve scoring + processClauses(clauses, adder, false, original); + return; + } + } + + // Otherwise, use normal processing + processClauses(clauses, adder, removeMatchAll, original); + } + + private void processClauses(List clauses, ClauseAdder adder, boolean removeMatchAll, BoolQueryBuilder original) { + if (!removeMatchAll) { + // For should/mustNot, don't remove match_all + for (QueryBuilder clause : clauses) { + if (clause instanceof BoolQueryBuilder) { + adder.addClause(rewriteBoolQuery((BoolQueryBuilder) clause)); + } else { + adder.addClause(clause); + } + } + return; + } + + // For must/filter, remove match_all if: + // 1. There are other non-match_all clauses in the same list OR + // 2. There are clauses in other lists (must, filter, should, mustNot) + boolean hasOtherClauses = hasNonMatchAllInSameList(clauses) || hasClausesInOtherLists(original); + + for (QueryBuilder clause : clauses) { + if (clause instanceof BoolQueryBuilder) { + adder.addClause(rewriteBoolQuery((BoolQueryBuilder) clause)); + } else if (clause instanceof MatchAllQueryBuilder && hasOtherClauses) { + // Skip match_all + continue; + } else { + adder.addClause(clause); + } + } + } + + private boolean hasNonMatchAllInSameList(List clauses) { + for (QueryBuilder q : clauses) { + if (!(q instanceof MatchAllQueryBuilder)) { + return true; + } + } + return false; + } + + private boolean hasClausesInOtherLists(BoolQueryBuilder bool) { + // Check if there are any clauses in any list + return !bool.must().isEmpty() || !bool.filter().isEmpty() || !bool.should().isEmpty() || !bool.mustNot().isEmpty(); + } + + @Override + public int priority() { + return 300; + } + + @Override + public String name() { + return "match_all_removal"; + } + + @FunctionalInterface + private interface ClauseAdder { + void addClause(QueryBuilder clause); + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriter.java new file mode 100644 index 0000000000000..ddcbea48b4b12 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriter.java @@ -0,0 +1,251 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ComplementAwareQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.WithFieldName; +import org.opensearch.search.query.QueryRewriter; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Rewrites must_not clauses to should clauses when possible. + * This improves performance by transforming negative queries into positive ones. + * + * For example: + *
+ * {"bool": {"must_not": [{"range": {"age": {"gte": 18, "lte": 65}}}]}}
+ * 
+ * becomes: + *
+ * {"bool": {"must": [{"bool": {"should": [
+ *   {"range": {"age": {"lt": 18}}},
+ *   {"range": {"age": {"gt": 65}}}
+ * ], "minimum_should_match": 1}}]}}
+ * 
+ * + * This optimization applies to: + * - RangeQueryBuilder + * - TermQueryBuilder (on numeric fields) + * - TermsQueryBuilder (on numeric fields) + * - MatchQueryBuilder (on numeric fields) + * + * @opensearch.internal + */ +public class MustNotToShouldRewriter implements QueryRewriter { + + public static final MustNotToShouldRewriter INSTANCE = new MustNotToShouldRewriter(); + + private MustNotToShouldRewriter() { + // Singleton + } + + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (!(query instanceof BoolQueryBuilder)) { + return query; + } + + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; + + // We need LeafReaderContexts to verify single-valued fields (only for must_not rewriting) + List leafReaderContexts = null; + List mustNotClausesToRewrite = new ArrayList<>(); + + // Only process must_not clauses if they exist + if (!boolQuery.mustNot().isEmpty()) { + leafReaderContexts = getLeafReaderContexts(context); + if (leafReaderContexts != null && !leafReaderContexts.isEmpty()) { + Map fieldCounts = new HashMap<>(); + + // Find complement-aware queries that can be rewritten + for (QueryBuilder clause : boolQuery.mustNot()) { + if (clause instanceof ComplementAwareQueryBuilder && clause instanceof WithFieldName) { + WithFieldName wfn = (WithFieldName) clause; + fieldCounts.merge(wfn.fieldName(), 1, Integer::sum); + } + } + + // For now, only handle the case where there's exactly 1 complement-aware query per field + for (QueryBuilder clause : boolQuery.mustNot()) { + if (clause instanceof ComplementAwareQueryBuilder && clause instanceof WithFieldName) { + WithFieldName wfn = (WithFieldName) clause; + String fieldName = wfn.fieldName(); + + if (fieldCounts.getOrDefault(fieldName, 0) == 1) { + // Check that all docs on this field have exactly 1 value + if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) { + mustNotClausesToRewrite.add(clause); + } + } + } + } + } + } + + // Create a new BoolQueryBuilder with rewritten clauses + BoolQueryBuilder rewritten = new BoolQueryBuilder(); + + // Copy all properties + rewritten.boost(boolQuery.boost()); + rewritten.queryName(boolQuery.queryName()); + rewritten.minimumShouldMatch(boolQuery.minimumShouldMatch()); + rewritten.adjustPureNegative(boolQuery.adjustPureNegative()); + + // Copy must clauses (rewrite nested queries first) + for (QueryBuilder mustClause : boolQuery.must()) { + rewritten.must(rewrite(mustClause, context)); + } + + // Copy filter clauses (rewrite nested queries first) + for (QueryBuilder filterClause : boolQuery.filter()) { + rewritten.filter(rewrite(filterClause, context)); + } + + // Copy should clauses (rewrite nested queries first) + for (QueryBuilder shouldClause : boolQuery.should()) { + rewritten.should(rewrite(shouldClause, context)); + } + + // Process must_not clauses + boolean changed = false; + for (QueryBuilder mustNotClause : boolQuery.mustNot()) { + if (mustNotClausesToRewrite.contains(mustNotClause)) { + // Rewrite this clause + ComplementAwareQueryBuilder caq = (ComplementAwareQueryBuilder) mustNotClause; + List complement = caq.getComplement(context); + + if (complement != null && !complement.isEmpty()) { + BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder(); + nestedBoolQuery.minimumShouldMatch(1); + for (QueryBuilder complementComponent : complement) { + nestedBoolQuery.should(complementComponent); + } + rewritten.must(nestedBoolQuery); + changed = true; + } else { + // If complement couldn't be determined, keep original + rewritten.mustNot(mustNotClause); + } + } else { + // Keep clauses we're not rewriting + rewritten.mustNot(rewrite(mustNotClause, context)); + } + } + + // Handle minimumShouldMatch adjustment + if (changed && boolQuery.minimumShouldMatch() == null) { + if (!boolQuery.should().isEmpty() && boolQuery.must().isEmpty() && boolQuery.filter().isEmpty()) { + // If there were originally should clauses and no must/filter clauses, + // null minimumShouldMatch defaults to 1 in Lucene. + // But if there was originally a must or filter clause, the default is 0. + // If we added a must clause due to this rewrite, we should respect the original default. + rewritten.minimumShouldMatch(1); + } + } + + // Check if any nested queries were rewritten + boolean nestedQueriesChanged = false; + for (QueryBuilder mustClause : boolQuery.must()) { + if (mustClause instanceof BoolQueryBuilder && rewritten.must().contains(mustClause) == false) { + nestedQueriesChanged = true; + break; + } + } + if (!nestedQueriesChanged) { + for (QueryBuilder filterClause : boolQuery.filter()) { + if (filterClause instanceof BoolQueryBuilder && rewritten.filter().contains(filterClause) == false) { + nestedQueriesChanged = true; + break; + } + } + } + if (!nestedQueriesChanged) { + for (QueryBuilder shouldClause : boolQuery.should()) { + if (shouldClause instanceof BoolQueryBuilder && rewritten.should().contains(shouldClause) == false) { + nestedQueriesChanged = true; + break; + } + } + } + if (!nestedQueriesChanged) { + for (QueryBuilder mustNotClause : boolQuery.mustNot()) { + if (mustNotClause instanceof BoolQueryBuilder && rewritten.mustNot().contains(mustNotClause) == false) { + nestedQueriesChanged = true; + break; + } + } + } + + return (changed || nestedQueriesChanged) ? rewritten : query; + } + + private List getLeafReaderContexts(QueryShardContext context) { + if (context == null) { + return null; + } + try { + return context.getIndexReader().leaves(); + } catch (Exception e) { + return null; + } + } + + private boolean checkAllDocsHaveOneValue(List leafReaderContexts, String fieldName) { + try { + for (LeafReaderContext leafReaderContext : leafReaderContexts) { + PointValues pointValues = leafReaderContext.reader().getPointValues(fieldName); + if (pointValues != null) { + int docCount = pointValues.getDocCount(); + long valueCount = pointValues.size(); + // Check if all documents have exactly one value + if (docCount != valueCount) { + return false; + } + // Also check if all documents in the segment have a value for this field + // If some documents are missing the field, we can't do the optimization + // because the semantics change (missing values won't match positive queries) + int maxDoc = leafReaderContext.reader().maxDoc(); + if (docCount != maxDoc) { + return false; + } + } else { + // If there are no point values but there are documents, some docs are missing the field + if (leafReaderContext.reader().maxDoc() > 0) { + return false; + } + } + } + return true; + } catch (IOException e) { + return false; + } + } + + @Override + public int priority() { + // Run after boolean flattening (100) and must-to-filter (150) + // but before terms merging (200) + return 175; + } + + @Override + public String name() { + return "must_not_to_should"; + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/MustToFilterRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/MustToFilterRewriter.java new file mode 100644 index 0000000000000..4960302d0c812 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/MustToFilterRewriter.java @@ -0,0 +1,177 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.GeoBoundingBoxQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.index.query.WithFieldName; +import org.opensearch.search.query.QueryRewriter; + +import java.util.ArrayList; +import java.util.List; + +/** + * Rewrites must clauses to filter clauses when they don't affect scoring. + * This improves performance by avoiding unnecessary scoring calculations. + * + * For example: + *
+ * {"bool": {"must": [
+ *   {"range": {"date": {"gte": "2024-01-01"}}},
+ *   {"term": {"status": "active"}}
+ * ]}}
+ * 
+ * becomes: + *
+ * {"bool": {
+ *   "filter": [{"range": {"date": {"gte": "2024-01-01"}}}],
+ *   "must": [{"term": {"status": "active"}}]
+ * }}
+ * 
+ * + * @opensearch.internal + */ +public class MustToFilterRewriter implements QueryRewriter { + + public static final MustToFilterRewriter INSTANCE = new MustToFilterRewriter(); + + private MustToFilterRewriter() { + // Singleton + } + + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (!(query instanceof BoolQueryBuilder)) { + return query; + } + + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; + + // If there are no must clauses, nothing to rewrite + if (boolQuery.must().isEmpty()) { + return query; + } + + // First, rewrite all clauses recursively + List rewrittenMustClauses = new ArrayList<>(); + List mustClausesToMove = new ArrayList<>(); + + for (QueryBuilder clause : boolQuery.must()) { + QueryBuilder rewrittenClause = rewriteIfNeeded(clause, context); + rewrittenMustClauses.add(rewrittenClause); + + if (isClauseIrrelevantToScoring(rewrittenClause, context)) { + mustClausesToMove.add(rewrittenClause); + } + } + + // Check if anything changed - either clauses to move or nested rewrites + boolean hasChanges = !mustClausesToMove.isEmpty(); + for (int i = 0; i < boolQuery.must().size(); i++) { + if (boolQuery.must().get(i) != rewrittenMustClauses.get(i)) { + hasChanges = true; + break; + } + } + + if (!hasChanges) { + return query; + } + + // Create a new BoolQueryBuilder with moved clauses + BoolQueryBuilder rewritten = new BoolQueryBuilder(); + + // Copy all properties + rewritten.boost(boolQuery.boost()); + rewritten.queryName(boolQuery.queryName()); + rewritten.minimumShouldMatch(boolQuery.minimumShouldMatch()); + rewritten.adjustPureNegative(boolQuery.adjustPureNegative()); + + // Copy must clauses except the ones we're moving + for (QueryBuilder rewrittenClause : rewrittenMustClauses) { + if (!mustClausesToMove.contains(rewrittenClause)) { + rewritten.must(rewrittenClause); + } + } + + // Add the moved clauses to filter + for (QueryBuilder movedClause : mustClausesToMove) { + rewritten.filter(movedClause); + } + + // Copy existing filter clauses + for (QueryBuilder filterClause : boolQuery.filter()) { + rewritten.filter(rewriteIfNeeded(filterClause, context)); + } + + // Copy should and mustNot clauses + for (QueryBuilder shouldClause : boolQuery.should()) { + rewritten.should(rewriteIfNeeded(shouldClause, context)); + } + for (QueryBuilder mustNotClause : boolQuery.mustNot()) { + rewritten.mustNot(rewriteIfNeeded(mustNotClause, context)); + } + + return rewritten; + } + + private QueryBuilder rewriteIfNeeded(QueryBuilder query, QueryShardContext context) { + // Recursively rewrite nested boolean queries + if (query instanceof BoolQueryBuilder) { + return rewrite(query, context); + } + return query; + } + + private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) { + // This is an incomplete list of clauses this might apply for; it can be expanded in future. + + // If a clause is purely numeric, for example a date range, its score is unimportant as + // it'll be the same for all returned docs + if (clause instanceof RangeQueryBuilder) return true; + if (clause instanceof GeoBoundingBoxQueryBuilder) return true; + + // Further optimizations depend on knowing whether the field is numeric. + // Skip moving these clauses if we don't have the shard context. + if (context == null) return false; + + if (!(clause instanceof WithFieldName)) return false; + + WithFieldName wfn = (WithFieldName) clause; + MappedFieldType fieldType = context.fieldMapper(wfn.fieldName()); + + if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false; + + // Numeric field queries have constant scores + if (clause instanceof MatchQueryBuilder) return true; + if (clause instanceof TermQueryBuilder) return true; + if (clause instanceof TermsQueryBuilder) return true; + + return false; + } + + @Override + public int priority() { + // Run after boolean flattening (100) but before terms merging (200) + return 150; + } + + @Override + public String name() { + return "must_to_filter"; + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/TermsMergingRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/TermsMergingRewriter.java new file mode 100644 index 0000000000000..9c4d863602091 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/TermsMergingRewriter.java @@ -0,0 +1,314 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.search.SearchService; +import org.opensearch.search.query.QueryRewriter; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Rewrites multiple term queries on the same field into a single terms query. + * For example: + *
+ * {"bool": {"filter": [
+ *   {"term": {"status": "active"}},
+ *   {"term": {"status": "pending"}},
+ *   {"term": {"category": "A"}}
+ * ]}}
+ * 
+ * becomes: + *
+ * {"bool": {"filter": [
+ *   {"terms": {"status": ["active", "pending"]}},
+ *   {"term": {"category": "A"}}
+ * ]}}
+ * 
+ * + * Note: Terms are only merged when there are enough terms to benefit from + * the terms query's bit set optimization (default threshold: 16 terms). + * This avoids performance regressions for small numbers of terms where + * individual term queries may perform better. + * + * @opensearch.internal + */ +public class TermsMergingRewriter implements QueryRewriter { + + public static final TermsMergingRewriter INSTANCE = new TermsMergingRewriter(); + + /** + * Default minimum number of terms to merge. Below this threshold, individual + * term queries may perform better than a terms query. + * Based on Lucene's TermInSetQuery optimization characteristics. + */ + private static final int DEFAULT_MINIMUM_TERMS_TO_MERGE = 16; + + /** + * The minimum number of terms to merge. + */ + private volatile int minimumTermsToMerge = DEFAULT_MINIMUM_TERMS_TO_MERGE; + + /** + * Creates a new rewriter. + */ + private TermsMergingRewriter() { + // Singleton + } + + /** + * Initialize this rewriter with cluster settings. + * This registers an update consumer to keep the threshold in sync with the cluster setting. + * + * @param settings Initial settings + * @param clusterSettings Cluster settings to register update consumer + */ + public void initialize(Settings settings, ClusterSettings clusterSettings) { + this.minimumTermsToMerge = SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer( + SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING, + threshold -> this.minimumTermsToMerge = threshold + ); + } + + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (!(query instanceof BoolQueryBuilder)) { + return query; + } + + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; + + // First check if merging is needed + if (!needsMerging(boolQuery)) { + return query; + } + + BoolQueryBuilder rewritten = new BoolQueryBuilder(); + + rewritten.boost(boolQuery.boost()); + rewritten.queryName(boolQuery.queryName()); + rewritten.minimumShouldMatch(boolQuery.minimumShouldMatch()); + rewritten.adjustPureNegative(boolQuery.adjustPureNegative()); + + // Only merge terms in contexts where it's semantically safe + rewriteClausesNoMerge(boolQuery.must(), rewritten::must); // Don't merge in must + rewriteClauses(boolQuery.filter(), rewritten::filter); // Safe to merge + rewriteClauses(boolQuery.should(), rewritten::should); // Safe to merge + rewriteClausesNoMerge(boolQuery.mustNot(), rewritten::mustNot); // Don't merge in mustNot + + return rewritten; + } + + private boolean needsMerging(BoolQueryBuilder boolQuery) { + // Check filter and should clauses for mergeable terms + if (hasMergeableTerms(boolQuery.filter()) || hasMergeableTerms(boolQuery.should())) { + return true; + } + + // Check nested bool queries + return hasNestedBoolThatNeedsMerging(boolQuery); + } + + private boolean hasMergeableTerms(List clauses) { + Map> fieldBoosts = new HashMap<>(); + + for (QueryBuilder clause : clauses) { + if (clause instanceof TermQueryBuilder) { + TermQueryBuilder termQuery = (TermQueryBuilder) clause; + String field = termQuery.fieldName(); + float boost = termQuery.boost(); + + fieldBoosts.computeIfAbsent(field, k -> new ArrayList<>()).add(boost); + + List boosts = fieldBoosts.get(field); + if (boosts.size() >= minimumTermsToMerge) { + // Check if all boosts are the same + float firstBoost = boosts.get(0); + boolean sameBoost = boosts.stream().allMatch(b -> b == firstBoost); + if (sameBoost) { + return true; + } + } + } else if (clause instanceof TermsQueryBuilder) { + // Check if there are enough term queries that can be merged with this terms query + TermsQueryBuilder termsQuery = (TermsQueryBuilder) clause; + String field = termsQuery.fieldName(); + int additionalTerms = 0; + + for (QueryBuilder other : clauses) { + if (other != clause && other instanceof TermQueryBuilder) { + TermQueryBuilder termQuery = (TermQueryBuilder) other; + if (field.equals(termQuery.fieldName()) && termsQuery.boost() == termQuery.boost()) { + additionalTerms++; + } + } + } + + // Only worth merging if the combined size would meet the threshold + if (termsQuery.values().size() + additionalTerms >= minimumTermsToMerge) { + return true; + } + } + } + + return false; + } + + private boolean hasNestedBoolThatNeedsMerging(BoolQueryBuilder boolQuery) { + for (QueryBuilder clause : boolQuery.must()) { + if (clause instanceof BoolQueryBuilder && needsMerging((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.filter()) { + if (clause instanceof BoolQueryBuilder && needsMerging((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.should()) { + if (clause instanceof BoolQueryBuilder && needsMerging((BoolQueryBuilder) clause)) { + return true; + } + } + for (QueryBuilder clause : boolQuery.mustNot()) { + if (clause instanceof BoolQueryBuilder && needsMerging((BoolQueryBuilder) clause)) { + return true; + } + } + return false; + } + + private void rewriteClauses(List clauses, ClauseAdder adder) { + Map termsMap = new HashMap<>(); + List nonTermClauses = new ArrayList<>(); + + // Group term queries by field + for (QueryBuilder clause : clauses) { + if (clause instanceof TermQueryBuilder) { + TermQueryBuilder termQuery = (TermQueryBuilder) clause; + String field = termQuery.fieldName(); + float boost = termQuery.boost(); + + TermsInfo info = termsMap.get(field); + if (info != null && info.boost != boost) { + // Different boost, can't merge - add as single term + nonTermClauses.add(clause); + } else { + termsMap.computeIfAbsent(field, k -> new TermsInfo(boost)).addValue(termQuery.value()); + } + } else if (clause instanceof TermsQueryBuilder) { + // Existing terms query - add to it + TermsQueryBuilder termsQuery = (TermsQueryBuilder) clause; + String field = termsQuery.fieldName(); + float boost = termsQuery.boost(); + + TermsInfo info = termsMap.get(field); + if (info != null && info.boost != boost) { + // Different boost, can't merge + nonTermClauses.add(clause); + } else { + info = termsMap.computeIfAbsent(field, k -> new TermsInfo(boost)); + for (Object value : termsQuery.values()) { + info.addValue(value); + } + } + } else if (clause instanceof BoolQueryBuilder) { + // Recursively rewrite nested bool queries + nonTermClauses.add(rewrite(clause, null)); + } else { + nonTermClauses.add(clause); + } + } + + // Create terms queries for fields with multiple values + for (Map.Entry entry : termsMap.entrySet()) { + String field = entry.getKey(); + TermsInfo info = entry.getValue(); + + if (info.values.size() == 1) { + // Single value, keep as term query + TermQueryBuilder termQuery = new TermQueryBuilder(field, info.values.get(0)); + if (info.boost != 1.0f) { + termQuery.boost(info.boost); + } + adder.addClause(termQuery); + } else if (info.values.size() >= minimumTermsToMerge) { + // Many values, merge into terms query for better performance + TermsQueryBuilder termsQuery = new TermsQueryBuilder(field, info.values); + if (info.boost != 1.0f) { + termsQuery.boost(info.boost); + } + adder.addClause(termsQuery); + } else { + // Few values, keep as individual term queries for better performance + for (Object value : info.values) { + TermQueryBuilder termQuery = new TermQueryBuilder(field, value); + if (info.boost != 1.0f) { + termQuery.boost(info.boost); + } + adder.addClause(termQuery); + } + } + } + + // Add non-term clauses + for (QueryBuilder clause : nonTermClauses) { + adder.addClause(clause); + } + } + + private void rewriteClausesNoMerge(List clauses, ClauseAdder adder) { + for (QueryBuilder clause : clauses) { + if (clause instanceof BoolQueryBuilder) { + // Recursively rewrite nested bool queries + adder.addClause(rewrite(clause, null)); + } else { + adder.addClause(clause); + } + } + } + + @Override + public int priority() { + return 200; + } + + @Override + public String name() { + return "terms_merging"; + } + + @FunctionalInterface + private interface ClauseAdder { + void addClause(QueryBuilder clause); + } + + private static class TermsInfo { + final float boost; + final List values = new ArrayList<>(); + + TermsInfo(float boost) { + this.boost = boost; + } + + void addValue(Object value) { + values.add(value); + } + } +} diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/package-info.java b/server/src/main/java/org/opensearch/search/query/rewriters/package-info.java new file mode 100644 index 0000000000000..167d97e097185 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/rewriters/package-info.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Query rewriting optimizations for improving search performance. + * + *

This package contains various query rewriters that transform queries + * into more efficient forms while maintaining semantic equivalence. + * + *

The rewriters include: + *

    + *
  • {@link org.opensearch.search.query.rewriters.BooleanFlatteningRewriter} - + * Flattens nested boolean queries with single clauses
  • + *
  • {@link org.opensearch.search.query.rewriters.MatchAllRemovalRewriter} - + * Removes redundant match_all queries from boolean clauses
  • + *
  • {@link org.opensearch.search.query.rewriters.TermsMergingRewriter} - + * Merges multiple term queries on the same field into a single terms query
  • + *
  • {@link org.opensearch.search.query.rewriters.MustNotToShouldRewriter} - + * Transforms must_not queries to should queries for better performance
  • + *
  • {@link org.opensearch.search.query.rewriters.MustToFilterRewriter} - + * Moves scoring-irrelevant queries from must to filter clauses
  • + *
+ * + * @opensearch.internal + */ +package org.opensearch.search.query.rewriters; diff --git a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java index cb83b8e1986b9..85e1d0f00c661 100644 --- a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java @@ -73,7 +73,6 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; public class BoolQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -517,63 +516,6 @@ public void testVisit() { } - public void testOneMustNotRangeRewritten() throws Exception { - int from = 10; - int to = 20; - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); - addDocument(w, INT_FIELD_NAME, 1); - DirectoryReader reader = DirectoryReader.open(w); - IndexSearcher searcher = getIndexSearcher(reader); - - for (boolean includeLower : new boolean[] { true, false }) { - for (boolean includeUpper : new boolean[] { true, false }) { - BoolQueryBuilder qb = new BoolQueryBuilder(); - QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, includeLower, includeUpper); - qb.mustNot(rq); - - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); - assertFalse(rewritten.mustNot().contains(rq)); - - QueryBuilder expectedLowerQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, !includeLower); - QueryBuilder expectedUpperQuery = getRangeQueryBuilder(INT_FIELD_NAME, to, null, !includeUpper, true); - assertEquals(1, rewritten.must().size()); - - BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0); - assertEquals(2, nestedBoolQuery.should().size()); - assertEquals("1", nestedBoolQuery.minimumShouldMatch()); - assertTrue(nestedBoolQuery.should().contains(expectedLowerQuery)); - assertTrue(nestedBoolQuery.should().contains(expectedUpperQuery)); - } - } - IOUtils.close(w, reader, dir); - } - - public void testOneSingleEndedMustNotRangeRewritten() throws Exception { - // Test a must_not range query with only one endpoint is rewritten correctly - int from = 10; - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); - addDocument(w, INT_FIELD_NAME, 1); - DirectoryReader reader = DirectoryReader.open(w); - IndexSearcher searcher = getIndexSearcher(reader); - - BoolQueryBuilder qb = new BoolQueryBuilder(); - QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, null, false, false); - qb.mustNot(rq); - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); - assertFalse(rewritten.mustNot().contains(rq)); - - QueryBuilder expectedQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, true); - assertEquals(1, rewritten.must().size()); - BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0); - assertEquals(1, nestedBoolQuery.should().size()); - assertTrue(nestedBoolQuery.should().contains(expectedQuery)); - assertEquals("1", nestedBoolQuery.minimumShouldMatch()); - - IOUtils.close(w, reader, dir); - } - public void testMultipleComplementAwareOnSameFieldNotRewritten() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); @@ -641,100 +583,6 @@ public void testMustNotRewriteDisabledWithoutExactlyOneValuePerDoc() throws Exce IOUtils.close(w, reader, dir); } - public void testOneMustNotNumericMatchQueryRewritten() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer())); - addDocument(w, INT_FIELD_NAME, 1); - DirectoryReader reader = DirectoryReader.open(w); - IndexSearcher searcher = getIndexSearcher(reader); - - BoolQueryBuilder qb = new BoolQueryBuilder(); - int excludedValue = 200; - QueryBuilder matchQuery = new MatchQueryBuilder(INT_FIELD_NAME, excludedValue); - qb.mustNot(matchQuery); - - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext(searcher)); - assertFalse(rewritten.mustNot().contains(matchQuery)); - - QueryBuilder expectedLowerQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, excludedValue, true, false); - QueryBuilder expectedUpperQuery = getRangeQueryBuilder(INT_FIELD_NAME, excludedValue, null, false, true); - assertEquals(1, rewritten.must().size()); - - BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0); - assertEquals(2, nestedBoolQuery.should().size()); - assertEquals("1", nestedBoolQuery.minimumShouldMatch()); - assertTrue(nestedBoolQuery.should().contains(expectedLowerQuery)); - assertTrue(nestedBoolQuery.should().contains(expectedUpperQuery)); - - // When the QueryShardContext is null, we should not rewrite any match queries as we can't confirm if they're on numeric fields. - QueryRewriteContext nullContext = mock(QueryRewriteContext.class); - when(nullContext.convertToShardContext()).thenReturn(null); - BoolQueryBuilder rewrittenNoContext = (BoolQueryBuilder) Rewriteable.rewrite(qb, nullContext); - assertTrue(rewrittenNoContext.mustNot().contains(matchQuery)); - assertTrue(rewrittenNoContext.should().isEmpty()); - - IOUtils.close(w, reader, dir); - } - - public void testMustClausesRewritten() throws Exception { - BoolQueryBuilder qb = new BoolQueryBuilder(); - - // Should be moved - QueryBuilder intTermQuery = new TermQueryBuilder(INT_FIELD_NAME, 200); - QueryBuilder rangeQuery = new RangeQueryBuilder(INT_FIELD_NAME).gt(10).lt(20); - // Should be moved to filter clause, the boost applies equally to all matched docs - QueryBuilder rangeQueryWithBoost = new RangeQueryBuilder(DATE_FIELD_NAME).gt(10).lt(20).boost(2); - QueryBuilder intTermsQuery = new TermsQueryBuilder(INT_FIELD_NAME, new int[] { 1, 4, 100 }); - QueryBuilder boundingBoxQuery = new GeoBoundingBoxQueryBuilder(GEO_POINT_FIELD_NAME); - QueryBuilder doubleMatchQuery = new MatchQueryBuilder(DOUBLE_FIELD_NAME, 5.5); - - // Should not be moved - QueryBuilder textTermQuery = new TermQueryBuilder(TEXT_FIELD_NAME, "bar"); - QueryBuilder textTermsQuery = new TermsQueryBuilder(TEXT_FIELD_NAME, "foo", "bar"); - QueryBuilder textMatchQuery = new MatchQueryBuilder(TEXT_FIELD_NAME, "baz"); - - qb.must(intTermQuery); - qb.must(rangeQuery); - qb.must(rangeQueryWithBoost); - qb.must(intTermsQuery); - qb.must(boundingBoxQuery); - qb.must(doubleMatchQuery); - - qb.must(textTermQuery); - qb.must(textTermsQuery); - qb.must(textMatchQuery); - - BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); - for (QueryBuilder clause : List.of( - intTermQuery, - rangeQuery, - rangeQueryWithBoost, - intTermsQuery, - boundingBoxQuery, - doubleMatchQuery - )) { - assertFalse(rewritten.must().contains(clause)); - assertTrue(rewritten.filter().contains(clause)); - } - for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery)) { - assertTrue(rewritten.must().contains(clause)); - assertFalse(rewritten.filter().contains(clause)); - } - - // If we have null QueryShardContext, match/term/terms queries should not be moved as we can't determine if they're numeric. - QueryRewriteContext nullContext = mock(QueryRewriteContext.class); - when(nullContext.convertToShardContext()).thenReturn(null); - rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, nullContext); - for (QueryBuilder clause : List.of(rangeQuery, rangeQueryWithBoost, boundingBoxQuery)) { - assertFalse(rewritten.must().contains(clause)); - assertTrue(rewritten.filter().contains(clause)); - } - for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery, intTermQuery, intTermsQuery, doubleMatchQuery)) { - assertTrue(rewritten.must().contains(clause)); - assertFalse(rewritten.filter().contains(clause)); - } - } - private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) { RangeQueryBuilder rq = new RangeQueryBuilder(fieldName); if (lower != null) { diff --git a/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java b/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java new file mode 100644 index 0000000000000..64cb2ff5efb27 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java @@ -0,0 +1,328 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.SearchService; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; + +public class QueryRewriterRegistryTests extends OpenSearchTestCase { + + private final QueryShardContext context = mock(QueryShardContext.class); + + @Override + public void setUp() throws Exception { + super.setUp(); + // Initialize registry with default settings + Settings settings = Settings.builder() + .put(SearchService.QUERY_REWRITING_ENABLED_SETTING.getKey(), true) + .put(SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING.getKey(), 16) + .build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + QueryRewriterRegistry.INSTANCE.initialize(settings, clusterSettings); + } + + public void testCompleteRewritingPipeline() { + // Test that all rewriters work together correctly + QueryBuilder nestedBool = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("status", "active")) + .must(QueryBuilders.termQuery("status", "pending")); + + QueryBuilder query = QueryBuilders.boolQuery() + .must(nestedBool) + .filter(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.termQuery("type", "product")) + .filter(QueryBuilders.termQuery("type", "service")); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have: + // - Flattened nested boolean + // - Terms in must clauses are NOT merged (semantically different) + // - Removed match_all queries + assertThat(rewrittenBool.must().size(), equalTo(2)); // two term queries for status + assertThat(rewrittenBool.must().get(0), instanceOf(TermQueryBuilder.class)); + assertThat(rewrittenBool.must().get(1), instanceOf(TermQueryBuilder.class)); + + assertThat(rewrittenBool.filter().size(), equalTo(2)); // two term queries for type (below threshold) + assertThat(rewrittenBool.filter().get(0), instanceOf(TermQueryBuilder.class)); + assertThat(rewrittenBool.filter().get(1), instanceOf(TermQueryBuilder.class)); + } + + public void testDisabledRewriting() { + // Test disabled rewriting via settings + Settings settings = Settings.builder().put(SearchService.QUERY_REWRITING_ENABLED_SETTING.getKey(), false).build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + // Initialize with disabled setting + QueryRewriterRegistry.INSTANCE.initialize(settings, clusterSettings); + + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertSame(query, rewritten); + + // Enable via settings update + clusterSettings.applySettings(Settings.builder().put(SearchService.QUERY_REWRITING_ENABLED_SETTING.getKey(), true).build()); + + // Now it should rewrite + QueryBuilder rewritten2 = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertNotSame(query, rewritten2); + } + + public void testNullQuery() { + // Null query should return null + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(null, context); + assertNull(rewritten); + } + + public void testRewriterPriorityOrder() { + // Test that rewriters are applied in correct order + // Create a query that will be affected by multiple rewriters + QueryBuilder deeplyNested = QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("field", "value1")) + .must(QueryBuilders.termQuery("field", "value2")) + ); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(deeplyNested, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should be flattened first, match_all kept in must (scoring context), but terms NOT merged in must context + assertThat(rewrittenBool.must().size(), equalTo(3)); // match_all + 2 term queries + // Check that we have one match_all and two term queries (order may vary) + long matchAllCount = rewrittenBool.must().stream().filter(q -> q instanceof MatchAllQueryBuilder).count(); + long termCount = rewrittenBool.must().stream().filter(q -> q instanceof TermQueryBuilder).count(); + assertThat(matchAllCount, equalTo(1L)); + assertThat(termCount, equalTo(2L)); + } + + public void testComplexRealWorldQuery() { + // Test a complex real-world-like query + QueryBuilder query = QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.termQuery("category", "electronics")) + .filter(QueryBuilders.termQuery("category", "computers")) + ) + .filter( + QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("brand", "apple")) + .should(QueryBuilders.termQuery("brand", "dell")) + .should(QueryBuilders.termQuery("brand", "hp")) + ) + .must(QueryBuilders.rangeQuery("price").gte(500).lte(2000)) + .mustNot(QueryBuilders.termQuery("status", "discontinued")); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // After rewriting: + // - The nested bool in must clause should be flattened + // - match_all should be removed + // - term queries should be merged into terms query + // - The filter bool with brand terms should be preserved + // - The range query should be moved from must to filter by MustToFilterRewriter + + // Check must clauses (should have terms query for category only - range moved to filter) + assertThat(rewrittenBool.must().size(), equalTo(1)); + + // Check filter clauses (should have the brand bool query AND the range query) + assertThat(rewrittenBool.filter().size(), equalTo(2)); + // One should be the brand bool query + boolean hasBoolFilter = false; + boolean hasRangeFilter = false; + for (QueryBuilder filter : rewrittenBool.filter()) { + if (filter instanceof BoolQueryBuilder) { + hasBoolFilter = true; + } else if (filter instanceof RangeQueryBuilder) { + hasRangeFilter = true; + } + } + assertTrue(hasBoolFilter); + assertTrue(hasRangeFilter); + + // Must not should be preserved + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + } + + public void testPerformanceMetrics() { + // Test that we log performance metrics in debug mode + // This is more of a sanity check that the timing code doesn't throw exceptions + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("field1", "value1")) + .must(QueryBuilders.termQuery("field1", "value2")) + .filter(QueryBuilders.matchAllQuery()); + + // Should not throw any exceptions + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertNotNull(rewritten); + } + + public void testRewriterErrorHandling() { + // Test that if a rewriter throws an exception, others still run + // This is handled internally by QueryRewriterRegistry + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("field", "value")) + .filter(QueryBuilders.matchAllQuery()); + + // Even if one rewriter fails, others should still be applied + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertNotNull(rewritten); + } + + public void testVeryComplexMixedQuery() { + // Test a very complex query with all optimizations applicable + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must( + QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("status", "active")) + .filter(QueryBuilders.termQuery("status", "pending")) + .filter(QueryBuilders.termQuery("status", "approved")) + ) + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.termQuery("type", "A")) + .filter(QueryBuilders.termQuery("type", "B")) + ) + ) + .filter(QueryBuilders.matchAllQuery()) + .filter( + QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("priority", "high")) + .should(QueryBuilders.termQuery("priority", "medium")) + .should(QueryBuilders.termQuery("priority", "low")) + ) + .should(QueryBuilders.termQuery("category", "urgent")) + .should(QueryBuilders.termQuery("category", "important")) + .mustNot(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("archived", "true"))) + .minimumShouldMatch(1); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder result = (BoolQueryBuilder) rewritten; + + // Check that minimum should match is preserved + assertThat(result.minimumShouldMatch(), equalTo("1")); + + // Verify optimizations were applied + assertNotSame(query, rewritten); + + // Should have flattened structure and merged terms + assertTrue(result.must().size() >= 1); + assertTrue(result.filter().size() >= 1); + } + + public void testCustomRewriterRegistration() { + // Create a custom rewriter for testing + QueryRewriter customRewriter = new QueryRewriter() { + @Override + public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) { + if (query instanceof TermQueryBuilder) { + TermQueryBuilder termQuery = (TermQueryBuilder) query; + if ("test_field".equals(termQuery.fieldName()) && "test_value".equals(termQuery.value())) { + // Replace with a different query + return QueryBuilders.termQuery("custom_field", "custom_value"); + } + } else if (query instanceof BoolQueryBuilder) { + // Recursively apply to nested queries + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; + BoolQueryBuilder rewritten = new BoolQueryBuilder(); + + // Copy settings + rewritten.boost(boolQuery.boost()); + rewritten.queryName(boolQuery.queryName()); + rewritten.minimumShouldMatch(boolQuery.minimumShouldMatch()); + rewritten.adjustPureNegative(boolQuery.adjustPureNegative()); + + // Recursively rewrite clauses + boolean changed = false; + for (QueryBuilder must : boolQuery.must()) { + QueryBuilder rewrittenClause = rewrite(must, context); + rewritten.must(rewrittenClause); + if (rewrittenClause != must) changed = true; + } + for (QueryBuilder filter : boolQuery.filter()) { + QueryBuilder rewrittenClause = rewrite(filter, context); + rewritten.filter(rewrittenClause); + if (rewrittenClause != filter) changed = true; + } + for (QueryBuilder should : boolQuery.should()) { + QueryBuilder rewrittenClause = rewrite(should, context); + rewritten.should(rewrittenClause); + if (rewrittenClause != should) changed = true; + } + for (QueryBuilder mustNot : boolQuery.mustNot()) { + QueryBuilder rewrittenClause = rewrite(mustNot, context); + rewritten.mustNot(rewrittenClause); + if (rewrittenClause != mustNot) changed = true; + } + + return changed ? rewritten : query; + } + return query; + } + + @Override + public int priority() { + return 1000; // High priority to ensure it runs last + } + + @Override + public String name() { + return "test_custom_rewriter"; + } + }; + + // Register the custom rewriter + QueryRewriterRegistry.INSTANCE.registerRewriter(customRewriter); + + // Test that it's applied + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("test_field", "test_value")) + .filter(QueryBuilders.termQuery("other_field", "other_value")); + + QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // The custom rewriter should have replaced the term query + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(TermQueryBuilder.class)); + TermQueryBuilder mustTerm = (TermQueryBuilder) rewrittenBool.must().get(0); + assertThat(mustTerm.fieldName(), equalTo("custom_field")); + assertThat(mustTerm.value(), equalTo("custom_value")); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java new file mode 100644 index 0000000000000..317d753493cf1 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java @@ -0,0 +1,182 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; + +public class BooleanFlatteningRewriterTests extends OpenSearchTestCase { + + private final BooleanFlatteningRewriter rewriter = BooleanFlatteningRewriter.INSTANCE; + private final QueryShardContext context = mock(QueryShardContext.class); + + public void testSimpleBooleanQuery() { + // Simple boolean query should not be modified + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("field1", "value1")) + .filter(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNestedBooleanFlattening() { + // Nested boolean query with single must clause should be flattened + QueryBuilder nestedBool = QueryBuilders.boolQuery().must(QueryBuilders.termQuery("field1", "value1")); + + QueryBuilder query = QueryBuilders.boolQuery().must(nestedBool).filter(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // The nested bool should be flattened + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(QueryBuilders.termQuery("field1", "value1").getClass())); + assertThat(rewrittenBool.filter().size(), equalTo(1)); + } + + public void testMultipleNestedBooleansFlattening() { + // Multiple nested boolean queries should all be flattened + QueryBuilder nested1 = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("field1", "value1")) + .must(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder nested2 = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("field3", "value3")); + + QueryBuilder query = QueryBuilders.boolQuery().must(nested1).filter(nested2); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // All nested clauses should be flattened + assertThat(rewrittenBool.must().size(), equalTo(2)); + assertThat(rewrittenBool.filter().size(), equalTo(1)); + } + + public void testShouldClauseFlattening() { + // Should clauses should also be flattened + QueryBuilder nestedShould = QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("field1", "value1")) + .should(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder query = QueryBuilders.boolQuery().should(nestedShould).must(QueryBuilders.termQuery("field3", "value3")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should clauses should be flattened + assertThat(rewrittenBool.should().size(), equalTo(2)); + assertThat(rewrittenBool.must().size(), equalTo(1)); + } + + public void testMustNotClauseNoFlattening() { + // Must_not clauses should NOT be flattened to preserve semantics + QueryBuilder nestedMustNot = QueryBuilders.boolQuery().must(QueryBuilders.termQuery("field1", "value1")); + + QueryBuilder query = QueryBuilders.boolQuery().mustNot(nestedMustNot).must(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Must_not should not be flattened + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().get(0), instanceOf(BoolQueryBuilder.class)); + } + + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/18906") + public void testDeepNesting() { + // TODO: This test expects complete flattening of deeply nested bool queries + // where intermediate bool wrappers are removed entirely. Our current implementation + // only flattens by merging same-type clauses but preserves the bool structure. + // This would require a different optimization strategy. + + // Deep nesting should be flattened at all levels + QueryBuilder deepNested = QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("field1", "value1")))); + + QueryBuilder rewritten = rewriter.rewrite(deepNested, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should be flattened to single level bool with term query + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(TermQueryBuilder.class)); + + // Verify the term query details + TermQueryBuilder termQuery = (TermQueryBuilder) rewrittenBool.must().get(0); + assertThat(termQuery.fieldName(), equalTo("field1")); + assertThat(termQuery.value(), equalTo("value1")); + } + + public void testMixedClauseTypes() { + // Mixed clause types with different minimumShouldMatch settings + QueryBuilder nested = QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("field1", "value1")) + .should(QueryBuilders.termQuery("field2", "value2")) + .minimumShouldMatch(1); + + QueryBuilder query = QueryBuilders.boolQuery().must(nested).minimumShouldMatch(2); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // Should not flatten due to different minimumShouldMatch + } + + public void testEmptyBooleanQuery() { + // Empty boolean query should not cause issues + QueryBuilder query = QueryBuilders.boolQuery(); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNonBooleanQuery() { + // Non-boolean queries should be returned as-is + QueryBuilder query = QueryBuilders.termQuery("field", "value"); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testVeryDeepNesting() { + // Test with 10 levels of nesting + QueryBuilder innermost = QueryBuilders.termQuery("field", "value"); + for (int i = 0; i < 10; i++) { + innermost = QueryBuilders.boolQuery().must(innermost); + } + + QueryBuilder rewritten = rewriter.rewrite(innermost, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + + // Should be flattened significantly + BoolQueryBuilder result = (BoolQueryBuilder) rewritten; + assertThat(result.must().size(), equalTo(1)); + } + + public void testQueryNamePreservation() { + // Ensure query names are preserved during flattening + QueryBuilder query = QueryBuilders.boolQuery() + .queryName("outer") + .must(QueryBuilders.boolQuery().queryName("inner").must(QueryBuilders.termQuery("field", "value"))); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder result = (BoolQueryBuilder) rewritten; + assertThat(result.queryName(), equalTo("outer")); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java new file mode 100644 index 0000000000000..6f2c6cf93133d --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; + +public class MatchAllRemovalRewriterTests extends OpenSearchTestCase { + + private final MatchAllRemovalRewriter rewriter = MatchAllRemovalRewriter.INSTANCE; + private final QueryShardContext context = mock(QueryShardContext.class); + + public void testRemoveMatchAllFromMust() { + // match_all in must clause should NOT be removed in scoring context + QueryBuilder query = QueryBuilders.boolQuery().must(QueryBuilders.matchAllQuery()).must(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // match_all should be kept in scoring context + assertThat(rewrittenBool.must().size(), equalTo(2)); + } + + public void testRemoveMatchAllFromFilter() { + // match_all in filter clause should be removed + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.rangeQuery("price").gt(100)) + .must(QueryBuilders.termQuery("category", "electronics")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // match_all should be removed from filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.must().size(), equalTo(1)); + } + + public void testKeepMatchAllInShould() { + // match_all in should clause should be kept + QueryBuilder query = QueryBuilders.boolQuery() + .should(QueryBuilders.matchAllQuery()) + .should(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes for should clause + } + + public void testKeepMatchAllInMustNot() { + // match_all in must_not clause should be kept (it's meaningful) + QueryBuilder query = QueryBuilders.boolQuery() + .mustNot(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes for must_not clause + } + + public void testOnlyMatchAllQuery() { + // Boolean query with only match_all should be simplified to match_all + QueryBuilder query = QueryBuilders.boolQuery().must(QueryBuilders.matchAllQuery()); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(QueryBuilders.matchAllQuery().getClass())); + } + + public void testMultipleMatchAllQueries() { + // Multiple match_all queries should all be removed + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.matchAllQuery()) + .filter(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // With filter clause present, this is not a pure scoring context + // Since there are non-match_all queries in must, match_all should be removed + assertThat(rewrittenBool.must().size(), equalTo(1)); // only term query remains + assertThat(rewrittenBool.filter().size(), equalTo(0)); + } + + public void testNestedBooleanWithMatchAll() { + // Nested boolean queries should also have match_all removed + QueryBuilder nested = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("field1", "value1")); + + QueryBuilder query = QueryBuilders.boolQuery().must(nested).filter(QueryBuilders.termQuery("field2", "value2")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Nested bool keeps match_all in scoring context + BoolQueryBuilder nestedRewritten = (BoolQueryBuilder) rewrittenBool.must().get(0); + assertThat(nestedRewritten.must().size(), equalTo(2)); // match_all + term + } + + public void testEmptyBoolAfterRemoval() { + // Bool with only match_all in must/filter - keeps match_all in must in scoring context + QueryBuilder query = QueryBuilders.boolQuery().must(QueryBuilders.matchAllQuery()).filter(QueryBuilders.matchAllQuery()); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // match_all in must is kept in scoring context, match_all in filter is removed + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.filter().size(), equalTo(0)); + } + + public void testBoolWithOnlyMustNotAfterRemoval() { + // Bool with only must_not after removal should not be converted to match_all + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .mustNot(QueryBuilders.termQuery("status", "deleted")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // must clause keeps match_all in scoring context, must_not preserved + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + } + + public void testNonBooleanQuery() { + // Non-boolean queries should be returned as-is + QueryBuilder query = QueryBuilders.matchAllQuery(); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testEmptyBooleanQuery() { + // Empty boolean query should not be converted + QueryBuilder query = QueryBuilders.boolQuery(); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testBoostPreservation() { + // When converting bool with only match_all to match_all, preserve boost + QueryBuilder query = QueryBuilders.boolQuery().must(QueryBuilders.matchAllQuery()).boost(2.0f); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(QueryBuilders.matchAllQuery().getClass())); + assertThat(rewritten.boost(), equalTo(2.0f)); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java new file mode 100644 index 0000000000000..4fd21d9ad601e --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java @@ -0,0 +1,284 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class MustNotToShouldRewriterTests extends OpenSearchTestCase { + + private final MustNotToShouldRewriter rewriter = MustNotToShouldRewriter.INSTANCE; + private QueryShardContext context; + private Directory directory; + private IndexReader reader; + + @Override + public void setUp() throws Exception { + super.setUp(); + context = mock(QueryShardContext.class); + + // Create an index with single-valued numeric fields + directory = newDirectory(); + IndexWriterConfig config = new IndexWriterConfig(Lucene.STANDARD_ANALYZER); + IndexWriter writer = new IndexWriter(directory, config); + + // Add some documents with single-valued numeric fields + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + doc.add(new IntPoint("age", i)); + doc.add(new IntPoint("status", i % 10)); + writer.addDocument(doc); + } + + writer.close(); + reader = DirectoryReader.open(directory); + when(context.getIndexReader()).thenReturn(reader); + + // Setup numeric field types + NumberFieldMapper.NumberFieldType intFieldType = mock(NumberFieldMapper.NumberFieldType.class); + when(intFieldType.numberType()).thenReturn(NumberFieldMapper.NumberType.INTEGER); + // Make parse return the input value as a Number + when(intFieldType.parse(any())).thenAnswer(invocation -> { + Object arg = invocation.getArgument(0); + if (arg instanceof Number) { + return (Number) arg; + } + return Integer.parseInt(arg.toString()); + }); + when(context.fieldMapper("age")).thenReturn(intFieldType); + when(context.fieldMapper("status")).thenReturn(intFieldType); + when(context.fieldMapper("price")).thenReturn(intFieldType); + + // Setup non-numeric field types + MappedFieldType textFieldType = mock(MappedFieldType.class); + when(context.fieldMapper("name")).thenReturn(textFieldType); + when(context.fieldMapper("description")).thenReturn(textFieldType); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + reader.close(); + directory.close(); + } + + public void testRangeQueryRewritten() { + // Test that must_not range query is rewritten to should clauses + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("type", "product")) + .mustNot(QueryBuilders.rangeQuery("age").gte(18).lte(65)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have the original term query plus a new bool query from the rewrite + assertThat(rewrittenBool.must().size(), equalTo(2)); + + // The must_not clause should be removed + assertThat(rewrittenBool.mustNot().size(), equalTo(0)); + + // Find the nested bool query + BoolQueryBuilder nestedBool = null; + for (QueryBuilder must : rewrittenBool.must()) { + if (must instanceof BoolQueryBuilder) { + nestedBool = (BoolQueryBuilder) must; + break; + } + } + + assertNotNull(nestedBool); + assertThat(nestedBool.should().size(), equalTo(2)); // Two range queries for complement + assertThat(nestedBool.minimumShouldMatch(), equalTo("1")); + } + + public void testNumericTermQueryRewritten() { + // Test that must_not term query on numeric field is rewritten + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery("status", 5)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have a new bool query from the rewrite + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(0)); + + BoolQueryBuilder nestedBool = (BoolQueryBuilder) rewrittenBool.must().get(0); + assertThat(nestedBool.should().size(), equalTo(2)); // Two range queries for complement + assertThat(nestedBool.minimumShouldMatch(), equalTo("1")); + } + + public void testNumericTermsQueryRewritten() { + // Test that must_not terms query on numeric field is rewritten + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery("status", new Object[] { 1, 2, 3 })); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have a new bool query from the rewrite + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(0)); + } + + public void testNumericMatchQueryRewritten() { + // Test that must_not match query on numeric field is rewritten + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.matchQuery("age", 25)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have a new bool query from the rewrite + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(0)); + } + + public void testTextFieldNotRewritten() { + // Test that must_not queries on text fields are not rewritten + QueryBuilder query = QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termQuery("name", "test")) + .mustNot(QueryBuilders.matchQuery("description", "product")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes + } + + public void testMultipleQueriesOnSameFieldNotRewritten() { + // Test that multiple must_not queries on the same field are not rewritten + QueryBuilder query = QueryBuilders.boolQuery() + .mustNot(QueryBuilders.rangeQuery("age").gte(18)) + .mustNot(QueryBuilders.rangeQuery("age").lte(65)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes + } + + public void testMinimumShouldMatchHandling() { + // Test that minimumShouldMatch is properly handled + QueryBuilder query = QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("category", "A")) + .should(QueryBuilders.termQuery("category", "B")) + .mustNot(QueryBuilders.rangeQuery("age").gte(18)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Since we added a must clause, minimumShouldMatch should be set to 1 + assertThat(rewrittenBool.minimumShouldMatch(), equalTo("1")); + } + + public void testExistingMustClausesPreserved() { + // Test that existing must/filter/should clauses are preserved + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("type", "product")) + .filter(QueryBuilders.rangeQuery("price").gte(100)) + .should(QueryBuilders.termQuery("featured", true)) + .mustNot(QueryBuilders.rangeQuery("age").gte(18)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Original clauses should be preserved + assertThat(rewrittenBool.must().size(), equalTo(2)); // Original + rewritten + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.should().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(0)); + } + + public void testNestedBooleanQueriesRewritten() { + // Test that nested boolean queries are also rewritten + QueryBuilder nested = QueryBuilders.boolQuery().mustNot(QueryBuilders.rangeQuery("age").gte(18)); + + QueryBuilder query = QueryBuilders.boolQuery().must(nested); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // The nested bool should be rewritten + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(BoolQueryBuilder.class)); + + BoolQueryBuilder innerBool = (BoolQueryBuilder) rewrittenBool.must().get(0); + assertThat(innerBool.must().size(), equalTo(1)); // The rewritten clause + assertThat(innerBool.mustNot().size(), equalTo(0)); + } + + public void testNoMustNotClausesNoChanges() { + // Query without must_not clauses should not be changed + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("type", "product")) + .filter(QueryBuilders.rangeQuery("price").gte(100)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNonBoolQueryUnchanged() { + // Non-bool queries should not be changed + QueryBuilder query = QueryBuilders.termQuery("field", "value"); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNullContextNoRewrite() { + // With null context, no rewriting should happen + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.rangeQuery("age").gte(18)); + + QueryBuilder rewritten = rewriter.rewrite(query, null); + assertSame(query, rewritten); + } + + public void testRewriterPriority() { + // Verify rewriter has correct priority + assertThat(rewriter.priority(), equalTo(175)); + assertThat(rewriter.name(), equalTo("must_not_to_should")); + } + + public void testBoolQueryPropertiesPreserved() { + // All bool query properties should be preserved + QueryBuilder query = QueryBuilders.boolQuery() + .mustNot(QueryBuilders.rangeQuery("age").gte(18)) + .boost(2.0f) + .queryName("my_query") + .adjustPureNegative(false); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Properties should be preserved + assertThat(rewrittenBool.boost(), equalTo(2.0f)); + assertThat(rewrittenBool.queryName(), equalTo("my_query")); + assertThat(rewrittenBool.adjustPureNegative(), equalTo(false)); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java new file mode 100644 index 0000000000000..35e289813acff --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java @@ -0,0 +1,309 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class MustToFilterRewriterTests extends OpenSearchTestCase { + + private final MustToFilterRewriter rewriter = MustToFilterRewriter.INSTANCE; + private QueryShardContext context; + + @Override + public void setUp() throws Exception { + super.setUp(); + context = mock(QueryShardContext.class); + + // Setup numeric field types + NumberFieldMapper.NumberFieldType intFieldType = mock(NumberFieldMapper.NumberFieldType.class); + when(context.fieldMapper("age")).thenReturn(intFieldType); + when(context.fieldMapper("price")).thenReturn(intFieldType); + when(context.fieldMapper("count")).thenReturn(intFieldType); + when(context.fieldMapper("user_id")).thenReturn(intFieldType); + + // Setup non-numeric field types + MappedFieldType textFieldType = mock(MappedFieldType.class); + when(context.fieldMapper("name")).thenReturn(textFieldType); + when(context.fieldMapper("description")).thenReturn(textFieldType); + when(context.fieldMapper("category")).thenReturn(textFieldType); + when(context.fieldMapper("status_code")).thenReturn(textFieldType); + when(context.fieldMapper("active")).thenReturn(textFieldType); + } + + public void testRangeQueryMovedToFilter() { + // Range queries should always be moved to filter + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100).lte(500)) + .must(QueryBuilders.termQuery("category", "electronics")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Range query should be moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.filter().get(0), instanceOf(RangeQueryBuilder.class)); + + // Term query on text field should remain in must + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(TermQueryBuilder.class)); + } + + public void testNumericTermQueryMovedToFilter() { + // Term queries on numeric fields should be moved to filter + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("age", 25)) + .must(QueryBuilders.termQuery("name", "John")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Numeric term query should be moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + TermQueryBuilder filterClause = (TermQueryBuilder) rewrittenBool.filter().get(0); + assertThat(filterClause.fieldName(), equalTo("age")); + + // Text term query should remain in must + assertThat(rewrittenBool.must().size(), equalTo(1)); + TermQueryBuilder mustClause = (TermQueryBuilder) rewrittenBool.must().get(0); + assertThat(mustClause.fieldName(), equalTo("name")); + } + + public void testNumericTermsQueryMovedToFilter() { + // Terms queries on numeric fields should be moved to filter + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termsQuery("count", new Object[] { 1, 2, 3 })) + .must(QueryBuilders.termsQuery("category", "A", "B", "C")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Numeric terms query should be moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + TermsQueryBuilder filterClause = (TermsQueryBuilder) rewrittenBool.filter().get(0); + assertThat(filterClause.fieldName(), equalTo("count")); + + // Text terms query should remain in must + assertThat(rewrittenBool.must().size(), equalTo(1)); + TermsQueryBuilder mustClause = (TermsQueryBuilder) rewrittenBool.must().get(0); + assertThat(mustClause.fieldName(), equalTo("category")); + } + + public void testNumericMatchQueryMovedToFilter() { + // Match queries on numeric fields should be moved to filter + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchQuery("price", 99.99)) + .must(QueryBuilders.matchQuery("description", "high quality")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Numeric match query should be moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + MatchQueryBuilder filterClause = (MatchQueryBuilder) rewrittenBool.filter().get(0); + assertThat(filterClause.fieldName(), equalTo("price")); + + // Text match query should remain in must + assertThat(rewrittenBool.must().size(), equalTo(1)); + MatchQueryBuilder mustClause = (MatchQueryBuilder) rewrittenBool.must().get(0); + assertThat(mustClause.fieldName(), equalTo("description")); + } + + public void testExistingFilterClausesPreserved() { + // Existing filter clauses should be preserved + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100)) + .filter(QueryBuilders.termQuery("status", "active")) + .filter(QueryBuilders.existsQuery("description")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have 3 filter clauses: moved range query + 2 existing + assertThat(rewrittenBool.filter().size(), equalTo(3)); + assertThat(rewrittenBool.must().size(), equalTo(0)); + } + + public void testShouldAndMustNotClausesUnchanged() { + // Should and must_not clauses should not be affected + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100)) + .should(QueryBuilders.termQuery("featured", true)) + .mustNot(QueryBuilders.termQuery("deleted", true)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Range query moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.must().size(), equalTo(0)); + + // Should and must_not unchanged + assertThat(rewrittenBool.should().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + } + + public void testNestedBooleanQueriesRewritten() { + // Nested boolean queries should also be rewritten + QueryBuilder nested = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("age").gte(18)) + .must(QueryBuilders.termQuery("active", true)); + + QueryBuilder query = QueryBuilders.boolQuery().must(nested).must(QueryBuilders.matchQuery("name", "test")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // The nested bool should be rewritten + assertThat(rewrittenBool.must().size(), equalTo(2)); + + // Find the nested bool query + BoolQueryBuilder nestedRewritten = null; + for (QueryBuilder clause : rewrittenBool.must()) { + if (clause instanceof BoolQueryBuilder) { + nestedRewritten = (BoolQueryBuilder) clause; + break; + } + } + + assertNotNull(nestedRewritten); + // The range query in the nested bool should be moved to filter + assertThat(nestedRewritten.filter().size(), equalTo(1)); + assertThat(nestedRewritten.filter().get(0), instanceOf(RangeQueryBuilder.class)); + // The term query should remain in must + assertThat(nestedRewritten.must().size(), equalTo(1)); + assertThat(nestedRewritten.must().get(0), instanceOf(TermQueryBuilder.class)); + } + + public void testBoolQueryPropertiesPreserved() { + // All bool query properties should be preserved + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100)) + .must(QueryBuilders.termQuery("category", "electronics")) + .boost(2.0f) + .queryName("my_query") + .minimumShouldMatch(2) + .adjustPureNegative(false); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Properties should be preserved + assertThat(rewrittenBool.boost(), equalTo(2.0f)); + assertThat(rewrittenBool.queryName(), equalTo("my_query")); + assertThat(rewrittenBool.minimumShouldMatch(), equalTo("2")); + assertThat(rewrittenBool.adjustPureNegative(), equalTo(false)); + } + + public void testNoMustClausesNoChanges() { + // Query without must clauses should not be changed + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.rangeQuery("price").gte(100)) + .should(QueryBuilders.termQuery("featured", true)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNonBoolQueryUnchanged() { + // Non-bool queries should not be changed + QueryBuilder query = QueryBuilders.termQuery("field", "value"); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNullContextStillMovesRangeQueries() { + // With null context, range queries should still be moved + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100)) + .must(QueryBuilders.termQuery("age", 25)); + + QueryBuilder rewritten = rewriter.rewrite(query, null); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Range query should be moved to filter even without context + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.filter().get(0), instanceOf(RangeQueryBuilder.class)); + + // Term query stays in must (can't determine if numeric without context) + assertThat(rewrittenBool.must().size(), equalTo(1)); + assertThat(rewrittenBool.must().get(0), instanceOf(TermQueryBuilder.class)); + } + + public void testAllMustClausesMovedToFilter() { + // If all must clauses can be moved, they should all go to filter + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("price").gte(100)) + .must(QueryBuilders.rangeQuery("age").gte(18)) + .must(QueryBuilders.termQuery("count", 5)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // All clauses should be in filter + assertThat(rewrittenBool.filter().size(), equalTo(3)); + assertThat(rewrittenBool.must().size(), equalTo(0)); + } + + public void testComplexMixedQuery() { + // Complex query with mix of movable and non-movable clauses + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("created_date").gte("2024-01-01")) + .must(QueryBuilders.termQuery("user_id", 12345)) + .must(QueryBuilders.matchQuery("title", "opensearch")) + .must(QueryBuilders.termsQuery("status_code", new Object[] { 200, 201, 204 })) + .filter(QueryBuilders.existsQuery("description")) + .should(QueryBuilders.matchQuery("tags", "important")) + .mustNot(QueryBuilders.termQuery("deleted", true)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Range and numeric queries moved to filter + assertThat(rewrittenBool.filter().size(), equalTo(3)); // range + exists + user_id (numeric) + + // Text queries remain in must + assertThat(rewrittenBool.must().size(), equalTo(2)); // match title + terms status_code (text field) + + // Should and must_not unchanged + assertThat(rewrittenBool.should().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + } + + public void testRewriterPriority() { + // Verify rewriter has correct priority + assertThat(rewriter.priority(), equalTo(150)); + assertThat(rewriter.name(), equalTo("must_to_filter")); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java new file mode 100644 index 0000000000000..085f2c72c67c9 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java @@ -0,0 +1,292 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query.rewriters; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; + +public class TermsMergingRewriterTests extends OpenSearchTestCase { + + private final TermsMergingRewriter rewriter = TermsMergingRewriter.INSTANCE; + private final QueryShardContext context = mock(QueryShardContext.class); + + public void testSimpleTermMergingBelowThreshold() { + // Few term queries on same field should NOT be merged (below threshold) + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("status", "active")) + .filter(QueryBuilders.termQuery("status", "pending")) + .filter(QueryBuilders.termQuery("status", "approved")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes expected + } + + public void testTermMergingAboveThreshold() { + // Many term queries on same field should be merged (above threshold of 16) + BoolQueryBuilder query = QueryBuilders.boolQuery(); + // Add 20 term queries for the same field + for (int i = 0; i < 20; i++) { + query.filter(QueryBuilders.termQuery("category", "cat_" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have one terms query instead of 20 term queries + assertThat(rewrittenBool.filter().size(), equalTo(1)); + assertThat(rewrittenBool.filter().get(0), instanceOf(TermsQueryBuilder.class)); + + TermsQueryBuilder termsQuery = (TermsQueryBuilder) rewrittenBool.filter().get(0); + assertThat(termsQuery.fieldName(), equalTo("category")); + assertThat(termsQuery.values().size(), equalTo(20)); + } + + public void testMustClauseNoMerging() { + // Term queries in must clauses should NOT be merged (different semantics) + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("category", "electronics")) + .must(QueryBuilders.termQuery("category", "computers")) + .must(QueryBuilders.rangeQuery("price").gt(100)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have 3 queries: NO merging in must clause + assertThat(rewrittenBool.must().size(), equalTo(3)); + } + + public void testShouldClauseMergingBelowThreshold() { + // Should clauses with few terms should NOT be merged + QueryBuilder query = QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("color", "red")) + .should(QueryBuilders.termQuery("color", "blue")) + .should(QueryBuilders.termQuery("size", "large")) + .should(QueryBuilders.termQuery("size", "medium")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes expected + } + + public void testShouldClauseMergingAboveThreshold() { + // Should clauses with many terms should be merged + BoolQueryBuilder query = QueryBuilders.boolQuery(); + + // Add 20 color terms + for (int i = 0; i < 20; i++) { + query.should(QueryBuilders.termQuery("color", "color_" + i)); + } + + // Add 18 size terms + for (int i = 0; i < 18; i++) { + query.should(QueryBuilders.termQuery("size", "size_" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have 2 terms queries: one for color, one for size + assertThat(rewrittenBool.should().size(), equalTo(2)); + assertThat(rewrittenBool.should().get(0), instanceOf(TermsQueryBuilder.class)); + assertThat(rewrittenBool.should().get(1), instanceOf(TermsQueryBuilder.class)); + } + + public void testMixedFieldsNoMerging() { + // Term queries on different fields should not be merged + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("field1", "value1")) + .filter(QueryBuilders.termQuery("field2", "value2")) + .filter(QueryBuilders.termQuery("field3", "value3")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes expected + } + + public void testExistingTermsQueryExpansionBelowThreshold() { + // Existing terms query with few additional terms should NOT be expanded (below threshold) + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery("status", "active", "pending")) + .filter(QueryBuilders.termQuery("status", "approved")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes expected + } + + public void testExistingTermsQueryExpansionAboveThreshold() { + // Existing terms query should be expanded when total terms exceed threshold + String[] initialTerms = new String[14]; + for (int i = 0; i < 14; i++) { + initialTerms[i] = "status_" + i; + } + + BoolQueryBuilder query = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery("status", initialTerms)); + + // Add 5 more term queries to exceed threshold + for (int i = 14; i < 19; i++) { + query.filter(QueryBuilders.termQuery("status", "status_" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Should have one terms query with all values + assertThat(rewrittenBool.filter().size(), equalTo(1)); + TermsQueryBuilder termsQuery = (TermsQueryBuilder) rewrittenBool.filter().get(0); + assertThat(termsQuery.values().size(), equalTo(19)); + } + + public void testSingleTermQuery() { + // Single term query should not be converted to terms query + QueryBuilder query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("field", "value")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testMustNotClauseNoMerging() { + // Must_not clauses should not be merged + QueryBuilder query = QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termQuery("status", "deleted")) + .mustNot(QueryBuilders.termQuery("status", "archived")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes to must_not + } + + public void testNestedBooleanQuery() { + // Should handle nested boolean queries with many terms + BoolQueryBuilder nested = QueryBuilders.boolQuery(); + // Add 20 term queries to exceed threshold + for (int i = 0; i < 20; i++) { + nested.filter(QueryBuilders.termQuery("status", "status_" + i)); + } + + QueryBuilder query = QueryBuilders.boolQuery().must(nested).filter(QueryBuilders.termQuery("type", "product")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Nested bool should also be rewritten + assertThat(rewrittenBool.must().size(), equalTo(1)); + BoolQueryBuilder nestedRewritten = (BoolQueryBuilder) rewrittenBool.must().get(0); + assertThat(nestedRewritten.filter().size(), equalTo(1)); + assertThat(nestedRewritten.filter().get(0), instanceOf(TermsQueryBuilder.class)); + + TermsQueryBuilder termsQuery = (TermsQueryBuilder) nestedRewritten.filter().get(0); + assertThat(termsQuery.values().size(), equalTo(20)); + } + + public void testEmptyBooleanQuery() { + // Empty boolean query should not cause issues + QueryBuilder query = QueryBuilders.boolQuery(); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testNonBooleanQuery() { + // Non-boolean queries should be returned as-is + QueryBuilder query = QueryBuilders.termQuery("field", "value"); + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); + } + + public void testBoostPreservation() { + // Boost values should be preserved when merging many terms + BoolQueryBuilder query = QueryBuilders.boolQuery(); + + // Add 20 terms with same boost + for (int i = 0; i < 20; i++) { + query.filter(QueryBuilders.termQuery("status", "status_" + i).boost(2.0f)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + assertThat(rewrittenBool.filter().size(), equalTo(1)); + TermsQueryBuilder termsQuery = (TermsQueryBuilder) rewrittenBool.filter().get(0); + assertThat(termsQuery.boost(), equalTo(2.0f)); + assertThat(termsQuery.values().size(), equalTo(20)); + } + + public void testMixedBoostNoMerging() { + // Different boost values should prevent merging + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("status", "active").boost(1.0f)) + .filter(QueryBuilders.termQuery("status", "pending").boost(2.0f)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes due to different boosts + } + + public void testLargeTermsMerging() { + // Test merging a large number of term queries + BoolQueryBuilder query = QueryBuilders.boolQuery(); + for (int i = 0; i < 50; i++) { + query.filter(QueryBuilders.termQuery("field", "value" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder result = (BoolQueryBuilder) rewritten; + + assertThat(result.filter().size(), equalTo(1)); + assertThat(result.filter().get(0), instanceOf(TermsQueryBuilder.class)); + TermsQueryBuilder terms = (TermsQueryBuilder) result.filter().get(0); + assertThat(terms.values().size(), equalTo(50)); + } + + public void testMixedTermsAndTermQueriesBelowThreshold() { + // Mix of existing terms queries and term queries with few values + QueryBuilder query = QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery("field", "v1", "v2")) + .filter(QueryBuilders.termQuery("field", "v3")) + .filter(QueryBuilders.termsQuery("field", "v4", "v5")) + .filter(QueryBuilders.termQuery("field", "v6")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertSame(query, rewritten); // No changes expected (total 6 values < 16) + } + + public void testMixedTermsAndTermQueriesAboveThreshold() { + // Mix of existing terms queries and term queries with many values + String[] initialValues = new String[10]; + for (int i = 0; i < 10; i++) { + initialValues[i] = "v" + i; + } + + BoolQueryBuilder query = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery("field", initialValues)); + + // Add more term queries to exceed threshold + for (int i = 10; i < 20; i++) { + query.filter(QueryBuilders.termQuery("field", "v" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder result = (BoolQueryBuilder) rewritten; + + // Should merge all into a single terms query + assertThat(result.filter().size(), equalTo(1)); + assertThat(result.filter().get(0), instanceOf(TermsQueryBuilder.class)); + TermsQueryBuilder merged = (TermsQueryBuilder) result.filter().get(0); + assertThat(merged.values().size(), equalTo(20)); + } + +}