Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
3ee4663
Added basic bool approx class
sawansri Jul 3, 2025
e0621f3
Ensure setcontext is called on query
sawansri Jul 11, 2025
d2f7fe3
implement createWeight in ApproximateBooleanQuery
sawansri Jul 17, 2025
f58229a
Create basic createWeight to bulkScorer outline
sawansri Jul 17, 2025
30013e4
custom weight implementation using default bulkscorer
sawansri Jul 17, 2025
cc01570
updated canApproximate
sawansri Jul 18, 2025
e2f69ba
created basic skeletons of custom classes
sawansri Jul 18, 2025
7b8c505
implemented BKDState in ResumableDISI
sawansri Jul 18, 2025
05cb03c
working ResumableDISI integration into ApproximatePointRangeQuery
sawansri Jul 19, 2025
a139c44
Working multi clause boolean approximation (finished at the airport 😃)
sawansri Jul 19, 2025
6f27785
changed implementation to only create ResumableDISIs for approximated…
sawansri Jul 26, 2025
35efcde
implemented truly resumable scoring
sawansri Jul 26, 2025
d91f825
enabled resumableDISI expansions for multiple clauses
sawansri Jul 26, 2025
b27ebc4
potential fix for missing docs
sawansri Jul 28, 2025
972ce14
more like Lucene's ConjunctionDISI
sawansri Jul 28, 2025
f6b9acc
Make BKDState a ScorerSupplier instance variable
sawansri Jul 28, 2025
2f44e3a
use Lucene's ConjunctionDISI
sawansri Jul 29, 2025
349a9f2
update state management in ApproximatePointRangeQuery
sawansri Jul 29, 2025
6d51f1e
working ApproximatePointRangeQuery
sawansri Jul 31, 2025
bb5dffb
correct state saving logic in intersectLeft
sawansri Jul 31, 2025
29a6090
before iterative tree traversal for resumability
sawansri Aug 7, 2025
69c5b4c
iterative bkd traversal
sawansri Aug 8, 2025
51389fd
more updates
sawansri Aug 9, 2025
58c139d
Added early termination in bulk scorer
sawansri Aug 9, 2025
b2bf378
added new iterative approach + debugging
sawansri Aug 12, 2025
5d98426
added bulk window scoring approach
sawansri Aug 12, 2025
17daf2d
cleaned up code and fixed single clause bool
sawansri Aug 12, 2025
29f9505
add proper bounds and license header
sawansri Aug 13, 2025
bf9f58d
fixed failing highlighter test + nested bool check
sawansri Aug 14, 2025
345a6ad
add unit tests
sawansri Aug 14, 2025
c162ddf
fix more tests
sawansri Aug 14, 2025
b7b924d
partially working integ tests
sawansri Aug 18, 2025
9f2a326
fixed duplicate doc collecting + more integ tests
sawansri Aug 22, 2025
511603f
added more tests
sawansri Aug 22, 2025
75facd3
corrected canApproximate tests
sawansri Aug 22, 2025
90a4126
added proper isTopLevel + desc sort
sawansri Aug 22, 2025
3898863
gradle check
sawansri Aug 22, 2025
7185528
rename vars + update changelog
sawansri Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Implement GRPC Search params `Highlight`and `Sort` ([#19868](https://github.com/opensearch-project/OpenSearch/pull/19868))
- Implement GRPC ConstantScoreQuery, FuzzyQuery, MatchBoolPrefixQuery, MatchPhrasePrefix, PrefixQuery, MatchQuery ([#19854](https://github.com/opensearch-project/OpenSearch/pull/19854))
- Add async periodic flush task support for pull-based ingestion ([#19878](https://github.com/opensearch-project/OpenSearch/pull/19878))
- Multifold Improvement in Multi-Clause Boolean Query, Window Scoring Approach ([#19045](https://github.com/opensearch-project/OpenSearch/pull/19046))


### Changed
- Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import org.opensearch.core.xcontent.ObjectParser;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.search.approximate.ApproximateBooleanQuery;
import org.opensearch.search.approximate.ApproximateScoreQuery;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -335,7 +337,18 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
}

Query query = Queries.applyMinimumShouldMatch(booleanQuery, minimumShouldMatch);
return adjustPureNegative ? fixNegativeQueryIfNeeded(query) : query;

if (adjustPureNegative) {
query = fixNegativeQueryIfNeeded(query);
}

// limit approximate query construction since several mappers (prefixQuery) expect a BooleanQuery not ApproximateBooleanQuery
if (query instanceof BooleanQuery boolQuery
&& (boolQuery.getClauses(Occur.FILTER).size() == boolQuery.clauses().size() || boolQuery.clauses().size() == 1)) {
return new ApproximateScoreQuery(query, new ApproximateBooleanQuery(boolQuery));
}

return query;
}

private static void addBooleanClauses(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.approximate;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.opensearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* An approximate-able version of {@link BooleanQuery}. For single clause boolean queries,
* it unwraps the query into the singular clause and ensures approximation is applied.
*/
public class ApproximateBooleanQuery extends ApproximateQuery {
public final BooleanQuery boolQuery;
private final int size;
private final List<BooleanClause> clauses;
private boolean isTopLevel = true; // Default to true, set to false when nested in boolean query

public ApproximateBooleanQuery(BooleanQuery boolQuery) {
this(boolQuery, SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO);
}

protected ApproximateBooleanQuery(BooleanQuery boolQuery, int size) {
this.boolQuery = boolQuery;
this.size = size;
this.clauses = boolQuery.clauses();
}

public BooleanQuery getBooleanQuery() {
return boolQuery;
}

public boolean isTopLevel() {
return this.isTopLevel;
}

public void setTopLevel(boolean isTopLevel) {
this.isTopLevel = isTopLevel;
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
return super.rewrite(indexSearcher);
}

public static Query boolRewrite(Query query, IndexSearcher indexSearcher) {
if (query instanceof BooleanQuery boolQuery) {
return (boolQuery.clauses().size() == 1) ? boolRewrite(boolQuery.clauses().get(0).query(), indexSearcher) : query;
} else if (query instanceof ApproximateBooleanQuery appxBool) {
return (appxBool.getBooleanQuery().clauses().size() == 1)
? boolRewrite(appxBool.boolQuery.clauses().get(0).query(), indexSearcher)
: query;
}
try {
return query.rewrite(indexSearcher);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

@Override
protected boolean canApproximate(SearchContext context) {
if (context == null) {
return false;
}

// Don't approximate if we need accurate total hits
if (context.trackTotalHitsUpTo() == SearchContext.TRACK_TOTAL_HITS_ACCURATE) {
return false;
}

// Don't approximate if we have aggregations
if (context.aggregations() != null) {
return false;
}

// Don't approximate if highlighting is enabled
if (context.highlight() != null) {
return false;
}

if (!isTopLevel) {
return false;
}

// For single clause boolean queries, check if the clause can be approximated
if (clauses.size() == 1 && clauses.get(0).occur() != BooleanClause.Occur.MUST_NOT) {
// If the clause is already an ApproximateScoreQuery, we can approximate + set context
if (clauses.get(0).query() instanceof ApproximateScoreQuery approximateScoreQuery) {
if (approximateScoreQuery.getApproximationQuery() instanceof ApproximateBooleanQuery nestedBool) {
return nestedBool.canApproximate(context);
}
return approximateScoreQuery.getApproximationQuery().canApproximate(context);
}
return false;
}

boolean hasApproximate = false;

// multi clause case - we might want to consider strategies for nested cases, for now limit to just top level
for (BooleanClause clause : clauses) {
if (clause.occur() != BooleanClause.Occur.FILTER) {
return false;
} else {
if (clause.query() instanceof ApproximateScoreQuery appxScore) {
if (appxScore.getApproximationQuery() instanceof ApproximatePointRangeQuery) {
hasApproximate = true;
}
if (appxScore.getApproximationQuery() instanceof ApproximateBooleanQuery || clause.query() instanceof BooleanQuery) {
return false;
}
}
}
}

return hasApproximate;
}

@Override
public ConstantScoreWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
// For single clause boolean queries, delegate to the clause's createWeight
if (clauses.size() == 1 && clauses.get(0).occur() != BooleanClause.Occur.MUST_NOT) {
Query clauseQuery = clauses.get(0).query();

// If it's a scoring query, wrap it in a ConstantScoreQuery to ensure constant scoring
if (!(clauseQuery instanceof ConstantScoreQuery)) {
clauseQuery = new ConstantScoreQuery(clauseQuery);
}

return (ConstantScoreWeight) clauseQuery.createWeight(searcher, scoreMode, boost);
}

// For multi-clause boolean queries, create a custom weight
return new ApproximateBooleanWeight(searcher, scoreMode, boost);
}

/**
* Custom Weight implementation for ApproximateBooleanQuery that handles multi-clause boolean queries.
* This is a basic implementation that behaves like a regular filter boolean query for now.
*/
private class ApproximateBooleanWeight extends ConstantScoreWeight {
private final ScoreMode scoreMode;
private final IndexSearcher searcher;
private final float boost;

public ApproximateBooleanWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
super(ApproximateBooleanQuery.this, boost);
// Create a weight for the underlying boolean query
this.scoreMode = scoreMode;
this.searcher = searcher;
this.boost = boost;
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
// For multi-clause boolean queries, create a custom scorer supplier
List<Weight> clauseWeights = new ArrayList<>(clauses.size());
for (BooleanClause clause : clauses) {
Weight weight = clause.query().createWeight(searcher, scoreMode, boost);
clauseWeights.add(weight);
}

return new ApproximateBooleanScorerSupplier(clauseWeights, scoreMode, boost, size, context);
}

}

@Override
public String toString(String s) {
return "ApproximateBooleanQuery(" + boolQuery.toString(s) + ")";
}

@Override
public void visit(QueryVisitor queryVisitor) {
boolQuery.visit(queryVisitor);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ApproximateBooleanQuery that = (ApproximateBooleanQuery) o;
return size == that.size && boolQuery.equals(that.boolQuery);
}

@Override
public int hashCode() {
return boolQuery.hashCode();
}
}
Loading
Loading