-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Term query I/O concurrency optimization #20415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -468,8 +468,13 @@ private SimpleTopDocsCollectorContext( | |
| totalHitsSupplier = () -> new TotalHits(0, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO); | ||
| hitCount = -1; | ||
| } else { | ||
| boolean deferShortcutTotalHitCount = deferShortcutTotalHitCount(hasFilterCollector, reader.hasDeletions(), query); | ||
| if (deferShortcutTotalHitCount) { | ||
| this.hitCount = 0; | ||
| } else { | ||
| this.hitCount = hasFilterCollector ? -1 : shortcutTotalHitCount(reader, query); | ||
| } | ||
| // implicit total hit counts are valid only when there is no filter collector in the chain | ||
| this.hitCount = hasFilterCollector ? -1 : shortcutTotalHitCount(reader, query); | ||
| if (this.hitCount == -1) { | ||
| topDocsCollector = createCollector(sortAndFormats, numHits, searchAfter, trackTotalHitsUpTo); | ||
| topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs); | ||
|
|
@@ -478,7 +483,7 @@ private SimpleTopDocsCollectorContext( | |
| // don't compute hit counts via the collector | ||
| topDocsCollector = createCollector(sortAndFormats, numHits, searchAfter, 1); | ||
| topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs); | ||
| totalHitsSupplier = () -> new TotalHits(this.hitCount, TotalHits.Relation.EQUAL_TO); | ||
| totalHitsSupplier = shortcutTotalHitCountSupplier(deferShortcutTotalHitCount, this.hitCount, reader, query); | ||
| } | ||
| } | ||
| MaxScoreCollector maxScoreCollector = null; | ||
|
|
@@ -511,6 +516,21 @@ private SimpleTopDocsCollectorContext( | |
| this.collector = MultiCollector.wrap(topDocsCollector, maxScoreCollector); | ||
| } | ||
|
|
||
| private Supplier<TotalHits> shortcutTotalHitCountSupplier( | ||
| boolean deferShortcutTotalHitCount, | ||
| int computedHitCount, | ||
| IndexReader reader, | ||
| Query query | ||
| ) throws IOException { | ||
| long shortcutTotalHitCnt; | ||
| if (deferShortcutTotalHitCount) { | ||
| shortcutTotalHitCnt = shortcutTotalHitCount(reader, query); | ||
| } else { | ||
| shortcutTotalHitCnt = computedHitCount; | ||
| } | ||
| return () -> (new TotalHits(shortcutTotalHitCnt, TotalHits.Relation.EQUAL_TO)); | ||
| } | ||
|
|
||
| private class SimpleTopDocsCollectorManager | ||
| implements | ||
| CollectorManager<Collector, ReduceableSearchResult>, | ||
|
|
@@ -621,7 +641,7 @@ TopDocsAndMaxScore newTopDocs(final TopDocs topDocs, final float maxScore, final | |
| if (hitCount == -1) { | ||
| totalHits = topDocs.totalHits; | ||
| } else { | ||
| totalHits = new TotalHits(hitCount, TotalHits.Relation.EQUAL_TO); | ||
| totalHits = totalHitsSupplier.get(); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -765,21 +785,7 @@ void postProcess(QuerySearchResult result) throws IOException { | |
| * -1 otherwise. | ||
| */ | ||
| static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOException { | ||
| while (true) { | ||
| // remove wrappers that don't matter for counts | ||
| // this is necessary so that we don't only optimize match_all | ||
| // queries but also match_all queries that are nested in | ||
| // a constant_score query | ||
| if (query instanceof ConstantScoreQuery constantScoreQuery) { | ||
| query = constantScoreQuery.getQuery(); | ||
| } else if (query instanceof BoostQuery boostQuery) { | ||
| query = boostQuery.getQuery(); | ||
| } else if (query instanceof ApproximateScoreQuery approximateScoreQuery) { | ||
| query = approximateScoreQuery.getOriginalQuery(); | ||
| } else { | ||
| break; | ||
| } | ||
| } | ||
| query = removeWrappersFromQuery(query); | ||
| if (query.getClass() == MatchAllDocsQuery.class) { | ||
| return reader.numDocs(); | ||
| } else if (query.getClass() == TermQuery.class && reader.hasDeletions() == false) { | ||
|
|
@@ -817,6 +823,33 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep | |
| } | ||
| } | ||
|
|
||
| static boolean deferShortcutTotalHitCount(boolean hasFilterCollector, boolean hasDeletions, Query query) { | ||
| if (hasFilterCollector) { | ||
| return false; | ||
| } | ||
| query = removeWrappersFromQuery(query); | ||
| return (query.getClass() == TermQuery.class && hasDeletions == false); | ||
| } | ||
|
|
||
| static Query removeWrappersFromQuery(Query query) { | ||
| while (true) { | ||
| // remove wrappers that don't matter for counts | ||
| // this is necessary so that we don't only optimize match_all | ||
| // queries but also match_all queries that are nested in | ||
| // a constant_score query | ||
| if (query instanceof ConstantScoreQuery constantScoreQuery) { | ||
| query = constantScoreQuery.getQuery(); | ||
| } else if (query instanceof BoostQuery boostQuery) { | ||
| query = boostQuery.getQuery(); | ||
| } else if (query instanceof ApproximateScoreQuery approximateScoreQuery) { | ||
| query = approximateScoreQuery.getOriginalQuery(); | ||
| } else { | ||
| break; | ||
| } | ||
| } | ||
| return query; | ||
| } | ||
|
Comment on lines
+834
to
+851
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Find classes that extend Query and have a getQuery() method
cd server/src/main/java || cd .
rg -t java 'class \w+.*extends.*Query' -A 20 | grep -E '(^[^:]*class|getQuery\(\)|private.*Query|public.*Query)' | head -100Repository: opensearch-project/OpenSearch Length of output: 13497 🏁 Script executed: # More targeted search for Query wrapper types
find . -name "*.java" -type f | xargs grep -l 'extends.*Query' | head -20Repository: opensearch-project/OpenSearch Length of output: 1945 🏁 Script executed: # Check what wrapper types are actually used in TopDocsCollectorContext
rg -B 5 -A 5 'instanceof.*Query' server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.javaRepository: opensearch-project/OpenSearch Length of output: 1295 🏁 Script executed: # Check if IndexOrDocValuesQuery is used in codebase
rg -i 'IndexOrDocValuesQuery' --type javaRepository: opensearch-project/OpenSearch Length of output: 33093 🏁 Script executed: # Find all usages of removeWrappersFromQuery to understand its purpose
rg 'removeWrappersFromQuery' -B 3 -A 3 server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.javaRepository: opensearch-project/OpenSearch Length of output: 848 🏁 Script executed: # Check what query wrapper types extend Query and have getter methods
rg -t java 'class \w+.*Query.*extends.*Query' -A 5 | grep -E '(class.*Query|getQuery\(|get.*Query\(|Original)' | head -50Repository: opensearch-project/OpenSearch Length of output: 7940 🏁 Script executed: # Confirm the method context and impact
rg -B 10 'shortcutTotalHitCount' server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java | head -30Repository: opensearch-project/OpenSearch Length of output: 2188 Add handling for IndexOrDocValuesQuery wrapper type. The Add an else-if branch: } else if (query instanceof IndexOrDocValuesQuery indexOrDocValuesQuery) {
query = indexOrDocValuesQuery.getIndexQuery();🤖 Prompt for AI Agents |
||
|
|
||
| /** | ||
| * Creates a {@link TopDocsCollectorContext} from the provided <code>searchContext</code>. | ||
| * @param hasFilterCollector True if the collector chain contains at least one collector that can filters document. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deferral does not actually defer I/O — computation happens eagerly.
The
shortcutTotalHitCount(reader, query)call at line 527 executes immediately during object construction, not when the supplier is invoked. This defeats the I/O concurrency optimization since the term counting I/O still blocks the constructor.For true deferral, move the computation inside the supplier. You'll need to handle the
IOException— consider wrapping withCachedSupplierand converting toUncheckedIOException:Proposed fix for lazy evaluation
private Supplier<TotalHits> shortcutTotalHitCountSupplier( boolean deferShortcutTotalHitCount, int computedHitCount, IndexReader reader, Query query -) throws IOException { - long shortcutTotalHitCnt; +) { if (deferShortcutTotalHitCount) { - shortcutTotalHitCnt = shortcutTotalHitCount(reader, query); + return new CachedSupplier<>(() -> { + try { + return new TotalHits(shortcutTotalHitCount(reader, query), TotalHits.Relation.EQUAL_TO); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); } else { - shortcutTotalHitCnt = computedHitCount; + return () -> new TotalHits(computedHitCount, TotalHits.Relation.EQUAL_TO); } - return () -> (new TotalHits(shortcutTotalHitCnt, TotalHits.Relation.EQUAL_TO)); }Based on learnings, avoid evaluating a supplier upfront during object construction; resolve at use time to preserve lazy behavior.
🤖 Prompt for AI Agents