Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Service does not start on Windows with OpenJDK ([#20615](https://github.com/opensearch-project/OpenSearch/pull/20615))
- Update RemoteClusterStateCleanupManager to performed batched deletions of stale ClusterMetadataManifests and address deletion timeout issues ([#20566](https://github.com/opensearch-project/OpenSearch/pull/20566))
- Fix the regression of terms agg optimization at high cardinality ([#20623](https://github.com/opensearch-project/OpenSearch/pull/20623))
- Leveraging segment-global ordinal mapping for efficient terms aggregation ([#20624](https://github.com/opensearch-project/OpenSearch/pull/20624))
- Support Docker distribution builds for ppc64le, arm64 and s390x ([#20678](https://github.com/opensearch-project/OpenSearch/pull/20678))

### Dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,28 +207,21 @@ boolean tryCollectFromTermFrequencies(LeafReaderContext ctx, BiConsumer<Long, In
return false;
}

TermsEnum indexTermsEnum = segmentTerms.iterator();
BytesRef indexTerm = indexTermsEnum.next();
final SortedSetDocValues globalOrds = this.getGlobalOrds(ctx);
TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum();
BytesRef ordinalTerm = globalOrdinalTermsEnum.next();

// Iterate over the terms in the segment, look for matches in the global ordinal terms,
// and increment bucket count when segment terms match global ordinal terms.
while (indexTerm != null && ordinalTerm != null) {
int compare = indexTerm.compareTo(ordinalTerm);
if (compare == 0) {
if (acceptedGlobalOrdinals.test(globalOrdinalTermsEnum.ord())) {
ordCountConsumer.accept(globalOrdinalTermsEnum.ord(), indexTermsEnum.docFreq());
}
indexTerm = indexTermsEnum.next();
ordinalTerm = globalOrdinalTermsEnum.next();
} else if (compare < 0) {
indexTerm = indexTermsEnum.next();
} else {
ordinalTerm = globalOrdinalTermsEnum.next();
final TermsEnum segmentTermsEnum = segmentTerms.iterator();
final LongUnaryOperator globalOrdsMapping = valuesSource.globalOrdinalsMapping(ctx);

// Iterate over the ordinals in the segment, look for matches in the global ordinal,
// and increment bucket count when segment ordinal is contained in global ordinals.
for (long segmentOrd = 0; segmentOrd < termCount; segmentOrd++) {
segmentTermsEnum.next();
long globalOrd = globalOrdsMapping.applyAsLong(segmentOrd);
if (acceptedGlobalOrdinals.test(globalOrd)) {
ordCountConsumer.accept(globalOrd, segmentTermsEnum.docFreq());
}
}

// Segment terms enum should be fully exhausted after iterating over all the ordinals
assert segmentTermsEnum.next() == null : "Segment terms enum must be fully exhausted";
return true;
}

Expand Down Expand Up @@ -565,11 +558,10 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
@Override
protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException {
if (subAggregators.length == 0) {
if (mapping != null) {
mapSegmentCountsToGlobalCounts(mapping);
}
mapping = valuesSource.globalOrdinalsMapping(ctx);
return tryCollectFromTermFrequencies(ctx, (ord, docCount) -> incrementBucketDocCount(mapping.applyAsLong(ord), docCount));
return tryCollectFromTermFrequencies(
ctx,
(globalOrd, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, globalOrd), docCount)
);
}
return tryStarTreePrecompute(ctx);
}
Expand Down
Loading