From 0d792574406f5d58504c616578f313f2e2bc6049 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 16:09:57 -0800 Subject: [PATCH 1/8] Leveraging segment-global ordinal mapping for efficient terms aggregation Signed-off-by: Ankit Jain --- .../GlobalOrdinalsStringTermsAggregator.java | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 79cb9e39608fe..adcb86b5c428e 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -92,6 +92,7 @@ import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_DOCS; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** * An aggregator of string values that relies on global ordinals in order to build buckets. @@ -207,27 +208,18 @@ boolean tryCollectFromTermFrequencies(LeafReaderContext ctx, BiConsumer Date: Thu, 19 Feb 2026 17:13:20 -0800 Subject: [PATCH 2/8] Fixing minor bug Signed-off-by: Ankit Jain --- .../GlobalOrdinalsStringTermsAggregator.java | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index adcb86b5c428e..6cbe6b751d627 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -210,17 +210,18 @@ boolean tryCollectFromTermFrequencies(LeafReaderContext ctx, BiConsumer incrementBucketDocCount(mapping.applyAsLong(ord), docCount)); + return tryCollectFromTermFrequencies(ctx, (globalOrd, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, globalOrd), docCount)); } return tryStarTreePrecompute(ctx); } From 0196bc19c27a98f0efbd16cc94daff3876b482e6 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 18:56:27 -0800 Subject: [PATCH 3/8] Applying spotless fix Signed-off-by: Ankit Jain --- .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 6cbe6b751d627..476b1ee6b2291 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -558,7 +558,10 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { @Override protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { if (subAggregators.length == 0) { - return tryCollectFromTermFrequencies(ctx, (globalOrd, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, globalOrd), docCount)); + return tryCollectFromTermFrequencies( + ctx, + (globalOrd, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, globalOrd), docCount) + ); } return tryStarTreePrecompute(ctx); } From 04b98b15bfe260b2ecb3816034be90969495f0e1 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 20:50:17 -0800 Subject: [PATCH 4/8] Adding changelog entry Signed-off-by: Ankit Jain --- CHANGELOG.md | 1 + .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b37c3e4e74161..d92fd2314380d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Service does not start on Windows with OpenJDK ([#20615](https://github.com/opensearch-project/OpenSearch/pull/20615)) - Update RemoteClusterStateCleanupManager to performed batched deletions of stale ClusterMetadataManifests and address deletion timeout issues ([#20566](https://github.com/opensearch-project/OpenSearch/pull/20566)) - Fix the regression of terms agg optimization at high cardinality ([#20623](https://github.com/opensearch-project/OpenSearch/pull/20623)) +- Leveraging segment-global ordinal mapping for efficient terms aggregation ([#20624](https://github.com/opensearch-project/OpenSearch/pull/20624)) - Support Docker distribution builds for ppc64le, arm64 and s390x ([#20678](https://github.com/opensearch-project/OpenSearch/pull/20678)) ### Dependencies diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 476b1ee6b2291..88215513eafef 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -91,7 +91,6 @@ import java.util.function.LongUnaryOperator; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; -import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_DOCS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** From d2a4d36be69d943b263cf55a1afde7546efcd2fc Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 21:33:27 -0800 Subject: [PATCH 5/8] Reverting unintended change Signed-off-by: Ankit Jain --- .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 88215513eafef..42610f2dd45da 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -91,7 +91,7 @@ import java.util.function.LongUnaryOperator; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; -import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_DOCS; /** * An aggregator of string values that relies on global ordinals in order to build buckets. From 0e0882c09f8b712d876e4951af31118eaf8b2caf Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Fri, 20 Feb 2026 23:13:31 -0800 Subject: [PATCH 6/8] Temporarily increasing the max precompute limit to test performance Signed-off-by: Ankit Jain --- server/src/main/java/org/opensearch/search/SearchService.java | 2 +- .../main/java/org/opensearch/search/internal/SearchContext.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 4a74170f71271..256dd2a0a492c 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -444,7 +444,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv public static final Setting TERMS_AGGREGATION_MAX_PRECOMPUTE_CARDINALITY = Setting.longSetting( "search.aggregations.terms.max_precompute_cardinality", - 30_000L, + 30_000_000_000L, 0L, Property.Dynamic, Property.NodeScope diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 45b0a7a11cb29..924709906934a 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -562,7 +562,7 @@ public int cardinalityAggregationPruningThreshold() { @ExperimentalApi public long termsAggregationMaxPrecomputeCardinality() { - return 30_000L; + return 30_000_000_000L; } public CardinalityAggregationContext cardinalityAggregationContext() { From 8e3729bce03530c125fa3013bb3cd90907f3aa86 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Sun, 22 Feb 2026 23:35:50 -0800 Subject: [PATCH 7/8] Revert "Temporarily increasing the max precompute limit to test performance" This reverts commit 0e0882c09f8b712d876e4951af31118eaf8b2caf. Signed-off-by: Ankit Jain --- server/src/main/java/org/opensearch/search/SearchService.java | 2 +- .../main/java/org/opensearch/search/internal/SearchContext.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 256dd2a0a492c..4a74170f71271 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -444,7 +444,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv public static final Setting TERMS_AGGREGATION_MAX_PRECOMPUTE_CARDINALITY = Setting.longSetting( "search.aggregations.terms.max_precompute_cardinality", - 30_000_000_000L, + 30_000L, 0L, Property.Dynamic, Property.NodeScope diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 924709906934a..45b0a7a11cb29 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -562,7 +562,7 @@ public int cardinalityAggregationPruningThreshold() { @ExperimentalApi public long termsAggregationMaxPrecomputeCardinality() { - return 30_000_000_000L; + return 30_000L; } public CardinalityAggregationContext cardinalityAggregationContext() { From 3e785914986451f40eb107c1a25fdcd76322f9b4 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Sun, 22 Feb 2026 23:36:19 -0800 Subject: [PATCH 8/8] Minor refactor Signed-off-by: Ankit Jain --- .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 42610f2dd45da..f94d5b13df286 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -209,18 +209,19 @@ boolean tryCollectFromTermFrequencies(LeafReaderContext ctx, BiConsumer