From 009b8066433c0bd5ce279865650319af81339498 Mon Sep 17 00:00:00 2001 From: panguixin Date: Mon, 22 Jul 2024 14:08:40 +0800 Subject: [PATCH 1/5] search_after query optimization with missing value comparison Signed-off-by: panguixin --- .../test/search/90_search_after.yml | 58 ++++++++++ .../org/opensearch/search/SearchService.java | 52 +++++++-- .../search/internal/ContextIndexSearcher.java | 2 +- .../searchafter/SearchAfterBuilder.java | 4 + .../opensearch/search/SearchServiceTests.java | 108 +++++++++++------- 5 files changed, 172 insertions(+), 52 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml index 89fc705a24b34..043a2e8a9e24c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml @@ -779,3 +779,61 @@ - match: { hits.hits.2._index: test } - match: { hits.hits.2._source.unsignedlong: null } - match: { hits.hits.2._source.id: 22 } + +--- +"search with null search after value": + - do: + indices.create: + index: test + body: + mappings: + properties: + doc: + type: keyword + name: + type: keyword + + - do: + bulk: + refresh: true + index: test + body: | + {"index":{}} + { "doc": "doc1", "name": "bob"} + {"index":{}} + { "doc": "doc2", "name": null} + {"index":{}} + { "doc": "doc3", "name": null} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + size: 2 + track_total_hits: false + sort: [{ name: desc }, { doc: desc }] + + - match: {hits.total: -1 } + - length: {hits.hits: 2 } + - match: {hits.hits.0._index: test } + - match: {hits.hits.0._source.doc: doc1 } + - match: {hits.hits.1._index: test } + - match: {hits.hits.1._source.doc: doc3 } + - match: {hits.hits.1.sort: [null, "doc3"] } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + size: 1 + track_total_hits: false + sort: [{ name: desc }, { doc: desc }] + search_after: [null, "doc3"] + + - match: {hits.total: -1 } + - length: {hits.hits: 1 } + - match: {hits.hits.0._index: test } + - match: {hits.hits.0._source.doc: doc2 } + - match: {hits.hits.0.sort: [null, "doc2"] } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 65bbfe62c994e..351ca4613ef85 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -36,7 +36,9 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchException; import org.opensearch.action.ActionRunnable; import org.opensearch.action.IndicesRequest; @@ -1692,7 +1694,10 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre ); Rewriteable.rewrite(request.getRewriteable(), context, false); final boolean aliasFilterCanMatch = request.getAliasFilter().getQueryBuilder() instanceof MatchNoneQueryBuilder == false; - FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source()); + final FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source()); + final SortAndFormats primarySort = sortBuilder != null + ? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get() + : null; MinAndMax minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null; boolean canMatch; if (canRewriteToMatchNone(request.source())) { @@ -1704,7 +1709,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre } final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context); final Integer trackTotalHitsUpto = request.source() == null ? null : request.source().trackTotalHitsUpTo(); - canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, sortBuilder, trackTotalHitsUpto); + canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, primarySort, trackTotalHitsUpto); return new CanMatchResponse(canMatch || hasRefreshPending, minMax); } @@ -1714,7 +1719,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre public static boolean canMatchSearchAfter( FieldDoc searchAfter, MinAndMax minMax, - FieldSortBuilder primarySortField, + SortAndFormats primarySort, Integer trackTotalHitsUpto ) { // Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max @@ -1722,26 +1727,57 @@ public static boolean canMatchSearchAfter( // Skipping search on shard/segment entirely can cause mismatch on total_tracking_hits, hence skip only if // track_total_hits is false. if (searchAfter != null + && searchAfter.fields[0] != null && minMax != null - && primarySortField != null - && primarySortField.missing() == null + && primarySort != null && Objects.equals(trackTotalHitsUpto, TRACK_TOTAL_HITS_DISABLED)) { final Object searchAfterPrimary = searchAfter.fields[0]; - if (primarySortField.order() == SortOrder.DESC) { + SortField primarySortField = primarySort.sort.getSort()[0]; + if (primarySortField.getReverse()) { if (minMax.compareMin(searchAfterPrimary) > 0) { // In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive - return false; + return canMatchMissingValue(primarySortField, searchAfterPrimary); } } else { if (minMax.compareMax(searchAfterPrimary) < 0) { // In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive - return false; + return canMatchMissingValue(primarySortField, searchAfterPrimary); } } } return true; } + private static boolean canMatchMissingValue(SortField primarySortField, Object primarySearchAfter) { + final Object missingValue = primarySortField.getMissingValue(); + if (primarySortField.getReverse()) { + // the missing value of Type.STRING can only be SortField.STRING_FIRS or SortField.STRING_LAST + if (primarySearchAfter instanceof BytesRef) { + return missingValue == SortField.STRING_FIRST; + } + return compare(primarySearchAfter, missingValue) >= 0; + } else { + if (primarySearchAfter instanceof BytesRef) { + return missingValue == SortField.STRING_LAST; + } + return compare(primarySearchAfter, missingValue) <= 0; + } + } + + private static int compare(Object one, Object two) { + if (one instanceof Long) { + return Long.compare((Long) one, (Long) two); + } else if (one instanceof Integer) { + return Integer.compare((Integer) one, (Integer) two); + } else if (one instanceof Float) { + return Float.compare((Float) one, (Float) two); + } else if (one instanceof Double) { + return Double.compare((Double) one, (Double) two); + } else { + throw new UnsupportedOperationException("compare type not supported : " + one.getClass()); + } + } + private static FieldDoc getSearchAfterFieldDoc(ShardSearchRequest request, QueryShardContext context) throws IOException { if (context != null && request != null && request.source() != null && request.source().sorts() != null) { final List> sorts = request.source().sorts(); diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index bf570a0dd0d60..aefa336c4de79 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -594,7 +594,7 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException { return SearchService.canMatchSearchAfter( searchContext.searchAfter(), minMax, - primarySortField, + searchContext.sort(), searchContext.trackTotalHitsUpTo() ); } diff --git a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java index a45b2bd40c03d..e033756ef6af8 100644 --- a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java @@ -138,6 +138,10 @@ public static FieldDoc buildFieldDoc(SortAndFormats sort, Object[] values) { if (values[i] != null) { fieldValues[i] = convertValueFromSortField(values[i], sortField, format); } else { + SortField.Type sortType = extractSortType(sortField); + if (sortType != SortField.Type.STRING && sortType != SortField.Type.STRING_VAL) { + throw new IllegalArgumentException("search after value of type [" + sortType + "] cannot be null"); + } fieldValues[i] = null; } } diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index 9cf0d543d0874..8abb1a103290c 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -36,6 +36,8 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.OpenSearchException; import org.opensearch.action.OriginalIndices; @@ -110,9 +112,8 @@ import org.opensearch.search.internal.ShardSearchContextId; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.search.query.QuerySearchResult; -import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.MinAndMax; -import org.opensearch.search.sort.SortOrder; +import org.opensearch.search.sort.SortAndFormats; import org.opensearch.search.suggest.SuggestBuilder; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; @@ -2156,111 +2157,132 @@ public void validatePitStats(String index, long expectedPitCurrent, long expecte /** * Test for ASC order search_after query. - * Min = 0L, Max = 9L, search_after = 10L + * Min = 0L, Max = 9L, search_after = 10L, missing = Long.MIN_VALUE * Expected result is canMatch = false */ - public void testCanMatchSearchAfterAscGreaterThanMax() throws IOException { + public void testCanMatchSearchAfterAscGreaterThanMaxAndMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 10L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.ASC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), false); + SortField sortField = new SortField("test", SortField.Type.LONG); + sortField.setMissingValue(Long.MIN_VALUE); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for ASC order search_after query. - * Min = 0L, Max = 9L, search_after = 7L + * Min = 0L, Max = 9L, search_after = 10L, missing = 11L + * Expected result is canMatch = true + */ + public void testCanMatchSearchAfterAscGreaterThanMaxAndLessThanMissing() throws IOException { + FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 10L }); + MinAndMax minMax = new MinAndMax(0L, 9L); + SortField sortField = new SortField("test", SortField.Type.LONG); + sortField.setMissingValue(11L); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + } + + /** + * Test for ASC order search_after query. + * Min = 0L, Max = 9L, search_after = 7L, missing = Long.MIN_VALUE/10L * Expected result is canMatch = true */ public void testCanMatchSearchAfterAscLessThanMax() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 7L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.ASC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), true); + SortField sortField = new SortField("test", SortField.Type.LONG); + sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for ASC order search_after query. - * Min = 0L, Max = 9L, search_after = 9L + * Min = 0L, Max = 9L, search_after = 9L, missing = Long.MIN_VALUE/10L * Expected result is canMatch = true */ public void testCanMatchSearchAfterAscEqualMax() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 9L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.ASC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), true); + SortField sortField = new SortField("test", SortField.Type.LONG); + sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for DESC order search_after query. - * Min = 0L, Max = 9L, search_after = 10L + * Min = 0L, Max = 9L, search_after = 10L, missing = Long.MAX_VALUE/Long.MIN_VALUE * Expected result is canMatch = true */ public void testCanMatchSearchAfterDescGreaterThanMin() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 10L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.DESC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), true); + SortField sortField = new SortField("test", SortField.Type.LONG, true); + sortField.setMissingValue(randomFrom(Long.MAX_VALUE, Long.MIN_VALUE)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for DESC order search_after query. - * Min = 0L, Max = 9L, search_after = -1L + * Min = 0L, Max = 9L, search_after = -1L, missing > search_after * Expected result is canMatch = false */ - public void testCanMatchSearchAfterDescLessThanMin() throws IOException { + public void testCanMatchSearchAfterDescLessThanMinAndMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { -1L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.DESC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), false); + SortField sortField = new SortField("test", SortField.Type.LONG, true); + sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for DESC order search_after query. - * Min = 0L, Max = 9L, search_after = 0L + * Min = 0L, Max = 9L, search_after = 0L, missing > search_after * Expected result is canMatch = true */ - public void testCanMatchSearchAfterDescEqualMin() throws IOException { + public void testCanMatchSearchAfterDescEqualMinAndLessThanMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 0L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.DESC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), true); + SortField sortField = new SortField("test", SortField.Type.LONG, true); + sortField.setMissingValue(randomLongBetween(1L, Long.MAX_VALUE)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test canMatchSearchAfter with missing value, even if min/max is out of range * Min = 0L, Max = 9L, search_after = -1L - * Expected result is canMatch = true */ - public void testCanMatchSearchAfterWithMissing() throws IOException { + public void testCanMatchSearchAfterWithMinMaxOutOfRangeAndDifferentMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { -1L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.DESC); - // Should be false without missing values - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), false); - primarySort.missing("_last"); - // Should be true with missing values - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED), true); + SortField sortField = new SortField("test", SortField.Type.LONG, true); + sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + // Should be false with missing values is larger than search_after + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + sortField.setMissingValue(Long.MIN_VALUE); + // Should be true with missing values is less than search_after + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); } /** * Test for DESC order search_after query with track_total_hits=true. - * Min = 0L, Max = 9L, search_after = -1L + * Min = 0L, Max = 9L, search_after = -1L, missing > search_after * With above min/max and search_after, it should not match, but since * track_total_hits = true, * Expected result is canMatch = true */ - public void testCanMatchSearchAfterDescLessThanMinWithTrackTotalhits() throws IOException { + public void testCanMatchSearchAfterDescLessThanMinAndMissingWithTrackTotalHits() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { -1L }); MinAndMax minMax = new MinAndMax(0L, 9L); - FieldSortBuilder primarySort = new FieldSortBuilder("test"); - primarySort.order(SortOrder.DESC); - assertEquals(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, 1000), true); + SortField sortField = new SortField("test", SortField.Type.LONG, true); + sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); + final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, 1000)); } } From 49997a08e272850d9c111895c2d52ffafb5d0fcf Mon Sep 17 00:00:00 2001 From: panguixin Date: Mon, 22 Jul 2024 23:41:44 +0800 Subject: [PATCH 2/5] add test Signed-off-by: panguixin --- .../test/search/90_search_after.yml | 58 ------- .../opensearch/search/SearchServiceTests.java | 159 ++++++++++++++++++ 2 files changed, 159 insertions(+), 58 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml index 043a2e8a9e24c..89fc705a24b34 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml @@ -779,61 +779,3 @@ - match: { hits.hits.2._index: test } - match: { hits.hits.2._source.unsignedlong: null } - match: { hits.hits.2._source.id: 22 } - ---- -"search with null search after value": - - do: - indices.create: - index: test - body: - mappings: - properties: - doc: - type: keyword - name: - type: keyword - - - do: - bulk: - refresh: true - index: test - body: | - {"index":{}} - { "doc": "doc1", "name": "bob"} - {"index":{}} - { "doc": "doc2", "name": null} - {"index":{}} - { "doc": "doc3", "name": null} - - - do: - search: - rest_total_hits_as_int: true - index: test - body: - size: 2 - track_total_hits: false - sort: [{ name: desc }, { doc: desc }] - - - match: {hits.total: -1 } - - length: {hits.hits: 2 } - - match: {hits.hits.0._index: test } - - match: {hits.hits.0._source.doc: doc1 } - - match: {hits.hits.1._index: test } - - match: {hits.hits.1._source.doc: doc3 } - - match: {hits.hits.1.sort: [null, "doc3"] } - - - do: - search: - rest_total_hits_as_int: true - index: test - body: - size: 1 - track_total_hits: false - sort: [{ name: desc }, { doc: desc }] - search_after: [null, "doc3"] - - - match: {hits.total: -1 } - - length: {hits.hits: 1 } - - match: {hits.hits.0._index: test } - - match: {hits.hits.0._source.doc: doc2 } - - match: {hits.hits.0.sort: [null, "doc2"] } diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index 8abb1a103290c..7cb52868d6da6 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -41,6 +41,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.OpenSearchException; import org.opensearch.action.OriginalIndices; +import org.opensearch.action.bulk.BulkRequestBuilder; import org.opensearch.action.index.IndexResponse; import org.opensearch.action.search.ClearScrollRequest; import org.opensearch.action.search.DeletePitResponse; @@ -57,11 +58,14 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.WriteRequest; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.CheckedTriFunction; +import org.opensearch.common.Numbers; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; @@ -70,6 +74,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexNotFoundException; @@ -77,6 +82,8 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.Engine; import org.opensearch.index.mapper.DerivedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.query.AbstractQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchNoneQueryBuilder; @@ -114,13 +121,17 @@ import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.sort.MinAndMax; import org.opensearch.search.sort.SortAndFormats; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import org.opensearch.search.suggest.SuggestBuilder; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; import org.junit.Before; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; @@ -1176,6 +1187,154 @@ public void testCanRewriteToMatchNone() { ); } + private Number randomNumber(NumberFieldMapper.NumberType type) { + switch (type) { + case BYTE: + return randomByte(); + case SHORT: + return randomShort(); + case INTEGER: + return randomInt(10000); + case LONG: + return randomLongBetween(0L, 10000L); + case DOUBLE: + return randomDoubleBetween(0, 10000, false); + case FLOAT: + case HALF_FLOAT: + return (float) randomDoubleBetween(0, 10000, false); + case UNSIGNED_LONG: + BigInteger ul = randomUnsignedLong(); + while (ul.compareTo(Numbers.MIN_UNSIGNED_LONG_VALUE) == 0 || ul.compareTo(Numbers.MAX_UNSIGNED_LONG_VALUE) == 0) { + ul = randomUnsignedLong(); + } + return ul; + default: + throw new AssertionError(); + } + } + + private Number incDecNumber(Number number, NumberFieldMapper.NumberType type, boolean inc) { + switch (type) { + case BYTE: + case SHORT: + case INTEGER: + return number.intValue() + (inc ? 1 : -1); + case LONG: + return number.longValue() + (inc ? 1 : -1); + case DOUBLE: + return number.doubleValue() + (inc ? 1 : -1); + case FLOAT: + case HALF_FLOAT: + return number.floatValue() + (inc ? 1 : -1); + case UNSIGNED_LONG: + return ((BigInteger) number).add(inc ? BigInteger.valueOf(1) : BigInteger.valueOf(-1)); + default: + throw new AssertionError(); + } + } + + private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType type) throws Exception { + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("num1") + .field("type", type.typeName()) + .endObject() + .startObject("num2") + .field("type", type.typeName()) + .endObject() + .endObject() + .endObject(); + IndexService indexService = createIndex("test", Settings.EMPTY, MapperService.SINGLE_MAPPING_NAME, mapping); + ensureGreen(); + + final BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); + final int numDocs = randomIntBetween(10, 20); + final Number[] nums1 = new Number[numDocs]; + final Number[] nums2 = new Number[numDocs]; + for (int i = 0; i < numDocs; i++) { + nums1[i] = randomNumber(type); + nums2[i] = randomNumber(type); + bulkRequestBuilder.add( + client().prepareIndex("test") + .setId(String.valueOf(i)) + .setSource(String.format(Locale.ROOT, "{\"num1\": %s, \"num2\": %s}", nums1[i].toString(), nums2[i].toString()), MediaTypeRegistry.JSON) + ); + } + bulkRequestBuilder.get(); + client().admin().indices().prepareRefresh().get(); + Arrays.sort(nums1); + Arrays.sort(nums2); + + final IndexShard shard = indexService.getShard(0); + final SearchService service = getInstanceFromNode(SearchService.class); + final ShardSearchRequest shardRequest = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + shard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1f, + -1, + null, + null + ); + assertFalse(shard.hasRefreshPending()); + + final CheckedTriFunction outOfRangeTester = (outOfRange, missingMatch, reverse) -> { + Number searchAfter; + Object missing; + if (outOfRange) { + searchAfter = reverse ? incDecNumber(nums1[0], type, false) : incDecNumber(nums1[numDocs - 1], type, true); + } else { + searchAfter = randomFrom(nums1); + } + if (missingMatch) { + missing = reverse ? (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, false)) : (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, true)); + } else { + missing = reverse ? (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, true)) : (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, false)); + } + shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); + shardRequest.source() + .sort(SortBuilders.fieldSort("num1").missing(missing).order(reverse ? SortOrder.DESC : SortOrder.ASC)); + List searchAfterFields = new ArrayList<>(); + searchAfterFields.add(searchAfter); + if (randomBoolean()) { + shardRequest.source().sort("num2"); + searchAfterFields.add(randomFrom(nums2)); + } + shardRequest.source().searchAfter(searchAfterFields.toArray()); + SearchService.CanMatchResponse response = service.canMatch(shardRequest); + + if (type == NumberFieldMapper.NumberType.HALF_FLOAT || type == NumberFieldMapper.NumberType.UNSIGNED_LONG) { + assertTrue(response.canMatch()); + return null; + } + + if (outOfRange == false || missingMatch) { + assertTrue(response.canMatch()); + } else { + assertFalse(response.canMatch()); + } + return null; + }; + for (boolean outOfRange : new boolean[] { true, false }) { + for (boolean missingMatch : new boolean[] { true, false }) { + for (boolean reverse : new boolean[] { true, false }) { + outOfRangeTester.apply(outOfRange, missingMatch, reverse); + } + } + } + client().admin().indices().prepareDelete("test").get(); + ensureGreen(); + } + + public void testNumericCanMatch() throws Exception { + for (var type: NumberFieldMapper.NumberType.values()) { + canMatchSearchAfterNumericTestCase(type); + } + } + public void testSetSearchThrottled() { createIndex("throttled_threadpool_index"); client().execute( From 489d614a371148960061b54f0d50d0e4e0369a85 Mon Sep 17 00:00:00 2001 From: panguixin Date: Wed, 7 Aug 2024 18:49:30 +0800 Subject: [PATCH 3/5] take field density into consideration Signed-off-by: panguixin --- .../org/opensearch/search/SearchService.java | 21 +++++-- .../search/internal/ContextIndexSearcher.java | 10 +-- .../search/sort/FieldSortBuilder.java | 62 +++++++++++-------- .../opensearch/search/sort/FieldStats.java | 40 ++++++++++++ .../opensearch/search/SearchServiceTests.java | 57 +++++++++-------- .../search/sort/FieldSortBuilderTests.java | 44 ++++++++----- 6 files changed, 156 insertions(+), 78 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/sort/FieldStats.java diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 351ca4613ef85..8d35d084114e8 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -138,6 +138,7 @@ import org.opensearch.search.rescore.RescorerBuilder; import org.opensearch.search.searchafter.SearchAfterBuilder; import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.FieldStats; import org.opensearch.search.sort.MinAndMax; import org.opensearch.search.sort.SortAndFormats; import org.opensearch.search.sort.SortBuilder; @@ -1698,7 +1699,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre final SortAndFormats primarySort = sortBuilder != null ? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get() : null; - MinAndMax minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null; + FieldStats stats = sortBuilder != null ? FieldSortBuilder.getFieldStatsForShard(context, sortBuilder) : FieldStats.UNKNOWN; boolean canMatch; if (canRewriteToMatchNone(request.source())) { QueryBuilder queryBuilder = request.source().query(); @@ -1709,9 +1710,16 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre } final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context); final Integer trackTotalHitsUpto = request.source() == null ? null : request.source().trackTotalHitsUpTo(); - canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, primarySort, trackTotalHitsUpto); + canMatch = canMatch + && canMatchSearchAfter( + searchAfterFieldDoc, + stats.getMinAndMax(), + primarySort, + trackTotalHitsUpto, + stats.allDocsNonMissing() + ); - return new CanMatchResponse(canMatch || hasRefreshPending, minMax); + return new CanMatchResponse(canMatch || hasRefreshPending, stats.getMinAndMax()); } } } @@ -1720,7 +1728,8 @@ public static boolean canMatchSearchAfter( FieldDoc searchAfter, MinAndMax minMax, SortAndFormats primarySort, - Integer trackTotalHitsUpto + Integer trackTotalHitsUpto, + boolean allDocsNonMissing ) { // Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max // is out of search_after range, it still should be printed and hence we should not skip segment/shard. @@ -1736,12 +1745,12 @@ public static boolean canMatchSearchAfter( if (primarySortField.getReverse()) { if (minMax.compareMin(searchAfterPrimary) > 0) { // In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive - return canMatchMissingValue(primarySortField, searchAfterPrimary); + return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary); } } else { if (minMax.compareMax(searchAfterPrimary) < 0) { // In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive - return canMatchMissingValue(primarySortField, searchAfterPrimary); + return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary); } } } diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index aefa336c4de79..82c6c07cd63a8 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -81,7 +81,7 @@ import org.opensearch.search.query.QueryPhase; import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.sort.FieldSortBuilder; -import org.opensearch.search.sort.MinAndMax; +import org.opensearch.search.sort.FieldStats; import java.io.IOException; import java.util.ArrayList; @@ -585,17 +585,19 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException { // Only applied on primary sort field and primary search_after. FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source()); if (primarySortField != null) { - MinAndMax minMax = FieldSortBuilder.getMinMaxOrNullForSegment( + FieldStats stats = FieldSortBuilder.getFieldStatsForSegment( this.searchContext.getQueryShardContext(), ctx, primarySortField, searchContext.sort() ); + assert stats != null; return SearchService.canMatchSearchAfter( searchContext.searchAfter(), - minMax, + stats.getMinAndMax(), searchContext.sort(), - searchContext.trackTotalHitsUpTo() + searchContext.trackTotalHitsUpTo(), + stats.allDocsNonMissing() ); } } diff --git a/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java index e592b2567d8ff..148d6a0a6e09c 100644 --- a/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java @@ -613,30 +613,30 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou } /** - * Return the {@link MinAndMax} indexed value for shard from the provided {@link FieldSortBuilder} or null if unknown. + * Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown. * The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields - * and configurations return null. + * and configurations return {@link FieldStats#UNKNOWN}. */ - public static MinAndMax getMinMaxOrNull(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException { + public static FieldStats getFieldStatsForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException { final SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get(); - return getMinMaxOrNullInternal(context.getIndexReader(), context, sortBuilder, sort); + return getFieldStatsInternal(context.getIndexReader(), context, sortBuilder, sort); } /** - * Return the {@link MinAndMax} indexed value for segment from the provided {@link FieldSortBuilder} or null if unknown. + * Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown. * The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields - * and configurations return null. + * and configurations return {@link FieldStats#UNKNOWN}. */ - public static MinAndMax getMinMaxOrNullForSegment( + public static FieldStats getFieldStatsForSegment( QueryShardContext context, LeafReaderContext ctx, FieldSortBuilder sortBuilder, SortAndFormats sort ) throws IOException { - return getMinMaxOrNullInternal(ctx.reader(), context, sortBuilder, sort); + return getFieldStatsInternal(ctx.reader(), context, sortBuilder, sort); } - private static MinAndMax getMinMaxOrNullInternal( + private static FieldStats getFieldStatsInternal( IndexReader reader, QueryShardContext context, FieldSortBuilder sortBuilder, @@ -644,43 +644,47 @@ private static MinAndMax getMinMaxOrNullInternal( ) throws IOException { SortField sortField = sort.sort.getSort()[0]; if (sortField.getField() == null) { - return null; + return FieldStats.UNKNOWN; } MappedFieldType fieldType = context.fieldMapper(sortField.getField()); if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) { - return null; + return FieldStats.UNKNOWN; } switch (IndexSortConfig.getSortFieldType(sortField)) { case LONG: case INT: case DOUBLE: case FLOAT: - return extractNumericMinAndMax(reader, sortField, fieldType, sortBuilder); + return extractNumericFieldStats(reader, sortField, fieldType, sortBuilder); case STRING: case STRING_VAL: if (fieldType.unwrap() instanceof KeywordFieldMapper.KeywordFieldType) { Terms terms = MultiTerms.getTerms(reader, fieldType.name()); if (terms == null) { - return null; + return FieldStats.UNKNOWN; } - return terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null; + MinAndMax minAndMax = terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null; + return new FieldStats(minAndMax, terms.getDocCount() == reader.maxDoc()); } break; } - return null; + return FieldStats.UNKNOWN; } - private static MinAndMax extractNumericMinAndMax( + private static FieldStats extractNumericFieldStats( IndexReader reader, SortField sortField, MappedFieldType fieldType, FieldSortBuilder sortBuilder ) throws IOException { String fieldName = fieldType.name(); - if (PointValues.size(reader, fieldName) == 0) { - return null; + final int docCount = PointValues.getDocCount(reader, fieldName); + if (docCount == 0) { + return FieldStats.UNKNOWN; } - if (fieldType.unwrap() instanceof NumberFieldType) { + final boolean allDocsNonMissing = docCount == reader.maxDoc(); + MinAndMax minAndMax = null; + if (fieldType instanceof NumberFieldType) { NumberFieldType numberFieldType = (NumberFieldType) fieldType; Number minPoint = numberFieldType.parsePoint(PointValues.getMinPackedValue(reader, fieldName)); Number maxPoint = numberFieldType.parsePoint(PointValues.getMaxPackedValue(reader, fieldName)); @@ -688,27 +692,31 @@ private static MinAndMax extractNumericMinAndMax( case LONG: if (numberFieldType.numericType() == NumericType.UNSIGNED_LONG) { // The min and max are expected to be BigInteger numbers - return new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint); + minAndMax = new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint); } else { - return new MinAndMax<>(minPoint.longValue(), maxPoint.longValue()); + minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue()); } + break; case INT: - return new MinAndMax<>(minPoint.intValue(), maxPoint.intValue()); + minAndMax = new MinAndMax<>(minPoint.intValue(), maxPoint.intValue()); + break; case DOUBLE: - return new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue()); + minAndMax = new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue()); + break; case FLOAT: - return new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue()); + minAndMax = new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue()); + break; default: - return null; + // no-op } } else if (fieldType.unwrap() instanceof DateFieldType) { DateFieldType dateFieldType = (DateFieldType) fieldType; Function dateConverter = createDateConverter(sortBuilder, dateFieldType); Long min = dateConverter.apply(PointValues.getMinPackedValue(reader, fieldName)); Long max = dateConverter.apply(PointValues.getMaxPackedValue(reader, fieldName)); - return new MinAndMax<>(min, max); + minAndMax = new MinAndMax<>(min, max); } - return null; + return new FieldStats(minAndMax, allDocsNonMissing); } private static Function createDateConverter(FieldSortBuilder sortBuilder, DateFieldType dateFieldType) { diff --git a/server/src/main/java/org/opensearch/search/sort/FieldStats.java b/server/src/main/java/org/opensearch/search/sort/FieldStats.java new file mode 100644 index 0000000000000..4c0efebd05288 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/sort/FieldStats.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +/** + * A class that encapsulates some stats about a field, including min/max etc. + * + * @opensearch.internal + */ +public class FieldStats { + public static final FieldStats UNKNOWN = new FieldStats(null, false); + + private final MinAndMax minAndMax; + private final boolean allDocsNonMissing; + + public FieldStats(MinAndMax minAndMax, boolean allDocsNonMissing) { + this.minAndMax = minAndMax; + this.allDocsNonMissing = allDocsNonMissing; + } + + /** + * Return the minimum and maximum value. + */ + public MinAndMax getMinAndMax() { + return minAndMax; + } + + /** + * Indicates whether all docs have values for corresponding field + */ + public boolean allDocsNonMissing() { + return allDocsNonMissing; + } +} diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index 7cb52868d6da6..3741a7292b2a7 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -1248,18 +1248,22 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ IndexService indexService = createIndex("test", Settings.EMPTY, MapperService.SINGLE_MAPPING_NAME, mapping); ensureGreen(); + final boolean allDocsNonMissing = randomBoolean(); final BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); final int numDocs = randomIntBetween(10, 20); - final Number[] nums1 = new Number[numDocs]; + final Number[] nums1 = new Number[allDocsNonMissing ? numDocs : numDocs - 1]; final Number[] nums2 = new Number[numDocs]; for (int i = 0; i < numDocs; i++) { - nums1[i] = randomNumber(type); - nums2[i] = randomNumber(type); - bulkRequestBuilder.add( - client().prepareIndex("test") - .setId(String.valueOf(i)) - .setSource(String.format(Locale.ROOT, "{\"num1\": %s, \"num2\": %s}", nums1[i].toString(), nums2[i].toString()), MediaTypeRegistry.JSON) - ); + String source; + if (i < numDocs - 1 || allDocsNonMissing) { + nums1[i] = randomNumber(type); + nums2[i] = randomNumber(type); + source = String.format(Locale.ROOT, "{\"num1\": %s, \"num2\": %s}", nums1[i].toString(), nums2[i].toString()); + } else { + nums2[i] = randomNumber(type); + source = String.format(Locale.ROOT, "{\"num2\": %s}", nums2[i].toString()); + } + bulkRequestBuilder.add(client().prepareIndex("test").setId(String.valueOf(i)).setSource(source, MediaTypeRegistry.JSON)); } bulkRequestBuilder.get(); client().admin().indices().prepareRefresh().get(); @@ -1285,18 +1289,21 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ Number searchAfter; Object missing; if (outOfRange) { - searchAfter = reverse ? incDecNumber(nums1[0], type, false) : incDecNumber(nums1[numDocs - 1], type, true); + searchAfter = reverse ? incDecNumber(nums1[0], type, false) : incDecNumber(nums1[nums1.length - 1], type, true); } else { searchAfter = randomFrom(nums1); } if (missingMatch) { - missing = reverse ? (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, false)) : (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, true)); + missing = reverse + ? (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, false)) + : (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, true)); } else { - missing = reverse ? (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, true)) : (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, false)); + missing = reverse + ? (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, true)) + : (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, false)); } shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); - shardRequest.source() - .sort(SortBuilders.fieldSort("num1").missing(missing).order(reverse ? SortOrder.DESC : SortOrder.ASC)); + shardRequest.source().sort(SortBuilders.fieldSort("num1").missing(missing).order(reverse ? SortOrder.DESC : SortOrder.ASC)); List searchAfterFields = new ArrayList<>(); searchAfterFields.add(searchAfter); if (randomBoolean()) { @@ -1311,7 +1318,7 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ return null; } - if (outOfRange == false || missingMatch) { + if (outOfRange == false || (allDocsNonMissing == false && missingMatch)) { assertTrue(response.canMatch()); } else { assertFalse(response.canMatch()); @@ -1330,7 +1337,7 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ } public void testNumericCanMatch() throws Exception { - for (var type: NumberFieldMapper.NumberType.values()) { + for (var type : NumberFieldMapper.NumberType.values()) { canMatchSearchAfterNumericTestCase(type); } } @@ -2325,7 +2332,7 @@ public void testCanMatchSearchAfterAscGreaterThanMaxAndMissing() throws IOExcept SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(Long.MIN_VALUE); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2339,7 +2346,7 @@ public void testCanMatchSearchAfterAscGreaterThanMaxAndLessThanMissing() throws SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(11L); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2353,7 +2360,7 @@ public void testCanMatchSearchAfterAscLessThanMax() throws IOException { SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2367,7 +2374,7 @@ public void testCanMatchSearchAfterAscEqualMax() throws IOException { SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2381,7 +2388,7 @@ public void testCanMatchSearchAfterDescGreaterThanMin() throws IOException { SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomFrom(Long.MAX_VALUE, Long.MIN_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2395,7 +2402,7 @@ public void testCanMatchSearchAfterDescLessThanMinAndMissing() throws IOExceptio SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2409,7 +2416,7 @@ public void testCanMatchSearchAfterDescEqualMinAndLessThanMissing() throws IOExc SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomLongBetween(1L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2423,10 +2430,10 @@ public void testCanMatchSearchAfterWithMinMaxOutOfRangeAndDifferentMissing() thr sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); // Should be false with missing values is larger than search_after - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); sortField.setMissingValue(Long.MIN_VALUE); // Should be true with missing values is less than search_after - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); } /** @@ -2442,6 +2449,6 @@ public void testCanMatchSearchAfterDescLessThanMinAndMissingWithTrackTotalHits() SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, 1000)); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, 1000, false)); } } diff --git a/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java b/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java index 8ffd4f888a117..4fd3bef1ff1e1 100644 --- a/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java @@ -88,7 +88,7 @@ import java.util.Locale; import java.util.Set; -import static org.opensearch.search.sort.FieldSortBuilder.getMinMaxOrNull; +import static org.opensearch.search.sort.FieldSortBuilder.getFieldStatsForShard; import static org.opensearch.search.sort.FieldSortBuilder.getPrimaryFieldSortOrNull; import static org.opensearch.search.sort.NestedSortBuilderTests.createRandomNestedSort; import static org.hamcrest.Matchers.instanceOf; @@ -490,8 +490,8 @@ public void testGetMaxNumericSortValue() throws IOException { QueryShardContext context = createMockShardContext(); for (NumberFieldMapper.NumberType numberType : NumberFieldMapper.NumberType.values()) { String fieldName = "custom-" + numberType.numericType(); - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName))); - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); @@ -558,12 +558,18 @@ public void testGetMaxNumericSortValue() throws IOException { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); if (numberType == NumberFieldMapper.NumberType.HALF_FLOAT || numberType == NumberFieldMapper.NumberType.UNSIGNED_LONG) { - assertNull(getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); - assertNull(getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName))); } else { - assertNull(getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); - assertEquals(values[numDocs - 1], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMax()); - assertEquals(values[0], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMin()); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); + assertEquals( + values[numDocs - 1], + getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + ); + assertEquals( + values[0], + getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin() + ); } } } @@ -574,8 +580,8 @@ public void testGetMaxNumericSortValue() throws IOException { public void testGetMaxNumericDateValue() throws IOException { QueryShardContext context = createMockShardContext(); String fieldName = "custom-date"; - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName))); - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); final long[] values = new long[numDocs]; @@ -589,8 +595,11 @@ public void testGetMaxNumericDateValue() throws IOException { Arrays.sort(values); try (DirectoryReader reader = writer.getReader()) { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); - assertEquals(values[numDocs - 1], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMax()); - assertEquals(values[0], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMin()); + assertEquals( + values[numDocs - 1], + getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + ); + assertEquals(values[0], getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin()); } } } @@ -599,8 +608,8 @@ public void testGetMaxNumericDateValue() throws IOException { public void testGetMaxKeywordValue() throws IOException { QueryShardContext context = createMockShardContext(); String fieldName = "custom-keyword"; - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName))); - assertNull(getMinMaxOrNull(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); + assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); final BytesRef[] values = new BytesRef[numDocs]; @@ -614,8 +623,11 @@ public void testGetMaxKeywordValue() throws IOException { Arrays.sort(values); try (DirectoryReader reader = writer.getReader()) { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); - assertEquals(values[numDocs - 1], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMax()); - assertEquals(values[0], getMinMaxOrNull(newContext, SortBuilders.fieldSort(fieldName)).getMin()); + assertEquals( + values[numDocs - 1], + getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + ); + assertEquals(values[0], getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin()); } } } From 5092bd8c06927a097ea1faa2a96acb5ed2fd9fc5 Mon Sep 17 00:00:00 2001 From: panguixin Date: Tue, 20 May 2025 16:58:11 +0800 Subject: [PATCH 4/5] single sort Signed-off-by: panguixin --- .../org/opensearch/search/SearchService.java | 62 +-- .../search/internal/ContextIndexSearcher.java | 7 +- .../search/sort/FieldSortBuilder.java | 59 +-- .../opensearch/search/sort/FieldStats.java | 32 +- .../org/opensearch/search/sort/MinAndMax.java | 26 +- .../opensearch/search/SearchServiceTests.java | 455 +++++++++++++----- .../search/sort/FieldSortBuilderTests.java | 38 +- 7 files changed, 441 insertions(+), 238 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 8d35d084114e8..760f17cdbc1ea 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -81,6 +81,7 @@ import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.IndexSortConfig; import org.opensearch.index.engine.Engine; import org.opensearch.index.mapper.DerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldResolverFactory; @@ -142,7 +143,6 @@ import org.opensearch.search.sort.MinAndMax; import org.opensearch.search.sort.SortAndFormats; import org.opensearch.search.sort.SortBuilder; -import org.opensearch.search.sort.SortOrder; import org.opensearch.search.startree.StarTreeQueryContext; import org.opensearch.search.startree.StarTreeQueryHelper; import org.opensearch.search.suggest.Suggest; @@ -1699,7 +1699,9 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre final SortAndFormats primarySort = sortBuilder != null ? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get() : null; - FieldStats stats = sortBuilder != null ? FieldSortBuilder.getFieldStatsForShard(context, sortBuilder) : FieldStats.UNKNOWN; + final FieldStats fieldStats = sortBuilder != null + ? FieldSortBuilder.getFieldStatsOrNullForShard(context, sortBuilder) + : null; boolean canMatch; if (canRewriteToMatchNone(request.source())) { QueryBuilder queryBuilder = request.source().query(); @@ -1713,78 +1715,78 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre canMatch = canMatch && canMatchSearchAfter( searchAfterFieldDoc, - stats.getMinAndMax(), + fieldStats, primarySort, trackTotalHitsUpto, - stats.allDocsNonMissing() + FieldSortBuilder.isSingleSort(request.source()) ); - return new CanMatchResponse(canMatch || hasRefreshPending, stats.getMinAndMax()); + return new CanMatchResponse(canMatch || hasRefreshPending, fieldStats != null ? fieldStats.minAndMax() : null); } } } public static boolean canMatchSearchAfter( FieldDoc searchAfter, - MinAndMax minMax, + FieldStats fieldStats, SortAndFormats primarySort, Integer trackTotalHitsUpto, - boolean allDocsNonMissing + boolean singleSort ) { - // Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max - // is out of search_after range, it still should be printed and hence we should not skip segment/shard. // Skipping search on shard/segment entirely can cause mismatch on total_tracking_hits, hence skip only if // track_total_hits is false. if (searchAfter != null && searchAfter.fields[0] != null - && minMax != null + && fieldStats != null && primarySort != null && Objects.equals(trackTotalHitsUpto, TRACK_TOTAL_HITS_DISABLED)) { final Object searchAfterPrimary = searchAfter.fields[0]; - SortField primarySortField = primarySort.sort.getSort()[0]; + final SortField primarySortField = primarySort.sort.getSort()[0]; if (primarySortField.getReverse()) { - if (minMax.compareMin(searchAfterPrimary) > 0) { + final boolean nonMatch = fieldStats.minAndMax().compareMin(searchAfterPrimary) > (singleSort ? -1 : 0); + if (nonMatch) { // In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive - return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary); + return fieldStats.allDocsHaveValue() == false && canMatchMissingValue(primarySortField, searchAfterPrimary, singleSort); } } else { - if (minMax.compareMax(searchAfterPrimary) < 0) { + final boolean nonMatch = fieldStats.minAndMax().compareMax(searchAfterPrimary) < (singleSort ? 1 : 0); + if (nonMatch) { // In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive - return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary); + return fieldStats.allDocsHaveValue() == false && canMatchMissingValue(primarySortField, searchAfterPrimary, singleSort); } } } return true; } - private static boolean canMatchMissingValue(SortField primarySortField, Object primarySearchAfter) { + private static boolean canMatchMissingValue(SortField primarySortField, Object primarySearchAfter, boolean singleSort) { final Object missingValue = primarySortField.getMissingValue(); if (primarySortField.getReverse()) { // the missing value of Type.STRING can only be SortField.STRING_FIRS or SortField.STRING_LAST if (primarySearchAfter instanceof BytesRef) { + assert IndexSortConfig.getSortFieldType(primarySortField) == SortField.Type.STRING; return missingValue == SortField.STRING_FIRST; } - return compare(primarySearchAfter, missingValue) >= 0; + return compare(primarySearchAfter, missingValue) > (singleSort ? 0 : -1); } else { if (primarySearchAfter instanceof BytesRef) { + assert IndexSortConfig.getSortFieldType(primarySortField) == SortField.Type.STRING; return missingValue == SortField.STRING_LAST; } - return compare(primarySearchAfter, missingValue) <= 0; + return compare(primarySearchAfter, missingValue) < (singleSort ? 0 : 1); } } - private static int compare(Object one, Object two) { - if (one instanceof Long) { - return Long.compare((Long) one, (Long) two); - } else if (one instanceof Integer) { - return Integer.compare((Integer) one, (Integer) two); - } else if (one instanceof Float) { - return Float.compare((Float) one, (Float) two); - } else if (one instanceof Double) { - return Double.compare((Double) one, (Double) two); - } else { - throw new UnsupportedOperationException("compare type not supported : " + one.getClass()); - } + private static int compare(Object lh, Object rh) { + assert lh != null; + assert rh != null; + return switch (lh) { + case Long l -> Long.compare(l, (Long) rh); + case Integer i -> Integer.compare(i, (Integer) rh); + case Float v -> Float.compare(v, (Float) rh); + case Double v -> Double.compare(v, (Double) rh); + default -> throw new UnsupportedOperationException("compare type not supported : " + lh.getClass()); + }; } private static FieldDoc getSearchAfterFieldDoc(ShardSearchRequest request, QueryShardContext context) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index 82c6c07cd63a8..0fcddc0e21a5a 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -585,19 +585,18 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException { // Only applied on primary sort field and primary search_after. FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source()); if (primarySortField != null) { - FieldStats stats = FieldSortBuilder.getFieldStatsForSegment( + final FieldStats fieldStats = FieldSortBuilder.getFieldStatsOrNullForSegment( this.searchContext.getQueryShardContext(), ctx, primarySortField, searchContext.sort() ); - assert stats != null; return SearchService.canMatchSearchAfter( searchContext.searchAfter(), - stats.getMinAndMax(), + fieldStats, searchContext.sort(), searchContext.trackTotalHitsUpTo(), - stats.allDocsNonMissing() + FieldSortBuilder.isSingleSort(searchContext.request().source()) ); } } diff --git a/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java index 148d6a0a6e09c..eca0c541a0d62 100644 --- a/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java @@ -72,7 +72,6 @@ import org.opensearch.search.builder.SearchSourceBuilder; import java.io.IOException; -import java.math.BigInteger; import java.util.Collections; import java.util.Locale; import java.util.Objects; @@ -613,30 +612,39 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou } /** - * Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown. + * Indicates whether the sort is based on a single sort field or not. + * + * @return {@code true} if the sort is based on a single sort field, {@code false} otherwise + */ + public static boolean isSingleSort(SearchSourceBuilder source) { + return source != null && source.sorts() != null && source.sorts().size() == 1; + } + + /** + * Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@code null} if unknown. * The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields - * and configurations return {@link FieldStats#UNKNOWN}. + * and configurations return {@code null}. */ - public static FieldStats getFieldStatsForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException { + public static FieldStats getFieldStatsOrNullForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException { final SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get(); - return getFieldStatsInternal(context.getIndexReader(), context, sortBuilder, sort); + return getFieldStatsOrNullInternal(context.getIndexReader(), context, sortBuilder, sort); } /** - * Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown. + * Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@code null} if unknown. * The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields - * and configurations return {@link FieldStats#UNKNOWN}. + * and configurations return {@code null}. */ - public static FieldStats getFieldStatsForSegment( + public static FieldStats getFieldStatsOrNullForSegment( QueryShardContext context, LeafReaderContext ctx, FieldSortBuilder sortBuilder, SortAndFormats sort ) throws IOException { - return getFieldStatsInternal(ctx.reader(), context, sortBuilder, sort); + return getFieldStatsOrNullInternal(ctx.reader(), context, sortBuilder, sort); } - private static FieldStats getFieldStatsInternal( + private static FieldStats getFieldStatsOrNullInternal( IndexReader reader, QueryShardContext context, FieldSortBuilder sortBuilder, @@ -644,11 +652,11 @@ private static FieldStats getFieldStatsInternal( ) throws IOException { SortField sortField = sort.sort.getSort()[0]; if (sortField.getField() == null) { - return FieldStats.UNKNOWN; + return null; } MappedFieldType fieldType = context.fieldMapper(sortField.getField()); if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) { - return FieldStats.UNKNOWN; + return null; } switch (IndexSortConfig.getSortFieldType(sortField)) { case LONG: @@ -661,14 +669,14 @@ private static FieldStats getFieldStatsInternal( if (fieldType.unwrap() instanceof KeywordFieldMapper.KeywordFieldType) { Terms terms = MultiTerms.getTerms(reader, fieldType.name()); if (terms == null) { - return FieldStats.UNKNOWN; + return null; } MinAndMax minAndMax = terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null; return new FieldStats(minAndMax, terms.getDocCount() == reader.maxDoc()); } break; } - return FieldStats.UNKNOWN; + return null; } private static FieldStats extractNumericFieldStats( @@ -679,23 +687,19 @@ private static FieldStats extractNumericFieldStats( ) throws IOException { String fieldName = fieldType.name(); final int docCount = PointValues.getDocCount(reader, fieldName); + // TODO: should we deal with the case that all docs have no value? if (docCount == 0) { - return FieldStats.UNKNOWN; + return null; } - final boolean allDocsNonMissing = docCount == reader.maxDoc(); + final boolean allDocsHaveValue = docCount == reader.maxDoc(); MinAndMax minAndMax = null; - if (fieldType instanceof NumberFieldType) { - NumberFieldType numberFieldType = (NumberFieldType) fieldType; + if (fieldType.unwrap() instanceof NumberFieldType numberFieldType) { Number minPoint = numberFieldType.parsePoint(PointValues.getMinPackedValue(reader, fieldName)); Number maxPoint = numberFieldType.parsePoint(PointValues.getMaxPackedValue(reader, fieldName)); + // TODO: deal with half float and unsigned long switch (IndexSortConfig.getSortFieldType(sortField)) { case LONG: - if (numberFieldType.numericType() == NumericType.UNSIGNED_LONG) { - // The min and max are expected to be BigInteger numbers - minAndMax = new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint); - } else { - minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue()); - } + minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue()); break; case INT: minAndMax = new MinAndMax<>(minPoint.intValue(), maxPoint.intValue()); @@ -707,16 +711,15 @@ private static FieldStats extractNumericFieldStats( minAndMax = new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue()); break; default: - // no-op + return null; } - } else if (fieldType.unwrap() instanceof DateFieldType) { - DateFieldType dateFieldType = (DateFieldType) fieldType; + } else if (fieldType.unwrap() instanceof DateFieldType dateFieldType) { Function dateConverter = createDateConverter(sortBuilder, dateFieldType); Long min = dateConverter.apply(PointValues.getMinPackedValue(reader, fieldName)); Long max = dateConverter.apply(PointValues.getMaxPackedValue(reader, fieldName)); minAndMax = new MinAndMax<>(min, max); } - return new FieldStats(minAndMax, allDocsNonMissing); + return new FieldStats(minAndMax, allDocsHaveValue); } private static Function createDateConverter(FieldSortBuilder sortBuilder, DateFieldType dateFieldType) { diff --git a/server/src/main/java/org/opensearch/search/sort/FieldStats.java b/server/src/main/java/org/opensearch/search/sort/FieldStats.java index 4c0efebd05288..0f011da278bc6 100644 --- a/server/src/main/java/org/opensearch/search/sort/FieldStats.java +++ b/server/src/main/java/org/opensearch/search/sort/FieldStats.java @@ -8,33 +8,17 @@ package org.opensearch.search.sort; +import java.util.Objects; + /** - * A class that encapsulates some stats about a field, including min/max etc. + * A class that encapsulates some stats for a given field. * + * @param minAndMax the minimum and maximum value of the given field + * @param allDocsHaveValue whether all docs have value for the given field * @opensearch.internal */ -public class FieldStats { - public static final FieldStats UNKNOWN = new FieldStats(null, false); - - private final MinAndMax minAndMax; - private final boolean allDocsNonMissing; - - public FieldStats(MinAndMax minAndMax, boolean allDocsNonMissing) { - this.minAndMax = minAndMax; - this.allDocsNonMissing = allDocsNonMissing; - } - - /** - * Return the minimum and maximum value. - */ - public MinAndMax getMinAndMax() { - return minAndMax; - } - - /** - * Indicates whether all docs have values for corresponding field - */ - public boolean allDocsNonMissing() { - return allDocsNonMissing; +public record FieldStats(MinAndMax minAndMax, boolean allDocsHaveValue) { + public FieldStats { + Objects.requireNonNull(minAndMax); } } diff --git a/server/src/main/java/org/opensearch/search/sort/MinAndMax.java b/server/src/main/java/org/opensearch/search/sort/MinAndMax.java index 98213ec630e33..bb7e2f38421d5 100644 --- a/server/src/main/java/org/opensearch/search/sort/MinAndMax.java +++ b/server/src/main/java/org/opensearch/search/sort/MinAndMax.java @@ -110,21 +110,15 @@ public int compareMax(Object object) { return compare(getMax(), object); } - private int compare(T one, Object two) { - if (one instanceof Long) { - return Long.compare((Long) one, (Long) two); - } else if (one instanceof Integer) { - return Integer.compare((Integer) one, (Integer) two); - } else if (one instanceof Float) { - return Float.compare((Float) one, (Float) two); - } else if (one instanceof Double) { - return Double.compare((Double) one, (Double) two); - } else if (one instanceof BigInteger) { - return ((BigInteger) one).compareTo((BigInteger) two); - } else if (one instanceof BytesRef) { - return ((BytesRef) one).compareTo((BytesRef) two); - } else { - throw new UnsupportedOperationException("compare type not supported : " + one.getClass()); - } + private static int compare(Object one, Object two) { + return switch (one) { + case Long v -> Long.compare(v, (Long) two); + case Integer v -> Integer.compare(v, (Integer) two); + case Float v -> Float.compare(v, (Float) two); + case Double v -> Double.compare(v, (Double) two); + case BigInteger bigInteger -> bigInteger.compareTo((BigInteger) two); + case BytesRef bytesRef -> bytesRef.compareTo((BytesRef) two); + default -> throw new UnsupportedOperationException("compare type not supported : " + one.getClass()); + }; } } diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index 3741a7292b2a7..e7d249877137a 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -39,6 +39,7 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchException; import org.opensearch.action.OriginalIndices; import org.opensearch.action.bulk.BulkRequestBuilder; @@ -58,7 +59,6 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.WriteRequest; import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.Numbers; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Setting; @@ -119,6 +119,7 @@ import org.opensearch.search.internal.ShardSearchContextId; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.search.query.QuerySearchResult; +import org.opensearch.search.sort.FieldStats; import org.opensearch.search.sort.MinAndMax; import org.opensearch.search.sort.SortAndFormats; import org.opensearch.search.sort.SortBuilders; @@ -1188,52 +1189,34 @@ public void testCanRewriteToMatchNone() { } private Number randomNumber(NumberFieldMapper.NumberType type) { - switch (type) { - case BYTE: - return randomByte(); - case SHORT: - return randomShort(); - case INTEGER: - return randomInt(10000); - case LONG: - return randomLongBetween(0L, 10000L); - case DOUBLE: - return randomDoubleBetween(0, 10000, false); - case FLOAT: - case HALF_FLOAT: - return (float) randomDoubleBetween(0, 10000, false); - case UNSIGNED_LONG: + return switch (type) { + case BYTE -> randomByte(); + case SHORT -> randomShort(); + case INTEGER -> randomInt(10000); + case LONG -> randomLongBetween(0L, 10000L); + case DOUBLE -> randomDoubleBetween(0, 10000, false); + case FLOAT, HALF_FLOAT -> (float) randomDoubleBetween(0, 10000, false); + case UNSIGNED_LONG -> { BigInteger ul = randomUnsignedLong(); while (ul.compareTo(Numbers.MIN_UNSIGNED_LONG_VALUE) == 0 || ul.compareTo(Numbers.MAX_UNSIGNED_LONG_VALUE) == 0) { ul = randomUnsignedLong(); } - return ul; - default: - throw new AssertionError(); - } + yield ul; + } + }; } - private Number incDecNumber(Number number, NumberFieldMapper.NumberType type, boolean inc) { - switch (type) { - case BYTE: - case SHORT: - case INTEGER: - return number.intValue() + (inc ? 1 : -1); - case LONG: - return number.longValue() + (inc ? 1 : -1); - case DOUBLE: - return number.doubleValue() + (inc ? 1 : -1); - case FLOAT: - case HALF_FLOAT: - return number.floatValue() + (inc ? 1 : -1); - case UNSIGNED_LONG: - return ((BigInteger) number).add(inc ? BigInteger.valueOf(1) : BigInteger.valueOf(-1)); - default: - throw new AssertionError(); - } + private Number incOrDecNumber(Number number, NumberFieldMapper.NumberType type, boolean inc) { + return switch (type) { + case BYTE, SHORT, INTEGER -> number.intValue() + (inc ? 1 : -1); + case LONG -> number.longValue() + (inc ? 1 : -1); + case DOUBLE -> number.doubleValue() + (inc ? 1 : -1); + case FLOAT, HALF_FLOAT -> number.floatValue() + (inc ? 1 : -1); + case UNSIGNED_LONG -> ((BigInteger) number).add(inc ? BigInteger.valueOf(1) : BigInteger.valueOf(-1)); + }; } - private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType type) throws Exception { + private void canMatchSearchAfterForOneNumericType(NumberFieldMapper.NumberType type) throws Exception { XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("properties") @@ -1245,17 +1228,22 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ .endObject() .endObject() .endObject(); - IndexService indexService = createIndex("test", Settings.EMPTY, MapperService.SINGLE_MAPPING_NAME, mapping); + IndexService indexService = createIndex( + "test", + Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 0).build(), + MapperService.SINGLE_MAPPING_NAME, + mapping + ); ensureGreen(); - final boolean allDocsNonMissing = randomBoolean(); + final boolean allDocsHaveValue = randomBoolean(); final BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); final int numDocs = randomIntBetween(10, 20); - final Number[] nums1 = new Number[allDocsNonMissing ? numDocs : numDocs - 1]; + final Number[] nums1 = new Number[allDocsHaveValue ? numDocs : numDocs - 1]; final Number[] nums2 = new Number[numDocs]; for (int i = 0; i < numDocs; i++) { String source; - if (i < numDocs - 1 || allDocsNonMissing) { + if (i < numDocs - 1 || allDocsHaveValue) { nums1[i] = randomNumber(type); nums2[i] = randomNumber(type); source = String.format(Locale.ROOT, "{\"num1\": %s, \"num2\": %s}", nums1[i].toString(), nums2[i].toString()); @@ -1267,11 +1255,8 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ } bulkRequestBuilder.get(); client().admin().indices().prepareRefresh().get(); - Arrays.sort(nums1); - Arrays.sort(nums2); final IndexShard shard = indexService.getShard(0); - final SearchService service = getInstanceFromNode(SearchService.class); final ShardSearchRequest shardRequest = new ShardSearchRequest( OriginalIndices.NONE, new SearchRequest().allowPartialSearchResults(true), @@ -1283,62 +1268,253 @@ private void canMatchSearchAfterNumericTestCase(NumberFieldMapper.NumberType typ null, null ); + shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); assertFalse(shard.hasRefreshPending()); - final CheckedTriFunction outOfRangeTester = (outOfRange, missingMatch, reverse) -> { - Number searchAfter; - Object missing; - if (outOfRange) { - searchAfter = reverse ? incDecNumber(nums1[0], type, false) : incDecNumber(nums1[nums1.length - 1], type, true); - } else { - searchAfter = randomFrom(nums1); + for (boolean singleSort : new boolean[] { true, false }) { + for (boolean searchAfterOutOfRange : new boolean[] { true, false }) { + for (boolean searchAfterEqualsToBoundary : new boolean[] { true, false }) { + for (boolean missingValueOutOfRange : new boolean[] { true, false }) { + for (boolean missingValueEqualsToSearchAfter : new boolean[] { true, false }) { + runNumericCanMatchTest( + type, + shardRequest, + nums1, + nums2, + allDocsHaveValue, + singleSort, + searchAfterOutOfRange, + searchAfterEqualsToBoundary, + missingValueOutOfRange, + missingValueEqualsToSearchAfter + ); + } + } + } } - if (missingMatch) { - missing = reverse - ? (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, false)) - : (randomBoolean() ? "_last" : incDecNumber(searchAfter, type, true)); + } + client().admin().indices().prepareDelete("test").get(); + ensureGreen(); + } + + private void runNumericCanMatchTest( + NumberFieldMapper.NumberType type, + ShardSearchRequest shardRequest, + Number[] nums1, + Number[] nums2, + boolean allDocsHaveValue, + boolean singleSort, + boolean searchAfterOutOfRange, + boolean searchAfterEqualsToBoundary, + boolean missingValueOutOfRange, + boolean missingValueEqualsToSearchAfter + ) throws IOException { + Number searchAfter; + Object missingValue; + Arrays.sort(nums1); + Arrays.sort(nums2); + final boolean reverse = randomBoolean(); + if (searchAfterOutOfRange) { + searchAfter = reverse ? incOrDecNumber(nums1[0], type, false) : incOrDecNumber(nums1[nums1.length - 1], type, true); + } else { + if (searchAfterEqualsToBoundary) { + searchAfter = reverse ? nums1[0] : nums1[nums1.length - 1]; } else { - missing = reverse - ? (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, true)) - : (randomBoolean() ? "_first" : incDecNumber(searchAfter, type, false)); + int from = 1, to = nums1.length - 1; + while (nums1[from].equals(nums1[from - 1])) + from++; + while (nums1[to - 1].equals(nums1[to])) + to--; + assert from < to; + searchAfter = randomFrom(Arrays.asList(nums1).subList(from, to)); } - shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); - shardRequest.source().sort(SortBuilders.fieldSort("num1").missing(missing).order(reverse ? SortOrder.DESC : SortOrder.ASC)); - List searchAfterFields = new ArrayList<>(); - searchAfterFields.add(searchAfter); - if (randomBoolean()) { - shardRequest.source().sort("num2"); - searchAfterFields.add(randomFrom(nums2)); - } - shardRequest.source().searchAfter(searchAfterFields.toArray()); - SearchService.CanMatchResponse response = service.canMatch(shardRequest); + } + if (missingValueOutOfRange) { + missingValue = reverse + ? (randomBoolean() ? "_first" : incOrDecNumber(searchAfter, type, true)) + : (randomBoolean() ? "_first" : incOrDecNumber(searchAfter, type, false)); + } else { + missingValue = missingValueEqualsToSearchAfter ? searchAfter + : reverse ? (randomBoolean() ? "_last" : incOrDecNumber(searchAfter, type, false)) + : (randomBoolean() ? "_last" : incOrDecNumber(searchAfter, type, true)); + } + shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); + shardRequest.source().sort(SortBuilders.fieldSort("num1").missing(missingValue).order(reverse ? SortOrder.DESC : SortOrder.ASC)); + List searchAfterFields = new ArrayList<>(); + searchAfterFields.add(searchAfter); + if (singleSort == false) { + shardRequest.source().sort("num2"); + searchAfterFields.add(randomFrom(nums2)); + } + shardRequest.source().searchAfter(searchAfterFields.toArray()); + final SearchService service = getInstanceFromNode(SearchService.class); + SearchService.CanMatchResponse response = service.canMatch(shardRequest); - if (type == NumberFieldMapper.NumberType.HALF_FLOAT || type == NumberFieldMapper.NumberType.UNSIGNED_LONG) { - assertTrue(response.canMatch()); - return null; - } + if (type == NumberFieldMapper.NumberType.HALF_FLOAT || type == NumberFieldMapper.NumberType.UNSIGNED_LONG) { + assertTrue(response.canMatch()); + return; + } - if (outOfRange == false || (allDocsNonMissing == false && missingMatch)) { - assertTrue(response.canMatch()); + final boolean nonMatch; + if (allDocsHaveValue) { + nonMatch = searchAfterOutOfRange || (singleSort && searchAfterEqualsToBoundary); + } else { + if (searchAfterOutOfRange) { + nonMatch = missingValueOutOfRange || (singleSort && missingValueEqualsToSearchAfter); } else { - assertFalse(response.canMatch()); - } - return null; - }; - for (boolean outOfRange : new boolean[] { true, false }) { - for (boolean missingMatch : new boolean[] { true, false }) { - for (boolean reverse : new boolean[] { true, false }) { - outOfRangeTester.apply(outOfRange, missingMatch, reverse); - } + nonMatch = singleSort && searchAfterEqualsToBoundary && (missingValueOutOfRange || missingValueEqualsToSearchAfter); } } - client().admin().indices().prepareDelete("test").get(); - ensureGreen(); + + boolean canMatch = response.canMatch(); + assertEquals(nonMatch == false, canMatch); } public void testNumericCanMatch() throws Exception { for (var type : NumberFieldMapper.NumberType.values()) { - canMatchSearchAfterNumericTestCase(type); + canMatchSearchAfterForOneNumericType(type); + } + } + + private void runStringCanMatchTest( + ShardSearchRequest shardRequest, + BytesRef[] values, + boolean allDocsHaveValue, + boolean singleSort, + boolean searchAfterOutOfRange, + boolean searchAfterEqualsToBoundary, + boolean customMissingValue, + boolean sortMissingLast + ) throws IOException { + Object searchAfter; + Object missingValue; + Arrays.sort(values, BytesRef::compareTo); + final String minValue = values[0].utf8ToString(); + final String maxValue = values[values.length - 1].utf8ToString(); + final boolean reverse = randomBoolean(); + if (searchAfterOutOfRange) { + searchAfter = reverse ? minValue.substring(0, minValue.length() - 1) : maxValue + randomAlphaOfLength(1); + } else { + if (searchAfterEqualsToBoundary) { + searchAfter = reverse ? minValue : maxValue; + } else { + int from = 1, to = values.length - 1; + while (values[from].equals(values[0])) + from++; + while (values[to - 1].equals(values[values.length - 1])) + to--; + assert from < to; + searchAfter = randomFrom(Arrays.asList(values).subList(from, to)).utf8ToString(); + } + } + if (customMissingValue) { + missingValue = randomBoolean() ? randomFrom(values).utf8ToString() : searchAfter; + } else { + missingValue = sortMissingLast ? "_last" : "_first"; + } + shardRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).trackTotalHits(false)); + shardRequest.source().sort(SortBuilders.fieldSort("field1").missing(missingValue).order(reverse ? SortOrder.DESC : SortOrder.ASC)); + List searchAfterFields = new ArrayList<>(); + searchAfterFields.add(searchAfter); + if (singleSort == false) { + shardRequest.source().sort("field2"); + searchAfterFields.add(randomAlphaOfLength(10)); + } + shardRequest.source().searchAfter(searchAfterFields.toArray()); + final SearchService service = getInstanceFromNode(SearchService.class); + SearchService.CanMatchResponse response = service.canMatch(shardRequest); + + if (customMissingValue) { + assertTrue(response.canMatch()); + return; + } + + final boolean nonMatch; + if (allDocsHaveValue) { + nonMatch = searchAfterOutOfRange || (singleSort && searchAfterEqualsToBoundary); + } else { + if (searchAfterOutOfRange) { + nonMatch = sortMissingLast == false; + } else { + nonMatch = singleSort && searchAfterEqualsToBoundary && sortMissingLast == false; + } + } + + boolean canMatch = response.canMatch(); + assertEquals(nonMatch == false, canMatch); + } + + public void testStringCanMatch() throws Exception { + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("field1") + .field("type", "keyword") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .endObject(); + IndexService indexService = createIndex( + "test", + Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 0).build(), + MapperService.SINGLE_MAPPING_NAME, + mapping + ); + ensureGreen(); + + final boolean allDocsHaveValue = randomBoolean(); + final BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); + final int numDocs = randomIntBetween(10, 20); + final BytesRef[] field1 = new BytesRef[allDocsHaveValue ? numDocs : numDocs - 1]; + for (int i = 0; i < numDocs; i++) { + String source; + if (i < numDocs - 1 || allDocsHaveValue) { + String str = randomAlphaOfLengthBetween(10, 20); + field1[i] = new BytesRef(str); + source = String.format(Locale.ROOT, "{\"field1\": \"%s\", \"field2\": \"%s\"}", str, randomAlphaOfLength(10)); + } else { + source = String.format(Locale.ROOT, "{\"field2\": \"%s\"}", randomAlphaOfLength(10)); + } + bulkRequestBuilder.add(client().prepareIndex("test").setId(String.valueOf(i)).setSource(source, MediaTypeRegistry.JSON)); + } + bulkRequestBuilder.get(); + client().admin().indices().prepareRefresh().get(); + + final IndexShard shard = indexService.getShard(0); + final ShardSearchRequest shardRequest = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + shard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1f, + -1, + null, + null + ); + assertFalse(shard.hasRefreshPending()); + + for (boolean singleSort : new boolean[] { false, true }) { + for (boolean searchAfterOutOfRange : new boolean[] { false, true }) { + for (boolean searchAfterEqualsToBoundary : new boolean[] { false, true }) { + for (boolean customMissingValue : new boolean[] { false, true }) { + for (boolean sortMissingLast : new boolean[] { false, true }) { + runStringCanMatchTest( + shardRequest, + field1, + allDocsHaveValue, + singleSort, + searchAfterOutOfRange, + searchAfterEqualsToBoundary, + customMissingValue, + sortMissingLast + ); + } + } + } + } } } @@ -2330,9 +2506,18 @@ public void testCanMatchSearchAfterAscGreaterThanMaxAndMissing() throws IOExcept FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 10L }); MinAndMax minMax = new MinAndMax(0L, 9L); SortField sortField = new SortField("test", SortField.Type.LONG); - sortField.setMissingValue(Long.MIN_VALUE); + sortField.setMissingValue(randomLongBetween(Long.MIN_VALUE, 9L)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertFalse( + SearchService.canMatchSearchAfter( + searchAfter, + fieldStats, + primarySort, + SearchContext.TRACK_TOTAL_HITS_DISABLED, + randomBoolean() + ) + ); } /** @@ -2346,7 +2531,16 @@ public void testCanMatchSearchAfterAscGreaterThanMaxAndLessThanMissing() throws SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(11L); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertTrue( + SearchService.canMatchSearchAfter( + searchAfter, + fieldStats, + primarySort, + SearchContext.TRACK_TOTAL_HITS_DISABLED, + randomBoolean() + ) + ); } /** @@ -2360,21 +2554,35 @@ public void testCanMatchSearchAfterAscLessThanMax() throws IOException { SortField sortField = new SortField("test", SortField.Type.LONG); sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertTrue( + SearchService.canMatchSearchAfter( + searchAfter, + fieldStats, + primarySort, + SearchContext.TRACK_TOTAL_HITS_DISABLED, + randomBoolean() + ) + ); } /** * Test for ASC order search_after query. - * Min = 0L, Max = 9L, search_after = 9L, missing = Long.MIN_VALUE/10L + * Min = 0L, Max = 9L, search_after = 9L, missing = 9L * Expected result is canMatch = true */ - public void testCanMatchSearchAfterAscEqualMax() throws IOException { + public void testCanMatchSearchAfterAscEqualMaxAndMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 9L }); MinAndMax minMax = new MinAndMax(0L, 9L); SortField sortField = new SortField("test", SortField.Type.LONG); - sortField.setMissingValue(randomFrom(Long.MIN_VALUE, 10L)); + sortField.setMissingValue(9L); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + final boolean singleSort = randomBoolean(); + assertEquals( + singleSort == false, + SearchService.canMatchSearchAfter(searchAfter, fieldStats, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, singleSort) + ); } /** @@ -2388,7 +2596,16 @@ public void testCanMatchSearchAfterDescGreaterThanMin() throws IOException { SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomFrom(Long.MAX_VALUE, Long.MIN_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertTrue( + SearchService.canMatchSearchAfter( + searchAfter, + fieldStats, + primarySort, + SearchContext.TRACK_TOTAL_HITS_DISABLED, + randomBoolean() + ) + ); } /** @@ -2402,38 +2619,35 @@ public void testCanMatchSearchAfterDescLessThanMinAndMissing() throws IOExceptio SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertFalse( + SearchService.canMatchSearchAfter( + searchAfter, + fieldStats, + primarySort, + SearchContext.TRACK_TOTAL_HITS_DISABLED, + randomBoolean() + ) + ); } /** * Test for DESC order search_after query. - * Min = 0L, Max = 9L, search_after = 0L, missing > search_after + * Min = 0L, Max = 9L, search_after = 0L, missing = 0L * Expected result is canMatch = true */ - public void testCanMatchSearchAfterDescEqualMinAndLessThanMissing() throws IOException { + public void testCanMatchSearchAfterDescEqualMinAndMissing() throws IOException { FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { 0L }); MinAndMax minMax = new MinAndMax(0L, 9L); SortField sortField = new SortField("test", SortField.Type.LONG, true); - sortField.setMissingValue(randomLongBetween(1L, Long.MAX_VALUE)); - final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); - } - - /** - * Test canMatchSearchAfter with missing value, even if min/max is out of range - * Min = 0L, Max = 9L, search_after = -1L - */ - public void testCanMatchSearchAfterWithMinMaxOutOfRangeAndDifferentMissing() throws IOException { - FieldDoc searchAfter = new FieldDoc(0, 0, new Long[] { -1L }); - MinAndMax minMax = new MinAndMax(0L, 9L); - SortField sortField = new SortField("test", SortField.Type.LONG, true); - sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); + sortField.setMissingValue(0L); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - // Should be false with missing values is larger than search_after - assertFalse(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); - sortField.setMissingValue(Long.MIN_VALUE); - // Should be true with missing values is less than search_after - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + final boolean singleSort = randomBoolean(); + assertEquals( + singleSort == false, + SearchService.canMatchSearchAfter(searchAfter, fieldStats, primarySort, SearchContext.TRACK_TOTAL_HITS_DISABLED, singleSort) + ); } /** @@ -2449,6 +2663,7 @@ public void testCanMatchSearchAfterDescLessThanMinAndMissingWithTrackTotalHits() SortField sortField = new SortField("test", SortField.Type.LONG, true); sortField.setMissingValue(randomLongBetween(0L, Long.MAX_VALUE)); final SortAndFormats primarySort = new SortAndFormats(new Sort(sortField), new DocValueFormat[] { DocValueFormat.RAW }); - assertTrue(SearchService.canMatchSearchAfter(searchAfter, minMax, primarySort, 1000, false)); + final FieldStats fieldStats = new FieldStats(minMax, randomBoolean()); + assertTrue(SearchService.canMatchSearchAfter(searchAfter, fieldStats, primarySort, 1000, randomBoolean())); } } diff --git a/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java b/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java index 4fd3bef1ff1e1..1978dbac89906 100644 --- a/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/sort/FieldSortBuilderTests.java @@ -88,7 +88,7 @@ import java.util.Locale; import java.util.Set; -import static org.opensearch.search.sort.FieldSortBuilder.getFieldStatsForShard; +import static org.opensearch.search.sort.FieldSortBuilder.getFieldStatsOrNullForShard; import static org.opensearch.search.sort.FieldSortBuilder.getPrimaryFieldSortOrNull; import static org.opensearch.search.sort.NestedSortBuilderTests.createRandomNestedSort; import static org.hamcrest.Matchers.instanceOf; @@ -490,8 +490,8 @@ public void testGetMaxNumericSortValue() throws IOException { QueryShardContext context = createMockShardContext(); for (NumberFieldMapper.NumberType numberType : NumberFieldMapper.NumberType.values()) { String fieldName = "custom-" + numberType.numericType(); - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); @@ -558,17 +558,17 @@ public void testGetMaxNumericSortValue() throws IOException { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); if (numberType == NumberFieldMapper.NumberType.HALF_FLOAT || numberType == NumberFieldMapper.NumberType.UNSIGNED_LONG) { - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName))); + assertNull(getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); + assertNull(getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName))); } else { - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); + assertNull(getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName + "-ni"))); assertEquals( values[numDocs - 1], - getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMax() ); assertEquals( values[0], - getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin() + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMin() ); } } @@ -580,8 +580,8 @@ public void testGetMaxNumericSortValue() throws IOException { public void testGetMaxNumericDateValue() throws IOException { QueryShardContext context = createMockShardContext(); String fieldName = "custom-date"; - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); final long[] values = new long[numDocs]; @@ -597,9 +597,12 @@ public void testGetMaxNumericDateValue() throws IOException { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); assertEquals( values[numDocs - 1], - getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMax() + ); + assertEquals( + values[0], + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMin() ); - assertEquals(values[0], getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin()); } } } @@ -608,8 +611,8 @@ public void testGetMaxNumericDateValue() throws IOException { public void testGetMaxKeywordValue() throws IOException { QueryShardContext context = createMockShardContext(); String fieldName = "custom-keyword"; - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName))); - assertEquals(FieldStats.UNKNOWN, getFieldStatsForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName))); + assertNull(getFieldStatsOrNullForShard(context, SortBuilders.fieldSort(fieldName + "-ni"))); try (Directory dir = newDirectory()) { int numDocs = randomIntBetween(10, 30); final BytesRef[] values = new BytesRef[numDocs]; @@ -625,9 +628,12 @@ public void testGetMaxKeywordValue() throws IOException { QueryShardContext newContext = createMockShardContext(new AssertingIndexSearcher(random(), reader)); assertEquals( values[numDocs - 1], - getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMax() + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMax() + ); + assertEquals( + values[0], + getFieldStatsOrNullForShard(newContext, SortBuilders.fieldSort(fieldName)).minAndMax().getMin() ); - assertEquals(values[0], getFieldStatsForShard(newContext, SortBuilders.fieldSort(fieldName)).getMinAndMax().getMin()); } } } From 9f7c4497786aa812af1743b86b6ac88dd67b7b73 Mon Sep 17 00:00:00 2001 From: panguixin Date: Wed, 21 May 2025 14:45:21 +0800 Subject: [PATCH 5/5] remove method Signed-off-by: panguixin --- .../org/opensearch/search/SearchService.java | 16 ++-------------- .../org/opensearch/search/sort/MinAndMax.java | 2 +- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 760f17cdbc1ea..7211aaf8c8d68 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -1767,28 +1767,16 @@ private static boolean canMatchMissingValue(SortField primarySortField, Object p assert IndexSortConfig.getSortFieldType(primarySortField) == SortField.Type.STRING; return missingValue == SortField.STRING_FIRST; } - return compare(primarySearchAfter, missingValue) > (singleSort ? 0 : -1); + return MinAndMax.compare(primarySearchAfter, missingValue) > (singleSort ? 0 : -1); } else { if (primarySearchAfter instanceof BytesRef) { assert IndexSortConfig.getSortFieldType(primarySortField) == SortField.Type.STRING; return missingValue == SortField.STRING_LAST; } - return compare(primarySearchAfter, missingValue) < (singleSort ? 0 : 1); + return MinAndMax.compare(primarySearchAfter, missingValue) < (singleSort ? 0 : 1); } } - private static int compare(Object lh, Object rh) { - assert lh != null; - assert rh != null; - return switch (lh) { - case Long l -> Long.compare(l, (Long) rh); - case Integer i -> Integer.compare(i, (Integer) rh); - case Float v -> Float.compare(v, (Float) rh); - case Double v -> Double.compare(v, (Double) rh); - default -> throw new UnsupportedOperationException("compare type not supported : " + lh.getClass()); - }; - } - private static FieldDoc getSearchAfterFieldDoc(ShardSearchRequest request, QueryShardContext context) throws IOException { if (context != null && request != null && request.source() != null && request.source().sorts() != null) { final List> sorts = request.source().sorts(); diff --git a/server/src/main/java/org/opensearch/search/sort/MinAndMax.java b/server/src/main/java/org/opensearch/search/sort/MinAndMax.java index bb7e2f38421d5..ce21b050691e1 100644 --- a/server/src/main/java/org/opensearch/search/sort/MinAndMax.java +++ b/server/src/main/java/org/opensearch/search/sort/MinAndMax.java @@ -110,7 +110,7 @@ public int compareMax(Object object) { return compare(getMax(), object); } - private static int compare(Object one, Object two) { + public static int compare(Object one, Object two) { return switch (one) { case Long v -> Long.compare(v, (Long) two); case Integer v -> Integer.compare(v, (Integer) two);