diff --git a/CHANGELOG.md b/CHANGELOG.md index 12951aeeb75f7..218719d0d5e77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Replace centos:8 with almalinux:8 since centos docker images are deprecated ([#19154](https://github.com/opensearch-project/OpenSearch/pull/19154)) - Add CompletionStage variants to IndicesAdminClient as an alternative to ActionListener ([#19161](https://github.com/opensearch-project/OpenSearch/pull/19161)) - Remove cap on Java version used by forbidden APIs ([#19163](https://github.com/opensearch-project/OpenSearch/pull/19163)) +- Disable pruning for `doc_values` for the wildcard field mapper ([#18568](https://github.com/opensearch-project/OpenSearch/pull/18568)) ### Fixed - Fix unnecessary refreshes on update preparation failures ([#15261](https://github.com/opensearch-project/OpenSearch/issues/15261)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml index a85399feefd25..045f3ed3eb320 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml @@ -393,3 +393,23 @@ setup: terms: { my_field: [ "\\*" ] } - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "9" } + +--- +"sort on doc value enabled wildcard": + - skip: + version: " - 3.2.99" + reason: "sorting on doc value enabled wildcard has bug before 3.3.0" + - do: + search: + index: test + body: + query: + wildcard: + my_field: + value: "*" + sort: + - my_field.doc_values: + order: asc + size: 4 + - match: { hits.total.value: 8 } + - match: { hits.hits.0._id: "8" } diff --git a/server/src/main/java/org/opensearch/common/lucene/Lucene.java b/server/src/main/java/org/opensearch/common/lucene/Lucene.java index 0dcb95ed9a9de..0de73245ab95b 100644 --- a/server/src/main/java/org/opensearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/opensearch/common/lucene/Lucene.java @@ -92,6 +92,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData.NonPruningSortField; import org.opensearch.search.sort.SortedWiderNumericSortField; import java.io.IOException; @@ -576,6 +577,24 @@ public static void writeSortField(StreamOutput out, SortField sortField) throws ); newSortField.setMissingValue(sortField.getMissingValue()); sortField = newSortField; + } else if (sortField instanceof NonPruningSortField) { + // There are 2 cases of how NonPruningSortField wraps around its underlying sort field. + // Which are through the SortField class or SortedSetSortField class + // We will serialize the sort field based on the type of underlying sort field + // Here the underlying sort field is SortedSetSortField, therefore, we will follow the + // logic in serializing SortedSetSortField and also unwrap the SortField case. + NonPruningSortField nonPruningSortField = (NonPruningSortField) sortField; + if (nonPruningSortField.getDelegate().getClass() == SortedSetSortField.class) { + SortField newSortField = new SortField( + nonPruningSortField.getField(), + SortField.Type.STRING, + nonPruningSortField.getReverse() + ); + newSortField.setMissingValue(nonPruningSortField.getMissingValue()); + sortField = newSortField; + } else if (nonPruningSortField.getDelegate().getClass() == SortField.class) { + sortField = nonPruningSortField.getDelegate(); + } } if (sortField.getClass() != SortField.class) { diff --git a/server/src/main/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldData.java new file mode 100644 index 0000000000000..fb96085dbb737 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldData.java @@ -0,0 +1,244 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.fielddata.plain; + +import org.apache.lucene.index.IndexSorter; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldComparatorSource; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Nullable; +import org.opensearch.core.indices.breaker.CircuitBreakerService; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; +import org.opensearch.index.fielddata.IndexFieldDataCache; +import org.opensearch.index.fielddata.ScriptDocValues; +import org.opensearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.opensearch.index.mapper.WildcardFieldMapper; +import org.opensearch.search.MultiValueMode; +import org.opensearch.search.aggregations.support.ValuesSourceType; + +import java.io.IOException; +import java.util.Comparator; +import java.util.function.Function; + +/** + * Wrapper for {@link SortedSetOrdinalsIndexFieldData} which disables pruning optimization for + * sorting. Used in {@link WildcardFieldMapper}. + * + * @opensearch.internal + */ +public class NonPruningSortedSetOrdinalsIndexFieldData extends SortedSetOrdinalsIndexFieldData { + + /** + * Builder for non-pruning sorted set ordinals + * + * @opensearch.internal + */ + public static class Builder implements IndexFieldData.Builder { + private final String name; + private final Function> scriptFunction; + private final ValuesSourceType valuesSourceType; + + public Builder(String name, ValuesSourceType valuesSourceType) { + this(name, AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION, valuesSourceType); + } + + public Builder(String name, Function> scriptFunction, ValuesSourceType valuesSourceType) { + this.name = name; + this.scriptFunction = scriptFunction; + this.valuesSourceType = valuesSourceType; + } + + @Override + public NonPruningSortedSetOrdinalsIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + return new NonPruningSortedSetOrdinalsIndexFieldData(cache, name, valuesSourceType, breakerService, scriptFunction); + } + } + + public NonPruningSortedSetOrdinalsIndexFieldData( + IndexFieldDataCache cache, + String fieldName, + ValuesSourceType valuesSourceType, + CircuitBreakerService breakerService, + Function> scriptFunction + ) { + super(cache, fieldName, valuesSourceType, breakerService, scriptFunction); + } + + @Override + public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) { + XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested); + source.disableSkipping(); + /* + Check if we can use a simple {@link SortedSetSortField} compatible with index sorting and + returns a custom sort field otherwise. + */ + if (nested != null + || (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN) + || (source.sortMissingLast(missingValue) == false && source.sortMissingFirst(missingValue) == false)) { + return new NonPruningSortField(new SortField(getFieldName(), source, reverse)); + } + SortField sortField = new NonPruningSortField( + new SortedSetSortField( + getFieldName(), + reverse, + sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN + ) + ); + sortField.setMissingValue( + source.sortMissingLast(missingValue) ^ reverse ? SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST + ); + return sortField; + } + + /** + * {@link SortField} implementation which delegates calls to another {@link SortField}. + * + */ + public abstract class FilteredSortField extends SortField { + protected final SortField delegate; + + protected FilteredSortField(SortField sortField) { + super(sortField.getField(), sortField.getType()); + this.delegate = sortField; + } + + @Override + public Object getMissingValue() { + return delegate.getMissingValue(); + } + + @Override + public void setMissingValue(Object missingValue) { + delegate.setMissingValue(missingValue); + } + + @Override + public String getField() { + return delegate.getField(); + } + + @Override + public Type getType() { + return delegate.getType(); + } + + @Override + public boolean getReverse() { + return delegate.getReverse(); + } + + @Override + public FieldComparatorSource getComparatorSource() { + return delegate.getComparatorSource(); + } + + @Override + public String toString() { + return delegate.toString(); + } + + @Override + public boolean equals(Object o) { + return delegate.equals(o); + } + + @Override + public int hashCode() { + return delegate.hashCode(); + } + + @Override + public void setBytesComparator(Comparator b) { + delegate.setBytesComparator(b); + } + + @Override + public Comparator getBytesComparator() { + return delegate.getBytesComparator(); + } + + @Override + public FieldComparator getComparator(int numHits, Pruning pruning) { + return delegate.getComparator(numHits, pruning); + } + + @Override + public SortField rewrite(IndexSearcher searcher) throws IOException { + return delegate.rewrite(searcher); + } + + @Override + public boolean needsScores() { + return delegate.needsScores(); + } + + @Override + public IndexSorter getIndexSorter() { + return delegate.getIndexSorter(); + } + + @Deprecated + @Override + public void setOptimizeSortWithIndexedData(boolean optimizeSortWithIndexedData) { + delegate.setOptimizeSortWithIndexedData(optimizeSortWithIndexedData); + } + + @Deprecated + @Override + public boolean getOptimizeSortWithIndexedData() { + return delegate.getOptimizeSortWithIndexedData(); + } + + @Deprecated + @Override + public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) { + delegate.setOptimizeSortWithPoints(optimizeSortWithPoints); + } + + @Deprecated + @Override + public boolean getOptimizeSortWithPoints() { + return delegate.getOptimizeSortWithPoints(); + } + } + + /** + * {@link SortField} extension which disables pruning in the comparator. + * + * @opensearch.internal + */ + public final class NonPruningSortField extends FilteredSortField { + + private NonPruningSortField(SortField sortField) { + super(sortField); + } + + public static Type readType(DataInput in) throws IOException { + return SortField.readType(in); + } + + @Override + public FieldComparator getComparator(int numHits, Pruning pruning) { + // explictly disable pruning + return delegate.getComparator(numHits, Pruning.NONE); + } + + public SortField getDelegate() { + return delegate; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java index 21179122c0b5e..b10371f301a59 100644 --- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java @@ -47,7 +47,7 @@ import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.fielddata.IndexFieldData; -import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; +import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.support.CoreValuesSourceType; @@ -366,7 +366,7 @@ NamedAnalyzer normalizer() { @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { failIfNoDocValues(); - return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + return new NonPruningSortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); } @Override diff --git a/server/src/test/java/org/opensearch/common/lucene/LuceneTests.java b/server/src/test/java/org/opensearch/common/lucene/LuceneTests.java index edcb7f2475693..3e9d3b72b1db2 100644 --- a/server/src/test/java/org/opensearch/common/lucene/LuceneTests.java +++ b/server/src/test/java/org/opensearch/common/lucene/LuceneTests.java @@ -77,11 +77,13 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.IntValuesComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.LongValuesComparatorSource; +import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData; import org.opensearch.search.MultiValueMode; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; @@ -582,6 +584,44 @@ public void testSortFieldSerialization() throws IOException { assertEquals(sortFieldTuple.v2(), deserialized); } + public void testNonpruningSortFieldSerialization() throws IOException { + NonPruningSortedSetOrdinalsIndexFieldData fieldData = new NonPruningSortedSetOrdinalsIndexFieldData( + null, + "field", + null, + null, + null + ); + + SortField nonPruningSortedSetField = fieldData.sortField(null, MultiValueMode.MAX, null, true); + SortField expected = new SortField( + nonPruningSortedSetField.getField(), + SortField.Type.STRING, + nonPruningSortedSetField.getReverse() + ); + expected.setMissingValue(SortField.STRING_FIRST); + SortField deserialized1 = copyInstance( + nonPruningSortedSetField, + EMPTY_REGISTRY, + Lucene::writeSortField, + Lucene::readSortField, + VersionUtils.randomVersion(random()) + ); + assertEquals(expected, deserialized1); + + SortField nonPruningSortField = fieldData.sortField(SortField.STRING_FIRST, MultiValueMode.SUM, null, true); + XFieldComparatorSource source = new BytesRefFieldComparatorSource(null, SortField.STRING_FIRST, MultiValueMode.SUM, null); + expected = new SortField(nonPruningSortField.getField(), source.reducedType(), nonPruningSortField.getReverse()); + SortField deserialized2 = copyInstance( + nonPruningSortField, + EMPTY_REGISTRY, + Lucene::writeSortField, + Lucene::readSortField, + VersionUtils.randomVersion(random()) + ); + assertEquals(expected, deserialized2); + } + public void testSortValueSerialization() throws IOException { Object sortValue = randomSortValue(); Object deserialized = copyInstance( diff --git a/server/src/test/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldDataTests.java b/server/src/test/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldDataTests.java new file mode 100644 index 0000000000000..d5d3299871c86 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/fielddata/plain/NonPruningSortedSetOrdinalsIndexFieldDataTests.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.fielddata.plain; + +import org.apache.lucene.index.IndexSorter; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSortField; +import org.opensearch.index.IndexService; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.IndexFieldDataService; +import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.Mapper.BuilderContext; +import org.opensearch.index.mapper.WildcardFieldMapper; +import org.opensearch.indices.IndicesService; +import org.opensearch.search.MultiValueMode; +import org.opensearch.test.OpenSearchSingleNodeTestCase; + +import java.io.IOException; +import java.util.Comparator; +import java.util.Objects; + +public class NonPruningSortedSetOrdinalsIndexFieldDataTests extends OpenSearchSingleNodeTestCase { + IndexService indexService; + IndicesService indicesService; + IndexFieldDataService ifdService; + BuilderContext ctx; + MappedFieldType stringMapper; + SortField field; + + @Override + public void setUp() throws Exception { + super.setUp(); + indexService = createIndex("test"); + indicesService = getInstanceFromNode(IndicesService.class); + ifdService = new IndexFieldDataService( + indexService.getIndexSettings(), + indicesService.getIndicesFieldDataCache(), + indicesService.getCircuitBreakerService(), + indexService.mapperService(), + indexService.getThreadPool() + ); + ctx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1)); + stringMapper = new WildcardFieldMapper.Builder("string").docValues(true).build(ctx).fieldType(); + ifdService.clear(); + IndexFieldData fd = ifdService.getForField(stringMapper, "test", () -> { throw new UnsupportedOperationException(); }); + field = ((NonPruningSortedSetOrdinalsIndexFieldData) fd).sortField(null, MultiValueMode.MAX, null, false); + field.setMissingValue(SortedSetSortField.STRING_FIRST); + field.setOptimizeSortWithIndexedData(false); + field.setOptimizeSortWithPoints(false); + } + + public void testNonPruningSortedSetOrdinalsIndexFieldDataSerialization() throws IOException { + assertEquals(" missingValue=SortField.STRING_FIRST selector=MAX", field.toString()); + } + + public void testNonPruningSortedSetOrdinalsIndexFieldDataComparator() throws IOException { + assertTrue(field.getBytesComparator().equals(Comparator.naturalOrder())); + assertTrue(field.getComparator(0, Pruning.NONE) instanceof FieldComparator); + assertTrue(field.getIndexSorter() instanceof IndexSorter); + } + + public void testNonPruningSortedSetOrdinalsIndexFieldDataSorting() throws IOException { + assertFalse(field.getOptimizeSortWithIndexedData()); + assertFalse(field.getOptimizeSortWithPoints()); + assertFalse(field.needsScores()); + assertTrue(field.getIndexSorter() instanceof IndexSorter); + } + + public void testNonPruningSortedSetOrdinalsIndexFieldDataEquality() throws IOException { + assertFalse(field.equals(field)); + assertNotEquals( + Objects.hash(field.getField(), field.getType(), field.getReverse(), field.getComparatorSource(), field.getMissingValue()), + field.hashCode() + ); + } +}