Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395))
- Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558))
- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273))
- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057))
- [BUG] Disable sort optimization for HALF_FLOAT ([#10999](https://github.com/opensearch-project/OpenSearch/pull/10999))

### Deprecated
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
"search on keyword fields with doc_values enabled":
- do:
indices.create:
index: test
body:
mappings:
properties:
"some_keyword":
type: "keyword"
index: true
doc_values: true

- do:
bulk:
index: test
refresh: true
body:
- '{"index": {"_index": "test", "_id": "1" }}'
- '{ "some_keyword": "ingesting some random keyword data" }'
- '{ "index": { "_index": "test", "_id": "2" }}'
- '{ "some_keyword": "400" }'
- '{ "index": { "_index": "test", "_id": "3" } }'
- '{ "some_keyword": "5" }'

- do:
search:
index: test
body:
query:
prefix:
some_keyword: "ing"

- match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" }

- do:
search:
index: test
body:
query:
range: {
"some_keyword": {
"lt": 500
} }

- match: { hits.total.value: 2 }
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,24 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Operations;
import org.opensearch.OpenSearchException;
import org.opensearch.common.Nullable;
import org.opensearch.common.lucene.BytesRefs;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
Expand All @@ -62,6 +75,8 @@
import java.util.Objects;
import java.util.function.Supplier;

import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;

/**
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
*
Expand Down Expand Up @@ -317,7 +332,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
if (format != null) {
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't " + "support formats.");
}

return new SourceValueFetcher(name(), context, nullValue) {
Expand Down Expand Up @@ -372,17 +387,226 @@ protected BytesRef indexedValueForSearch(Object value) {
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
}

@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
// has index and doc_values enabled
if (isSearchable() && hasDocValues()) {
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(values.get(i));
}
Query indexQuery = new TermInSetQuery(name(), bytesRefs);
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
// if we only have doc_values enabled, we construct a new query with doc_values re-written
if (hasDocValues()) {
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(values.get(i));
}
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs);
}
// has index enabled, we're going to return the query as is
return super.termsQuery(values, context);
}

@Override
public Query prefixQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
QueryShardContext context
) {
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[prefix] queries cannot be executed when '"
+ ALLOW_EXPENSIVE_QUERIES.getKey()
+ "' is set to false. For optimised prefix queries on text "
+ "fields please enable [index_prefixes]."
);
}
failIfNotIndexedAndNoDocValues();
if (isSearchable() && hasDocValues()) {
Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context);
Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
if (hasDocValues()) {
if (caseInsensitive) {
return AutomatonQueries.caseInsensitivePrefixQuery(
(new Term(name(), indexedValueForSearch(value))),
MultiTermQuery.DOC_VALUES_REWRITE
);
}
return new PrefixQuery(new Term(name(), indexedValueForSearch(value)), MultiTermQuery.DOC_VALUES_REWRITE);
}
return super.prefixQuery(value, method, caseInsensitive, context);
}

@Override
public Query regexpQuery(
String value,
int syntaxFlags,
int matchFlags,
int maxDeterminizedStates,
@Nullable MultiTermQuery.RewriteMethod method,
QueryShardContext context
) {
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[regexp] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false."
);
}
failIfNotIndexedAndNoDocValues();
if (isSearchable() && hasDocValues()) {
Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
Query dvQuery = super.regexpQuery(
value,
syntaxFlags,
matchFlags,
maxDeterminizedStates,
MultiTermQuery.DOC_VALUES_REWRITE,
context
);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
if (hasDocValues()) {
return new RegexpQuery(
new Term(name(), indexedValueForSearch(value)),
syntaxFlags,
matchFlags,
RegexpQuery.DEFAULT_PROVIDER,
maxDeterminizedStates,
MultiTermQuery.DOC_VALUES_REWRITE
);
}
return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
}

@Override
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) {
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[range] queries on [text] or [keyword] fields cannot be executed when '"
+ ALLOW_EXPENSIVE_QUERIES.getKey()
+ "' is set to false."
);
}
failIfNotIndexedAndNoDocValues();
if (isSearchable() && hasDocValues()) {
Query indexQuery = new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
Query dvQuery = new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper,
MultiTermQuery.DOC_VALUES_REWRITE
);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
if (hasDocValues()) {
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper,
MultiTermQuery.DOC_VALUES_REWRITE
);
}
return new TermRangeQuery(
name(),
lowerTerm == null ? null : indexedValueForSearch(lowerTerm),
upperTerm == null ? null : indexedValueForSearch(upperTerm),
includeLower,
includeUpper
);
}

@Override
public Query fuzzyQuery(
Object value,
Fuzziness fuzziness,
int prefixLength,
int maxExpansions,
boolean transpositions,
@Nullable MultiTermQuery.RewriteMethod method,
QueryShardContext context
) {
failIfNotIndexedAndNoDocValues();
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false."
);
}
if (isSearchable() && hasDocValues()) {
Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context);
Query dvQuery = super.fuzzyQuery(
value,
fuzziness,
prefixLength,
maxExpansions,
transpositions,
MultiTermQuery.DOC_VALUES_REWRITE,
context
);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
if (hasDocValues()) {
return new FuzzyQuery(
new Term(name(), indexedValueForSearch(value)),
fuzziness.asDistance(BytesRefs.toString(value)),
prefixLength,
maxExpansions,
transpositions,
MultiTermQuery.DOC_VALUES_REWRITE
);
}
return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context);
}

@Override
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitve,
boolean caseInsensitive,
QueryShardContext context
) {
// keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[wildcard] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false."
);
}
failIfNotIndexedAndNoDocValues();
// keyword field types are always normalized, so ignore case sensitivity and force normalize the
// wildcard
// query text
return super.wildcardQuery(value, method, caseInsensitve, true, context);
if (isSearchable() && hasDocValues()) {
Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context);
Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
if (hasDocValues()) {
Term term;
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
term = new Term(name(), value);
if (caseInsensitive) {
return AutomatonQueries.caseInsensitiveWildcardQuery(term, method);
}
return new WildcardQuery(term, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, MultiTermQuery.DOC_VALUES_REWRITE);
}
return super.wildcardQuery(value, method, caseInsensitive, true, context);
}

}

private final boolean indexed;
Expand Down Expand Up @@ -422,8 +646,10 @@ protected KeywordFieldMapper(
this.indexAnalyzers = builder.indexAnalyzers;
}

/** Values that have more chars than the return value of this method will
* be skipped at parsing time. */
/**
* Values that have more chars than the return value of this method will
* be skipped at parsing time.
*/
public int ignoreAbove() {
return ignoreAbove;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,21 @@ public Query fuzzyQuery(
);
}

// Fuzzy Query with re-write method
public Query fuzzyQuery(
Object value,
Fuzziness fuzziness,
int prefixLength,
int maxExpansions,
boolean transpositions,
@Nullable MultiTermQuery.RewriteMethod method,
QueryShardContext context
) {
throw new IllegalArgumentException(
"Can only use fuzzy queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

// Case sensitive form of prefix query
public final Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
return prefixQuery(value, method, false, context);
Expand Down Expand Up @@ -433,6 +448,15 @@ protected final void failIfNotIndexed() {
}
}

protected final void failIfNotIndexedAndNoDocValues() {
// we fail if a field is both not indexed and does not have doc_values enabled
if (isIndexed == false && hasDocValues() == false) {
throw new IllegalArgumentException(
"Cannot search on field [" + name() + "] since it is both not indexed," + " and does not have doc_values enabled."
);
}
}

public boolean eagerGlobalOrdinals() {
return eagerGlobalOrdinals;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.search.fetch.subphase.highlight;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.WeightedSpanTerm;
Expand Down Expand Up @@ -104,6 +105,8 @@ protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> t
super.extract(((FunctionScoreQuery) query).getSubQuery(), boost, terms);
} else if (query instanceof OpenSearchToParentBlockJoinQuery) {
super.extract(((OpenSearchToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof IndexOrDocValuesQuery) {
super.extract(((IndexOrDocValuesQuery) query).getIndexQuery(), boost, terms);
} else {
super.extract(query, boost, terms);
}
Expand Down
Loading