Skip to content

Commit 0f5485c

Browse files
committed
Implementation for match_only_text field
Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com>
1 parent cb39d00 commit 0f5485c

7 files changed

Lines changed: 341 additions & 11 deletions

File tree

server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,23 +352,34 @@ public Query existsQuery(QueryShardContext context) {
352352
}
353353

354354
public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
355+
return phraseQuery(stream, slop, enablePositionIncrements, null);
356+
}
357+
358+
public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
355359
throw new IllegalArgumentException(
356360
"Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
357361
);
358362
}
359363

360364
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
365+
return multiPhraseQuery(stream, slop, enablePositionIncrements, null);
366+
}
367+
368+
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
361369
throw new IllegalArgumentException(
362370
"Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
363371
);
364372
}
365373

366374
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
375+
return phrasePrefixQuery(stream, slop, maxExpansions, null);
376+
}
377+
378+
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
367379
throw new IllegalArgumentException(
368380
"Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
369381
);
370382
}
371-
372383
public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) {
373384
throw new IllegalArgumentException(
374385
"Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.analysis.TokenStream;
12+
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
13+
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
14+
import org.apache.lucene.document.FieldType;
15+
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.Term;
17+
import org.apache.lucene.search.BooleanClause;
18+
import org.apache.lucene.search.BooleanQuery;
19+
import org.apache.lucene.search.MultiPhraseQuery;
20+
import org.apache.lucene.search.PhraseQuery;
21+
import org.apache.lucene.search.Query;
22+
import org.apache.lucene.search.TermQuery;
23+
import org.opensearch.Version;
24+
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
25+
import org.opensearch.index.analysis.IndexAnalyzers;
26+
import org.opensearch.index.query.QueryShardContext;
27+
import org.opensearch.index.query.SourceFieldMatchQuery;
28+
29+
import java.io.IOException;
30+
import java.util.ArrayList;
31+
import java.util.List;
32+
33+
public class MatchOnlyTextFieldMapper extends TextFieldMapper {
34+
35+
public static final FieldType FIELD_TYPE = new FieldType();
36+
public static final String CONTENT_TYPE = "match_only_text";
37+
38+
@Override
39+
protected String contentType() {
40+
return CONTENT_TYPE;
41+
}
42+
43+
static {
44+
FIELD_TYPE.setTokenized(true);
45+
FIELD_TYPE.setStored(false);
46+
FIELD_TYPE.setStoreTermVectors(false);
47+
FIELD_TYPE.setOmitNorms(true);
48+
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
49+
FIELD_TYPE.freeze();
50+
}
51+
52+
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
53+
54+
protected MatchOnlyTextFieldMapper(String simpleName, FieldType fieldType, MatchOnlyTextFieldType mappedFieldType,
55+
TextFieldMapper.PrefixFieldMapper prefixFieldMapper,
56+
TextFieldMapper.PhraseFieldMapper phraseFieldMapper,
57+
MultiFields multiFields, CopyTo copyTo, Builder builder) {
58+
59+
super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder);
60+
}
61+
62+
public static class Builder extends TextFieldMapper.Builder {
63+
64+
public Builder(String name, IndexAnalyzers indexAnalyzers) {
65+
super(name, indexAnalyzers);
66+
}
67+
68+
public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
69+
super(name, indexCreatedVersion, indexAnalyzers);
70+
}
71+
72+
@Override
73+
public MatchOnlyTextFieldMapper build(BuilderContext context) {
74+
FieldType fieldType = FIELD_TYPE;
75+
MatchOnlyTextFieldType tft = new MatchOnlyTextFieldType(buildFieldType(fieldType, context));
76+
return new MatchOnlyTextFieldMapper(
77+
name,
78+
fieldType,
79+
tft,
80+
buildPrefixMapper(context, fieldType, tft),
81+
buildPhraseMapper(fieldType, tft),
82+
multiFieldsBuilder.build(this, context),
83+
copyTo.build(),
84+
this
85+
);
86+
}
87+
}
88+
89+
public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType {
90+
91+
@Override
92+
public String typeName() {
93+
return CONTENT_TYPE;
94+
}
95+
96+
public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) {
97+
super(tft.name(), tft.isSearchable(), tft.isStored(), tft.getTextSearchInfo(), tft.meta());
98+
}
99+
100+
@Override
101+
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException {
102+
PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements);
103+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
104+
for (Term term: phraseQuery.getTerms()) {
105+
builder.add(new TermQuery(term), BooleanClause.Occur.FILTER);
106+
}
107+
return new SourceFieldMatchQuery(builder.build(), phraseQuery, this,
108+
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
109+
}
110+
111+
@Override
112+
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
113+
MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements);
114+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
115+
for (Term[] terms : multiPhraseQuery.getTermArrays()) {
116+
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
117+
for (Term term: terms) {
118+
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
119+
}
120+
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
121+
}
122+
return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this,
123+
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
124+
}
125+
126+
@Override
127+
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
128+
Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions);
129+
List<List<Term>> termArray = getTermsFromTokenStream(stream);
130+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
131+
for (int i = 0; i < termArray.size(); i++) {
132+
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
133+
for (Term term: termArray.get(i)) {
134+
if (i == termArray.size() - 1) {
135+
MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name());
136+
mqb.add(term);
137+
disjunctions.add(mqb, BooleanClause.Occur.SHOULD);
138+
} else {
139+
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
140+
}
141+
}
142+
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
143+
}
144+
return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this,
145+
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
146+
}
147+
148+
private List<List<Term>> getTermsFromTokenStream(TokenStream stream) throws IOException {
149+
final List<List<Term>> termArray = new ArrayList<>();
150+
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
151+
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
152+
List<Term> currentTerms = new ArrayList<>();
153+
stream.reset();
154+
while (stream.incrementToken()) {
155+
if (posIncrAtt.getPositionIncrement() != 0) {
156+
if (currentTerms.isEmpty() == false) {
157+
termArray.add(List.copyOf(currentTerms));
158+
}
159+
currentTerms.clear();
160+
}
161+
currentTerms.add(new Term(name(), termAtt.getBytesRef()));
162+
}
163+
termArray.add(List.copyOf(currentTerms));
164+
return termArray;
165+
}
166+
}
167+
}

server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ protected List<Parameter<?>> getParameters() {
395395
);
396396
}
397397

398-
private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
398+
protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
399399
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
400400
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
401401
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
@@ -420,7 +420,7 @@ private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context
420420
return ft;
421421
}
422422

423-
private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) {
423+
protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) {
424424
if (indexPrefixes.get() == null) {
425425
return null;
426426
}
@@ -454,7 +454,7 @@ private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fi
454454
return new PrefixFieldMapper(pft, prefixFieldType);
455455
}
456456

457-
private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
457+
protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
458458
if (indexPhrases.get() == false) {
459459
return null;
460460
}
@@ -683,7 +683,7 @@ public Query existsQuery(QueryShardContext context) {
683683
*
684684
* @opensearch.internal
685685
*/
686-
private static final class PhraseFieldMapper extends FieldMapper {
686+
protected static final class PhraseFieldMapper extends FieldMapper {
687687

688688
PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType) {
689689
super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty());
@@ -710,7 +710,7 @@ protected String contentType() {
710710
*
711711
* @opensearch.internal
712712
*/
713-
private static final class PrefixFieldMapper extends FieldMapper {
713+
protected static final class PrefixFieldMapper extends FieldMapper {
714714

715715
protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType) {
716716
super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty());
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.query;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.apache.lucene.index.memory.MemoryIndex;
13+
import org.apache.lucene.search.ConstantScoreScorer;
14+
import org.apache.lucene.search.ConstantScoreWeight;
15+
import org.apache.lucene.search.DocIdSetIterator;
16+
import org.apache.lucene.search.IndexSearcher;
17+
import org.apache.lucene.search.Query;
18+
import org.apache.lucene.search.QueryVisitor;
19+
import org.apache.lucene.search.ScoreMode;
20+
import org.apache.lucene.search.Scorer;
21+
import org.apache.lucene.search.TwoPhaseIterator;
22+
import org.apache.lucene.search.Weight;
23+
import org.opensearch.index.mapper.MappedFieldType;
24+
import org.opensearch.index.mapper.SourceValueFetcher;
25+
import org.opensearch.search.lookup.LeafSearchLookup;
26+
import org.opensearch.search.lookup.SearchLookup;
27+
28+
import java.io.IOException;
29+
import java.util.List;
30+
import java.util.Objects;
31+
32+
/**
33+
* A query that matches against each document from the parent query by filtering using the source field values.
34+
* Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically.
35+
*/
36+
public class SourceFieldMatchQuery extends Query {
37+
final private Query delegateQuery;
38+
final private Query filter;
39+
final private SearchLookup lookup;
40+
final private MappedFieldType fieldType;
41+
final private SourceValueFetcher valueFetcher;
42+
43+
/**
44+
* Constructs a SourceFieldMatchQuery.
45+
*
46+
* @param delegateQuery The parent query to use to find matches.
47+
* @param filter The query used to filter further by running against field value computed using _source field.
48+
* @param fieldType The mapped field type.
49+
* @param valueFetcher The source value fetcher.
50+
* @param lookup The search lookup.
51+
*/
52+
public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType,
53+
SourceValueFetcher valueFetcher, SearchLookup lookup) {
54+
this.delegateQuery = delegateQuery;
55+
this.filter = filter;
56+
this.fieldType = fieldType;
57+
this.valueFetcher = valueFetcher;
58+
this.lookup = lookup;
59+
}
60+
61+
@Override
62+
public void visit(QueryVisitor visitor) {
63+
delegateQuery.visit(visitor);
64+
}
65+
66+
@Override
67+
public Query rewrite(IndexSearcher searcher) throws IOException {
68+
return delegateQuery.rewrite(searcher);
69+
}
70+
71+
@Override
72+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
73+
74+
Weight weight = delegateQuery.createWeight(searcher, scoreMode, boost);
75+
76+
return new ConstantScoreWeight(this, boost) {
77+
78+
@Override
79+
public Scorer scorer(LeafReaderContext context) throws IOException {
80+
81+
Scorer scorer = weight.scorer(context);
82+
DocIdSetIterator approximation = scorer.iterator();
83+
LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context);
84+
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
85+
86+
@Override
87+
public boolean matches() {
88+
leafSearchLookup.setDocument(approximation.docID());
89+
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source());
90+
MemoryIndex memoryIndex = new MemoryIndex();
91+
for (Object value : values) {
92+
memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer());
93+
}
94+
float score = memoryIndex.search(delegateQuery);
95+
return score > 0.0f;
96+
}
97+
98+
@Override
99+
public float matchCost() {
100+
// arbitrary cost
101+
return 1000f;
102+
}
103+
};
104+
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
105+
}
106+
107+
@Override
108+
public boolean isCacheable(LeafReaderContext ctx) {
109+
// It is fine to cache if delegate query weight is cacheable since additional logic here
110+
// is just a filter on top of delegate query matches
111+
return weight.isCacheable(ctx);
112+
}
113+
};
114+
}
115+
116+
@Override
117+
public boolean equals(Object o) {
118+
if (this == o) {
119+
return true;
120+
}
121+
if (sameClassAs(o) == false) {
122+
return false;
123+
}
124+
SourceFieldMatchQuery other = (SourceFieldMatchQuery) o;
125+
return Objects.equals(this.delegateQuery, other.delegateQuery)
126+
&& this.filter == other.filter
127+
&& Objects.equals(this.lookup, other.lookup)
128+
&& Objects.equals(this.fieldType, other.fieldType)
129+
&& Objects.equals(this.valueFetcher, other.valueFetcher);
130+
}
131+
132+
@Override
133+
public int hashCode() {
134+
return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher);
135+
}
136+
137+
@Override
138+
public String toString(String f) {
139+
return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) +
140+
" ], filter query: [ " + filter.toString(f) + "])";
141+
}
142+
}

0 commit comments

Comments
 (0)