From 2d2bf973f9dce571243ed312a7b69316a43b3f6d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 4 Oct 2025 00:24:39 +0000 Subject: [PATCH] Fix derived query rewrite (#19496) Signed-off-by: Harsha Vamsi Kalluri (cherry picked from commit 0c3a3130d9e0f249260a395b5e133f5c32467c20) Signed-off-by: github-actions[bot] --- CHANGELOG.md | 1 + .../index/query/DerivedFieldQuery.java | 59 ++++-- .../mapper/DerivedFieldMapperQueryTests.java | 2 +- .../index/query/DerivedFieldQueryTests.java | 199 ++++++++++++++++++ 4 files changed, 244 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47a135e8046fd..deb7f77a6892d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Setting number of sharedArenaMaxPermits to 1 ([#19503](https://github.com/opensearch-project/OpenSearch/pull/19503)) - Handle negative search request nodes stats ([#19340](https://github.com/opensearch-project/OpenSearch/pull/19340)) - Remove unnecessary iteration per-shard in request cache cleanup ([#19263](https://github.com/opensearch-project/OpenSearch/pull/19263)) +- Fix derived field rewrite to handle range queries ([#19496](https://github.com/opensearch-project/OpenSearch/pull/19496)) ### Dependencies - Bump `com.gradleup.shadow:shadow-gradle-plugin` from 8.3.5 to 8.3.9 ([#19400](https://github.com/opensearch-project/OpenSearch/pull/19400)) diff --git a/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java index e53e0d52d062f..28ae4bc50c1af 100644 --- a/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java +++ b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java @@ -15,7 +15,9 @@ import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -24,6 +26,7 @@ import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.Weight; import org.opensearch.index.mapper.DerivedFieldValueFetcher; +import org.opensearch.search.approximate.ApproximateScoreQuery; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; @@ -72,22 +75,46 @@ public void visit(QueryVisitor visitor) { query.visit(visitor); } - // @Override - // public Query rewrite(IndexSearcher indexSearcher) throws IOException { - // Query rewritten = query.rewrite(indexSearcher); - // if (rewritten == query) { - // return this; - // } - // ; - // return new DerivedFieldQuery( - // rewritten, - // valueFetcherSupplier, - // searchLookup, - // indexAnalyzer, - // indexableFieldGenerator, - // ignoreMalformed - // ); - // } + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (!needsRewrite()) { + return this; + } + Query rewritten = query.rewrite(indexSearcher); + if (rewritten == query) { + return this; + } + ; + return new DerivedFieldQuery( + rewritten, + valueFetcherSupplier, + searchLookup, + indexAnalyzer, + indexableFieldGenerator, + ignoreMalformed + ); + } + + private boolean needsRewrite() { + if (query instanceof PointRangeQuery) { + return false; + } + + if (query instanceof ApproximateScoreQuery) { + Query originalQuery = ((ApproximateScoreQuery) query).getOriginalQuery(); + if (originalQuery instanceof IndexOrDocValuesQuery) { + Query indexQuery = ((IndexOrDocValuesQuery) originalQuery).getIndexQuery(); + return !(indexQuery instanceof PointRangeQuery); + } + } + + if (query instanceof IndexOrDocValuesQuery) { + Query indexQuery = ((IndexOrDocValuesQuery) query).getIndexQuery(); + return !(indexQuery instanceof PointRangeQuery); + } + + return true; + } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java index bc471571a8c2c..34652b8703034 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java @@ -551,7 +551,7 @@ public void execute() { query = new MatchPhrasePrefixQueryBuilder("object_field.text_field", "document number").toQuery(queryShardContext); topDocs = searcher.search(query, 10); - assertEquals(10, topDocs.totalHits.value()); + assertEquals(0, topDocs.totalHits.value()); // Multi Phrase Query query = QueryBuilders.multiMatchQuery("GET", "object_field.nested_field.sub_field_1", "object_field.keyword_field") diff --git a/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java index e46b60dc460aa..26f2f9e19c6b7 100644 --- a/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java @@ -10,6 +10,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; @@ -18,7 +19,10 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; @@ -181,4 +185,199 @@ public void execute() { } } } + + public void testNeedsRewriteWithPointRangeQuery() throws IOException { + // Create lucene documents + List docs = new ArrayList<>(); + for (String[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_request", request[0], Field.Store.YES)); + document.add(new KeywordField("status", request[1], Field.Store.YES)); + docs.add(document); + } + + // Mock SearchLookup + SearchLookup searchLookup = mock(SearchLookup.class); + SourceLookup sourceLookup = new SourceLookup(); + LeafSearchLookup leafLookup = mock(LeafSearchLookup.class); + when(leafLookup.source()).thenReturn(sourceLookup); + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> { + when(searchLookup.getLeafSearchLookup(ctx)).thenReturn(leafLookup); + return new DerivedFieldScript(params, lookup, ctx) { + @Override + public void execute() { + addEmittedValue(raw_requests[sourceLookup.docId()][2]); + } + }; + }; + + // Create ValueFetcher from mocked DerivedFieldScript.Factory + DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); + Function indexableFieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( + "keyword", + "ip_from_raw_request" + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory, null); + + // Create a real PointRangeQuery using IntPoint + Query pointRangeQuery = IntPoint.newRangeQuery("test_field", 0, 100); + + // Create DerivedFieldQuery with PointRangeQuery + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + pointRangeQuery, + () -> valueFetcher, + searchLookup, + Lucene.STANDARD_ANALYZER, + indexableFieldFunction, + true + ); + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + + // Rewrite should return the same query without attempting rewrite + Query rewritten = derivedFieldQuery.rewrite(searcher); + assertSame(derivedFieldQuery, rewritten); + } + } + } + + public void testNeedsRewriteWithIndexOrDocValuesQueryAndPointRange() throws IOException { + // Create lucene documents + List docs = new ArrayList<>(); + for (String[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_request", request[0], Field.Store.YES)); + document.add(new KeywordField("status", request[1], Field.Store.YES)); + docs.add(document); + } + + // Mock SearchLookup + SearchLookup searchLookup = mock(SearchLookup.class); + SourceLookup sourceLookup = new SourceLookup(); + LeafSearchLookup leafLookup = mock(LeafSearchLookup.class); + when(leafLookup.source()).thenReturn(sourceLookup); + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> { + when(searchLookup.getLeafSearchLookup(ctx)).thenReturn(leafLookup); + return new DerivedFieldScript(params, lookup, ctx) { + @Override + public void execute() { + addEmittedValue(raw_requests[sourceLookup.docId()][2]); + } + }; + }; + + // Create ValueFetcher from mocked DerivedFieldScript.Factory + DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); + Function indexableFieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( + "keyword", + "ip_from_raw_request" + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory, null); + + // Create real queries: PointRangeQuery -> IndexOrDocValuesQuery + Query pointRangeQuery = IntPoint.newRangeQuery("test_field", 0, 100); + Query dvQuery = new MatchNoDocsQuery(); + IndexOrDocValuesQuery indexOrDocValuesQuery = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery); + + // Create DerivedFieldQuery with IndexOrDocValuesQuery with PointRangeQuery + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + indexOrDocValuesQuery, + () -> valueFetcher, + searchLookup, + Lucene.STANDARD_ANALYZER, + indexableFieldFunction, + true + ); + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + + // Rewrite should return the same query without attempting rewrite + Query rewritten = derivedFieldQuery.rewrite(searcher); + assertSame(derivedFieldQuery, rewritten); + } + } + } + + public void testNeedsRewriteWithRegularQuery() throws IOException { + // Create lucene documents + List docs = new ArrayList<>(); + for (String[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_request", request[0], Field.Store.YES)); + document.add(new KeywordField("status", request[1], Field.Store.YES)); + docs.add(document); + } + + // Mock SearchLookup + SearchLookup searchLookup = mock(SearchLookup.class); + SourceLookup sourceLookup = new SourceLookup(); + LeafSearchLookup leafLookup = mock(LeafSearchLookup.class); + when(leafLookup.source()).thenReturn(sourceLookup); + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> { + when(searchLookup.getLeafSearchLookup(ctx)).thenReturn(leafLookup); + return new DerivedFieldScript(params, lookup, ctx) { + @Override + public void execute() { + addEmittedValue(raw_requests[sourceLookup.docId()][2]); + } + }; + }; + + // Create ValueFetcher from mocked DerivedFieldScript.Factory + DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); + Function indexableFieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( + "keyword", + "ip_from_raw_request" + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory, null); + + // Create DerivedFieldQuery with regular TermQuery + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + new TermQuery(new Term("ip_from_raw_request", "247.37.0.0")), + () -> valueFetcher, + searchLookup, + Lucene.STANDARD_ANALYZER, + indexableFieldFunction, + true + ); + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + + // Rewrite should perform rewrite on the inner query (TermQuery doesn't change in this case) + Query rewritten = derivedFieldQuery.rewrite(searcher); + // Since TermQuery doesn't rewrite to something different, we still get the same DerivedFieldQuery back + assertSame(derivedFieldQuery, rewritten); + } + } + } }