From 9c17ad3d0e5a39987a6c2a6282ec4370e6485373 Mon Sep 17 00:00:00 2001 From: Elliot V Pourmand Date: Mon, 28 Aug 2023 00:12:34 -0700 Subject: [PATCH] Avoid negative scores in multi_match Signed-off-by: Elliot V Pourmand --- .../lucene/queries/BlendedTermQuery.java | 4 ++- .../lucene/queries/BlendedTermQueryTests.java | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java index 345be330f048c..f366cce19e222 100644 --- a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java +++ b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java @@ -176,7 +176,9 @@ protected int compare(int i, int j) { if (prev > current) { actualDf++; } - contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf)); + int docCount = reader.getDocCount(terms[i].field()); + int newDocFreq = Math.min(actualDf, docCount); + contexts[i] = ctx = adjustDF(reader.getContext(), ctx, newDocFreq); prev = current; sumTTF += ctx.totalTermFreq(); } diff --git a/server/src/test/java/org/opensearch/lucene/queries/BlendedTermQueryTests.java b/server/src/test/java/org/opensearch/lucene/queries/BlendedTermQueryTests.java index ca420bfbf8fbb..a2bb3fa3b0dd5 100644 --- a/server/src/test/java/org/opensearch/lucene/queries/BlendedTermQueryTests.java +++ b/server/src/test/java/org/opensearch/lucene/queries/BlendedTermQueryTests.java @@ -276,6 +276,39 @@ public void testMinTTF() throws IOException { dir.close(); } + public void testMissingFields() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.freeze(); + + for (int i = 0; i < 10; i++) { + Document d = new Document(); + d.add(new TextField("id", Integer.toString(i), Field.Store.YES)); + d.add(new Field("dense", "foo", ft)); + // Add a sparse field with high totalTermFreq but low docCount + if (i % 5 == 0) { + d.add(new Field("sparse", "foo", ft)); + d.add(new Field("sparse", "one two three four five size", ft)); + } + w.addDocument(d); + } + w.commit(); + + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = setSimilarity(newSearcher(reader)); + + String[] fields = new String[] { "dense", "sparse" }; + Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f); + TopDocs search = searcher.search(query, 10); + ScoreDoc[] scoreDocs = search.scoreDocs; + assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue()); + + reader.close(); + w.close(); + dir.close(); + } + public void testEqualsAndHash() { String[] fields = new String[1 + random().nextInt(10)]; for (int i = 0; i < fields.length; i++) {