diff --git a/CHANGELOG.md b/CHANGELOG.md index 648cd3d0963e6..96bde718bcef1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Null check field names in QueryStringQueryBuilder ([#18194](https://github.com/opensearch-project/OpenSearch/pull/18194)) - Avoid NPE if on SnapshotInfo if 'shallow' boolean not present ([#18187](https://github.com/opensearch-project/OpenSearch/issues/18187)) - Fix 'system call filter not installed' caused when network.host: 0.0.0.0 ([#18309](https://github.com/opensearch-project/OpenSearch/pull/18309)) +- DocValues-only IP field supports `terms_query` with more than 1025 IP masks ([#18357](https://github.com/opensearch-project/OpenSearch/pull/18357) - Fix MatrixStatsAggregator reuse when mode parameter changes ([#18242](https://github.com/opensearch-project/OpenSearch/issues/18242)) - Replace the deprecated construction method of TopScoreDocCollectorManager with the new method ([#18395](https://github.com/opensearch-project/OpenSearch/pull/18395)) - Fixed Approximate Framework regression with Lucene 10.2.1 by updating `intersectRight` BKD walk and `IntRef` visit method ([#18358](https://github.com/opensearch-project/OpenSearch/issues/18358 diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchIpFieldTermsIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchIpFieldTermsIT.java index 301af04f7204f..506842a8be536 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchIpFieldTermsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchIpFieldTermsIT.java @@ -10,7 +10,6 @@ import org.apache.lucene.search.IndexSearcher; import org.opensearch.action.bulk.BulkRequestBuilder; -import org.opensearch.action.search.SearchPhaseExecutionException; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.network.InetAddresses; import org.opensearch.common.xcontent.XContentFactory; @@ -109,19 +108,15 @@ public void testLessThanMaxClauses() throws IOException { assertTermsHitCount(indexName, "addr", toQuery, expectMatches); assertTermsHitCount(indexName, "addr.idx", toQuery, expectMatches); assertTermsHitCount(indexName, "addr.dv", toQuery, expectMatches); - // passing dummy filter crushes on rewriting - SearchPhaseExecutionException ose = assertThrows(SearchPhaseExecutionException.class, () -> { - assertTermsHitCount( - indexName, - "addr.dv", - toQuery, - expectMatches, - (boolBuilder) -> boolBuilder.filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3")) - .filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3", "4")) - .filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3", "4", "5")) - ); - }); - assertTrue("exceeding on query rewrite", ose.shardFailures()[0].getCause() instanceof IndexSearcher.TooManyNestedClauses); + assertTermsHitCount( + indexName, + "addr.dv", + toQuery, + expectMatches, + (boolBuilder) -> boolBuilder.filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3")) + .filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3", "4")) + .filter(QueryBuilders.termsQuery("dummy_filter", "1", "2", "3", "4", "5")) + ); } public void testExceedMaxClauses() throws IOException { @@ -130,12 +125,7 @@ public void testExceedMaxClauses() throws IOException { int expectMatches = createIndex(indexName, IndexSearcher.getMaxClauseCount() + (rarely() ? 0 : atLeast(10)), toQuery); assertTermsHitCount(indexName, "addr", toQuery, expectMatches); assertTermsHitCount(indexName, "addr.idx", toQuery, expectMatches); - // error from mapper/parser - final SearchPhaseExecutionException ose = assertThrows( - SearchPhaseExecutionException.class, - () -> assertTermsHitCount(indexName, "addr.dv", toQuery, expectMatches) - ); - assertTrue("exceeding on query building", ose.shardFailures()[0].getCause().getCause() instanceof IndexSearcher.TooManyClauses); + assertTermsHitCount(indexName, "addr.dv", toQuery, expectMatches); } private static String getFirstThreeOctets(String ipAddress) { diff --git a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java index 59bbc73844642..1a36dc5b1420a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java @@ -39,6 +39,7 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.sandbox.search.DocValuesMultiRangeQuery; import org.apache.lucene.sandbox.search.MultiRangeQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -299,99 +300,117 @@ public Query termQuery(Object value, @Nullable QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); - Tuple, List> ipsMasks = splitIpsAndMasks(values); - List combiner = new ArrayList<>(); - convertIps(ipsMasks.v1(), combiner); - convertMasks(ipsMasks.v2(), context, combiner); - if (combiner.size() == 1) { - return combiner.get(0); + + List concreteIPs = new ArrayList<>(); + List masks = new ArrayList<>(); + parseIps(values, concreteIPs, masks); + + if (!isSearchable()) { + return hasDocValues() ? docValuesTermsQuery(concreteIPs, masks) : new MatchNoDocsQuery("never happened"); } - return new ConstantScoreQuery(union(combiner)); - } - private Query union(List combiner) { - BooleanQuery.Builder bqb = new BooleanQuery.Builder(); - for (Query q : combiner) { - bqb.add(q, BooleanClause.Occur.SHOULD); + if (!hasDocValues()) { + return indexTermsQuery(concreteIPs, masks); } - return bqb.build(); + + // Both searchable and doc values available - create composite query + return new IndexOrDocValuesQuery(indexTermsQuery(concreteIPs, masks), docValuesTermsQuery(concreteIPs, masks)); } - private void convertIps(List inetAddresses, List sink) { - if (!inetAddresses.isEmpty() && (isSearchable() || hasDocValues())) { - Query pointsQuery = null; - if (isSearchable()) { - pointsQuery = inetAddresses.size() == 1 - ? InetAddressPoint.newExactQuery(name(), inetAddresses.iterator().next()) - : InetAddressPoint.newSetQuery(name(), inetAddresses.toArray(new InetAddress[0])); - } - Query dvQuery = null; - if (hasDocValues()) { - List set = new ArrayList<>(inetAddresses.size()); - for (final InetAddress address : inetAddresses) { - set.add(new BytesRef(InetAddressPoint.encode(address))); - } - if (set.size() == 1) { - dvQuery = SortedSetDocValuesField.newSlowExactQuery(name(), set.iterator().next()); - } else { - dvQuery = SortedSetDocValuesField.newSlowSetQuery(name(), set); - } + private void parseIps(List values, List concreteIPs, List masks) { + for (Object value : values) { + if (value instanceof InetAddress) { + concreteIPs.add((InetAddress) value); + continue; } - final Query out; - if (isSearchable() && hasDocValues()) { - out = new IndexOrDocValuesQuery(pointsQuery, dvQuery); + + String strVal = value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value.toString(); + + if (strVal.contains("/")) { + Tuple cidr = InetAddresses.parseCidr(strVal); + masks.add((PointRangeQuery) InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2())); } else { - out = isSearchable() ? pointsQuery : dvQuery; + concreteIPs.add(InetAddresses.forString(strVal)); } - sink.add(out); } } - private void convertMasks(List masks, QueryShardContext context, List sink) { - if (!masks.isEmpty() && (isSearchable() || hasDocValues())) { - MultiIpRangeQueryBuilder multiRange = null; - for (String mask : masks) { - final Tuple cidr = InetAddresses.parseCidr(mask); - PointRangeQuery query = (PointRangeQuery) InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2()); - if (isSearchable()) { // even there is DV we don't go with it, since we can't guess clauses limit - if (multiRange == null) { - multiRange = new MultiIpRangeQueryBuilder(name()); - } - multiRange.add(query.getLowerPoint(), query.getUpperPoint()); - } else { // it may hit clauses limit sooner or later - Query dvRange = SortedSetDocValuesField.newSlowRangeQuery( - name(), - new BytesRef(query.getLowerPoint()), - new BytesRef(query.getUpperPoint()), - true, - true - ); - sink.add(dvRange); - } - } - // never IndexOrDocValuesQuery() since we can't guess clauses limit - if (multiRange != null) { - sink.add(multiRange.build()); - } + private Query indexTermsQuery(List concreteIPs, List masks) { + List queries = new ArrayList<>(); + addConcreteIpQuery(concreteIPs, queries); + addMaskQueries(masks, queries); + + return combineQueries(queries); + } + + private void addConcreteIpQuery(List ips, List queries) { + if (ips.isEmpty()) return; + + queries.add( + ips.size() == 1 + ? InetAddressPoint.newExactQuery(name(), ips.getFirst()) + : InetAddressPoint.newSetQuery(name(), ips.toArray(new InetAddress[0])) + ); + } + + private void addMaskQueries(List masks, List queries) { + if (masks.isEmpty()) return; + + if (masks.size() == 1) { + queries.add(masks.getFirst()); + } else { + MultiIpRangeQueryBuilder multiRange = new MultiIpRangeQueryBuilder(name()); + masks.forEach(q -> multiRange.add(q.getLowerPoint(), q.getUpperPoint())); + queries.add(multiRange.build()); } } - private static Tuple, List> splitIpsAndMasks(List values) { - List concreteIPs = new ArrayList<>(); - List masks = new ArrayList<>(); - for (final Object value : values) { - if (value instanceof InetAddress) { - concreteIPs.add((InetAddress) value); + private Query combineQueries(List queries) { + return switch (queries.size()) { + case 0 -> new MatchNoDocsQuery(); + case 1 -> queries.getFirst(); + default -> new ConstantScoreQuery(union(queries)); + }; + } + + private Query docValuesTermsQuery(List concreteIPs, List masks) { + List ipsBytes = concreteIPs.stream().map(addr -> new BytesRef(InetAddressPoint.encode(addr))).toList(); + + if (ipsBytes.isEmpty() && masks.isEmpty()) { + return new MatchNoDocsQuery(); + } + if (masks.isEmpty()) { + if (ipsBytes.size() == 1) { + return SortedSetDocValuesField.newSlowExactQuery(name(), ipsBytes.getFirst()); } else { - final String strVal = (value instanceof BytesRef) ? ((BytesRef) value).utf8ToString() : value.toString(); - if (strVal.contains("/")) { - masks.add(strVal); - } else { - concreteIPs.add(InetAddresses.forString(strVal)); - } + return SortedSetDocValuesField.newSlowSetQuery(name(), ipsBytes); } + } else { + if (masks.size() == 1 && ipsBytes.isEmpty()) { + return SortedSetDocValuesField.newSlowRangeQuery( + name(), + new BytesRef(masks.getFirst().getLowerPoint()), + new BytesRef(masks.getFirst().getUpperPoint()), + true, + true + ); + } else { + DocValuesMultiRangeQuery.SortedSetStabbingBuilder builder = new DocValuesMultiRangeQuery.SortedSetStabbingBuilder( + name() + ); + masks.forEach(q -> builder.add(new BytesRef(q.getLowerPoint()), new BytesRef(q.getUpperPoint()))); + ipsBytes.forEach(builder::add); + return builder.build(); + } + } + } + + private Query union(List combiner) { + BooleanQuery.Builder bqb = new BooleanQuery.Builder(); + for (Query q : combiner) { + bqb.add(q, BooleanClause.Occur.SHOULD); } - return Tuple.tuple(concreteIPs, masks); + return bqb.build(); } @Override diff --git a/server/src/test/java/org/opensearch/index/mapper/IpFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/IpFieldTypeTests.java index ffb7bc5acea08..d11d32cf65bc1 100644 --- a/server/src/test/java/org/opensearch/index/mapper/IpFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/IpFieldTypeTests.java @@ -33,6 +33,7 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.sandbox.search.DocValuesMultiRangeQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; @@ -224,24 +225,34 @@ public void testDvOnlyTermsQuery() { SortedSetDocValuesField.newSlowSetQuery("field", List.of(ipToByteRef("::2"), ipToByteRef("::5"))), dvOnly.termsQuery(Arrays.asList("::2", "::5"), null) ); - - // if the list includes a prefix query we fallback to a bool query + assertEquals(SortedSetDocValuesField.newSlowExactQuery("field", ipToByteRef("::2")), dvOnly.termsQuery(List.of("::2"), null)); assertEquals( - new ConstantScoreQuery( - new BooleanQuery.Builder().add(dvOnly.termQuery("::42", null), Occur.SHOULD) - .add(dvOnly.termQuery("::2/16", null), Occur.SHOULD) - .build() + SortedSetDocValuesField.newSlowRangeQuery( + "field", + ipToByteRef("::"), + ipToByteRef("::ffff:ffff:ffff:ffff:ffff:ffff:ffff"), + true, + true ), - dvOnly.termsQuery(Arrays.asList("::42", "::2/16"), null) + dvOnly.termsQuery(List.of("::2/16"), null) ); + // multirange handles both + DocValuesMultiRangeQuery.SortedSetStabbingBuilder expect = new DocValuesMultiRangeQuery.SortedSetStabbingBuilder("field"); + expect.add(ipToByteRef("::42")); + expect.add(ipToByteRef("::"), ipToByteRef("::ffff:ffff:ffff:ffff:ffff:ffff:ffff")); + assertEquals(expect.build(), dvOnly.termsQuery(Arrays.asList("::42", "::2/16"), null)); } public void testDvVsPoint() { MappedFieldType indexOnly = new IpFieldMapper.IpFieldType("field", true, false, false, null, Collections.emptyMap()); MappedFieldType dvOnly = new IpFieldMapper.IpFieldType("field", false, false, true, null, Collections.emptyMap()); MappedFieldType indexDv = new IpFieldMapper.IpFieldType("field", true, false, true, null, Collections.emptyMap()); - assertEquals("ignore DV", indexOnly.termsQuery(List.of("::2/16"), null), indexDv.termsQuery(List.of("::2/16"), null)); + assertNotEquals("obey DocValues", indexOnly.termsQuery(List.of("::2/16"), null), indexDv.termsQuery(List.of("::2/16"), null)); assertEquals(dvOnly.termQuery("::2/16", null), dvOnly.termsQuery(List.of("::2/16"), null)); + assertEquals( + new IndexOrDocValuesQuery(indexOnly.termsQuery(List.of("::2/16"), null), dvOnly.termsQuery(List.of("::2/16"), null)), + indexDv.termsQuery(List.of("::2/16"), null) + ); } public void testRangeQuery() {