Skip to content

Commit 6a4e638

Browse files
committed
Add Roaring64NavigableMap support for bitmap filtering on long fields
Extend bitmap-based term filtering to long fields using 64-bit roaring bitmaps with portable serialization. This mirrors the existing 32-bit RoaringBitmap support for integer fields. Key changes: - Add Bitmap64IndexQuery (BKD tree merge-join for long point values) - Add Bitmap64DocValuesQuery (doc values filtering for long fields) - Add LONG.bitmapQuery() override in NumberFieldMapper - Graceful cross-format fallback: long fields accept 32-bit bitmaps (up-convert) and integer fields accept 64-bit bitmaps (down-convert with range validation) - Unit tests, integration tests, and REST API YAML tests Signed-off-by: Paul De Audney <pdeaudney@gmail.com>
1 parent 42abf94 commit 6a4e638

9 files changed

Lines changed: 1272 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55

66
## [Unreleased 3.x]
77
### Added
8+
- Add Roaring64NavigableMap support for bitmap filtering on long fields ([#XXXXX](https://github.com/opensearch-project/OpenSearch/pull/XXXXX))
89
- Add getWrappedScorer method to ProfileScorer for plugin access to wrapped scorers ([#20548](https://github.com/opensearch-project/OpenSearch/issues/20548))
910
- Support expected cluster name with validation in CCS Sniff mode ([#20532](https://github.com/opensearch-project/OpenSearch/pull/20532))
1011
- Add security policy to allow `accessUnixDomainSocket` in `transport-grpc` module ([#20463](https://github.com/opensearch-project/OpenSearch/pull/20463))

rest-api-spec/src/main/resources/rest-api-spec/test/search/380_bitmap_filtering.yml

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,114 @@ setup:
182182
- match: { hits.hits.1._source.student_id: 111 }
183183
- match: { hits.hits.2._source.name: John Doe }
184184
- match: { hits.hits.2._source.student_id: 333 }
185+
186+
---
187+
"Terms query accepting bitmap for long field":
188+
- skip:
189+
version: " - 3.5.99"
190+
reason: The bitmap filtering for long fields is available in 3.6 and later.
191+
- do:
192+
indices.create:
193+
index: students_long
194+
body:
195+
settings:
196+
number_of_shards: 1
197+
number_of_replicas: 0
198+
mappings:
199+
properties:
200+
student_id:
201+
type: long
202+
- do:
203+
bulk:
204+
refresh: true
205+
body:
206+
- { "index": { "_index": "students_long", "_id": "1" } }
207+
- { "name": "Jane Doe", "student_id": 111 }
208+
- { "index": { "_index": "students_long", "_id": "2" } }
209+
- { "name": "Mary Major", "student_id": 222 }
210+
- { "index": { "_index": "students_long", "_id": "3" } }
211+
- { "name": "John Doe", "student_id": 333 }
212+
- do:
213+
search:
214+
rest_total_hits_as_int: true
215+
index: students_long
216+
body: {
217+
"query": {
218+
"terms": {
219+
"student_id": ["AQAAAAAAAAAAAAAAOjAAAAEAAAAAAAEAEAAAAG8A3gA="],
220+
"value_type": "bitmap"
221+
}
222+
}
223+
}
224+
- match: { hits.total: 2 }
225+
- match: { hits.hits.0._source.name: Jane Doe }
226+
- match: { hits.hits.0._source.student_id: 111 }
227+
- match: { hits.hits.1._source.name: Mary Major }
228+
- match: { hits.hits.1._source.student_id: 222 }
229+
230+
---
231+
"Terms lookup on a binary field with bitmap for long field":
232+
- skip:
233+
version: " - 3.5.99"
234+
reason: The bitmap filtering for long fields is available in 3.6 and later.
235+
- do:
236+
indices.create:
237+
index: students_long_lookup
238+
body:
239+
settings:
240+
number_of_shards: 1
241+
number_of_replicas: 0
242+
mappings:
243+
properties:
244+
student_id:
245+
type: long
246+
- do:
247+
bulk:
248+
refresh: true
249+
body:
250+
- { "index": { "_index": "students_long_lookup", "_id": "1" } }
251+
- { "name": "Jane Doe", "student_id": 111 }
252+
- { "index": { "_index": "students_long_lookup", "_id": "2" } }
253+
- { "name": "Mary Major", "student_id": 222 }
254+
- { "index": { "_index": "students_long_lookup", "_id": "3" } }
255+
- { "name": "John Doe", "student_id": 333 }
256+
- do:
257+
indices.create:
258+
index: classes_long
259+
body:
260+
settings:
261+
number_of_shards: 1
262+
number_of_replicas: 0
263+
mappings:
264+
properties:
265+
enrolled:
266+
type: binary
267+
store: true
268+
- do:
269+
bulk:
270+
refresh: true
271+
body:
272+
- { "index": { "_index": "classes_long", "_id": "101" } }
273+
- { "enrolled": "AQAAAAAAAAAAAAAAOjAAAAEAAAAAAAEAEAAAAG8A3gA=" }
274+
- do:
275+
search:
276+
rest_total_hits_as_int: true
277+
index: students_long_lookup
278+
body: {
279+
"query": {
280+
"terms": {
281+
"student_id": {
282+
"index": "classes_long",
283+
"id": "101",
284+
"path": "enrolled",
285+
"store": true
286+
},
287+
"value_type": "bitmap"
288+
}
289+
}
290+
}
291+
- match: { hits.total: 2 }
292+
- match: { hits.hits.0._source.name: Jane Doe }
293+
- match: { hits.hits.0._source.student_id: 111 }
294+
- match: { hits.hits.1._source.name: Mary Major }
295+
- match: { hits.hits.1._source.student_id: 222 }

server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;
8585
import org.opensearch.test.junit.annotations.TestIssueLogging;
8686

87+
import java.io.ByteArrayOutputStream;
88+
import java.io.DataOutputStream;
8789
import java.io.IOException;
8890
import java.io.Reader;
8991
import java.nio.ByteBuffer;
@@ -102,6 +104,7 @@
102104
import java.util.regex.Pattern;
103105

104106
import org.roaringbitmap.RoaringBitmap;
107+
import org.roaringbitmap.longlong.Roaring64NavigableMap;
105108

106109
import static java.util.Collections.singletonMap;
107110
import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
@@ -1197,6 +1200,74 @@ public void testTermsQueryWithBitmapDocValuesQuery() throws Exception {
11971200
assertSearchHits(searchResponse, "1", "3", "4");
11981201
}
11991202

1203+
public void testTermsQueryWithBitmapLongField() throws Exception {
1204+
assertAcked(
1205+
prepareCreate("products_long").setMapping(
1206+
jsonBuilder().startObject()
1207+
.startObject("properties")
1208+
.startObject("product")
1209+
.field("type", "long")
1210+
.endObject()
1211+
.endObject()
1212+
.endObject()
1213+
)
1214+
);
1215+
indexRandom(
1216+
true,
1217+
client().prepareIndex("products_long").setId("1").setSource("product", 1L),
1218+
client().prepareIndex("products_long").setId("2").setSource("product", 2L),
1219+
client().prepareIndex("products_long").setId("3").setSource("product", new long[] { 1L, 3L }),
1220+
client().prepareIndex("products_long").setId("4").setSource("product", 4L)
1221+
);
1222+
1223+
Roaring64NavigableMap r = new Roaring64NavigableMap(true);
1224+
r.addLong(1L);
1225+
r.addLong(4L);
1226+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
1227+
r.serializePortable(new DataOutputStream(baos));
1228+
BytesArray bitmap = new BytesArray(baos.toByteArray());
1229+
SearchResponse searchResponse = client().prepareSearch("products_long")
1230+
.setQuery(constantScoreQuery(termsQuery("product", bitmap).valueType(TermsQueryBuilder.ValueType.BITMAP)))
1231+
.get();
1232+
assertHitCount(searchResponse, 3L);
1233+
assertSearchHits(searchResponse, "1", "3", "4");
1234+
}
1235+
1236+
public void testTermsQueryWithBitmapLongFieldLargeValues() throws Exception {
1237+
assertAcked(
1238+
prepareCreate("products_long_large").setMapping(
1239+
jsonBuilder().startObject()
1240+
.startObject("properties")
1241+
.startObject("product")
1242+
.field("type", "long")
1243+
.endObject()
1244+
.endObject()
1245+
.endObject()
1246+
)
1247+
);
1248+
long largeVal1 = Integer.MAX_VALUE + 100L;
1249+
long largeVal2 = Integer.MAX_VALUE + 200L;
1250+
long largeVal3 = Integer.MAX_VALUE + 300L;
1251+
indexRandom(
1252+
true,
1253+
client().prepareIndex("products_long_large").setId("1").setSource("product", largeVal1),
1254+
client().prepareIndex("products_long_large").setId("2").setSource("product", largeVal2),
1255+
client().prepareIndex("products_long_large").setId("3").setSource("product", largeVal3)
1256+
);
1257+
1258+
Roaring64NavigableMap r = new Roaring64NavigableMap(true);
1259+
r.addLong(largeVal1);
1260+
r.addLong(largeVal3);
1261+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
1262+
r.serializePortable(new DataOutputStream(baos));
1263+
BytesArray bitmap = new BytesArray(baos.toByteArray());
1264+
SearchResponse searchResponse = client().prepareSearch("products_long_large")
1265+
.setQuery(constantScoreQuery(termsQuery("product", bitmap).valueType(TermsQueryBuilder.ValueType.BITMAP)))
1266+
.get();
1267+
assertHitCount(searchResponse, 2L);
1268+
assertSearchHits(searchResponse, "1", "3");
1269+
}
1270+
12001271
public void testTermsLookupFilter() throws Exception {
12011272
assertAcked(prepareCreate("lookup").setMapping("terms", "type=text", "other", "type=text"));
12021273
indexRandomForConcurrentSearch("lookup");

server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,13 @@
7474
import org.opensearch.search.approximate.ApproximatePointRangeQuery;
7575
import org.opensearch.search.approximate.ApproximateScoreQuery;
7676
import org.opensearch.search.lookup.SearchLookup;
77+
import org.opensearch.search.query.Bitmap64DocValuesQuery;
78+
import org.opensearch.search.query.Bitmap64IndexQuery;
7779
import org.opensearch.search.query.BitmapDocValuesQuery;
7880
import org.opensearch.search.query.BitmapIndexQuery;
7981

82+
import java.io.ByteArrayInputStream;
83+
import java.io.DataInputStream;
8084
import java.io.IOException;
8185
import java.math.BigInteger;
8286
import java.nio.ByteBuffer;
@@ -93,6 +97,8 @@
9397
import java.util.function.Supplier;
9498

9599
import org.roaringbitmap.RoaringBitmap;
100+
import org.roaringbitmap.longlong.LongIterator;
101+
import org.roaringbitmap.longlong.Roaring64NavigableMap;
96102

97103
/**
98104
* A {@link FieldMapper} for numeric types: byte, short, int, long, float, double and unsigned long.
@@ -1162,7 +1168,27 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha
11621168
try {
11631169
bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array()));
11641170
} catch (Exception e) {
1165-
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
1171+
// Fallback: try 64-bit Roaring64NavigableMap and down-convert.
1172+
// The two formats have distinct cookies so deserialization failure is reliable.
1173+
// All values must fit in [Integer.MIN_VALUE, Integer.MAX_VALUE] or an error is thrown.
1174+
try {
1175+
Roaring64NavigableMap bitmap64 = new Roaring64NavigableMap(true);
1176+
bitmap64.deserializePortable(new DataInputStream(new ByteArrayInputStream(bitmapArray.array())));
1177+
LongIterator iter = bitmap64.getLongIterator();
1178+
while (iter.hasNext()) {
1179+
long value = iter.next();
1180+
if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
1181+
throw new IllegalArgumentException(
1182+
"Bitmap contains value " + value + " which is out of range for integer field"
1183+
);
1184+
}
1185+
bitmap.add((int) value);
1186+
}
1187+
} catch (IllegalArgumentException iae) {
1188+
throw iae;
1189+
} catch (Exception e2) {
1190+
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
1191+
}
11661192
}
11671193

11681194
if (isSearchable && hasDocValues) {
@@ -1437,11 +1463,59 @@ public List<Field> createFields(
14371463
return fields;
14381464
}
14391465

1466+
/**
1467+
* Bitmap query support for long fields using Roaring64NavigableMap with portable serialization.
1468+
* <p>
1469+
* Signed mode (signedLongs=true) is required so the bitmap iterator produces values in the
1470+
* same order as Lucene's LongPoint BKD tree encoding. The default Roaring64NavigableMap
1471+
* constructor uses unsigned mode, which would break the merge-join for negative values.
1472+
* <p>
1473+
* Clients should serialize bitmaps using {@code Roaring64NavigableMap.serializePortable()}.
1474+
* If a 32-bit RoaringBitmap blob is received (detected via cookie-based format validation),
1475+
* values are up-converted from int to long, which is always safe.
1476+
* <p>
1477+
* Cross-language compatibility: other implementations (C/CRoaring, Go, Python) typically
1478+
* use unsigned 64-bit semantics. For values in the range 0 to 2^63-1, the bit patterns
1479+
* are identical and fully interoperable. Negative Java longs correspond to unsigned values
1480+
* greater than or equal to 2^63 in other implementations.
1481+
* <p>
1482+
* Not applicable to unsigned_long fields, which use BigIntegerPoint (16-byte encoding)
1483+
* and are incompatible with the 8-byte Roaring64NavigableMap representation.
1484+
*/
1485+
@Override
1486+
public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable, boolean hasDocValues) {
1487+
// signedLongs=true is critical: ensures iterator order matches LongPoint BKD tree sort order
1488+
Roaring64NavigableMap bitmap = new Roaring64NavigableMap(true);
1489+
try {
1490+
bitmap.deserializePortable(new DataInputStream(new ByteArrayInputStream(bitmapArray.array())));
1491+
} catch (Exception e) {
1492+
// Fallback: try 32-bit RoaringBitmap and up-convert (int -> long is always safe).
1493+
// The two formats have distinct cookies so deserialization failure is reliable.
1494+
try {
1495+
RoaringBitmap bitmap32 = new RoaringBitmap();
1496+
bitmap32.deserialize(ByteBuffer.wrap(bitmapArray.array()));
1497+
bitmap32.forEach((int value) -> bitmap.addLong(value));
1498+
} catch (Exception e2) {
1499+
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
1500+
}
1501+
}
1502+
1503+
if (isSearchable && hasDocValues) {
1504+
return new IndexOrDocValuesQuery(new Bitmap64IndexQuery(field, bitmap), new Bitmap64DocValuesQuery(field, bitmap));
1505+
}
1506+
if (isSearchable) {
1507+
return new Bitmap64IndexQuery(field, bitmap);
1508+
}
1509+
return new Bitmap64DocValuesQuery(field, bitmap);
1510+
}
1511+
14401512
@Override
14411513
Number valueForSearch(String value) {
14421514
return Long.parseLong(value);
14431515
}
14441516
},
1517+
// Note: UNSIGNED_LONG does not support bitmap queries. It uses BigIntegerPoint (16-byte
1518+
// encoding) which is incompatible with Roaring64NavigableMap's 8-byte long representation.
14451519
UNSIGNED_LONG("unsigned_long", NumericType.UNSIGNED_LONG) {
14461520
@Override
14471521
public BigInteger parse(Object value, boolean coerce) {

0 commit comments

Comments
 (0)