Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Use env variable (OPENSEARCH_FIPS_MODE) to enable opensearch to run in FIPS enforced mode instead of checking for existence of bcFIPS jars ([#20625](https://github.com/opensearch-project/OpenSearch/pull/20625))
- Update streaming flag to use search request context ([#20530](https://github.com/opensearch-project/OpenSearch/pull/20530))
- Move pull-based ingestion classes from experimental to publicAPI ([#20704](https://github.com/opensearch-project/OpenSearch/pull/20704))
- Lazy init stored field reader in SourceLookup ([#20827](https://github.com/opensearch-project/OpenSearch/pull/20827))

### Fixed
- Relax index template pattern overlap check to use minimum-string heuristic, allowing distinguishable multi-wildcard patterns at the same priority ([#20702](https://github.com/opensearch-project/OpenSearch/pull/20702))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import org.opensearch.search.fetch.subphase.FetchSourceContext;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
Expand All @@ -65,7 +64,7 @@
public class SourceLookup implements Map {

private LeafReader reader;
CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;
private CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;

private int docId = -1;

Expand Down Expand Up @@ -100,6 +99,19 @@ public Map<String, Object> loadSourceIfNeeded() {
return source;
}
try {
// Lazily initialize fieldReader on first _source access.
// All the docs to fetch are adjacent but Lucene stored fields are optimized
// for random access and don't optimize for sequential access - except for merging.
// So we do a little hack here and pretend we're going to do merges in order to
// get better sequential access.
if (fieldReader == null) {
if (reader instanceof SequentialStoredFieldsLeafReader lf) {
fieldReader = lf.getSequentialStoredFieldsReader()::document;
} else {
fieldReader = reader.storedFields()::document;
}
}

FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
fieldReader.accept(docId, sourceFieldVisitor);
BytesReference source = sourceFieldVisitor.source();
Expand Down Expand Up @@ -132,20 +144,9 @@ public void setSegmentAndDocument(LeafReaderContext context, int docId) {
}
if (this.reader != context.reader()) {
this.reader = context.reader();
// only reset reader and fieldReader when reader changes
try {
if (context.reader() instanceof SequentialStoredFieldsLeafReader lf) {
// All the docs to fetch are adjacent but Lucene stored fields are optimized
// for random access and don't optimize for sequential access - except for merging.
// So we do a little hack here and pretend we're going to do merges in order to
// get better sequential access.
fieldReader = lf.getSequentialStoredFieldsReader()::document;
} else {
fieldReader = context.reader().storedFields()::document;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
// Lazily initialize fieldReader in loadSourceIfNeeded() to avoid
// unnecessary work when _source is never accessed.
this.fieldReader = null;
}
this.source = null;
this.sourceAsBytes = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.lookup;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.store.Directory;
import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
import org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.opensearch.core.index.Index;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.test.OpenSearchTestCase;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;

public class SourceLookupTests extends OpenSearchTestCase {

public void testLazyFieldReaderWithRegularReader() throws IOException {
try (Directory dir = newDirectory()) {
indexSourceDoc(dir, "{\"field\":\"value\"}");
try (DirectoryReader reader = DirectoryReader.open(dir)) {
LeafReaderContext ctx = reader.leaves().get(0);
assertFalse(ctx.reader() instanceof SequentialStoredFieldsLeafReader);

SourceLookup lookup = new SourceLookup();
lookup.setSegmentAndDocument(ctx, 0);

Map<String, Object> source = lookup.loadSourceIfNeeded();
assertEquals("value", source.get("field"));

// Same reader+doc — source should remain cached
lookup.setSegmentAndDocument(ctx, 0);
assertSame(source, lookup.loadSourceIfNeeded());
}
}
}

public void testLazyFieldReaderWithSequentialReader() throws IOException {
try (Directory dir = newDirectory()) {
indexSourceDoc(dir, "{\"field\":\"value2\"}");
try (DirectoryReader rawReader = DirectoryReader.open(dir)) {
DirectoryReader wrappedReader = OpenSearchDirectoryReader.wrap(rawReader, new ShardId(new Index("test", "_na_"), 0));
LeafReaderContext ctx = wrappedReader.leaves().get(0);
assertTrue(ctx.reader() instanceof SequentialStoredFieldsLeafReader);

SourceLookup lookup = new SourceLookup();
lookup.setSegmentAndDocument(ctx, 0);

Map<String, Object> source = lookup.loadSourceIfNeeded();
assertEquals("value2", source.get("field"));
}
}
}

public void testSetSegmentAndDocumentWithNewReaderDefersFieldReader() throws IOException {
try (Directory dir1 = newDirectory(); Directory dir2 = newDirectory()) {
indexSourceDoc(dir1, "{\"a\":\"1\"}");
indexSourceDoc(dir2, "{\"b\":\"2\"}");
try (DirectoryReader reader1 = DirectoryReader.open(dir1); DirectoryReader reader2 = DirectoryReader.open(dir2)) {
LeafReaderContext ctx1 = reader1.leaves().get(0);
LeafReaderContext ctx2 = reader2.leaves().get(0);

SourceLookup lookup = new SourceLookup();
lookup.setSegmentAndDocument(ctx1, 0);
assertEquals("1", lookup.loadSourceIfNeeded().get("a"));

// Switch to a different reader — should reset fieldReader and source
lookup.setSegmentAndDocument(ctx2, 0);
assertEquals("2", lookup.loadSourceIfNeeded().get("b"));
}
}
}

public void testLoadSourceWithNoSourceReturnsEmptyMap() throws IOException {
try (Directory dir = newDirectory()) {
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
Document doc = new Document();
doc.add(new StoredField("some_other_field", "data"));
writer.addDocument(doc);
writer.commit();
}
try (DirectoryReader reader = DirectoryReader.open(dir)) {
LeafReaderContext ctx = reader.leaves().get(0);

SourceLookup lookup = new SourceLookup();
lookup.setSegmentAndDocument(ctx, 0);

Map<String, Object> source = lookup.loadSourceIfNeeded();
assertTrue(source.isEmpty());
}
}
}

private static void indexSourceDoc(Directory dir, String jsonSource) throws IOException {
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
Document doc = new Document();
doc.add(new StoredField("_source", jsonSource.getBytes(StandardCharsets.UTF_8)));
writer.addDocument(doc);
writer.commit();
}
}
}
Loading