opensearch-project · rishabhmaurya · Mar 23, 2026 · Mar 10, 2026 · Mar 19, 2026 · Mar 23, 2026
@@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Use env variable (OPENSEARCH_FIPS_MODE) to enable opensearch to run in FIPS enforced mode instead of checking for existence of bcFIPS jars ([#20625](https://github.com/opensearch-project/OpenSearch/pull/20625))
 - Update streaming flag to use search request context ([#20530](https://github.com/opensearch-project/OpenSearch/pull/20530))
 - Move pull-based ingestion classes from experimental to publicAPI ([#20704](https://github.com/opensearch-project/OpenSearch/pull/20704))
+- Lazy init stored field reader in SourceLookup ([#20827](https://github.com/opensearch-project/OpenSearch/pull/20827))
 
 ### Fixed
 - Relax index template pattern overlap check to use minimum-string heuristic, allowing distinguishable multi-wildcard patterns at the same priority ([#20702](https://github.com/opensearch-project/OpenSearch/pull/20702))

@@ -48,7 +48,6 @@
 import org.opensearch.search.fetch.subphase.FetchSourceContext;
 
 import java.io.IOException;
-import java.io.UncheckedIOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -65,7 +64,7 @@
 public class SourceLookup implements Map {
 
     private LeafReader reader;
-    CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;
+    private CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;
 
     private int docId = -1;
 
@@ -100,6 +99,19 @@ public Map<String, Object> loadSourceIfNeeded() {
             return source;
         }
         try {
+            // Lazily initialize fieldReader on first _source access.
+            // All the docs to fetch are adjacent but Lucene stored fields are optimized
+            // for random access and don't optimize for sequential access - except for merging.
+            // So we do a little hack here and pretend we're going to do merges in order to
+            // get better sequential access.
+            if (fieldReader == null) {
+                if (reader instanceof SequentialStoredFieldsLeafReader lf) {
+                    fieldReader = lf.getSequentialStoredFieldsReader()::document;
+                } else {
+                    fieldReader = reader.storedFields()::document;
+                }
+            }
+
             FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
             fieldReader.accept(docId, sourceFieldVisitor);
             BytesReference source = sourceFieldVisitor.source();
@@ -132,20 +144,9 @@ public void setSegmentAndDocument(LeafReaderContext context, int docId) {
         }
         if (this.reader != context.reader()) {
             this.reader = context.reader();
-            // only reset reader and fieldReader when reader changes
-            try {
-                if (context.reader() instanceof SequentialStoredFieldsLeafReader lf) {
-                    // All the docs to fetch are adjacent but Lucene stored fields are optimized
-                    // for random access and don't optimize for sequential access - except for merging.
-                    // So we do a little hack here and pretend we're going to do merges in order to
-                    // get better sequential access.
-                    fieldReader = lf.getSequentialStoredFieldsReader()::document;
-                } else {
-                    fieldReader = context.reader().storedFields()::document;
-                }
-            } catch (IOException e) {
-                throw new UncheckedIOException(e);
-            }
+            // Lazily initialize fieldReader in loadSourceIfNeeded() to avoid
+            // unnecessary work when _source is never accessed.
+            this.fieldReader = null;
         }
         this.source = null;
         this.sourceAsBytes = null;

@@ -0,0 +1,114 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.lookup;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.store.Directory;
+import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+import org.opensearch.common.lucene.index.SequentialStoredFieldsLeafReader;
+import org.opensearch.core.index.Index;
+import org.opensearch.core.index.shard.ShardId;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+
+public class SourceLookupTests extends OpenSearchTestCase {
+
+    public void testLazyFieldReaderWithRegularReader() throws IOException {
+        try (Directory dir = newDirectory()) {
+            indexSourceDoc(dir, "{\"field\":\"value\"}");
+            try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                LeafReaderContext ctx = reader.leaves().get(0);
+                assertFalse(ctx.reader() instanceof SequentialStoredFieldsLeafReader);
+
+                SourceLookup lookup = new SourceLookup();
+                lookup.setSegmentAndDocument(ctx, 0);
+
+                Map<String, Object> source = lookup.loadSourceIfNeeded();
+                assertEquals("value", source.get("field"));
+
+                // Same reader+doc — source should remain cached
+                lookup.setSegmentAndDocument(ctx, 0);
+                assertSame(source, lookup.loadSourceIfNeeded());
+            }
+        }
+    }
+
+    public void testLazyFieldReaderWithSequentialReader() throws IOException {
+        try (Directory dir = newDirectory()) {
+            indexSourceDoc(dir, "{\"field\":\"value2\"}");
+            try (DirectoryReader rawReader = DirectoryReader.open(dir)) {
+                DirectoryReader wrappedReader = OpenSearchDirectoryReader.wrap(rawReader, new ShardId(new Index("test", "_na_"), 0));
+                LeafReaderContext ctx = wrappedReader.leaves().get(0);
+                assertTrue(ctx.reader() instanceof SequentialStoredFieldsLeafReader);
+
+                SourceLookup lookup = new SourceLookup();
+                lookup.setSegmentAndDocument(ctx, 0);
+
+                Map<String, Object> source = lookup.loadSourceIfNeeded();
+                assertEquals("value2", source.get("field"));
+            }
+        }
+    }
+
+    public void testSetSegmentAndDocumentWithNewReaderDefersFieldReader() throws IOException {
+        try (Directory dir1 = newDirectory(); Directory dir2 = newDirectory()) {
+            indexSourceDoc(dir1, "{\"a\":\"1\"}");
+            indexSourceDoc(dir2, "{\"b\":\"2\"}");
+            try (DirectoryReader reader1 = DirectoryReader.open(dir1); DirectoryReader reader2 = DirectoryReader.open(dir2)) {
+                LeafReaderContext ctx1 = reader1.leaves().get(0);
+                LeafReaderContext ctx2 = reader2.leaves().get(0);
+
+                SourceLookup lookup = new SourceLookup();
+                lookup.setSegmentAndDocument(ctx1, 0);
+                assertEquals("1", lookup.loadSourceIfNeeded().get("a"));
+
+                // Switch to a different reader — should reset fieldReader and source
+                lookup.setSegmentAndDocument(ctx2, 0);
+                assertEquals("2", lookup.loadSourceIfNeeded().get("b"));
+            }
+        }
+    }
+
+    public void testLoadSourceWithNoSourceReturnsEmptyMap() throws IOException {
+        try (Directory dir = newDirectory()) {
+            try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
+                Document doc = new Document();
+                doc.add(new StoredField("some_other_field", "data"));
+                writer.addDocument(doc);
+                writer.commit();
+            }
+            try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                LeafReaderContext ctx = reader.leaves().get(0);
+
+                SourceLookup lookup = new SourceLookup();
+                lookup.setSegmentAndDocument(ctx, 0);
+
+                Map<String, Object> source = lookup.loadSourceIfNeeded();
+                assertTrue(source.isEmpty());
+            }
+        }
+    }
+
+    private static void indexSourceDoc(Directory dir, String jsonSource) throws IOException {
+        try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
+            Document doc = new Document();
+            doc.add(new StoredField("_source", jsonSource.getBytes(StandardCharsets.UTF_8)));
+            writer.addDocument(doc);
+            writer.commit();
+        }
+    }
+}