elastic · jpountz · Sep 1, 2020 · nik9000 · Sep 1, 2020 · jpountz
diff --git a/...lucene/index/ElasticsearchLeafReader.java → ...ucene/index/ElasticsearchCodecReader.java b/...lucene/index/ElasticsearchLeafReader.java → ...ucene/index/ElasticsearchCodecReader.java
@@ -18,6 +18,8 @@
  */
 package org.elasticsearch.common.lucene.index;
 
+import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.index.FilterCodecReader;
 import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.LeafReader;
 import org.elasticsearch.index.shard.ShardId;
@@ -26,7 +28,7 @@
  * A {@link org.apache.lucene.index.FilterLeafReader} that exposes
  * Elasticsearch internal per shard / index information like the shard ID.
  */
-public final class ElasticsearchLeafReader extends FilterLeafReader {
+public final class ElasticsearchCodecReader extends FilterCodecReader {
 
     private final ShardId shardId;
 
@@ -36,7 +38,7 @@ public final class ElasticsearchLeafReader extends FilterLeafReader {
      *
      * @param in specified base reader.
      */
-    public ElasticsearchLeafReader(LeafReader in, ShardId shardId) {
+    public ElasticsearchCodecReader(CodecReader in, ShardId shardId) {
         super(in);
         this.shardId = shardId;
     }
@@ -58,16 +60,17 @@ public CacheHelper getReaderCacheHelper() {
         return in.getReaderCacheHelper();
     }
 
-    public static ElasticsearchLeafReader getElasticsearchLeafReader(LeafReader reader) {
-        if (reader instanceof FilterLeafReader) {
-            if (reader instanceof ElasticsearchLeafReader) {
-                return (ElasticsearchLeafReader) reader;
+    public static ElasticsearchCodecReader getElasticsearchCodecReader(LeafReader reader) {
+        reader = FilterLeafReader.unwrap(reader);
+        if (reader instanceof FilterCodecReader) {
+            if (reader instanceof ElasticsearchCodecReader) {
+                return (ElasticsearchCodecReader) reader;
             } else {
-                // We need to use FilterLeafReader#getDelegate and not FilterLeafReader#unwrap, because
+                // We need to use FilterCodecReader#getDelegate and not FilterCodecReader#unwrap, because
                 // If there are multiple levels of filtered leaf readers then with the unwrap() method it immediately
-                // returns the most inner leaf reader and thus skipping of over any other filtered leaf reader that
-                // may be instance of ElasticsearchLeafReader. This can cause us to miss the shardId.
-                return getElasticsearchLeafReader(((FilterLeafReader) reader).getDelegate());
+                // returns the most inner leaf reader and thus skipping of over any other filtered codec reader that
+                // may be instance of ElasticsearchCodecReader. This can cause us to miss the shardId.
+                return getElasticsearchCodecReader(((FilterCodecReader) reader).getDelegate());
             }
         }
         return null;

diff --git a/server/src/main/java/org/elasticsearch/common/lucene/index/ElasticsearchDirectoryReader.java b/server/src/main/java/org/elasticsearch/common/lucene/index/ElasticsearchDirectoryReader.java
@@ -18,6 +18,7 @@
  */
 package org.elasticsearch.common.lucene.index;
 
+import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FilterDirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -63,9 +64,12 @@ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOExc
 
     /**
      * Wraps the given reader in a {@link org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader} as
-     * well as all it's sub-readers in {@link org.elasticsearch.common.lucene.index.ElasticsearchLeafReader} to
+     * well as all it's sub-readers in {@link org.elasticsearch.common.lucene.index.ElasticsearchCodecReader} to
      * expose the given shard Id.
      *
+     * This uses codec readers on leaves rather than plain leaf readers in order to give a chance for SourceLookup
+     * to optimize for sequential access by using the logic that is normally reserved to merges.
+     *
      * @param reader the reader to wrap
      * @param shardId the shard ID to expose via the elasticsearch internal reader wrappers.
      */
@@ -80,7 +84,7 @@ private static final class SubReaderWrapper extends FilterDirectoryReader.SubRea
         }
         @Override
         public LeafReader wrap(LeafReader reader) {
-            return new ElasticsearchLeafReader(reader, shardId);
+            return new ElasticsearchCodecReader((CodecReader) reader, shardId);
         }
     }
 

diff --git a/server/src/main/java/org/elasticsearch/index/shard/ShardUtils.java b/server/src/main/java/org/elasticsearch/index/shard/ShardUtils.java
@@ -23,7 +23,7 @@
 import org.apache.lucene.index.LeafReader;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
-import org.elasticsearch.common.lucene.index.ElasticsearchLeafReader;
+import org.elasticsearch.common.lucene.index.ElasticsearchCodecReader;
 
 public final class ShardUtils {
 
@@ -35,7 +35,7 @@ private ShardUtils() {}
      */
     @Nullable
     public static ShardId extractShardId(LeafReader reader) {
-        final ElasticsearchLeafReader esReader = ElasticsearchLeafReader.getElasticsearchLeafReader(reader);
+        final ElasticsearchCodecReader esReader = ElasticsearchCodecReader.getElasticsearchCodecReader(reader);
         if (esReader != null) {
             assert reader.getRefCount() > 0 : "ElasticsearchLeafReader is already closed";
             return esReader.shardId();

diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@@ -18,8 +18,10 @@
  */
 package org.elasticsearch.search.lookup;
 
+import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SlowCodecReaderWrapper;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.bytes.BytesReference;
@@ -30,6 +32,8 @@
 import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 
+import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -40,6 +44,7 @@
 public class SourceLookup implements Map<String, Object> {
 
     private LeafReader reader;
+    private StoredFieldsReader storedFieldsReader;
 
     private int docId = -1;
 
@@ -71,7 +76,7 @@ public Map<String, Object> loadSourceIfNeeded() {
         }
         try {
             FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
-            reader.document(docId, sourceFieldVisitor);
+            storedFieldsReader.visitDocument(docId, sourceFieldVisitor);
             BytesReference source = sourceFieldVisitor.source();
             if (source == null) {
                 this.source = emptyMap();
@@ -101,6 +106,15 @@ public void setSegmentAndDocument(LeafReaderContext context, int docId) {
             return;
         }
         this.reader = context.reader();
+        // Lucene stored fields are really optimized for random access and don't
+        // optimize for sequential access - except for merging. So we do a
+        // little hack here and pretend we're going to do merges in order to
+        // get better sequential access.
+        try {
+            this.storedFieldsReader = SlowCodecReaderWrapper.wrap(reader).getFieldsReader().getMergeInstance();
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
         this.source = null;
         this.sourceAsBytes = null;
         this.docId = docId;

diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.IndexReader;
@@ -5932,4 +5933,23 @@ public void testNotWarmUpSearcherInEngineCtor() throws Exception {
             }
         }
     }
+
+    public void testProducesCodecReader() throws Exception {
+        // Make sure that the engine produces a CodecReader.
+        // This is required for optimizations on SourceLookup to work, which is in-turn useful for runtime fields.
+        ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField("test"),
+                new BytesArray("{}".getBytes(Charset.defaultCharset())), null);
+        Engine.Index operation = randomBoolean() ?
+                appendOnlyPrimary(doc, false, 1)
+                : appendOnlyReplica(doc, false, 1, randomIntBetween(0, 5));
+        engine.index(operation);
+        engine.refresh("test");
+        try (Engine.Searcher searcher = engine.acquireSearcher("test")) {
+            IndexReader reader = searcher.getIndexReader();
+            assertThat(reader.leaves().size(), Matchers.greaterThanOrEqualTo(1));
+            for (LeafReaderContext context: reader.leaves()) {
+                assertThat(context.reader(), Matchers.instanceOf(CodecReader.class));
+            }
+        }
+    }
 }