Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package org.elasticsearch.common.lucene.index;

import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.elasticsearch.index.shard.ShardId;
Expand All @@ -26,7 +28,7 @@
* A {@link org.apache.lucene.index.FilterLeafReader} that exposes
* Elasticsearch internal per shard / index information like the shard ID.
*/
public final class ElasticsearchLeafReader extends FilterLeafReader {
public final class ElasticsearchCodecReader extends FilterCodecReader {

private final ShardId shardId;

Expand All @@ -36,7 +38,7 @@ public final class ElasticsearchLeafReader extends FilterLeafReader {
*
* @param in specified base reader.
*/
public ElasticsearchLeafReader(LeafReader in, ShardId shardId) {
public ElasticsearchCodecReader(CodecReader in, ShardId shardId) {
super(in);
this.shardId = shardId;
}
Expand All @@ -58,16 +60,17 @@ public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}

public static ElasticsearchLeafReader getElasticsearchLeafReader(LeafReader reader) {
if (reader instanceof FilterLeafReader) {
if (reader instanceof ElasticsearchLeafReader) {
return (ElasticsearchLeafReader) reader;
public static ElasticsearchCodecReader getElasticsearchCodecReader(LeafReader reader) {
reader = FilterLeafReader.unwrap(reader);
if (reader instanceof FilterCodecReader) {
if (reader instanceof ElasticsearchCodecReader) {
return (ElasticsearchCodecReader) reader;
} else {
// We need to use FilterLeafReader#getDelegate and not FilterLeafReader#unwrap, because
// We need to use FilterCodecReader#getDelegate and not FilterCodecReader#unwrap, because
// If there are multiple levels of filtered leaf readers then with the unwrap() method it immediately
// returns the most inner leaf reader and thus skipping of over any other filtered leaf reader that
// may be instance of ElasticsearchLeafReader. This can cause us to miss the shardId.
return getElasticsearchLeafReader(((FilterLeafReader) reader).getDelegate());
// returns the most inner leaf reader and thus skipping of over any other filtered codec reader that
// may be instance of ElasticsearchCodecReader. This can cause us to miss the shardId.
return getElasticsearchCodecReader(((FilterCodecReader) reader).getDelegate());
}
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.elasticsearch.common.lucene.index;

import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand Down Expand Up @@ -63,9 +64,12 @@ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOExc

/**
* Wraps the given reader in a {@link org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader} as
* well as all it's sub-readers in {@link org.elasticsearch.common.lucene.index.ElasticsearchLeafReader} to
* well as all it's sub-readers in {@link org.elasticsearch.common.lucene.index.ElasticsearchCodecReader} to
* expose the given shard Id.
*
* This uses codec readers on leaves rather than plain leaf readers in order to give a chance for SourceLookup
* to optimize for sequential access by using the logic that is normally reserved to merges.
*
* @param reader the reader to wrap
* @param shardId the shard ID to expose via the elasticsearch internal reader wrappers.
*/
Expand All @@ -80,7 +84,7 @@ private static final class SubReaderWrapper extends FilterDirectoryReader.SubRea
}
@Override
public LeafReader wrap(LeafReader reader) {
return new ElasticsearchLeafReader(reader, shardId);
return new ElasticsearchCodecReader((CodecReader) reader, shardId);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.apache.lucene.index.LeafReader;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
import org.elasticsearch.common.lucene.index.ElasticsearchLeafReader;
import org.elasticsearch.common.lucene.index.ElasticsearchCodecReader;

public final class ShardUtils {

Expand All @@ -35,7 +35,7 @@ private ShardUtils() {}
*/
@Nullable
public static ShardId extractShardId(LeafReader reader) {
final ElasticsearchLeafReader esReader = ElasticsearchLeafReader.getElasticsearchLeafReader(reader);
final ElasticsearchCodecReader esReader = ElasticsearchCodecReader.getElasticsearchCodecReader(reader);
if (esReader != null) {
assert reader.getRefCount() > 0 : "ElasticsearchLeafReader is already closed";
return esReader.shardId();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
*/
package org.elasticsearch.search.lookup;

import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCodecReaderWrapper;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesReference;
Expand All @@ -30,6 +32,8 @@
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
Expand All @@ -40,6 +44,7 @@
public class SourceLookup implements Map<String, Object> {

private LeafReader reader;
private StoredFieldsReader storedFieldsReader;

private int docId = -1;

Expand Down Expand Up @@ -71,7 +76,7 @@ public Map<String, Object> loadSourceIfNeeded() {
}
try {
FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
reader.document(docId, sourceFieldVisitor);
storedFieldsReader.visitDocument(docId, sourceFieldVisitor);
BytesReference source = sourceFieldVisitor.source();
if (source == null) {
this.source = emptyMap();
Expand Down Expand Up @@ -101,6 +106,15 @@ public void setSegmentAndDocument(LeafReaderContext context, int docId) {
return;
}
this.reader = context.reader();
// Lucene stored fields are really optimized for random access and don't
// optimize for sequential access - except for merging. So we do a
// little hack here and pretend we're going to do merges in order to
// get better sequential access.
try {
this.storedFieldsReader = SlowCodecReaderWrapper.wrap(reader).getFieldsReader().getMergeInstance();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we do our best to always have a CodecReader here now so maybe we should make that the return type? Or assert that it is one and then hard cast? It'd be a shame to fall back to the "slow" implementation of this a run time accidentally.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting there requires more work. DLS and FLS currently use reader wrappers that would hide the codec reader.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you leave a comment then about how we're not using the "slow part" of SlowCodecReaderWrapper. I checked the code path a little more closely and I feel much better it. The name is just scary. Really this just wraps the reader so you can call getMergeInstance which still returns this.

} catch (IOException e) {
throw new UncheckedIOException(e);
}
this.source = null;
this.sourceAsBytes = null;
this.docId = docId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexReader;
Expand Down Expand Up @@ -5932,4 +5933,23 @@ public void testNotWarmUpSearcherInEngineCtor() throws Exception {
}
}
}

public void testProducesCodecReader() throws Exception {
// Make sure that the engine produces a CodecReader.
// This is required for optimizations on SourceLookup to work, which is in-turn useful for runtime fields.
ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField("test"),
new BytesArray("{}".getBytes(Charset.defaultCharset())), null);
Engine.Index operation = randomBoolean() ?
appendOnlyPrimary(doc, false, 1)
: appendOnlyReplica(doc, false, 1, randomIntBetween(0, 5));
engine.index(operation);
engine.refresh("test");
try (Engine.Searcher searcher = engine.acquireSearcher("test")) {
IndexReader reader = searcher.getIndexReader();
assertThat(reader.leaves().size(), Matchers.greaterThanOrEqualTo(1));
for (LeafReaderContext context: reader.leaves()) {
assertThat(context.reader(), Matchers.instanceOf(CodecReader.class));
}
}
}
}