Skip to content

Commit bccbb5c

Browse files
authored
Merge branch 'main' into fix_148138
2 parents 3af6996 + a05cb5f commit bccbb5c

16 files changed

Lines changed: 908 additions & 172 deletions

File tree

docs/changelog/148261.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
area: ES|QL
2+
issues: []
3+
pr: 148261
4+
summary: Skip filter eval when row group stats prove all rows pass
5+
type: enhancement

muted-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,3 +548,9 @@ tests:
548548
- class: org.elasticsearch.xpack.watcher.test.integration.SingleNodeTests
549549
method: testThatLoadingWithNonExistingIndexWorks
550550
issue: https://github.com/elastic/elasticsearch/issues/148323
551+
- class: org.elasticsearch.xpack.stateless.lucene.BlobCacheIndexInputTests
552+
method: testWithByteBufferSlicesReturnsFalseAfterEviction
553+
issue: https://github.com/elastic/elasticsearch/issues/148330
554+
- class: org.elasticsearch.xpack.security.authc.esnative.ReservedRealmElasticAutoconfigIntegTests
555+
method: testAutoconfigFailedPasswordPromotion
556+
issue: https://github.com/elastic/elasticsearch/issues/122668

server/src/main/java/org/elasticsearch/index/codec/tsdb/AbstractTSDBDocValuesProducer.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,13 +387,12 @@ public BlockLoader.Block tryRead(
387387
}
388388
} else if (isDense(firstDocId, lastDocId, count)) {
389389
try (var builder = factory.singletonBytesRefs(count)) {
390-
long[] offsets = new long[count + 1];
390+
int[] offsets = new int[count + 1];
391391

392392
long startOffset = addresses.get(firstDocId);
393393
for (int i = offset, j = 1; i < docs.count(); i++, j++) {
394394
int docId = docs.get(i);
395-
long nextOffset = addresses.get(docId + 1) - startOffset;
396-
offsets[j] = nextOffset;
395+
offsets[j] = Math.toIntExact(addresses.get(docId + 1) - startOffset);
397396
}
398397

399398
int length = Math.toIntExact(addresses.get(lastDocId + 1L) - startOffset);
@@ -887,7 +886,7 @@ void decodeBulk(int numBlocks, int firstDocId, int lastDocId, int count, BlockLo
887886
final int bufferSize = computeMultipleBlockBufferSize(firstBlockId, endBlockId);
888887

889888
int offsetBufferIndex = 0;
890-
final long[] offsetBuffer = new long[count + 1];
889+
final int[] offsetBuffer = new int[count + 1];
891890
int valuesBufferIndex = 0;
892891
final byte[] valuesBuffer = new byte[bufferSize];
893892

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -798,12 +798,12 @@ interface SingletonBytesRefBuilder extends Builder {
798798
* Append multiple BytesRef. Offsets contains offsets of each BytesRef in the byte array.
799799
* The length of the offsets array is one more than the number of BytesRefs.
800800
*/
801-
SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) throws IOException;
801+
SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, int[] offsets) throws IOException;
802802

803803
/**
804804
* Append multiple BytesRefs, all with the same length.
805805
*/
806-
SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long bytesRefLengths) throws IOException;
806+
SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, int bytesRefLengths) throws IOException;
807807
}
808808

809809
interface FloatBuilder extends Builder {

test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,18 +124,18 @@ private BytesRefsBuilder() {
124124
}
125125

126126
@Override
127-
public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) throws IOException {
127+
public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, int[] offsets) {
128128
for (int i = 0; i < offsets.length - 1; i++) {
129-
BytesRef ref = new BytesRef(bytes, (int) offsets[i], (int) (offsets[i + 1] - offsets[i]));
129+
BytesRef ref = new BytesRef(bytes, offsets[i], offsets[i + 1] - offsets[i]);
130130
add(BytesRef.deepCopyOf(ref));
131131
}
132132
return this;
133133
}
134134

135135
@Override
136-
public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long bytesRefLengths) throws IOException {
136+
public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, int bytesRefLengths) {
137137
for (int i = 0; i < count; i++) {
138-
BytesRef ref = new BytesRef(bytes, (int) (i * bytesRefLengths), (int) bytesRefLengths);
138+
BytesRef ref = new BytesRef(bytes, (int) (i * bytesRefLengths), bytesRefLengths);
139139
add(BytesRef.deepCopyOf(ref));
140140
}
141141
return this;

x-pack/plugin/esql-datasource-parquet/src/main/java/org/elasticsearch/xpack/esql/datasource/parquet/OptimizedParquetColumnIterator.java

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.parquet.column.impl.ColumnReadStoreImpl;
1313
import org.apache.parquet.column.page.PageReader;
1414
import org.apache.parquet.compression.CompressionCodecFactory;
15+
import org.apache.parquet.filter2.predicate.FilterPredicate;
1516
import org.apache.parquet.hadoop.ParquetFileReader;
1617
import org.apache.parquet.hadoop.metadata.BlockMetaData;
1718
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -62,6 +63,13 @@
6263
* {@link BlockFactory#breaker()}) before async I/O starts. The reservation is released
6364
* when prefetched data is consumed and cleared. If the breaker would trip, prefetch is
6465
* skipped and the query falls back to synchronous I/O for that row group.
66+
*
67+
* <p><b>Trivially-passes guard:</b> when late materialization is enabled and row-group
68+
* statistics prove every row satisfies the pushed filter ({@link TriviallyPassesChecker}),
69+
* the iterator routes that row group through {@code nextStandard} for the remainder of the
70+
* row group, skipping per-row filter evaluation and survivor compaction. This benefits queries
71+
* that mix selective and non-selective row groups (e.g., time-bucketed data with skewed
72+
* filter selectivity).
6573
*/
6674
final class OptimizedParquetColumnIterator implements CloseableIterator<Page> {
6775

@@ -111,8 +119,28 @@ final class OptimizedParquetColumnIterator implements CloseableIterator<Page> {
111119
private final boolean lateMaterialization;
112120
private final boolean[] isPredicateColumn;
113121
private final ParquetPushedExpressions pushedExpressions;
122+
/**
123+
* The parquet-mr {@link FilterPredicate} resolved by {@code ParquetFormatReader} for this scan,
124+
* passed through unchanged so the trivially-passes check sees the same predicate that drove
125+
* row-group pruning and ColumnIndex {@link RowRanges} computation. Translating again here
126+
* would be wasted work and could subtly diverge if the caller's schema differs from
127+
* {@link #projectedSchema}.
128+
*
129+
* <p>{@code null} when the trivially-passes guard is inactive: late materialization is off,
130+
* the reader did not resolve a file-level predicate, or predicate resolution failed earlier
131+
* (in which case the reader logged a warning and passed {@code null} through).
132+
*/
133+
private final FilterPredicate triviallyPassesPredicate;
114134
private final WordMask survivorMask;
115135
private long rowsEliminatedByLateMaterialization;
136+
/**
137+
* When {@code true}, row-group statistics prove every row in the current row group satisfies
138+
* the pushed filter, so late materialization is bypassed for this row group: filter
139+
* evaluation and survivor compaction are skipped, and the standard read path is used.
140+
*/
141+
private boolean currentRowGroupTriviallyPasses;
142+
/** Diagnostic counter: number of row groups for which the filter was proven to trivially pass. */
143+
private long rowGroupsWithTrivialFilter;
116144

117145
OptimizedParquetColumnIterator(
118146
ParquetFileReader reader,
@@ -129,7 +157,8 @@ final class OptimizedParquetColumnIterator implements CloseableIterator<Page> {
129157
RowRanges[] allRowRanges,
130158
boolean[] survivingRowGroups,
131159
CompressionCodecFactory codecFactory,
132-
ParquetPushedExpressions pushedExpressions
160+
ParquetPushedExpressions pushedExpressions,
161+
FilterPredicate triviallyPassesPredicate
133162
) {
134163
this.reader = reader;
135164
this.projectedSchema = projectedSchema;
@@ -151,6 +180,9 @@ final class OptimizedParquetColumnIterator implements CloseableIterator<Page> {
151180
this.isPredicateColumn = classifyPredicateColumns(attributes, columnInfos, pushedExpressions);
152181
this.lateMaterialization = pushedExpressions != null && hasProjectionOnlyColumns(isPredicateColumn, columnInfos);
153182
this.survivorMask = lateMaterialization ? new WordMask() : null;
183+
// Caller supplies null when late materialization is off; defensively also drop it here so
184+
// the trivially-passes check is gated by a single condition below.
185+
this.triviallyPassesPredicate = lateMaterialization ? triviallyPassesPredicate : null;
154186

155187
this.projectedColumnPaths = buildProjectedColumnPaths(columnInfos);
156188
this.prefetchDepth = computePrefetchDepth(reader.getRowGroups(), this.projectedColumnPaths);
@@ -333,18 +365,34 @@ private boolean advanceRowGroup() throws IOException {
333365
if (rowsEliminatedByLateMaterialization > 0) {
334366
logger.debug("Late materialization eliminated [{}] rows in [{}]", rowsEliminatedByLateMaterialization, fileLocation);
335367
}
368+
if (rowGroupsWithTrivialFilter > 0) {
369+
logger.debug(
370+
"Trivially-passes guard skipped late-materialization for [{}] row groups in [{}]",
371+
rowGroupsWithTrivialFilter,
372+
fileLocation
373+
);
374+
}
336375
return false;
337376
}
338377
rowGroupOrdinal = nextOrdinal;
339378
pageBatchIndexInRowGroup = 0;
340379

341380
BlockMetaData block = reader.getRowGroups().get(rowGroupOrdinal);
381+
// Per-row-group trivially-passes check: when stats prove every row matches the filter,
382+
// the late-materialization machinery (decode predicate columns → evaluate filter → compact
383+
// survivors) is pure overhead. Switching to the standard path eliminates filter evaluation.
384+
currentRowGroupTriviallyPasses = triviallyPassesPredicate != null && TriviallyPassesChecker.check(triviallyPassesPredicate, block);
385+
if (currentRowGroupTriviallyPasses) {
386+
rowGroupsWithTrivialFilter++;
387+
}
342388
NavigableMap<Long, ColumnChunkPrefetcher.PrefetchedChunk> chunks = takePendingPrefetch(rowGroupOrdinal);
343389
try {
344390
RowRanges currentRowRanges = resolveCurrentRowRanges(block);
345391
// When late materialization is active, skip ColumnIndex page filtering — late-mat handles
346392
// row-level filtering itself via the survivor mask. Applying both ColumnIndex RowRanges
347393
// AND late-mat evaluation causes double-filtering that drops rows incorrectly.
394+
// The trivially-passes case is handled the same way: we already know all rows match,
395+
// so leaving page filtering off is consistent and safe (RowRanges would be all() anyway).
348396
RowRanges buildRowRanges = lateMaterialization ? null : currentRowRanges;
349397
rowGroup = PrefetchedRowGroupBuilder.build(
350398
block,
@@ -551,12 +599,13 @@ public Page next() {
551599
}
552600
int rowsToRead = (int) Math.min(effectiveBatch, rowsRemainingInGroup);
553601

554-
Page result = lateMaterialization ? nextWithLateMaterialization(rowsToRead) : nextStandard(rowsToRead);
602+
boolean useLateMaterialization = lateMaterialization && currentRowGroupTriviallyPasses == false;
603+
Page result = useLateMaterialization ? nextWithLateMaterialization(rowsToRead) : nextStandard(rowsToRead);
555604

556605
pageBatchIndexInRowGroup++;
557606
rowsRemainingInGroup -= rowsToRead;
558607
if (rowBudget != FormatReader.NO_LIMIT) {
559-
rowBudget -= lateMaterialization ? result.getPositionCount() : rowsToRead;
608+
rowBudget -= useLateMaterialization ? result.getPositionCount() : rowsToRead;
560609
}
561610
return result;
562611
}

x-pack/plugin/esql-datasource-parquet/src/main/java/org/elasticsearch/xpack/esql/datasource/parquet/ParquetFormatReader.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,12 @@ private CloseableIterator<Page> createOptimizedIterator(
813813
}
814814
}
815815

816+
// Reuse the FilterPredicate already resolved at the file level so the trivially-passes
817+
// guard sees the same predicate that drove row-group pruning and column-index RowRanges.
818+
// Only pass it through when late materialization is actually active; otherwise the
819+
// iterator has no use for it.
820+
FilterPredicate triviallyPassesPredicate = effectivePushed != null ? filterPredicate : null;
821+
816822
return new OptimizedParquetColumnIterator(
817823
reader,
818824
projectedSchema,
@@ -828,7 +834,8 @@ private CloseableIterator<Page> createOptimizedIterator(
828834
allRowRanges,
829835
survivingRowGroups,
830836
codecFactory,
831-
effectivePushed
837+
effectivePushed,
838+
triviallyPassesPredicate
832839
);
833840
}
834841

0 commit comments

Comments
 (0)