From 88f0bd51ad84ecf2938b473568fea21f9b2ba23c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Tue, 14 Dec 2021 23:07:39 +0800 Subject: [PATCH 01/34] stash --- .../org/apache/lucene/util/bkd/BKDReader.java | 20 ++-- .../org/apache/lucene/util/bkd/BKDWriter.java | 5 +- .../apache/lucene/util/bkd/DocIdsWriter.java | 107 ++++++++++++++++-- .../lucene/util/bkd/TestDocIdsWriter.java | 27 +++-- 4 files changed, 133 insertions(+), 26 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index ff2584fd8ddf..97741c6ed1d0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -168,6 +168,7 @@ private static class BKDPointTree implements PointTree { scratchMaxIndexPackedValue; private final int[] commonPrefixLengths; private final BKDReaderDocIDSetIterator scratchIterator; + private final DocIdsWriter docIdsWriter; private BKDPointTree( IndexInput innerNodes, @@ -248,6 +249,7 @@ private BKDPointTree( this.scratchDataPackedValue = scratchDataPackedValue; this.scratchMinIndexPackedValue = scratchMinIndexPackedValue; this.scratchMaxIndexPackedValue = scratchMaxIndexPackedValue; + this.docIdsWriter = scratchIterator.docIdsWriter; } @Override @@ -514,7 +516,7 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I // How many points are stored in this leaf cell: int count = leafNodes.readVInt(); // No need to call grow(), it has been called up-front - DocIdsWriter.readInts(leafNodes, count, visitor); + docIdsWriter.readInts(leafNodes, count, visitor); } else { pushLeft(); addAll(visitor, grown); @@ -577,7 +579,7 @@ private int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator it // How many points are stored in this leaf cell: int count = in.readVInt(); - DocIdsWriter.readInts(in, count, iterator.docIDs); + docIdsWriter.readInts(in, count, iterator.docIDs); return count; } @@ -700,7 +702,7 @@ private void visitDocValuesNoCardinality( visitor.grow(count); if (r == PointValues.Relation.CELL_INSIDE_QUERY) { for (int i = 0; i < count; ++i) { - visitor.visit(scratchIterator.docIDs[i]); + visitor.visit((int) scratchIterator.docIDs[i]); } return; } @@ -765,7 +767,7 @@ private void visitDocValuesWithCardinality( if (r == PointValues.Relation.CELL_INSIDE_QUERY) { for (int i = 0; i < count; ++i) { - visitor.visit(scratchIterator.docIDs[i]); + visitor.visit((int) scratchIterator.docIDs[i]); } return; } @@ -865,7 +867,7 @@ private void visitCompressedDocValues( in.readBytes( scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix); } - visitor.visit(scratchIterator.docIDs[i + j], scratchPackedValue); + visitor.visit((int) scratchIterator.docIDs[i + j], scratchPackedValue); } i += runLen; } @@ -945,10 +947,12 @@ private static class BKDReaderDocIDSetIterator extends DocIdSetIterator { private int length; private int offset; private int docID; - final int[] docIDs; + final long[] docIDs; + private final DocIdsWriter docIdsWriter; public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) { - this.docIDs = new int[maxPointsInLeafNode]; + this.docIDs = new long[maxPointsInLeafNode]; + this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode); } @Override @@ -969,7 +973,7 @@ public int nextDoc() throws IOException { if (idx == length) { docID = DocIdSetIterator.NO_MORE_DOCS; } else { - docID = docIDs[offset + idx]; + docID = (int) docIDs[offset + idx]; idx++; } return docID; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 655cf8cc610e..58f45df4077f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -130,6 +130,7 @@ public class BKDWriter implements Closeable { private final long totalPointCount; private final int maxDoc; + private final DocIdsWriter docIdsWriter; public BKDWriter( int maxDoc, @@ -165,7 +166,7 @@ public BKDWriter( // Maximum number of points we hold in memory at any time maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc)); - + docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode); // Finally, we must be able to hold at least the leaf node in heap during build: if (maxPointsSortInHeap < config.maxPointsInLeafNode) { throw new IllegalArgumentException( @@ -1288,7 +1289,7 @@ private void writeLeafBlockDocs(DataOutput out, int[] docIDs, int start, int cou throws IOException { assert count > 0 : "config.maxPointsInLeafNode=" + config.maxPointsInLeafNode; out.writeVInt(count); - DocIdsWriter.writeDocIds(docIDs, start, count, out); + docIdsWriter.writeDocIds(docIDs, start, count, out); } private void writeLeafBlockPackedValues( diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 4e2741591881..23ce7a7004e6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -23,12 +23,18 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.DocBaseBitSetIterator; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.packed.PackedInts; class DocIdsWriter { - private DocIdsWriter() {} + private final ForUtil forUtil = new ForUtil(); + private final long[] scratch; - static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { + DocIdsWriter(int maxPointsInLeaf) { + scratch = new long[maxPointsInLeaf]; + } + + void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { // docs can be sorted either when all docs in a block have the same value // or when a segment is sorted boolean sorted = true; @@ -61,6 +67,39 @@ static void writeDocIds(int[] docIds, int start, int count, DataOutput out) thro return; } } + + // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) + if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { + if (sorted) { + out.writeByte((byte) 1); + long max = 0; + long[] delta = new long[count]; + int previous = docIds[start]; + for (int i = 1; i < count; i++) { + int doc = docIds[i + start]; + delta[i] = doc - previous; + max |= delta[i] & 0xffffffffL; + previous = doc; + } + int bitsPerValue = PackedInts.bitsRequired(max); + out.writeVInt(docIds[start]); + out.writeVInt(bitsPerValue); + forUtil.encode(delta, PackedInts.bitsRequired(max), out); + } else { + out.writeByte((byte) 2); + long[] longs = new long[count]; + long max = 0; + for (int i=0; i BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); + rand(() -> 1 + random().nextInt(5000)); + } + + private void rand(IntSupplier size) throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { for (int iter = 0; iter < numIters; ++iter) { - int[] docIDs = new int[1 + random().nextInt(5000)]; + int[] docIDs = new int[size.getAsInt()]; final int bpv = TestUtil.nextInt(random(), 1, 32); for (int i = 0; i < docIDs.length; ++i) { docIDs[i] = TestUtil.nextInt(random(), 0, (1 << bpv) - 1); @@ -46,10 +53,15 @@ public void testRandom() throws Exception { } public void testSorted() throws Exception { + sorted(() -> BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); + sorted(() -> 1 + random().nextInt(5000)); + } + + private void sorted(IntSupplier size) throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { for (int iter = 0; iter < numIters; ++iter) { - int[] docIDs = new int[1 + random().nextInt(5000)]; + int[] docIDs = new int[size.getAsInt()]; final int bpv = TestUtil.nextInt(random(), 1, 32); for (int i = 0; i < docIDs.length; ++i) { docIDs[i] = TestUtil.nextInt(random(), 0, (1 << bpv) - 1); @@ -93,22 +105,23 @@ public void testContinuousIds() throws Exception { private void test(Directory dir, int[] ints) throws Exception { final long len; + DocIdsWriter docIdsWriter = new DocIdsWriter(ints.length); try (IndexOutput out = dir.createOutput("tmp", IOContext.DEFAULT)) { - DocIdsWriter.writeDocIds(ints, 0, ints.length, out); + docIdsWriter.writeDocIds(ints, 0, ints.length, out); len = out.getFilePointer(); if (random().nextBoolean()) { out.writeLong(0); // garbage } } try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { - int[] read = new int[ints.length]; - DocIdsWriter.readInts(in, ints.length, read); - assertArrayEquals(ints, read); + long[] read = new long[ints.length]; + docIdsWriter.readInts(in, ints.length, read); + assertArrayEquals(ints, Arrays.stream(read).mapToInt(i -> (int)i).toArray()); assertEquals(len, in.getFilePointer()); } try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { int[] read = new int[ints.length]; - DocIdsWriter.readInts( + docIdsWriter.readInts( in, ints.length, new IntersectVisitor() { From 141dc408c2a4b4c319394199aa76a80b277effaa Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 15 Dec 2021 01:16:41 +0800 Subject: [PATCH 02/34] stash --- .../apache/lucene/util/bkd/DocIdsWriter.java | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 23ce7a7004e6..ee4f3617bc60 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -86,7 +86,6 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx out.writeVInt(bitsPerValue); forUtil.encode(delta, PackedInts.bitsRequired(max), out); } else { - out.writeByte((byte) 2); long[] longs = new long[count]; long max = 0; for (int i=0; i Date: Wed, 15 Dec 2021 02:00:35 +0800 Subject: [PATCH 03/34] check --- .../src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index ee4f3617bc60..6b7a8d0b0aa2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -27,6 +27,12 @@ class DocIdsWriter { + static { + if (ForUtil.BLOCK_SIZE != BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { + throw new RuntimeException("ForUtils size need to be the same as BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"); + } + } + private final ForUtil forUtil = new ForUtil(); private final long[] scratch; From 8b79de00401142c85b9eb995af1f812c34176c6a Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 15 Dec 2021 02:04:56 +0800 Subject: [PATCH 04/34] add forutil --- .../apache/lucene/util/bkd/DocIdsWriter.java | 18 +- .../org/apache/lucene/util/bkd/ForUtil.java | 1371 +++++++++++++++++ .../org/apache/lucene/util/bkd/gen_ForUtil.py | 452 ++++++ .../lucene/util/bkd/TestDocIdsWriter.java | 3 +- 4 files changed, 1835 insertions(+), 9 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 6b7a8d0b0aa2..895fd6bf56d9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -29,7 +29,8 @@ class DocIdsWriter { static { if (ForUtil.BLOCK_SIZE != BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { - throw new RuntimeException("ForUtils size need to be the same as BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"); + throw new RuntimeException( + "ForUtils size need to be the same as BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"); } } @@ -94,7 +95,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } else { long[] longs = new long[count]; long max = 0; - for (int i=0; i> 1; + private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; + private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; + private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; + private static final int BLOCK_SIZE_DIV_8_MUL_1 = BLOCK_SIZE_DIV_8; + private static final int BLOCK_SIZE_DIV_8_MUL_2 = BLOCK_SIZE_DIV_8 * 2; + private static final int BLOCK_SIZE_DIV_8_MUL_3 = BLOCK_SIZE_DIV_8 * 3; + private static final int BLOCK_SIZE_DIV_8_MUL_4 = BLOCK_SIZE_DIV_8 * 4; + private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; + private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; + private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; + private static final int BLOCK_SIZE_LOG2 = MathUtil.log(BLOCK_SIZE, 2); + + private static long expandMask32(long mask32) { + return mask32 | (mask32 << 32); + } + + private static long expandMask16(long mask16) { + return expandMask32(mask16 | (mask16 << 16)); + } + + private static long expandMask8(long mask8) { + return expandMask16(mask8 | (mask8 << 8)); + } + + private static long mask32(int bitsPerValue) { + return expandMask32((1L << bitsPerValue) - 1); + } + + private static long mask16(int bitsPerValue) { + return expandMask16((1L << bitsPerValue) - 1); + } + + private static long mask8(int bitsPerValue) { + return expandMask8((1L << bitsPerValue) - 1); + } + + private static void expand8(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + long l = arr[i]; + arr[i] = (l >>> 56) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 48) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 40) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = (l >>> 32) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 24) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_5 + i] = (l >>> 16) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = (l >>> 8) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_7 + i] = l & 0xFFL; + } + } + + private static void expand8To32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + long l = arr[i]; + arr[i] = (l >>> 24) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 16) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 8) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = l & 0x000000FF000000FFL; + } + } + + private static void collapse8(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + arr[i] = + (arr[i] << 56) + | (arr[BLOCK_SIZE_DIV_8_MUL_1 + i] << 48) + | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 40) + | (arr[BLOCK_SIZE_DIV_8_MUL_3 + i] << 32) + | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 24) + | (arr[BLOCK_SIZE_DIV_8_MUL_5 + i] << 16) + | (arr[BLOCK_SIZE_DIV_8_MUL_6 + i] << 8) + | arr[BLOCK_SIZE_DIV_8_MUL_7 + i]; + } + } + + private static void expand16(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { + long l = arr[i]; + arr[i] = (l >>> 48) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 32) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 16) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = l & 0xFFFFL; + } + } + + private static void expand16To32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + long l = arr[i]; + arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = l & 0x0000FFFF0000FFFFL; + } + } + + private static void collapse16(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { + arr[i] = + (arr[i] << 48) + | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) + | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) + | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; + } + } + + private static void expand32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + long l = arr[i]; + arr[i] = l >>> 32; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = l & 0xFFFFFFFFL; + } + } + + private static void collapse32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + arr[i] = (arr[i] << 32) | arr[BLOCK_SIZE_DIV_8_MUL_4 + i]; + } + } + + private final long[] tmp = new long[BLOCK_SIZE_DIV_2]; + + /** Encode 128 integers from {@code longs} into {@code out}. */ + void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { + final int nextPrimitive; + final int numLongs; + if (bitsPerValue <= 8) { + nextPrimitive = 8; + numLongs = BLOCK_SIZE_DIV_8; + collapse8(longs); + } else if (bitsPerValue <= 16) { + nextPrimitive = 16; + numLongs = BLOCK_SIZE_DIV_4; + collapse16(longs); + } else { + nextPrimitive = 32; + numLongs = BLOCK_SIZE_DIV_2; + collapse32(longs); + } + + final int numLongsPerShift = bitsPerValue * BLOCK_SIZE_DIV_64; + int idx = 0; + int shift = nextPrimitive - bitsPerValue; + for (int i = 0; i < numLongsPerShift; ++i) { + tmp[i] = longs[idx++] << shift; + } + for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) { + for (int i = 0; i < numLongsPerShift; ++i) { + tmp[i] |= longs[idx++] << shift; + } + } + + final int remainingBitsPerLong = shift + bitsPerValue; + final long maskRemainingBitsPerLong; + if (nextPrimitive == 8) { + maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong]; + } else if (nextPrimitive == 16) { + maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong]; + } else { + maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong]; + } + + int tmpIdx = 0; + int remainingBitsPerValue = bitsPerValue; + while (idx < numLongs) { + if (remainingBitsPerValue >= remainingBitsPerLong) { + remainingBitsPerValue -= remainingBitsPerLong; + tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong; + if (remainingBitsPerValue == 0) { + idx++; + remainingBitsPerValue = bitsPerValue; + } + } else { + final long mask1, mask2; + if (nextPrimitive == 8) { + mask1 = MASKS8[remainingBitsPerValue]; + mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue]; + } else if (nextPrimitive == 16) { + mask1 = MASKS16[remainingBitsPerValue]; + mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue]; + } else { + mask1 = MASKS32[remainingBitsPerValue]; + mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue]; + } + tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue); + remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue; + tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2; + } + } + + for (int i = 0; i < numLongsPerShift; ++i) { + out.writeLong(tmp[i]); + } + } + + /** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */ + int numBytes(int bitsPerValue) { + return bitsPerValue << (BLOCK_SIZE_LOG2 - 3); + } + + private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) + throws IOException { + final int numLongs = bitsPerValue << 1; + in.readLongs(tmp, 0, numLongs); + final long mask = MASKS32[bitsPerValue]; + int longsIdx = 0; + int shift = 32 - bitsPerValue; + for (; shift >= 0; shift -= bitsPerValue) { + shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask); + longsIdx += numLongs; + } + final int remainingBitsPerLong = shift + bitsPerValue; + final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong]; + int tmpIdx = 0; + int remainingBits = remainingBitsPerLong; + for (; longsIdx < BLOCK_SIZE_DIV_2; ++longsIdx) { + int b = bitsPerValue - remainingBits; + long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b; + while (b >= remainingBitsPerLong) { + b -= remainingBitsPerLong; + l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b; + } + if (b > 0) { + l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b]; + remainingBits = remainingBitsPerLong - b; + } else { + remainingBits = remainingBitsPerLong; + } + longs[longsIdx] = l; + } + } + + /** + * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which + * generates SIMD instructions for it in order to shift multiple longs at once. + */ + private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) { + for (int i = 0; i < count; ++i) { + b[bi + i] = (a[i] >>> shift) & mask; + } + } + + private static final long[] MASKS8 = new long[8]; + private static final long[] MASKS16 = new long[16]; + private static final long[] MASKS32 = new long[32]; + + static { + for (int i = 0; i < 8; ++i) { + MASKS8[i] = mask8(i); + } + for (int i = 0; i < 16; ++i) { + MASKS16[i] = mask16(i); + } + for (int i = 0; i < 32; ++i) { + MASKS32[i] = mask32(i); + } + } + // mark values in array as final longs to avoid the cost of reading array, arrays should only be + // used when the idx is a variable + private static final long MASK8_1 = MASKS8[1]; + private static final long MASK8_2 = MASKS8[2]; + private static final long MASK8_3 = MASKS8[3]; + private static final long MASK8_4 = MASKS8[4]; + private static final long MASK8_5 = MASKS8[5]; + private static final long MASK8_6 = MASKS8[6]; + private static final long MASK8_7 = MASKS8[7]; + private static final long MASK16_1 = MASKS16[1]; + private static final long MASK16_2 = MASKS16[2]; + private static final long MASK16_3 = MASKS16[3]; + private static final long MASK16_4 = MASKS16[4]; + private static final long MASK16_5 = MASKS16[5]; + private static final long MASK16_6 = MASKS16[6]; + private static final long MASK16_7 = MASKS16[7]; + private static final long MASK16_9 = MASKS16[9]; + private static final long MASK16_10 = MASKS16[10]; + private static final long MASK16_11 = MASKS16[11]; + private static final long MASK16_12 = MASKS16[12]; + private static final long MASK16_13 = MASKS16[13]; + private static final long MASK16_14 = MASKS16[14]; + private static final long MASK16_15 = MASKS16[15]; + private static final long MASK32_1 = MASKS32[1]; + private static final long MASK32_2 = MASKS32[2]; + private static final long MASK32_3 = MASKS32[3]; + private static final long MASK32_4 = MASKS32[4]; + private static final long MASK32_5 = MASKS32[5]; + private static final long MASK32_6 = MASKS32[6]; + private static final long MASK32_7 = MASKS32[7]; + private static final long MASK32_8 = MASKS32[8]; + private static final long MASK32_9 = MASKS32[9]; + private static final long MASK32_10 = MASKS32[10]; + private static final long MASK32_11 = MASKS32[11]; + private static final long MASK32_12 = MASKS32[12]; + private static final long MASK32_13 = MASKS32[13]; + private static final long MASK32_14 = MASKS32[14]; + private static final long MASK32_15 = MASKS32[15]; + private static final long MASK32_17 = MASKS32[17]; + private static final long MASK32_18 = MASKS32[18]; + private static final long MASK32_19 = MASKS32[19]; + private static final long MASK32_20 = MASKS32[20]; + private static final long MASK32_21 = MASKS32[21]; + private static final long MASK32_22 = MASKS32[22]; + private static final long MASK32_23 = MASKS32[23]; + private static final long MASK32_24 = MASKS32[24]; + private static final long MASK32_25 = MASKS32[25]; + private static final long MASK32_26 = MASKS32[26]; + private static final long MASK32_27 = MASKS32[27]; + private static final long MASK32_28 = MASKS32[28]; + private static final long MASK32_29 = MASKS32[29]; + private static final long MASK32_30 = MASKS32[30]; + private static final long MASK32_31 = MASKS32[31]; + + /** Decode 128 integers into {@code longs}. */ + void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { + switch (bitsPerValue) { + case 1: + decode1(in, tmp, longs); + expand8(longs); + break; + case 2: + decode2(in, tmp, longs); + expand8(longs); + break; + case 3: + decode3(in, tmp, longs); + expand8(longs); + break; + case 4: + decode4(in, tmp, longs); + expand8(longs); + break; + case 5: + decode5(in, tmp, longs); + expand8(longs); + break; + case 6: + decode6(in, tmp, longs); + expand8(longs); + break; + case 7: + decode7(in, tmp, longs); + expand8(longs); + break; + case 8: + decode8(in, tmp, longs); + expand8(longs); + break; + case 9: + decode9(in, tmp, longs); + expand16(longs); + break; + case 10: + decode10(in, tmp, longs); + expand16(longs); + break; + case 11: + decode11(in, tmp, longs); + expand16(longs); + break; + case 12: + decode12(in, tmp, longs); + expand16(longs); + break; + case 13: + decode13(in, tmp, longs); + expand16(longs); + break; + case 14: + decode14(in, tmp, longs); + expand16(longs); + break; + case 15: + decode15(in, tmp, longs); + expand16(longs); + break; + case 16: + decode16(in, tmp, longs); + expand16(longs); + break; + case 17: + decode17(in, tmp, longs); + expand32(longs); + break; + case 18: + decode18(in, tmp, longs); + expand32(longs); + break; + case 19: + decode19(in, tmp, longs); + expand32(longs); + break; + case 20: + decode20(in, tmp, longs); + expand32(longs); + break; + case 21: + decode21(in, tmp, longs); + expand32(longs); + break; + case 22: + decode22(in, tmp, longs); + expand32(longs); + break; + case 23: + decode23(in, tmp, longs); + expand32(longs); + break; + case 24: + decode24(in, tmp, longs); + expand32(longs); + break; + case 25: + decode25(in, tmp, longs); + expand32(longs); + break; + case 26: + decode26(in, tmp, longs); + expand32(longs); + break; + case 27: + decode27(in, tmp, longs); + expand32(longs); + break; + case 28: + decode28(in, tmp, longs); + expand32(longs); + break; + case 29: + decode29(in, tmp, longs); + expand32(longs); + break; + case 30: + decode30(in, tmp, longs); + expand32(longs); + break; + case 31: + decode31(in, tmp, longs); + expand32(longs); + break; + case 32: + decode32(in, tmp, longs); + expand32(longs); + break; + default: + decodeSlow(bitsPerValue, in, tmp, longs); + expand32(longs); + break; + } + } + + /** + * Decodes 128 integers into 64 {@code longs} such that each long contains two values, each + * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs} + * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This + * representation may allow subsequent operations to be performed on two values at a time. + */ + void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException { + switch (bitsPerValue) { + case 1: + decode1(in, tmp, longs); + expand8To32(longs); + break; + case 2: + decode2(in, tmp, longs); + expand8To32(longs); + break; + case 3: + decode3(in, tmp, longs); + expand8To32(longs); + break; + case 4: + decode4(in, tmp, longs); + expand8To32(longs); + break; + case 5: + decode5(in, tmp, longs); + expand8To32(longs); + break; + case 6: + decode6(in, tmp, longs); + expand8To32(longs); + break; + case 7: + decode7(in, tmp, longs); + expand8To32(longs); + break; + case 8: + decode8(in, tmp, longs); + expand8To32(longs); + break; + case 9: + decode9(in, tmp, longs); + expand16To32(longs); + break; + case 10: + decode10(in, tmp, longs); + expand16To32(longs); + break; + case 11: + decode11(in, tmp, longs); + expand16To32(longs); + break; + case 12: + decode12(in, tmp, longs); + expand16To32(longs); + break; + case 13: + decode13(in, tmp, longs); + expand16To32(longs); + break; + case 14: + decode14(in, tmp, longs); + expand16To32(longs); + break; + case 15: + decode15(in, tmp, longs); + expand16To32(longs); + break; + case 16: + decode16(in, tmp, longs); + expand16To32(longs); + break; + case 17: + decode17(in, tmp, longs); + break; + case 18: + decode18(in, tmp, longs); + break; + case 19: + decode19(in, tmp, longs); + break; + case 20: + decode20(in, tmp, longs); + break; + case 21: + decode21(in, tmp, longs); + break; + case 22: + decode22(in, tmp, longs); + break; + case 23: + decode23(in, tmp, longs); + break; + case 24: + decode24(in, tmp, longs); + break; + case 25: + decode25(in, tmp, longs); + break; + case 26: + decode26(in, tmp, longs); + break; + case 27: + decode27(in, tmp, longs); + break; + case 28: + decode28(in, tmp, longs); + break; + case 29: + decode29(in, tmp, longs); + break; + case 30: + decode30(in, tmp, longs); + break; + case 31: + decode31(in, tmp, longs); + break; + case 32: + decode32(in, tmp, longs); + break; + default: + decodeSlow(bitsPerValue, in, tmp, longs); + break; + } + } + + private static void decode1(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 8); + shiftLongs(tmp, 8, longs, 0, 7, MASK8_1); + shiftLongs(tmp, 8, longs, 8, 6, MASK8_1); + shiftLongs(tmp, 8, longs, 16, 5, MASK8_1); + shiftLongs(tmp, 8, longs, 24, 4, MASK8_1); + shiftLongs(tmp, 8, longs, 32, 3, MASK8_1); + shiftLongs(tmp, 8, longs, 40, 2, MASK8_1); + shiftLongs(tmp, 8, longs, 48, 1, MASK8_1); + shiftLongs(tmp, 8, longs, 56, 0, MASK8_1); + } + + private static void decode2(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 16); + shiftLongs(tmp, 16, longs, 0, 6, MASK8_2); + shiftLongs(tmp, 16, longs, 16, 4, MASK8_2); + shiftLongs(tmp, 16, longs, 32, 2, MASK8_2); + shiftLongs(tmp, 16, longs, 48, 0, MASK8_2); + } + + private static void decode3(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 24); + shiftLongs(tmp, 24, longs, 0, 5, MASK8_3); + shiftLongs(tmp, 24, longs, 24, 2, MASK8_3); + for (int iter = 0, tmpIdx = 0, longsIdx = 48; iter < 8; ++iter, tmpIdx += 3, longsIdx += 2) { + long l0 = (tmp[tmpIdx + 0] & MASK8_2) << 1; + l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_1; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 2; + l1 |= (tmp[tmpIdx + 2] & MASK8_2) << 0; + longs[longsIdx + 1] = l1; + } + } + + private static void decode4(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 32); + shiftLongs(tmp, 32, longs, 0, 4, MASK8_4); + shiftLongs(tmp, 32, longs, 32, 0, MASK8_4); + } + + private static void decode5(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 40); + shiftLongs(tmp, 40, longs, 0, 3, MASK8_5); + for (int iter = 0, tmpIdx = 0, longsIdx = 40; iter < 8; ++iter, tmpIdx += 5, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK8_3) << 2; + l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 4; + l1 |= (tmp[tmpIdx + 2] & MASK8_3) << 1; + l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK8_1; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 3] & MASK8_2) << 3; + l2 |= (tmp[tmpIdx + 4] & MASK8_3) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode6(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 48); + shiftLongs(tmp, 48, longs, 0, 2, MASK8_6); + shiftLongs(tmp, 48, tmp, 0, 0, MASK8_2); + for (int iter = 0, tmpIdx = 0, longsIdx = 48; iter < 16; ++iter, tmpIdx += 3, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 4; + l0 |= tmp[tmpIdx + 1] << 2; + l0 |= tmp[tmpIdx + 2] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode7(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 56); + shiftLongs(tmp, 56, longs, 0, 1, MASK8_7); + shiftLongs(tmp, 56, tmp, 0, 0, MASK8_1); + for (int iter = 0, tmpIdx = 0, longsIdx = 56; iter < 8; ++iter, tmpIdx += 7, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 6; + l0 |= tmp[tmpIdx + 1] << 5; + l0 |= tmp[tmpIdx + 2] << 4; + l0 |= tmp[tmpIdx + 3] << 3; + l0 |= tmp[tmpIdx + 4] << 2; + l0 |= tmp[tmpIdx + 5] << 1; + l0 |= tmp[tmpIdx + 6] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode8(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(longs, 0, 64); + } + + private static void decode9(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 72); + shiftLongs(tmp, 72, longs, 0, 7, MASK16_9); + for (int iter = 0, tmpIdx = 0, longsIdx = 72; iter < 8; ++iter, tmpIdx += 9, longsIdx += 7) { + long l0 = (tmp[tmpIdx + 0] & MASK16_7) << 2; + l0 |= (tmp[tmpIdx + 1] >>> 5) & MASK16_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK16_5) << 4; + l1 |= (tmp[tmpIdx + 2] >>> 3) & MASK16_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 2] & MASK16_3) << 6; + l2 |= (tmp[tmpIdx + 3] >>> 1) & MASK16_6; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 3] & MASK16_1) << 8; + l3 |= (tmp[tmpIdx + 4] & MASK16_7) << 1; + l3 |= (tmp[tmpIdx + 5] >>> 6) & MASK16_1; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 5] & MASK16_6) << 3; + l4 |= (tmp[tmpIdx + 6] >>> 4) & MASK16_3; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 6] & MASK16_4) << 5; + l5 |= (tmp[tmpIdx + 7] >>> 2) & MASK16_5; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 7] & MASK16_2) << 7; + l6 |= (tmp[tmpIdx + 8] & MASK16_7) << 0; + longs[longsIdx + 6] = l6; + } + } + + private static void decode10(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 80); + shiftLongs(tmp, 80, longs, 0, 6, MASK16_10); + for (int iter = 0, tmpIdx = 0, longsIdx = 80; iter < 16; ++iter, tmpIdx += 5, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK16_6) << 4; + l0 |= (tmp[tmpIdx + 1] >>> 2) & MASK16_4; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK16_2) << 8; + l1 |= (tmp[tmpIdx + 2] & MASK16_6) << 2; + l1 |= (tmp[tmpIdx + 3] >>> 4) & MASK16_2; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 3] & MASK16_4) << 6; + l2 |= (tmp[tmpIdx + 4] & MASK16_6) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode11(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 88); + shiftLongs(tmp, 88, longs, 0, 5, MASK16_11); + for (int iter = 0, tmpIdx = 0, longsIdx = 88; iter < 8; ++iter, tmpIdx += 11, longsIdx += 5) { + long l0 = (tmp[tmpIdx + 0] & MASK16_5) << 6; + l0 |= (tmp[tmpIdx + 1] & MASK16_5) << 1; + l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK16_1; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 2] & MASK16_4) << 7; + l1 |= (tmp[tmpIdx + 3] & MASK16_5) << 2; + l1 |= (tmp[tmpIdx + 4] >>> 3) & MASK16_2; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 4] & MASK16_3) << 8; + l2 |= (tmp[tmpIdx + 5] & MASK16_5) << 3; + l2 |= (tmp[tmpIdx + 6] >>> 2) & MASK16_3; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 6] & MASK16_2) << 9; + l3 |= (tmp[tmpIdx + 7] & MASK16_5) << 4; + l3 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_4; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 8] & MASK16_1) << 10; + l4 |= (tmp[tmpIdx + 9] & MASK16_5) << 5; + l4 |= (tmp[tmpIdx + 10] & MASK16_5) << 0; + longs[longsIdx + 4] = l4; + } + } + + private static void decode12(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 96); + shiftLongs(tmp, 96, longs, 0, 4, MASK16_12); + shiftLongs(tmp, 96, tmp, 0, 0, MASK16_4); + for (int iter = 0, tmpIdx = 0, longsIdx = 96; iter < 32; ++iter, tmpIdx += 3, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 8; + l0 |= tmp[tmpIdx + 1] << 4; + l0 |= tmp[tmpIdx + 2] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode13(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 104); + shiftLongs(tmp, 104, longs, 0, 3, MASK16_13); + for (int iter = 0, tmpIdx = 0, longsIdx = 104; iter < 8; ++iter, tmpIdx += 13, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK16_3) << 10; + l0 |= (tmp[tmpIdx + 1] & MASK16_3) << 7; + l0 |= (tmp[tmpIdx + 2] & MASK16_3) << 4; + l0 |= (tmp[tmpIdx + 3] & MASK16_3) << 1; + l0 |= (tmp[tmpIdx + 4] >>> 2) & MASK16_1; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 4] & MASK16_2) << 11; + l1 |= (tmp[tmpIdx + 5] & MASK16_3) << 8; + l1 |= (tmp[tmpIdx + 6] & MASK16_3) << 5; + l1 |= (tmp[tmpIdx + 7] & MASK16_3) << 2; + l1 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_2; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 8] & MASK16_1) << 12; + l2 |= (tmp[tmpIdx + 9] & MASK16_3) << 9; + l2 |= (tmp[tmpIdx + 10] & MASK16_3) << 6; + l2 |= (tmp[tmpIdx + 11] & MASK16_3) << 3; + l2 |= (tmp[tmpIdx + 12] & MASK16_3) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode14(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 112); + shiftLongs(tmp, 112, longs, 0, 2, MASK16_14); + shiftLongs(tmp, 112, tmp, 0, 0, MASK16_2); + for (int iter = 0, tmpIdx = 0, longsIdx = 112; iter < 16; ++iter, tmpIdx += 7, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 12; + l0 |= tmp[tmpIdx + 1] << 10; + l0 |= tmp[tmpIdx + 2] << 8; + l0 |= tmp[tmpIdx + 3] << 6; + l0 |= tmp[tmpIdx + 4] << 4; + l0 |= tmp[tmpIdx + 5] << 2; + l0 |= tmp[tmpIdx + 6] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode15(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 120); + shiftLongs(tmp, 120, longs, 0, 1, MASK16_15); + shiftLongs(tmp, 120, tmp, 0, 0, MASK16_1); + for (int iter = 0, tmpIdx = 0, longsIdx = 120; iter < 8; ++iter, tmpIdx += 15, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 14; + l0 |= tmp[tmpIdx + 1] << 13; + l0 |= tmp[tmpIdx + 2] << 12; + l0 |= tmp[tmpIdx + 3] << 11; + l0 |= tmp[tmpIdx + 4] << 10; + l0 |= tmp[tmpIdx + 5] << 9; + l0 |= tmp[tmpIdx + 6] << 8; + l0 |= tmp[tmpIdx + 7] << 7; + l0 |= tmp[tmpIdx + 8] << 6; + l0 |= tmp[tmpIdx + 9] << 5; + l0 |= tmp[tmpIdx + 10] << 4; + l0 |= tmp[tmpIdx + 11] << 3; + l0 |= tmp[tmpIdx + 12] << 2; + l0 |= tmp[tmpIdx + 13] << 1; + l0 |= tmp[tmpIdx + 14] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode16(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(longs, 0, 128); + } + + private static void decode17(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 136); + shiftLongs(tmp, 136, longs, 0, 15, MASK32_17); + for (int iter = 0, tmpIdx = 0, longsIdx = 136; iter < 8; ++iter, tmpIdx += 17, longsIdx += 15) { + long l0 = (tmp[tmpIdx + 0] & MASK32_15) << 2; + l0 |= (tmp[tmpIdx + 1] >>> 13) & MASK32_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK32_13) << 4; + l1 |= (tmp[tmpIdx + 2] >>> 11) & MASK32_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 2] & MASK32_11) << 6; + l2 |= (tmp[tmpIdx + 3] >>> 9) & MASK32_6; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 3] & MASK32_9) << 8; + l3 |= (tmp[tmpIdx + 4] >>> 7) & MASK32_8; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 4] & MASK32_7) << 10; + l4 |= (tmp[tmpIdx + 5] >>> 5) & MASK32_10; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 5] & MASK32_5) << 12; + l5 |= (tmp[tmpIdx + 6] >>> 3) & MASK32_12; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 6] & MASK32_3) << 14; + l6 |= (tmp[tmpIdx + 7] >>> 1) & MASK32_14; + longs[longsIdx + 6] = l6; + long l7 = (tmp[tmpIdx + 7] & MASK32_1) << 16; + l7 |= (tmp[tmpIdx + 8] & MASK32_15) << 1; + l7 |= (tmp[tmpIdx + 9] >>> 14) & MASK32_1; + longs[longsIdx + 7] = l7; + long l8 = (tmp[tmpIdx + 9] & MASK32_14) << 3; + l8 |= (tmp[tmpIdx + 10] >>> 12) & MASK32_3; + longs[longsIdx + 8] = l8; + long l9 = (tmp[tmpIdx + 10] & MASK32_12) << 5; + l9 |= (tmp[tmpIdx + 11] >>> 10) & MASK32_5; + longs[longsIdx + 9] = l9; + long l10 = (tmp[tmpIdx + 11] & MASK32_10) << 7; + l10 |= (tmp[tmpIdx + 12] >>> 8) & MASK32_7; + longs[longsIdx + 10] = l10; + long l11 = (tmp[tmpIdx + 12] & MASK32_8) << 9; + l11 |= (tmp[tmpIdx + 13] >>> 6) & MASK32_9; + longs[longsIdx + 11] = l11; + long l12 = (tmp[tmpIdx + 13] & MASK32_6) << 11; + l12 |= (tmp[tmpIdx + 14] >>> 4) & MASK32_11; + longs[longsIdx + 12] = l12; + long l13 = (tmp[tmpIdx + 14] & MASK32_4) << 13; + l13 |= (tmp[tmpIdx + 15] >>> 2) & MASK32_13; + longs[longsIdx + 13] = l13; + long l14 = (tmp[tmpIdx + 15] & MASK32_2) << 15; + l14 |= (tmp[tmpIdx + 16] & MASK32_15) << 0; + longs[longsIdx + 14] = l14; + } + } + + private static void decode18(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 144); + shiftLongs(tmp, 144, longs, 0, 14, MASK32_18); + for (int iter = 0, tmpIdx = 0, longsIdx = 144; iter < 16; ++iter, tmpIdx += 9, longsIdx += 7) { + long l0 = (tmp[tmpIdx + 0] & MASK32_14) << 4; + l0 |= (tmp[tmpIdx + 1] >>> 10) & MASK32_4; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK32_10) << 8; + l1 |= (tmp[tmpIdx + 2] >>> 6) & MASK32_8; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 2] & MASK32_6) << 12; + l2 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_12; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 3] & MASK32_2) << 16; + l3 |= (tmp[tmpIdx + 4] & MASK32_14) << 2; + l3 |= (tmp[tmpIdx + 5] >>> 12) & MASK32_2; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 5] & MASK32_12) << 6; + l4 |= (tmp[tmpIdx + 6] >>> 8) & MASK32_6; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 6] & MASK32_8) << 10; + l5 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_10; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 7] & MASK32_4) << 14; + l6 |= (tmp[tmpIdx + 8] & MASK32_14) << 0; + longs[longsIdx + 6] = l6; + } + } + + private static void decode19(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 152); + shiftLongs(tmp, 152, longs, 0, 13, MASK32_19); + for (int iter = 0, tmpIdx = 0, longsIdx = 152; iter < 8; ++iter, tmpIdx += 19, longsIdx += 13) { + long l0 = (tmp[tmpIdx + 0] & MASK32_13) << 6; + l0 |= (tmp[tmpIdx + 1] >>> 7) & MASK32_6; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK32_7) << 12; + l1 |= (tmp[tmpIdx + 2] >>> 1) & MASK32_12; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 2] & MASK32_1) << 18; + l2 |= (tmp[tmpIdx + 3] & MASK32_13) << 5; + l2 |= (tmp[tmpIdx + 4] >>> 8) & MASK32_5; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 4] & MASK32_8) << 11; + l3 |= (tmp[tmpIdx + 5] >>> 2) & MASK32_11; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 5] & MASK32_2) << 17; + l4 |= (tmp[tmpIdx + 6] & MASK32_13) << 4; + l4 |= (tmp[tmpIdx + 7] >>> 9) & MASK32_4; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 7] & MASK32_9) << 10; + l5 |= (tmp[tmpIdx + 8] >>> 3) & MASK32_10; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 8] & MASK32_3) << 16; + l6 |= (tmp[tmpIdx + 9] & MASK32_13) << 3; + l6 |= (tmp[tmpIdx + 10] >>> 10) & MASK32_3; + longs[longsIdx + 6] = l6; + long l7 = (tmp[tmpIdx + 10] & MASK32_10) << 9; + l7 |= (tmp[tmpIdx + 11] >>> 4) & MASK32_9; + longs[longsIdx + 7] = l7; + long l8 = (tmp[tmpIdx + 11] & MASK32_4) << 15; + l8 |= (tmp[tmpIdx + 12] & MASK32_13) << 2; + l8 |= (tmp[tmpIdx + 13] >>> 11) & MASK32_2; + longs[longsIdx + 8] = l8; + long l9 = (tmp[tmpIdx + 13] & MASK32_11) << 8; + l9 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_8; + longs[longsIdx + 9] = l9; + long l10 = (tmp[tmpIdx + 14] & MASK32_5) << 14; + l10 |= (tmp[tmpIdx + 15] & MASK32_13) << 1; + l10 |= (tmp[tmpIdx + 16] >>> 12) & MASK32_1; + longs[longsIdx + 10] = l10; + long l11 = (tmp[tmpIdx + 16] & MASK32_12) << 7; + l11 |= (tmp[tmpIdx + 17] >>> 6) & MASK32_7; + longs[longsIdx + 11] = l11; + long l12 = (tmp[tmpIdx + 17] & MASK32_6) << 13; + l12 |= (tmp[tmpIdx + 18] & MASK32_13) << 0; + longs[longsIdx + 12] = l12; + } + } + + private static void decode20(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 160); + shiftLongs(tmp, 160, longs, 0, 12, MASK32_20); + for (int iter = 0, tmpIdx = 0, longsIdx = 160; iter < 32; ++iter, tmpIdx += 5, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK32_12) << 8; + l0 |= (tmp[tmpIdx + 1] >>> 4) & MASK32_8; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK32_4) << 16; + l1 |= (tmp[tmpIdx + 2] & MASK32_12) << 4; + l1 |= (tmp[tmpIdx + 3] >>> 8) & MASK32_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 3] & MASK32_8) << 12; + l2 |= (tmp[tmpIdx + 4] & MASK32_12) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode21(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 168); + shiftLongs(tmp, 168, longs, 0, 11, MASK32_21); + for (int iter = 0, tmpIdx = 0, longsIdx = 168; iter < 8; ++iter, tmpIdx += 21, longsIdx += 11) { + long l0 = (tmp[tmpIdx + 0] & MASK32_11) << 10; + l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK32_10; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 1] & MASK32_1) << 20; + l1 |= (tmp[tmpIdx + 2] & MASK32_11) << 9; + l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_9; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 3] & MASK32_2) << 19; + l2 |= (tmp[tmpIdx + 4] & MASK32_11) << 8; + l2 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_8; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 5] & MASK32_3) << 18; + l3 |= (tmp[tmpIdx + 6] & MASK32_11) << 7; + l3 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_7; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 7] & MASK32_4) << 17; + l4 |= (tmp[tmpIdx + 8] & MASK32_11) << 6; + l4 |= (tmp[tmpIdx + 9] >>> 5) & MASK32_6; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 9] & MASK32_5) << 16; + l5 |= (tmp[tmpIdx + 10] & MASK32_11) << 5; + l5 |= (tmp[tmpIdx + 11] >>> 6) & MASK32_5; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 11] & MASK32_6) << 15; + l6 |= (tmp[tmpIdx + 12] & MASK32_11) << 4; + l6 |= (tmp[tmpIdx + 13] >>> 7) & MASK32_4; + longs[longsIdx + 6] = l6; + long l7 = (tmp[tmpIdx + 13] & MASK32_7) << 14; + l7 |= (tmp[tmpIdx + 14] & MASK32_11) << 3; + l7 |= (tmp[tmpIdx + 15] >>> 8) & MASK32_3; + longs[longsIdx + 7] = l7; + long l8 = (tmp[tmpIdx + 15] & MASK32_8) << 13; + l8 |= (tmp[tmpIdx + 16] & MASK32_11) << 2; + l8 |= (tmp[tmpIdx + 17] >>> 9) & MASK32_2; + longs[longsIdx + 8] = l8; + long l9 = (tmp[tmpIdx + 17] & MASK32_9) << 12; + l9 |= (tmp[tmpIdx + 18] & MASK32_11) << 1; + l9 |= (tmp[tmpIdx + 19] >>> 10) & MASK32_1; + longs[longsIdx + 9] = l9; + long l10 = (tmp[tmpIdx + 19] & MASK32_10) << 11; + l10 |= (tmp[tmpIdx + 20] & MASK32_11) << 0; + longs[longsIdx + 10] = l10; + } + } + + private static void decode22(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 176); + shiftLongs(tmp, 176, longs, 0, 10, MASK32_22); + for (int iter = 0, tmpIdx = 0, longsIdx = 176; iter < 16; ++iter, tmpIdx += 11, longsIdx += 5) { + long l0 = (tmp[tmpIdx + 0] & MASK32_10) << 12; + l0 |= (tmp[tmpIdx + 1] & MASK32_10) << 2; + l0 |= (tmp[tmpIdx + 2] >>> 8) & MASK32_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 2] & MASK32_8) << 14; + l1 |= (tmp[tmpIdx + 3] & MASK32_10) << 4; + l1 |= (tmp[tmpIdx + 4] >>> 6) & MASK32_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 4] & MASK32_6) << 16; + l2 |= (tmp[tmpIdx + 5] & MASK32_10) << 6; + l2 |= (tmp[tmpIdx + 6] >>> 4) & MASK32_6; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 6] & MASK32_4) << 18; + l3 |= (tmp[tmpIdx + 7] & MASK32_10) << 8; + l3 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_8; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 8] & MASK32_2) << 20; + l4 |= (tmp[tmpIdx + 9] & MASK32_10) << 10; + l4 |= (tmp[tmpIdx + 10] & MASK32_10) << 0; + longs[longsIdx + 4] = l4; + } + } + + private static void decode23(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 184); + shiftLongs(tmp, 184, longs, 0, 9, MASK32_23); + for (int iter = 0, tmpIdx = 0, longsIdx = 184; iter < 8; ++iter, tmpIdx += 23, longsIdx += 9) { + long l0 = (tmp[tmpIdx + 0] & MASK32_9) << 14; + l0 |= (tmp[tmpIdx + 1] & MASK32_9) << 5; + l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK32_5; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 2] & MASK32_4) << 19; + l1 |= (tmp[tmpIdx + 3] & MASK32_9) << 10; + l1 |= (tmp[tmpIdx + 4] & MASK32_9) << 1; + l1 |= (tmp[tmpIdx + 5] >>> 8) & MASK32_1; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 5] & MASK32_8) << 15; + l2 |= (tmp[tmpIdx + 6] & MASK32_9) << 6; + l2 |= (tmp[tmpIdx + 7] >>> 3) & MASK32_6; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 7] & MASK32_3) << 20; + l3 |= (tmp[tmpIdx + 8] & MASK32_9) << 11; + l3 |= (tmp[tmpIdx + 9] & MASK32_9) << 2; + l3 |= (tmp[tmpIdx + 10] >>> 7) & MASK32_2; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 10] & MASK32_7) << 16; + l4 |= (tmp[tmpIdx + 11] & MASK32_9) << 7; + l4 |= (tmp[tmpIdx + 12] >>> 2) & MASK32_7; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 12] & MASK32_2) << 21; + l5 |= (tmp[tmpIdx + 13] & MASK32_9) << 12; + l5 |= (tmp[tmpIdx + 14] & MASK32_9) << 3; + l5 |= (tmp[tmpIdx + 15] >>> 6) & MASK32_3; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 15] & MASK32_6) << 17; + l6 |= (tmp[tmpIdx + 16] & MASK32_9) << 8; + l6 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_8; + longs[longsIdx + 6] = l6; + long l7 = (tmp[tmpIdx + 17] & MASK32_1) << 22; + l7 |= (tmp[tmpIdx + 18] & MASK32_9) << 13; + l7 |= (tmp[tmpIdx + 19] & MASK32_9) << 4; + l7 |= (tmp[tmpIdx + 20] >>> 5) & MASK32_4; + longs[longsIdx + 7] = l7; + long l8 = (tmp[tmpIdx + 20] & MASK32_5) << 18; + l8 |= (tmp[tmpIdx + 21] & MASK32_9) << 9; + l8 |= (tmp[tmpIdx + 22] & MASK32_9) << 0; + longs[longsIdx + 8] = l8; + } + } + + private static void decode24(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 192); + shiftLongs(tmp, 192, longs, 0, 8, MASK32_24); + shiftLongs(tmp, 192, tmp, 0, 0, MASK32_8); + for (int iter = 0, tmpIdx = 0, longsIdx = 192; iter < 64; ++iter, tmpIdx += 3, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 16; + l0 |= tmp[tmpIdx + 1] << 8; + l0 |= tmp[tmpIdx + 2] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode25(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 200); + shiftLongs(tmp, 200, longs, 0, 7, MASK32_25); + for (int iter = 0, tmpIdx = 0, longsIdx = 200; iter < 8; ++iter, tmpIdx += 25, longsIdx += 7) { + long l0 = (tmp[tmpIdx + 0] & MASK32_7) << 18; + l0 |= (tmp[tmpIdx + 1] & MASK32_7) << 11; + l0 |= (tmp[tmpIdx + 2] & MASK32_7) << 4; + l0 |= (tmp[tmpIdx + 3] >>> 3) & MASK32_4; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 3] & MASK32_3) << 22; + l1 |= (tmp[tmpIdx + 4] & MASK32_7) << 15; + l1 |= (tmp[tmpIdx + 5] & MASK32_7) << 8; + l1 |= (tmp[tmpIdx + 6] & MASK32_7) << 1; + l1 |= (tmp[tmpIdx + 7] >>> 6) & MASK32_1; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 7] & MASK32_6) << 19; + l2 |= (tmp[tmpIdx + 8] & MASK32_7) << 12; + l2 |= (tmp[tmpIdx + 9] & MASK32_7) << 5; + l2 |= (tmp[tmpIdx + 10] >>> 2) & MASK32_5; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 10] & MASK32_2) << 23; + l3 |= (tmp[tmpIdx + 11] & MASK32_7) << 16; + l3 |= (tmp[tmpIdx + 12] & MASK32_7) << 9; + l3 |= (tmp[tmpIdx + 13] & MASK32_7) << 2; + l3 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_2; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 14] & MASK32_5) << 20; + l4 |= (tmp[tmpIdx + 15] & MASK32_7) << 13; + l4 |= (tmp[tmpIdx + 16] & MASK32_7) << 6; + l4 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_6; + longs[longsIdx + 4] = l4; + long l5 = (tmp[tmpIdx + 17] & MASK32_1) << 24; + l5 |= (tmp[tmpIdx + 18] & MASK32_7) << 17; + l5 |= (tmp[tmpIdx + 19] & MASK32_7) << 10; + l5 |= (tmp[tmpIdx + 20] & MASK32_7) << 3; + l5 |= (tmp[tmpIdx + 21] >>> 4) & MASK32_3; + longs[longsIdx + 5] = l5; + long l6 = (tmp[tmpIdx + 21] & MASK32_4) << 21; + l6 |= (tmp[tmpIdx + 22] & MASK32_7) << 14; + l6 |= (tmp[tmpIdx + 23] & MASK32_7) << 7; + l6 |= (tmp[tmpIdx + 24] & MASK32_7) << 0; + longs[longsIdx + 6] = l6; + } + } + + private static void decode26(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 208); + shiftLongs(tmp, 208, longs, 0, 6, MASK32_26); + for (int iter = 0, tmpIdx = 0, longsIdx = 208; iter < 16; ++iter, tmpIdx += 13, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK32_6) << 20; + l0 |= (tmp[tmpIdx + 1] & MASK32_6) << 14; + l0 |= (tmp[tmpIdx + 2] & MASK32_6) << 8; + l0 |= (tmp[tmpIdx + 3] & MASK32_6) << 2; + l0 |= (tmp[tmpIdx + 4] >>> 4) & MASK32_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 4] & MASK32_4) << 22; + l1 |= (tmp[tmpIdx + 5] & MASK32_6) << 16; + l1 |= (tmp[tmpIdx + 6] & MASK32_6) << 10; + l1 |= (tmp[tmpIdx + 7] & MASK32_6) << 4; + l1 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 8] & MASK32_2) << 24; + l2 |= (tmp[tmpIdx + 9] & MASK32_6) << 18; + l2 |= (tmp[tmpIdx + 10] & MASK32_6) << 12; + l2 |= (tmp[tmpIdx + 11] & MASK32_6) << 6; + l2 |= (tmp[tmpIdx + 12] & MASK32_6) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode27(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 216); + shiftLongs(tmp, 216, longs, 0, 5, MASK32_27); + for (int iter = 0, tmpIdx = 0, longsIdx = 216; iter < 8; ++iter, tmpIdx += 27, longsIdx += 5) { + long l0 = (tmp[tmpIdx + 0] & MASK32_5) << 22; + l0 |= (tmp[tmpIdx + 1] & MASK32_5) << 17; + l0 |= (tmp[tmpIdx + 2] & MASK32_5) << 12; + l0 |= (tmp[tmpIdx + 3] & MASK32_5) << 7; + l0 |= (tmp[tmpIdx + 4] & MASK32_5) << 2; + l0 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 5] & MASK32_3) << 24; + l1 |= (tmp[tmpIdx + 6] & MASK32_5) << 19; + l1 |= (tmp[tmpIdx + 7] & MASK32_5) << 14; + l1 |= (tmp[tmpIdx + 8] & MASK32_5) << 9; + l1 |= (tmp[tmpIdx + 9] & MASK32_5) << 4; + l1 |= (tmp[tmpIdx + 10] >>> 1) & MASK32_4; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 10] & MASK32_1) << 26; + l2 |= (tmp[tmpIdx + 11] & MASK32_5) << 21; + l2 |= (tmp[tmpIdx + 12] & MASK32_5) << 16; + l2 |= (tmp[tmpIdx + 13] & MASK32_5) << 11; + l2 |= (tmp[tmpIdx + 14] & MASK32_5) << 6; + l2 |= (tmp[tmpIdx + 15] & MASK32_5) << 1; + l2 |= (tmp[tmpIdx + 16] >>> 4) & MASK32_1; + longs[longsIdx + 2] = l2; + long l3 = (tmp[tmpIdx + 16] & MASK32_4) << 23; + l3 |= (tmp[tmpIdx + 17] & MASK32_5) << 18; + l3 |= (tmp[tmpIdx + 18] & MASK32_5) << 13; + l3 |= (tmp[tmpIdx + 19] & MASK32_5) << 8; + l3 |= (tmp[tmpIdx + 20] & MASK32_5) << 3; + l3 |= (tmp[tmpIdx + 21] >>> 2) & MASK32_3; + longs[longsIdx + 3] = l3; + long l4 = (tmp[tmpIdx + 21] & MASK32_2) << 25; + l4 |= (tmp[tmpIdx + 22] & MASK32_5) << 20; + l4 |= (tmp[tmpIdx + 23] & MASK32_5) << 15; + l4 |= (tmp[tmpIdx + 24] & MASK32_5) << 10; + l4 |= (tmp[tmpIdx + 25] & MASK32_5) << 5; + l4 |= (tmp[tmpIdx + 26] & MASK32_5) << 0; + longs[longsIdx + 4] = l4; + } + } + + private static void decode28(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 224); + shiftLongs(tmp, 224, longs, 0, 4, MASK32_28); + shiftLongs(tmp, 224, tmp, 0, 0, MASK32_4); + for (int iter = 0, tmpIdx = 0, longsIdx = 224; iter < 32; ++iter, tmpIdx += 7, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 24; + l0 |= tmp[tmpIdx + 1] << 20; + l0 |= tmp[tmpIdx + 2] << 16; + l0 |= tmp[tmpIdx + 3] << 12; + l0 |= tmp[tmpIdx + 4] << 8; + l0 |= tmp[tmpIdx + 5] << 4; + l0 |= tmp[tmpIdx + 6] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode29(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 232); + shiftLongs(tmp, 232, longs, 0, 3, MASK32_29); + for (int iter = 0, tmpIdx = 0, longsIdx = 232; iter < 8; ++iter, tmpIdx += 29, longsIdx += 3) { + long l0 = (tmp[tmpIdx + 0] & MASK32_3) << 26; + l0 |= (tmp[tmpIdx + 1] & MASK32_3) << 23; + l0 |= (tmp[tmpIdx + 2] & MASK32_3) << 20; + l0 |= (tmp[tmpIdx + 3] & MASK32_3) << 17; + l0 |= (tmp[tmpIdx + 4] & MASK32_3) << 14; + l0 |= (tmp[tmpIdx + 5] & MASK32_3) << 11; + l0 |= (tmp[tmpIdx + 6] & MASK32_3) << 8; + l0 |= (tmp[tmpIdx + 7] & MASK32_3) << 5; + l0 |= (tmp[tmpIdx + 8] & MASK32_3) << 2; + l0 |= (tmp[tmpIdx + 9] >>> 1) & MASK32_2; + longs[longsIdx + 0] = l0; + long l1 = (tmp[tmpIdx + 9] & MASK32_1) << 28; + l1 |= (tmp[tmpIdx + 10] & MASK32_3) << 25; + l1 |= (tmp[tmpIdx + 11] & MASK32_3) << 22; + l1 |= (tmp[tmpIdx + 12] & MASK32_3) << 19; + l1 |= (tmp[tmpIdx + 13] & MASK32_3) << 16; + l1 |= (tmp[tmpIdx + 14] & MASK32_3) << 13; + l1 |= (tmp[tmpIdx + 15] & MASK32_3) << 10; + l1 |= (tmp[tmpIdx + 16] & MASK32_3) << 7; + l1 |= (tmp[tmpIdx + 17] & MASK32_3) << 4; + l1 |= (tmp[tmpIdx + 18] & MASK32_3) << 1; + l1 |= (tmp[tmpIdx + 19] >>> 2) & MASK32_1; + longs[longsIdx + 1] = l1; + long l2 = (tmp[tmpIdx + 19] & MASK32_2) << 27; + l2 |= (tmp[tmpIdx + 20] & MASK32_3) << 24; + l2 |= (tmp[tmpIdx + 21] & MASK32_3) << 21; + l2 |= (tmp[tmpIdx + 22] & MASK32_3) << 18; + l2 |= (tmp[tmpIdx + 23] & MASK32_3) << 15; + l2 |= (tmp[tmpIdx + 24] & MASK32_3) << 12; + l2 |= (tmp[tmpIdx + 25] & MASK32_3) << 9; + l2 |= (tmp[tmpIdx + 26] & MASK32_3) << 6; + l2 |= (tmp[tmpIdx + 27] & MASK32_3) << 3; + l2 |= (tmp[tmpIdx + 28] & MASK32_3) << 0; + longs[longsIdx + 2] = l2; + } + } + + private static void decode30(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 240); + shiftLongs(tmp, 240, longs, 0, 2, MASK32_30); + shiftLongs(tmp, 240, tmp, 0, 0, MASK32_2); + for (int iter = 0, tmpIdx = 0, longsIdx = 240; iter < 16; ++iter, tmpIdx += 15, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 28; + l0 |= tmp[tmpIdx + 1] << 26; + l0 |= tmp[tmpIdx + 2] << 24; + l0 |= tmp[tmpIdx + 3] << 22; + l0 |= tmp[tmpIdx + 4] << 20; + l0 |= tmp[tmpIdx + 5] << 18; + l0 |= tmp[tmpIdx + 6] << 16; + l0 |= tmp[tmpIdx + 7] << 14; + l0 |= tmp[tmpIdx + 8] << 12; + l0 |= tmp[tmpIdx + 9] << 10; + l0 |= tmp[tmpIdx + 10] << 8; + l0 |= tmp[tmpIdx + 11] << 6; + l0 |= tmp[tmpIdx + 12] << 4; + l0 |= tmp[tmpIdx + 13] << 2; + l0 |= tmp[tmpIdx + 14] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode31(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(tmp, 0, 248); + shiftLongs(tmp, 248, longs, 0, 1, MASK32_31); + shiftLongs(tmp, 248, tmp, 0, 0, MASK32_1); + for (int iter = 0, tmpIdx = 0, longsIdx = 248; iter < 8; ++iter, tmpIdx += 31, longsIdx += 1) { + long l0 = tmp[tmpIdx + 0] << 30; + l0 |= tmp[tmpIdx + 1] << 29; + l0 |= tmp[tmpIdx + 2] << 28; + l0 |= tmp[tmpIdx + 3] << 27; + l0 |= tmp[tmpIdx + 4] << 26; + l0 |= tmp[tmpIdx + 5] << 25; + l0 |= tmp[tmpIdx + 6] << 24; + l0 |= tmp[tmpIdx + 7] << 23; + l0 |= tmp[tmpIdx + 8] << 22; + l0 |= tmp[tmpIdx + 9] << 21; + l0 |= tmp[tmpIdx + 10] << 20; + l0 |= tmp[tmpIdx + 11] << 19; + l0 |= tmp[tmpIdx + 12] << 18; + l0 |= tmp[tmpIdx + 13] << 17; + l0 |= tmp[tmpIdx + 14] << 16; + l0 |= tmp[tmpIdx + 15] << 15; + l0 |= tmp[tmpIdx + 16] << 14; + l0 |= tmp[tmpIdx + 17] << 13; + l0 |= tmp[tmpIdx + 18] << 12; + l0 |= tmp[tmpIdx + 19] << 11; + l0 |= tmp[tmpIdx + 20] << 10; + l0 |= tmp[tmpIdx + 21] << 9; + l0 |= tmp[tmpIdx + 22] << 8; + l0 |= tmp[tmpIdx + 23] << 7; + l0 |= tmp[tmpIdx + 24] << 6; + l0 |= tmp[tmpIdx + 25] << 5; + l0 |= tmp[tmpIdx + 26] << 4; + l0 |= tmp[tmpIdx + 27] << 3; + l0 |= tmp[tmpIdx + 28] << 2; + l0 |= tmp[tmpIdx + 29] << 1; + l0 |= tmp[tmpIdx + 30] << 0; + longs[longsIdx + 0] = l0; + } + } + + private static void decode32(DataInput in, long[] tmp, long[] longs) throws IOException { + in.readLongs(longs, 0, 256); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py new file mode 100644 index 000000000000..c170d03e9c7d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py @@ -0,0 +1,452 @@ +#! /usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fractions import gcd + +"""Code generation for ForUtil.java""" + +BLOCK_SIZE = 512 +MAX_SPECIALIZED_BITS_PER_VALUE = 32 +OUTPUT_FILE = "ForUtil.java" +PRIMITIVE_SIZE = [8, 16, 32] +HEADER = """// This file has been automatically generated, DO NOT EDIT + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.bkd; + +import java.io.IOException; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.MathUtil; + +// Inspired from https://fulmicoton.com/posts/bitpacking/ +// Encodes multiple integers in a long to get SIMD-like speedups. +// If bitsPerValue <= 8 then we pack 8 ints per long +// else if bitsPerValue <= 16 we pack 4 ints per long +// else we pack 2 ints per long +final class ForUtil { + + static final int BLOCK_SIZE = """ + str(BLOCK_SIZE) + """; + private static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; + private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; + private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; + private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; + private static final int BLOCK_SIZE_DIV_8_MUL_1 = BLOCK_SIZE_DIV_8; + private static final int BLOCK_SIZE_DIV_8_MUL_2 = BLOCK_SIZE_DIV_8 * 2; + private static final int BLOCK_SIZE_DIV_8_MUL_3 = BLOCK_SIZE_DIV_8 * 3; + private static final int BLOCK_SIZE_DIV_8_MUL_4 = BLOCK_SIZE_DIV_8 * 4; + private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; + private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; + private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; + private static final int BLOCK_SIZE_LOG2 = MathUtil.log(BLOCK_SIZE, 2); + + private static long expandMask32(long mask32) { + return mask32 | (mask32 << 32); + } + + private static long expandMask16(long mask16) { + return expandMask32(mask16 | (mask16 << 16)); + } + + private static long expandMask8(long mask8) { + return expandMask16(mask8 | (mask8 << 8)); + } + + private static long mask32(int bitsPerValue) { + return expandMask32((1L << bitsPerValue) - 1); + } + + private static long mask16(int bitsPerValue) { + return expandMask16((1L << bitsPerValue) - 1); + } + + private static long mask8(int bitsPerValue) { + return expandMask8((1L << bitsPerValue) - 1); + } + + private static void expand8(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + long l = arr[i]; + arr[i] = (l >>> 56) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 48) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 40) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = (l >>> 32) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 24) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_5 + i] = (l >>> 16) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = (l >>> 8) & 0xFFL; + arr[BLOCK_SIZE_DIV_8_MUL_7 + i] = l & 0xFFL; + } + } + + private static void expand8To32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + long l = arr[i]; + arr[i] = (l >>> 24) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 16) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 8) & 0x000000FF000000FFL; + arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = l & 0x000000FF000000FFL; + } + } + + private static void collapse8(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { + arr[i] = + (arr[i] << 56) + | (arr[BLOCK_SIZE_DIV_8_MUL_1 + i] << 48) + | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 40) + | (arr[BLOCK_SIZE_DIV_8_MUL_3 + i] << 32) + | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 24) + | (arr[BLOCK_SIZE_DIV_8_MUL_5 + i] << 16) + | (arr[BLOCK_SIZE_DIV_8_MUL_6 + i] << 8) + | arr[BLOCK_SIZE_DIV_8_MUL_7 + i]; + } + } + + private static void expand16(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { + long l = arr[i]; + arr[i] = (l >>> 48) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 32) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 16) & 0xFFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = l & 0xFFFFL; + } + } + + private static void expand16To32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + long l = arr[i]; + arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL; + arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = l & 0x0000FFFF0000FFFFL; + } + } + + private static void collapse16(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { + arr[i] = (arr[i] << 48) | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; + } + } + + private static void expand32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + long l = arr[i]; + arr[i] = l >>> 32; + arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = l & 0xFFFFFFFFL; + } + } + + private static void collapse32(long[] arr) { + for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { + arr[i] = (arr[i] << 32) | arr[BLOCK_SIZE_DIV_8_MUL_4 + i]; + } + } + + private final long[] tmp = new long[BLOCK_SIZE_DIV_2]; + + /** Encode 512 integers from {@code longs} into {@code out}. */ + void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { + final int nextPrimitive; + final int numLongs; + if (bitsPerValue <= 8) { + nextPrimitive = 8; + numLongs = BLOCK_SIZE_DIV_8; + collapse8(longs); + } else if (bitsPerValue <= 16) { + nextPrimitive = 16; + numLongs = BLOCK_SIZE_DIV_4; + collapse16(longs); + } else { + nextPrimitive = 32; + numLongs = BLOCK_SIZE_DIV_2; + collapse32(longs); + } + + final int numLongsPerShift = bitsPerValue * BLOCK_SIZE_DIV_64; + int idx = 0; + int shift = nextPrimitive - bitsPerValue; + for (int i = 0; i < numLongsPerShift; ++i) { + tmp[i] = longs[idx++] << shift; + } + for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) { + for (int i = 0; i < numLongsPerShift; ++i) { + tmp[i] |= longs[idx++] << shift; + } + } + + final int remainingBitsPerLong = shift + bitsPerValue; + final long maskRemainingBitsPerLong; + if (nextPrimitive == 8) { + maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong]; + } else if (nextPrimitive == 16) { + maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong]; + } else { + maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong]; + } + + int tmpIdx = 0; + int remainingBitsPerValue = bitsPerValue; + while (idx < numLongs) { + if (remainingBitsPerValue >= remainingBitsPerLong) { + remainingBitsPerValue -= remainingBitsPerLong; + tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong; + if (remainingBitsPerValue == 0) { + idx++; + remainingBitsPerValue = bitsPerValue; + } + } else { + final long mask1, mask2; + if (nextPrimitive == 8) { + mask1 = MASKS8[remainingBitsPerValue]; + mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue]; + } else if (nextPrimitive == 16) { + mask1 = MASKS16[remainingBitsPerValue]; + mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue]; + } else { + mask1 = MASKS32[remainingBitsPerValue]; + mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue]; + } + tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue); + remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue; + tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2; + } + } + + for (int i = 0; i < numLongsPerShift; ++i) { + out.writeLong(tmp[i]); + } + } + + /** Number of bytes required to encode 512 integers of {@code bitsPerValue} bits per value. */ + int numBytes(int bitsPerValue) { + return bitsPerValue << (BLOCK_SIZE_LOG2 - 3); + } + + private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) + throws IOException { + final int numLongs = bitsPerValue << 1; + in.readLongs(tmp, 0, numLongs); + final long mask = MASKS32[bitsPerValue]; + int longsIdx = 0; + int shift = 32 - bitsPerValue; + for (; shift >= 0; shift -= bitsPerValue) { + shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask); + longsIdx += numLongs; + } + final int remainingBitsPerLong = shift + bitsPerValue; + final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong]; + int tmpIdx = 0; + int remainingBits = remainingBitsPerLong; + for (; longsIdx < BLOCK_SIZE_DIV_2; ++longsIdx) { + int b = bitsPerValue - remainingBits; + long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b; + while (b >= remainingBitsPerLong) { + b -= remainingBitsPerLong; + l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b; + } + if (b > 0) { + l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b]; + remainingBits = remainingBitsPerLong - b; + } else { + remainingBits = remainingBitsPerLong; + } + longs[longsIdx] = l; + } + } + + /** + * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which + * generates SIMD instructions for it in order to shift multiple longs at once. + */ + private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) { + for (int i = 0; i < count; ++i) { + b[bi + i] = (a[i] >>> shift) & mask; + } + } + +""" + +def writeRemainderWithSIMDOptimize(bpv, next_primitive, remaining_bits_per_long, o, num_values, f): + iteration = 1 + num_longs = bpv * num_values / remaining_bits_per_long + while num_longs % 2 == 0 and num_values % 2 == 0: + num_longs /= 2 + num_values /= 2 + iteration *= 2 + + f.write(' shiftLongs(tmp, %d, tmp, 0, 0, MASK%d_%d);\n' % (iteration * num_longs, next_primitive, remaining_bits_per_long)) + f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values)) + tmp_idx = 0 + b = bpv + b -= remaining_bits_per_long + f.write(' long l0 = tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b)) + tmp_idx += 1 + while b >= remaining_bits_per_long: + b -= remaining_bits_per_long + f.write(' l0 |= tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b)) + tmp_idx += 1 + f.write(' longs[longsIdx + 0] = l0;\n') + f.write(' }\n') + + +def writeRemainder(bpv, next_primitive, remaining_bits_per_long, o, num_values, f): + iteration = 1 + num_longs = bpv * num_values / remaining_bits_per_long + while num_longs % 2 == 0 and num_values % 2 == 0: + num_longs /= 2 + num_values /= 2 + iteration *= 2 + f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values)) + i = 0 + remaining_bits = 0 + tmp_idx = 0 + for i in range(num_values): + b = bpv + if remaining_bits == 0: + b -= remaining_bits_per_long + f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b)) + else: + b -= remaining_bits + f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits, b)) + tmp_idx += 1 + while b >= remaining_bits_per_long: + b -= remaining_bits_per_long + f.write(' l%d |= (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b)) + tmp_idx += 1 + if b > 0: + f.write(' l%d |= (tmp[tmpIdx + %d] >>> %d) & MASK%d_%d;\n' %(i, tmp_idx, remaining_bits_per_long-b, next_primitive, b)) + remaining_bits = remaining_bits_per_long-b + f.write(' longs[longsIdx + %d] = l%d;\n' %(i, i)) + f.write(' }\n') + + +def writeDecode(bpv, f): + next_primitive = 32 + if bpv <= 8: + next_primitive = 8 + elif bpv <= 16: + next_primitive = 16 + f.write(' private static void decode%d(DataInput in, long[] tmp, long[] longs) throws IOException {\n' %bpv) + num_values_per_long = 64 / next_primitive + if bpv == next_primitive: + f.write(' in.readLongs(longs, 0, %d);\n' %(bpv*(BLOCK_SIZE / 64))) + else: + f.write(' in.readLongs(tmp, 0, %d);\n' %(bpv*(BLOCK_SIZE / 64))) + shift = next_primitive - bpv + o = 0 + while shift >= 0: + f.write(' shiftLongs(tmp, %d, longs, %d, %d, MASK%d_%d);\n' %(bpv*(BLOCK_SIZE / 64), o, shift, next_primitive, bpv)) + o += bpv*(BLOCK_SIZE / 64) + shift -= bpv + if shift + bpv > 0: + if bpv % (next_primitive % bpv) == 0: + writeRemainderWithSIMDOptimize(bpv, next_primitive, shift + bpv, o, BLOCK_SIZE/num_values_per_long - o, f) + else: + writeRemainder(bpv, next_primitive, shift + bpv, o, BLOCK_SIZE/num_values_per_long - o, f) + f.write(' }\n') + + +if __name__ == '__main__': + f = open(OUTPUT_FILE, 'w') + f.write(HEADER) + for primitive_size in PRIMITIVE_SIZE: + f.write(' private static final long[] MASKS%d = new long[%d];\n' %(primitive_size, primitive_size)) + f.write('\n') + f.write(' static {\n') + for primitive_size in PRIMITIVE_SIZE: + f.write(' for (int i = 0; i < %d; ++i) {\n' %primitive_size) + f.write(' MASKS%d[i] = mask%d(i);\n' %(primitive_size, primitive_size)) + f.write(' }\n') + f.write(' }') + f.write(""" + // mark values in array as final longs to avoid the cost of reading array, arrays should only be + // used when the idx is a variable +""") + for primitive_size in PRIMITIVE_SIZE: + for bpv in range(1, min(MAX_SPECIALIZED_BITS_PER_VALUE + 1, primitive_size)): + if bpv * 2 != primitive_size or primitive_size == 8: + f.write(' private static final long MASK%d_%d = MASKS%d[%d];\n' %(primitive_size, bpv, primitive_size, bpv)) + + f.write(""" + /** Decode 512 integers into {@code longs}. */ + void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { + switch (bitsPerValue) { +""") + for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): + next_primitive = 32 + if bpv <= 8: + next_primitive = 8 + elif bpv <= 16: + next_primitive = 16 + f.write(' case %d:\n' %bpv) + f.write(' decode%d(in, tmp, longs);\n' %bpv) + f.write(' expand%d(longs);\n' %next_primitive) + f.write(' break;\n') + f.write(' default:\n') + f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n') + f.write(' expand32(longs);\n') + f.write(' break;\n') + f.write(' }\n') + f.write(' }\n') + + f.write(""" + /** + * Decodes 512 integers into 64 {@code longs} such that each long contains two values, each + * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs} + * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This + * representation may allow subsequent operations to be performed on two values at a time. + */ + void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException { + switch (bitsPerValue) { +""") + for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): + next_primitive = 32 + if bpv <= 8: + next_primitive = 8 + elif bpv <= 16: + next_primitive = 16 + f.write(' case %d:\n' %bpv) + f.write(' decode%d(in, tmp, longs);\n' %bpv) + if next_primitive <= 16: + f.write(' expand%dTo32(longs);\n' %next_primitive) + f.write(' break;\n') + f.write(' default:\n') + f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n') + f.write(' break;\n') + f.write(' }\n') + f.write(' }\n') + + f.write('\n') + for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): + writeDecode(i, f) + if i < MAX_SPECIALIZED_BITS_PER_VALUE: + f.write('\n') + + f.write('}\n') diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java index 45a491baa570..27c4309c0136 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java @@ -21,7 +21,6 @@ import java.util.HashSet; import java.util.Set; import java.util.function.IntSupplier; - import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.store.Directory; @@ -116,7 +115,7 @@ private void test(Directory dir, int[] ints) throws Exception { try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { long[] read = new long[ints.length]; docIdsWriter.readInts(in, ints.length, read); - assertArrayEquals(ints, Arrays.stream(read).mapToInt(i -> (int)i).toArray()); + assertArrayEquals(ints, Arrays.stream(read).mapToInt(i -> (int) i).toArray()); assertEquals(len, in.getFilePointer()); } try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { From 05d7ba2e645979295534da256e74004c82ec7300 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 15 Dec 2021 02:14:00 +0800 Subject: [PATCH 05/34] name codes --- .../apache/lucene/util/bkd/DocIdsWriter.java | 57 +++++++++++-------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 895fd6bf56d9..ab23d3333fa9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -34,6 +34,15 @@ class DocIdsWriter { } } + private static final byte CONTINUOUS_IDS = (byte) -2; + private static final byte BITSET_IDS = (byte) -1; + private static final byte DELTA_VINT = (byte) 0; + private static final byte DELTA_FOR_UTIL = (byte) 1; + private static final byte BPV_24 = (byte) 24; + private static final byte BPV_24_FOR_UTIL = (byte) 25; + private static final byte BPV_32 = (byte) 32; + private static final byte BPV_32_FOR_UTIL = (byte) 33; + private final ForUtil forUtil = new ForUtil(); private final long[] scratch; @@ -61,7 +70,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx if (strictlySorted) { if (min2max == count) { // continuous ids, typically happens when segment is sorted - out.writeByte((byte) -2); + out.writeByte(CONTINUOUS_IDS); out.writeVInt(docIds[start]); return; } else if (min2max <= (count << 4)) { @@ -69,7 +78,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx // Only trigger bitset optimization when max - min + 1 <= 16 * count in order to avoid // expanding too much storage. // A field with lower cardinality will have higher probability to trigger this optimization. - out.writeByte((byte) -1); + out.writeByte(BITSET_IDS); writeIdsAsBitSet(docIds, start, count, out); return; } @@ -78,7 +87,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { if (sorted) { - out.writeByte((byte) 1); + out.writeByte(DELTA_FOR_UTIL); long max = 0; long[] delta = new long[count]; int previous = docIds[start]; @@ -101,10 +110,10 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } int bitsPerValue = PackedInts.bitsRequired(max); if (bitsPerValue <= 24) { - out.writeByte((byte) 25); + out.writeByte(BPV_24_FOR_UTIL); forUtil.encode(longs, 24, out); } else { - out.writeByte((byte) 33); + out.writeByte(BPV_32_FOR_UTIL); forUtil.encode(longs, 32, out); } } @@ -112,7 +121,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } if (sorted) { - out.writeByte((byte) 0); + out.writeByte(DELTA_VINT); int previous = 0; for (int i = 0; i < count; ++i) { int doc = docIds[start + i]; @@ -125,7 +134,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx max |= Integer.toUnsignedLong(docIds[start + i]); } if (max <= 0xffffff) { - out.writeByte((byte) 24); + out.writeByte(BPV_24); // write them the same way we are reading them. int i; for (i = 0; i < count - 7; i += 8) { @@ -153,7 +162,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx out.writeByte((byte) docIds[start + i]); } } else { - out.writeByte((byte) 32); + out.writeByte(BPV_32); for (int i = 0; i < count; ++i) { out.writeInt(docIds[start + i]); } @@ -198,28 +207,28 @@ private static void writeIdsAsBitSet(int[] docIds, int start, int count, DataOut void readInts(IndexInput in, int count, long[] docIDs) throws IOException { final int bpv = in.readByte(); switch (bpv) { - case -2: + case CONTINUOUS_IDS: readContinuousIds(in, count, docIDs); break; - case -1: + case BITSET_IDS: readBitSet(in, count, docIDs); break; - case 0: + case DELTA_VINT: readDeltaVInts(in, count, docIDs); break; - case 1: + case DELTA_FOR_UTIL: readForUtilDelta(in, count, docIDs); break; - case 32: + case BPV_32: readInts32(in, count, docIDs); break; - case 33: + case BPV_32_FOR_UTIL: readForUtil32(in, count, docIDs); break; - case 24: + case BPV_24: readInts24(in, count, docIDs); break; - case 25: + case BPV_24_FOR_UTIL: readForUtil24(in, count, docIDs); break; default: @@ -314,28 +323,28 @@ private static void readInts24(IndexInput in, int count, long[] docIDs) throws I void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOException { final int bpv = in.readByte(); switch (bpv) { - case -2: + case CONTINUOUS_IDS: readContinuousIds(in, count, visitor); break; - case -1: + case BITSET_IDS: readBitSet(in, count, visitor); break; - case 0: + case DELTA_VINT: readDeltaVInts(in, count, visitor); break; - case 1: + case DELTA_FOR_UTIL: readForUtilDelta(in, count, visitor); break; - case 32: + case BPV_32: readInts32(in, count, visitor); break; - case 33: + case BPV_32_FOR_UTIL: readForUtil32(in, count, visitor); break; - case 24: + case BPV_24: readInts24(in, count, visitor); break; - case 25: + case BPV_24_FOR_UTIL: readForUtil24(in, count, visitor); break; default: From d80bb2879a4a605145873e47b855c31e878b9995 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 15 Dec 2021 02:20:23 +0800 Subject: [PATCH 06/34] note --- .../src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index ab23d3333fa9..a3c17df6b538 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -108,8 +108,9 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx longs[i] = docIds[i]; max |= longs[i] & 0xffffffffL; } - int bitsPerValue = PackedInts.bitsRequired(max); - if (bitsPerValue <= 24) { + // The 24/32 bit judgment is kept without calculating the bitsPerValue of max, thus + // ensuring the high efficiency of decoding. + if (max <= 0xffffff) { out.writeByte(BPV_24_FOR_UTIL); forUtil.encode(longs, 24, out); } else { From e4bd039922935306fd6d6ac3dca2b6fa5d6134b2 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 15 Dec 2021 02:22:30 +0800 Subject: [PATCH 07/34] for util --- .../java/org/apache/lucene/util/bkd/DocIdsWriter.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index a3c17df6b538..bb74a6b766e0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -27,13 +27,6 @@ class DocIdsWriter { - static { - if (ForUtil.BLOCK_SIZE != BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { - throw new RuntimeException( - "ForUtils size need to be the same as BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"); - } - } - private static final byte CONTINUOUS_IDS = (byte) -2; private static final byte BITSET_IDS = (byte) -1; private static final byte DELTA_VINT = (byte) 0; @@ -85,7 +78,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) - if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) { + if (count == ForUtil.BLOCK_SIZE) { if (sorted) { out.writeByte(DELTA_FOR_UTIL); long max = 0; From 92bfc83dcb04b5078030db388f9ff582d48d1630 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 20 Dec 2021 16:23:52 +0800 Subject: [PATCH 08/34] reduce code num --- .../org/apache/lucene/util/bkd/ForUtil.java | 426 +----------------- .../org/apache/lucene/util/bkd/gen_ForUtil.py | 76 +--- 2 files changed, 18 insertions(+), 484 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java index ad2e9338bdbb..ea452083d707 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java @@ -31,7 +31,7 @@ final class ForUtil { static final int BLOCK_SIZE = 512; - private static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; + static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; @@ -42,7 +42,6 @@ final class ForUtil { private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; - private static final int BLOCK_SIZE_LOG2 = MathUtil.log(BLOCK_SIZE, 2); private static long expandMask32(long mask32) { return mask32 | (mask32 << 32); @@ -82,16 +81,6 @@ private static void expand8(long[] arr) { } } - private static void expand8To32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - long l = arr[i]; - arr[i] = (l >>> 24) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 16) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 8) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = l & 0x000000FF000000FFL; - } - } - private static void collapse8(long[] arr) { for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { arr[i] = @@ -116,14 +105,6 @@ private static void expand16(long[] arr) { } } - private static void expand16To32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - long l = arr[i]; - arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = l & 0x0000FFFF0000FFFFL; - } - } - private static void collapse16(long[] arr) { for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { arr[i] = @@ -222,15 +203,10 @@ void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { out.writeLong(tmp[i]); } } - - /** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */ - int numBytes(int bitsPerValue) { - return bitsPerValue << (BLOCK_SIZE_LOG2 - 3); - } - + private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) throws IOException { - final int numLongs = bitsPerValue << 1; + final int numLongs = bitsPerValue * BLOCK_SIZE_DIV_64; in.readLongs(tmp, 0, numLongs); final long mask = MASKS32[bitsPerValue]; int longsIdx = 0; @@ -331,13 +307,6 @@ private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, private static final long MASK32_22 = MASKS32[22]; private static final long MASK32_23 = MASKS32[23]; private static final long MASK32_24 = MASKS32[24]; - private static final long MASK32_25 = MASKS32[25]; - private static final long MASK32_26 = MASKS32[26]; - private static final long MASK32_27 = MASKS32[27]; - private static final long MASK32_28 = MASKS32[28]; - private static final long MASK32_29 = MASKS32[29]; - private static final long MASK32_30 = MASKS32[30]; - private static final long MASK32_31 = MASKS32[31]; /** Decode 128 integers into {@code longs}. */ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { @@ -438,34 +407,6 @@ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { decode24(in, tmp, longs); expand32(longs); break; - case 25: - decode25(in, tmp, longs); - expand32(longs); - break; - case 26: - decode26(in, tmp, longs); - expand32(longs); - break; - case 27: - decode27(in, tmp, longs); - expand32(longs); - break; - case 28: - decode28(in, tmp, longs); - expand32(longs); - break; - case 29: - decode29(in, tmp, longs); - expand32(longs); - break; - case 30: - decode30(in, tmp, longs); - expand32(longs); - break; - case 31: - decode31(in, tmp, longs); - expand32(longs); - break; case 32: decode32(in, tmp, longs); expand32(longs); @@ -477,132 +418,6 @@ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { } } - /** - * Decodes 128 integers into 64 {@code longs} such that each long contains two values, each - * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs} - * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This - * representation may allow subsequent operations to be performed on two values at a time. - */ - void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException { - switch (bitsPerValue) { - case 1: - decode1(in, tmp, longs); - expand8To32(longs); - break; - case 2: - decode2(in, tmp, longs); - expand8To32(longs); - break; - case 3: - decode3(in, tmp, longs); - expand8To32(longs); - break; - case 4: - decode4(in, tmp, longs); - expand8To32(longs); - break; - case 5: - decode5(in, tmp, longs); - expand8To32(longs); - break; - case 6: - decode6(in, tmp, longs); - expand8To32(longs); - break; - case 7: - decode7(in, tmp, longs); - expand8To32(longs); - break; - case 8: - decode8(in, tmp, longs); - expand8To32(longs); - break; - case 9: - decode9(in, tmp, longs); - expand16To32(longs); - break; - case 10: - decode10(in, tmp, longs); - expand16To32(longs); - break; - case 11: - decode11(in, tmp, longs); - expand16To32(longs); - break; - case 12: - decode12(in, tmp, longs); - expand16To32(longs); - break; - case 13: - decode13(in, tmp, longs); - expand16To32(longs); - break; - case 14: - decode14(in, tmp, longs); - expand16To32(longs); - break; - case 15: - decode15(in, tmp, longs); - expand16To32(longs); - break; - case 16: - decode16(in, tmp, longs); - expand16To32(longs); - break; - case 17: - decode17(in, tmp, longs); - break; - case 18: - decode18(in, tmp, longs); - break; - case 19: - decode19(in, tmp, longs); - break; - case 20: - decode20(in, tmp, longs); - break; - case 21: - decode21(in, tmp, longs); - break; - case 22: - decode22(in, tmp, longs); - break; - case 23: - decode23(in, tmp, longs); - break; - case 24: - decode24(in, tmp, longs); - break; - case 25: - decode25(in, tmp, longs); - break; - case 26: - decode26(in, tmp, longs); - break; - case 27: - decode27(in, tmp, longs); - break; - case 28: - decode28(in, tmp, longs); - break; - case 29: - decode29(in, tmp, longs); - break; - case 30: - decode30(in, tmp, longs); - break; - case 31: - decode31(in, tmp, longs); - break; - case 32: - decode32(in, tmp, longs); - break; - default: - decodeSlow(bitsPerValue, in, tmp, longs); - break; - } - } - private static void decode1(DataInput in, long[] tmp, long[] longs) throws IOException { in.readLongs(tmp, 0, 8); shiftLongs(tmp, 8, longs, 0, 7, MASK8_1); @@ -1130,241 +945,6 @@ private static void decode24(DataInput in, long[] tmp, long[] longs) throws IOEx longs[longsIdx + 0] = l0; } } - - private static void decode25(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 200); - shiftLongs(tmp, 200, longs, 0, 7, MASK32_25); - for (int iter = 0, tmpIdx = 0, longsIdx = 200; iter < 8; ++iter, tmpIdx += 25, longsIdx += 7) { - long l0 = (tmp[tmpIdx + 0] & MASK32_7) << 18; - l0 |= (tmp[tmpIdx + 1] & MASK32_7) << 11; - l0 |= (tmp[tmpIdx + 2] & MASK32_7) << 4; - l0 |= (tmp[tmpIdx + 3] >>> 3) & MASK32_4; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 3] & MASK32_3) << 22; - l1 |= (tmp[tmpIdx + 4] & MASK32_7) << 15; - l1 |= (tmp[tmpIdx + 5] & MASK32_7) << 8; - l1 |= (tmp[tmpIdx + 6] & MASK32_7) << 1; - l1 |= (tmp[tmpIdx + 7] >>> 6) & MASK32_1; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 7] & MASK32_6) << 19; - l2 |= (tmp[tmpIdx + 8] & MASK32_7) << 12; - l2 |= (tmp[tmpIdx + 9] & MASK32_7) << 5; - l2 |= (tmp[tmpIdx + 10] >>> 2) & MASK32_5; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 10] & MASK32_2) << 23; - l3 |= (tmp[tmpIdx + 11] & MASK32_7) << 16; - l3 |= (tmp[tmpIdx + 12] & MASK32_7) << 9; - l3 |= (tmp[tmpIdx + 13] & MASK32_7) << 2; - l3 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_2; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 14] & MASK32_5) << 20; - l4 |= (tmp[tmpIdx + 15] & MASK32_7) << 13; - l4 |= (tmp[tmpIdx + 16] & MASK32_7) << 6; - l4 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_6; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 17] & MASK32_1) << 24; - l5 |= (tmp[tmpIdx + 18] & MASK32_7) << 17; - l5 |= (tmp[tmpIdx + 19] & MASK32_7) << 10; - l5 |= (tmp[tmpIdx + 20] & MASK32_7) << 3; - l5 |= (tmp[tmpIdx + 21] >>> 4) & MASK32_3; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 21] & MASK32_4) << 21; - l6 |= (tmp[tmpIdx + 22] & MASK32_7) << 14; - l6 |= (tmp[tmpIdx + 23] & MASK32_7) << 7; - l6 |= (tmp[tmpIdx + 24] & MASK32_7) << 0; - longs[longsIdx + 6] = l6; - } - } - - private static void decode26(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 208); - shiftLongs(tmp, 208, longs, 0, 6, MASK32_26); - for (int iter = 0, tmpIdx = 0, longsIdx = 208; iter < 16; ++iter, tmpIdx += 13, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK32_6) << 20; - l0 |= (tmp[tmpIdx + 1] & MASK32_6) << 14; - l0 |= (tmp[tmpIdx + 2] & MASK32_6) << 8; - l0 |= (tmp[tmpIdx + 3] & MASK32_6) << 2; - l0 |= (tmp[tmpIdx + 4] >>> 4) & MASK32_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 4] & MASK32_4) << 22; - l1 |= (tmp[tmpIdx + 5] & MASK32_6) << 16; - l1 |= (tmp[tmpIdx + 6] & MASK32_6) << 10; - l1 |= (tmp[tmpIdx + 7] & MASK32_6) << 4; - l1 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 8] & MASK32_2) << 24; - l2 |= (tmp[tmpIdx + 9] & MASK32_6) << 18; - l2 |= (tmp[tmpIdx + 10] & MASK32_6) << 12; - l2 |= (tmp[tmpIdx + 11] & MASK32_6) << 6; - l2 |= (tmp[tmpIdx + 12] & MASK32_6) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode27(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 216); - shiftLongs(tmp, 216, longs, 0, 5, MASK32_27); - for (int iter = 0, tmpIdx = 0, longsIdx = 216; iter < 8; ++iter, tmpIdx += 27, longsIdx += 5) { - long l0 = (tmp[tmpIdx + 0] & MASK32_5) << 22; - l0 |= (tmp[tmpIdx + 1] & MASK32_5) << 17; - l0 |= (tmp[tmpIdx + 2] & MASK32_5) << 12; - l0 |= (tmp[tmpIdx + 3] & MASK32_5) << 7; - l0 |= (tmp[tmpIdx + 4] & MASK32_5) << 2; - l0 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 5] & MASK32_3) << 24; - l1 |= (tmp[tmpIdx + 6] & MASK32_5) << 19; - l1 |= (tmp[tmpIdx + 7] & MASK32_5) << 14; - l1 |= (tmp[tmpIdx + 8] & MASK32_5) << 9; - l1 |= (tmp[tmpIdx + 9] & MASK32_5) << 4; - l1 |= (tmp[tmpIdx + 10] >>> 1) & MASK32_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 10] & MASK32_1) << 26; - l2 |= (tmp[tmpIdx + 11] & MASK32_5) << 21; - l2 |= (tmp[tmpIdx + 12] & MASK32_5) << 16; - l2 |= (tmp[tmpIdx + 13] & MASK32_5) << 11; - l2 |= (tmp[tmpIdx + 14] & MASK32_5) << 6; - l2 |= (tmp[tmpIdx + 15] & MASK32_5) << 1; - l2 |= (tmp[tmpIdx + 16] >>> 4) & MASK32_1; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 16] & MASK32_4) << 23; - l3 |= (tmp[tmpIdx + 17] & MASK32_5) << 18; - l3 |= (tmp[tmpIdx + 18] & MASK32_5) << 13; - l3 |= (tmp[tmpIdx + 19] & MASK32_5) << 8; - l3 |= (tmp[tmpIdx + 20] & MASK32_5) << 3; - l3 |= (tmp[tmpIdx + 21] >>> 2) & MASK32_3; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 21] & MASK32_2) << 25; - l4 |= (tmp[tmpIdx + 22] & MASK32_5) << 20; - l4 |= (tmp[tmpIdx + 23] & MASK32_5) << 15; - l4 |= (tmp[tmpIdx + 24] & MASK32_5) << 10; - l4 |= (tmp[tmpIdx + 25] & MASK32_5) << 5; - l4 |= (tmp[tmpIdx + 26] & MASK32_5) << 0; - longs[longsIdx + 4] = l4; - } - } - - private static void decode28(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 224); - shiftLongs(tmp, 224, longs, 0, 4, MASK32_28); - shiftLongs(tmp, 224, tmp, 0, 0, MASK32_4); - for (int iter = 0, tmpIdx = 0, longsIdx = 224; iter < 32; ++iter, tmpIdx += 7, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 24; - l0 |= tmp[tmpIdx + 1] << 20; - l0 |= tmp[tmpIdx + 2] << 16; - l0 |= tmp[tmpIdx + 3] << 12; - l0 |= tmp[tmpIdx + 4] << 8; - l0 |= tmp[tmpIdx + 5] << 4; - l0 |= tmp[tmpIdx + 6] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode29(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 232); - shiftLongs(tmp, 232, longs, 0, 3, MASK32_29); - for (int iter = 0, tmpIdx = 0, longsIdx = 232; iter < 8; ++iter, tmpIdx += 29, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK32_3) << 26; - l0 |= (tmp[tmpIdx + 1] & MASK32_3) << 23; - l0 |= (tmp[tmpIdx + 2] & MASK32_3) << 20; - l0 |= (tmp[tmpIdx + 3] & MASK32_3) << 17; - l0 |= (tmp[tmpIdx + 4] & MASK32_3) << 14; - l0 |= (tmp[tmpIdx + 5] & MASK32_3) << 11; - l0 |= (tmp[tmpIdx + 6] & MASK32_3) << 8; - l0 |= (tmp[tmpIdx + 7] & MASK32_3) << 5; - l0 |= (tmp[tmpIdx + 8] & MASK32_3) << 2; - l0 |= (tmp[tmpIdx + 9] >>> 1) & MASK32_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 9] & MASK32_1) << 28; - l1 |= (tmp[tmpIdx + 10] & MASK32_3) << 25; - l1 |= (tmp[tmpIdx + 11] & MASK32_3) << 22; - l1 |= (tmp[tmpIdx + 12] & MASK32_3) << 19; - l1 |= (tmp[tmpIdx + 13] & MASK32_3) << 16; - l1 |= (tmp[tmpIdx + 14] & MASK32_3) << 13; - l1 |= (tmp[tmpIdx + 15] & MASK32_3) << 10; - l1 |= (tmp[tmpIdx + 16] & MASK32_3) << 7; - l1 |= (tmp[tmpIdx + 17] & MASK32_3) << 4; - l1 |= (tmp[tmpIdx + 18] & MASK32_3) << 1; - l1 |= (tmp[tmpIdx + 19] >>> 2) & MASK32_1; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 19] & MASK32_2) << 27; - l2 |= (tmp[tmpIdx + 20] & MASK32_3) << 24; - l2 |= (tmp[tmpIdx + 21] & MASK32_3) << 21; - l2 |= (tmp[tmpIdx + 22] & MASK32_3) << 18; - l2 |= (tmp[tmpIdx + 23] & MASK32_3) << 15; - l2 |= (tmp[tmpIdx + 24] & MASK32_3) << 12; - l2 |= (tmp[tmpIdx + 25] & MASK32_3) << 9; - l2 |= (tmp[tmpIdx + 26] & MASK32_3) << 6; - l2 |= (tmp[tmpIdx + 27] & MASK32_3) << 3; - l2 |= (tmp[tmpIdx + 28] & MASK32_3) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode30(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 240); - shiftLongs(tmp, 240, longs, 0, 2, MASK32_30); - shiftLongs(tmp, 240, tmp, 0, 0, MASK32_2); - for (int iter = 0, tmpIdx = 0, longsIdx = 240; iter < 16; ++iter, tmpIdx += 15, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 28; - l0 |= tmp[tmpIdx + 1] << 26; - l0 |= tmp[tmpIdx + 2] << 24; - l0 |= tmp[tmpIdx + 3] << 22; - l0 |= tmp[tmpIdx + 4] << 20; - l0 |= tmp[tmpIdx + 5] << 18; - l0 |= tmp[tmpIdx + 6] << 16; - l0 |= tmp[tmpIdx + 7] << 14; - l0 |= tmp[tmpIdx + 8] << 12; - l0 |= tmp[tmpIdx + 9] << 10; - l0 |= tmp[tmpIdx + 10] << 8; - l0 |= tmp[tmpIdx + 11] << 6; - l0 |= tmp[tmpIdx + 12] << 4; - l0 |= tmp[tmpIdx + 13] << 2; - l0 |= tmp[tmpIdx + 14] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode31(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 248); - shiftLongs(tmp, 248, longs, 0, 1, MASK32_31); - shiftLongs(tmp, 248, tmp, 0, 0, MASK32_1); - for (int iter = 0, tmpIdx = 0, longsIdx = 248; iter < 8; ++iter, tmpIdx += 31, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 30; - l0 |= tmp[tmpIdx + 1] << 29; - l0 |= tmp[tmpIdx + 2] << 28; - l0 |= tmp[tmpIdx + 3] << 27; - l0 |= tmp[tmpIdx + 4] << 26; - l0 |= tmp[tmpIdx + 5] << 25; - l0 |= tmp[tmpIdx + 6] << 24; - l0 |= tmp[tmpIdx + 7] << 23; - l0 |= tmp[tmpIdx + 8] << 22; - l0 |= tmp[tmpIdx + 9] << 21; - l0 |= tmp[tmpIdx + 10] << 20; - l0 |= tmp[tmpIdx + 11] << 19; - l0 |= tmp[tmpIdx + 12] << 18; - l0 |= tmp[tmpIdx + 13] << 17; - l0 |= tmp[tmpIdx + 14] << 16; - l0 |= tmp[tmpIdx + 15] << 15; - l0 |= tmp[tmpIdx + 16] << 14; - l0 |= tmp[tmpIdx + 17] << 13; - l0 |= tmp[tmpIdx + 18] << 12; - l0 |= tmp[tmpIdx + 19] << 11; - l0 |= tmp[tmpIdx + 20] << 10; - l0 |= tmp[tmpIdx + 21] << 9; - l0 |= tmp[tmpIdx + 22] << 8; - l0 |= tmp[tmpIdx + 23] << 7; - l0 |= tmp[tmpIdx + 24] << 6; - l0 |= tmp[tmpIdx + 25] << 5; - l0 |= tmp[tmpIdx + 26] << 4; - l0 |= tmp[tmpIdx + 27] << 3; - l0 |= tmp[tmpIdx + 28] << 2; - l0 |= tmp[tmpIdx + 29] << 1; - l0 |= tmp[tmpIdx + 30] << 0; - longs[longsIdx + 0] = l0; - } - } - private static void decode32(DataInput in, long[] tmp, long[] longs) throws IOException { in.readLongs(longs, 0, 256); } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py index c170d03e9c7d..710411d85417 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py @@ -20,9 +20,10 @@ """Code generation for ForUtil.java""" BLOCK_SIZE = 512 -MAX_SPECIALIZED_BITS_PER_VALUE = 32 +MAX_SPECIALIZED_BITS_PER_VALUE = 24 OUTPUT_FILE = "ForUtil.java" PRIMITIVE_SIZE = [8, 16, 32] +SPECIALIZED_BITS_PER_VALUES = [i for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE + 1)] + [32] HEADER = """// This file has been automatically generated, DO NOT EDIT /* @@ -56,7 +57,7 @@ final class ForUtil { static final int BLOCK_SIZE = """ + str(BLOCK_SIZE) + """; - private static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; + static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; @@ -67,7 +68,6 @@ private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; - private static final int BLOCK_SIZE_LOG2 = MathUtil.log(BLOCK_SIZE, 2); private static long expandMask32(long mask32) { return mask32 | (mask32 << 32); @@ -107,16 +107,6 @@ } } - private static void expand8To32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - long l = arr[i]; - arr[i] = (l >>> 24) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 16) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 8) & 0x000000FF000000FFL; - arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = l & 0x000000FF000000FFL; - } - } - private static void collapse8(long[] arr) { for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { arr[i] = @@ -141,17 +131,13 @@ } } - private static void expand16To32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - long l = arr[i]; - arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = l & 0x0000FFFF0000FFFFL; - } - } - private static void collapse16(long[] arr) { for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { - arr[i] = (arr[i] << 48) | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; + arr[i] = + (arr[i] << 48) + | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) + | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) + | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; } } @@ -171,7 +157,7 @@ private final long[] tmp = new long[BLOCK_SIZE_DIV_2]; - /** Encode 512 integers from {@code longs} into {@code out}. */ + /** Encode 128 integers from {@code longs} into {@code out}. */ void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { final int nextPrimitive; final int numLongs; @@ -243,15 +229,10 @@ out.writeLong(tmp[i]); } } - - /** Number of bytes required to encode 512 integers of {@code bitsPerValue} bits per value. */ - int numBytes(int bitsPerValue) { - return bitsPerValue << (BLOCK_SIZE_LOG2 - 3); - } - + private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) throws IOException { - final int numLongs = bitsPerValue << 1; + final int numLongs = bitsPerValue * BLOCK_SIZE_DIV_64; in.readLongs(tmp, 0, numLongs); final long mask = MASKS32[bitsPerValue]; int longsIdx = 0; @@ -395,11 +376,11 @@ def writeDecode(bpv, f): f.write(' private static final long MASK%d_%d = MASKS%d[%d];\n' %(primitive_size, bpv, primitive_size, bpv)) f.write(""" - /** Decode 512 integers into {@code longs}. */ + /** Decode 128 integers into {@code longs}. */ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { switch (bitsPerValue) { """) - for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): + for bpv in SPECIALIZED_BITS_PER_VALUES: next_primitive = 32 if bpv <= 8: next_primitive = 8 @@ -416,37 +397,10 @@ def writeDecode(bpv, f): f.write(' }\n') f.write(' }\n') - f.write(""" - /** - * Decodes 512 integers into 64 {@code longs} such that each long contains two values, each - * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs} - * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This - * representation may allow subsequent operations to be performed on two values at a time. - */ - void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException { - switch (bitsPerValue) { -""") - for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): - next_primitive = 32 - if bpv <= 8: - next_primitive = 8 - elif bpv <= 16: - next_primitive = 16 - f.write(' case %d:\n' %bpv) - f.write(' decode%d(in, tmp, longs);\n' %bpv) - if next_primitive <= 16: - f.write(' expand%dTo32(longs);\n' %next_primitive) - f.write(' break;\n') - f.write(' default:\n') - f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n') - f.write(' break;\n') - f.write(' }\n') - f.write(' }\n') - f.write('\n') - for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1): + for i in SPECIALIZED_BITS_PER_VALUES: writeDecode(i, f) if i < MAX_SPECIALIZED_BITS_PER_VALUE: f.write('\n') - f.write('}\n') + f.write('}\n') \ No newline at end of file From 73d00ffbf09d2de6cb91a110451637e8c53eaa3c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Tue, 21 Dec 2021 13:04:08 +0800 Subject: [PATCH 09/34] spotless --- lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java | 4 ++-- .../core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java index ea452083d707..de94a0b0e4d8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.util.MathUtil; // Inspired from https://fulmicoton.com/posts/bitpacking/ // Encodes multiple integers in a long to get SIMD-like speedups. @@ -203,7 +202,7 @@ void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { out.writeLong(tmp[i]); } } - + private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) throws IOException { final int numLongs = bitsPerValue * BLOCK_SIZE_DIV_64; @@ -945,6 +944,7 @@ private static void decode24(DataInput in, long[] tmp, long[] longs) throws IOEx longs[longsIdx + 0] = l0; } } + private static void decode32(DataInput in, long[] tmp, long[] longs) throws IOException { in.readLongs(longs, 0, 256); } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py index 710411d85417..761650cb60ab 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py @@ -47,7 +47,6 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.util.MathUtil; // Inspired from https://fulmicoton.com/posts/bitpacking/ // Encodes multiple integers in a long to get SIMD-like speedups. From 4648f834df95191609c5ae55bcee1d92e20be16b Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 6 Jan 2022 22:46:17 +0800 Subject: [PATCH 10/34] limit bpv to 16/24/32 and using floor delta codec --- .../apache/lucene/util/bkd/BKDForUtil.java | 137 +++ .../apache/lucene/util/bkd/DocIdsWriter.java | 92 +- .../org/apache/lucene/util/bkd/ForUtil.java | 951 ------------------ .../org/apache/lucene/util/bkd/gen_ForUtil.py | 405 -------- 4 files changed, 179 insertions(+), 1406 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java delete mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java delete mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java new file mode 100644 index 000000000000..15ceacd476e5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -0,0 +1,137 @@ +// This file has been automatically generated, DO NOT EDIT +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.bkd; + +import java.io.IOException; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; + +// Inspired from https://fulmicoton.com/posts/bitpacking/ +// Encodes multiple integers in a long to get SIMD-like speedups. +// If bitsPerValue <= 8 then we pack 8 ints per long +// else if bitsPerValue <= 16 we pack 4 ints per long +// else we pack 2 ints per long +final class BKDForUtil { + + static final int BLOCK_SIZE = 512; + private static final long MASK32_8 = mask32(8); + private static final long MASK32_24 = mask32(24); + + private static long expandMask32(long mask32) { + return mask32 | (mask32 << 32); + } + + private static long mask32(int bitsPerValue) { + return expandMask32((1L << bitsPerValue) - 1); + } + + private static void expand16(long[] arr) { + for (int i = 0; i < 128; ++i) { + long l = arr[i]; + arr[i] = (l >>> 48) & 0xFFFFL; + arr[128 + i] = (l >>> 32) & 0xFFFFL; + arr[256 + i] = (l >>> 16) & 0xFFFFL; + arr[384 + i] = l & 0xFFFFL; + } + } + + private static void collapse16(long[] arr) { + for (int i = 0; i < 128; ++i) { + arr[i] = (arr[i] << 48) | (arr[128 + i] << 32) | (arr[256 + i] << 16) | arr[384 + i]; + } + } + + private static void expand32(long[] arr) { + for (int i = 0; i < 256; ++i) { + long l = arr[i]; + arr[i] = l >>> 32; + arr[256 + i] = l & 0xFFFFFFFFL; + } + } + + private static void collapse32(long[] arr) { + for (int i = 0; i < 256; ++i) { + arr[i] = (arr[i] << 32) | arr[256 + i]; + } + } + + private final long[] tmp = new long[256]; + + void encode16(long[] longs, DataOutput out) throws IOException { + collapse16(longs); + for (int i = 0; i < 128; i++) { + out.writeLong(longs[i]); + } + } + + void encode32(long[] longs, DataOutput out) throws IOException { + collapse32(longs); + for (int i = 0; i < 256; i++) { + out.writeLong(longs[i]); + } + } + + void encode24(long[] longs, DataOutput out) throws IOException { + collapse32(longs); + for (int i = 0; i < 192; ++i) { + tmp[i] = longs[i] << 8; + } + int tmpIdx = 0; + for (int i = 192; i < 256; i++) { + tmp[tmpIdx++] |= (longs[i] >>> 16) & MASK32_8; + tmp[tmpIdx++] |= (longs[i] >>> 8) & MASK32_8; + tmp[tmpIdx++] |= longs[i] & MASK32_8; + } + for (int i = 0; i < 192; ++i) { + out.writeLong(tmp[i]); + } + } + + /** + * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which + * generates SIMD instructions for it in order to shift multiple longs at once. + */ + private static void shiftLongs(long[] a, int count, long[] b, int shift, long mask) { + for (int i = 0; i < count; ++i) { + b[i] = (a[i] >>> shift) & mask; + } + } + + void decode16(DataInput in, long[] longs) throws IOException { + in.readLongs(longs, 0, 128); + expand16(longs); + } + + void decode24(DataInput in, long[] longs) throws IOException { + in.readLongs(tmp, 0, 192); + shiftLongs(tmp, 192, longs, 8, MASK32_24); + shiftLongs(tmp, 192, tmp, 0, MASK32_8); + for (int iter = 0, tmpIdx = 0, longsIdx = 192; iter < 64; ++iter, tmpIdx += 3, longsIdx += 1) { + long l0 = tmp[tmpIdx] << 16; + l0 |= tmp[tmpIdx + 1] << 8; + l0 |= tmp[tmpIdx + 2]; + longs[longsIdx] = l0; + } + expand32(longs); + } + + void decode32(DataInput in, long[] longs) throws IOException { + in.readLongs(longs, 0, 256); + expand32(longs); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index bb74a6b766e0..b648b6b489df 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -23,20 +23,19 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.DocBaseBitSetIterator; import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.packed.PackedInts; class DocIdsWriter { private static final byte CONTINUOUS_IDS = (byte) -2; private static final byte BITSET_IDS = (byte) -1; private static final byte DELTA_VINT = (byte) 0; - private static final byte DELTA_FOR_UTIL = (byte) 1; + private static final byte DELTA_FOR_UTIL = (byte) 32 + 16; private static final byte BPV_24 = (byte) 24; - private static final byte BPV_24_FOR_UTIL = (byte) 25; + private static final byte BPV_24_FOR_UTIL = (byte) 32 + 24; private static final byte BPV_32 = (byte) 32; - private static final byte BPV_32_FOR_UTIL = (byte) 33; + private static final byte BPV_32_FOR_UTIL = (byte) 32 + 32; - private final ForUtil forUtil = new ForUtil(); + private final BKDForUtil forUtil = new BKDForUtil(); private final long[] scratch; DocIdsWriter(int maxPointsInLeaf) { @@ -78,22 +77,16 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) - if (count == ForUtil.BLOCK_SIZE) { - if (sorted) { + if (count == BKDForUtil.BLOCK_SIZE) { + if (sorted && Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); - long max = 0; long[] delta = new long[count]; - int previous = docIds[start]; - for (int i = 1; i < count; i++) { - int doc = docIds[i + start]; - delta[i] = doc - previous; - max |= delta[i] & 0xffffffffL; - previous = doc; + final int min = docIds[start]; + for (int i = 0; i < count; i++) { + delta[i] = docIds[i] - min; } - int bitsPerValue = PackedInts.bitsRequired(max); out.writeVInt(docIds[start]); - out.writeVInt(bitsPerValue); - forUtil.encode(delta, PackedInts.bitsRequired(max), out); + forUtil.encode16(delta, out); } else { long[] longs = new long[count]; long max = 0; @@ -101,14 +94,12 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx longs[i] = docIds[i]; max |= longs[i] & 0xffffffffL; } - // The 24/32 bit judgment is kept without calculating the bitsPerValue of max, thus - // ensuring the high efficiency of decoding. if (max <= 0xffffff) { out.writeByte(BPV_24_FOR_UTIL); - forUtil.encode(longs, 24, out); + forUtil.encode24(longs, out); } else { out.writeByte(BPV_32_FOR_UTIL); - forUtil.encode(longs, 32, out); + forUtil.encode32(longs, out); } } return; @@ -211,43 +202,40 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { readDeltaVInts(in, count, docIDs); break; case DELTA_FOR_UTIL: - readForUtilDelta(in, count, docIDs); + readBKDForUtilDelta(in, count, docIDs); break; case BPV_32: readInts32(in, count, docIDs); break; case BPV_32_FOR_UTIL: - readForUtil32(in, count, docIDs); + readBKDForUtil32(in, count, docIDs); break; case BPV_24: readInts24(in, count, docIDs); break; case BPV_24_FOR_UTIL: - readForUtil24(in, count, docIDs); + readBKDForUtil24(in, count, docIDs); break; default: throw new IOException("Unsupported number of bits per value: " + bpv); } } - private void readForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - docIDs[0] = in.readVInt(); - int bitsPerValue = in.readVInt(); - forUtil.decode(bitsPerValue, in, scratch); - for (int i = 1; i < count; i++) { - docIDs[i] = docIDs[i - 1] + scratch[i]; - } + final long min = in.readVInt(); + forUtil.decode16(in, docIDs); + plus(docIDs, min, count); } - private void readForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - forUtil.decode(24, in, docIDs); + forUtil.decode24(in, docIDs); } - private void readForUtil32(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtil32(IndexInput in, int count, long[] docIDs) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - forUtil.decode(32, in, docIDs); + forUtil.decode32(in, docIDs); } private static DocIdSetIterator readBitSetIterator(IndexInput in, int count) throws IOException { @@ -327,19 +315,19 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOExcep readDeltaVInts(in, count, visitor); break; case DELTA_FOR_UTIL: - readForUtilDelta(in, count, visitor); + readBKDForUtilDelta(in, count, visitor); break; case BPV_32: readInts32(in, count, visitor); break; case BPV_32_FOR_UTIL: - readForUtil32(in, count, visitor); + readBKDForUtil32(in, count, visitor); break; case BPV_24: readInts24(in, count, visitor); break; case BPV_24_FOR_UTIL: - readForUtil24(in, count, visitor); + readBKDForUtil24(in, count, visitor); break; default: throw new IOException("Unsupported number of bits per value: " + bpv); @@ -400,34 +388,38 @@ private static void readContinuousIds(IndexInput in, int count, IntersectVisitor visitor.visit(new DocBaseBitSetIterator(bitSet, count, offset)); } - private void readForUtilDelta(IndexInput in, int count, IntersectVisitor visitor) + private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visitor) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - int previous = in.readVInt(); - int bitsPerValue = in.readVInt(); - forUtil.decode(bitsPerValue, in, scratch); - visitor.visit(previous); - for (int i = 1; i < count; i++) { - previous += (int) scratch[i]; - visitor.visit(previous); + final int min = in.readVInt(); + forUtil.decode16(in, scratch); + plus(scratch, min, count); + for (long l : scratch) { + visitor.visit((int) l); } } - private void readForUtil24(IndexInput in, int count, IntersectVisitor visitor) + private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - forUtil.decode(24, in, scratch); + forUtil.decode24(in, scratch); for (long l : scratch) { visitor.visit((int) l); } } - private void readForUtil32(IndexInput in, int count, IntersectVisitor visitor) + private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor) throws IOException { assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; - forUtil.decode(32, in, scratch); + forUtil.decode32(in, scratch); for (long l : scratch) { visitor.visit((int) l); } } + + private static void plus(long[] a, long min, int count) { + for (int i = 0; i < count; i++) { + a[i] = a[i] + min; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java deleted file mode 100644 index de94a0b0e4d8..000000000000 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/ForUtil.java +++ /dev/null @@ -1,951 +0,0 @@ -// This file has been automatically generated, DO NOT EDIT - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.util.bkd; - -import java.io.IOException; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; - -// Inspired from https://fulmicoton.com/posts/bitpacking/ -// Encodes multiple integers in a long to get SIMD-like speedups. -// If bitsPerValue <= 8 then we pack 8 ints per long -// else if bitsPerValue <= 16 we pack 4 ints per long -// else we pack 2 ints per long -final class ForUtil { - - static final int BLOCK_SIZE = 512; - static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; - private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; - private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; - private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; - private static final int BLOCK_SIZE_DIV_8_MUL_1 = BLOCK_SIZE_DIV_8; - private static final int BLOCK_SIZE_DIV_8_MUL_2 = BLOCK_SIZE_DIV_8 * 2; - private static final int BLOCK_SIZE_DIV_8_MUL_3 = BLOCK_SIZE_DIV_8 * 3; - private static final int BLOCK_SIZE_DIV_8_MUL_4 = BLOCK_SIZE_DIV_8 * 4; - private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; - private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; - private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; - - private static long expandMask32(long mask32) { - return mask32 | (mask32 << 32); - } - - private static long expandMask16(long mask16) { - return expandMask32(mask16 | (mask16 << 16)); - } - - private static long expandMask8(long mask8) { - return expandMask16(mask8 | (mask8 << 8)); - } - - private static long mask32(int bitsPerValue) { - return expandMask32((1L << bitsPerValue) - 1); - } - - private static long mask16(int bitsPerValue) { - return expandMask16((1L << bitsPerValue) - 1); - } - - private static long mask8(int bitsPerValue) { - return expandMask8((1L << bitsPerValue) - 1); - } - - private static void expand8(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - long l = arr[i]; - arr[i] = (l >>> 56) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 48) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 40) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = (l >>> 32) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 24) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_5 + i] = (l >>> 16) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = (l >>> 8) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_7 + i] = l & 0xFFL; - } - } - - private static void collapse8(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - arr[i] = - (arr[i] << 56) - | (arr[BLOCK_SIZE_DIV_8_MUL_1 + i] << 48) - | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 40) - | (arr[BLOCK_SIZE_DIV_8_MUL_3 + i] << 32) - | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 24) - | (arr[BLOCK_SIZE_DIV_8_MUL_5 + i] << 16) - | (arr[BLOCK_SIZE_DIV_8_MUL_6 + i] << 8) - | arr[BLOCK_SIZE_DIV_8_MUL_7 + i]; - } - } - - private static void expand16(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { - long l = arr[i]; - arr[i] = (l >>> 48) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 32) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 16) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = l & 0xFFFFL; - } - } - - private static void collapse16(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { - arr[i] = - (arr[i] << 48) - | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) - | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) - | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; - } - } - - private static void expand32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - long l = arr[i]; - arr[i] = l >>> 32; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = l & 0xFFFFFFFFL; - } - } - - private static void collapse32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - arr[i] = (arr[i] << 32) | arr[BLOCK_SIZE_DIV_8_MUL_4 + i]; - } - } - - private final long[] tmp = new long[BLOCK_SIZE_DIV_2]; - - /** Encode 128 integers from {@code longs} into {@code out}. */ - void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { - final int nextPrimitive; - final int numLongs; - if (bitsPerValue <= 8) { - nextPrimitive = 8; - numLongs = BLOCK_SIZE_DIV_8; - collapse8(longs); - } else if (bitsPerValue <= 16) { - nextPrimitive = 16; - numLongs = BLOCK_SIZE_DIV_4; - collapse16(longs); - } else { - nextPrimitive = 32; - numLongs = BLOCK_SIZE_DIV_2; - collapse32(longs); - } - - final int numLongsPerShift = bitsPerValue * BLOCK_SIZE_DIV_64; - int idx = 0; - int shift = nextPrimitive - bitsPerValue; - for (int i = 0; i < numLongsPerShift; ++i) { - tmp[i] = longs[idx++] << shift; - } - for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) { - for (int i = 0; i < numLongsPerShift; ++i) { - tmp[i] |= longs[idx++] << shift; - } - } - - final int remainingBitsPerLong = shift + bitsPerValue; - final long maskRemainingBitsPerLong; - if (nextPrimitive == 8) { - maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong]; - } else if (nextPrimitive == 16) { - maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong]; - } else { - maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong]; - } - - int tmpIdx = 0; - int remainingBitsPerValue = bitsPerValue; - while (idx < numLongs) { - if (remainingBitsPerValue >= remainingBitsPerLong) { - remainingBitsPerValue -= remainingBitsPerLong; - tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong; - if (remainingBitsPerValue == 0) { - idx++; - remainingBitsPerValue = bitsPerValue; - } - } else { - final long mask1, mask2; - if (nextPrimitive == 8) { - mask1 = MASKS8[remainingBitsPerValue]; - mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue]; - } else if (nextPrimitive == 16) { - mask1 = MASKS16[remainingBitsPerValue]; - mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue]; - } else { - mask1 = MASKS32[remainingBitsPerValue]; - mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue]; - } - tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue); - remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue; - tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2; - } - } - - for (int i = 0; i < numLongsPerShift; ++i) { - out.writeLong(tmp[i]); - } - } - - private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) - throws IOException { - final int numLongs = bitsPerValue * BLOCK_SIZE_DIV_64; - in.readLongs(tmp, 0, numLongs); - final long mask = MASKS32[bitsPerValue]; - int longsIdx = 0; - int shift = 32 - bitsPerValue; - for (; shift >= 0; shift -= bitsPerValue) { - shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask); - longsIdx += numLongs; - } - final int remainingBitsPerLong = shift + bitsPerValue; - final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong]; - int tmpIdx = 0; - int remainingBits = remainingBitsPerLong; - for (; longsIdx < BLOCK_SIZE_DIV_2; ++longsIdx) { - int b = bitsPerValue - remainingBits; - long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b; - while (b >= remainingBitsPerLong) { - b -= remainingBitsPerLong; - l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b; - } - if (b > 0) { - l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b]; - remainingBits = remainingBitsPerLong - b; - } else { - remainingBits = remainingBitsPerLong; - } - longs[longsIdx] = l; - } - } - - /** - * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which - * generates SIMD instructions for it in order to shift multiple longs at once. - */ - private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) { - for (int i = 0; i < count; ++i) { - b[bi + i] = (a[i] >>> shift) & mask; - } - } - - private static final long[] MASKS8 = new long[8]; - private static final long[] MASKS16 = new long[16]; - private static final long[] MASKS32 = new long[32]; - - static { - for (int i = 0; i < 8; ++i) { - MASKS8[i] = mask8(i); - } - for (int i = 0; i < 16; ++i) { - MASKS16[i] = mask16(i); - } - for (int i = 0; i < 32; ++i) { - MASKS32[i] = mask32(i); - } - } - // mark values in array as final longs to avoid the cost of reading array, arrays should only be - // used when the idx is a variable - private static final long MASK8_1 = MASKS8[1]; - private static final long MASK8_2 = MASKS8[2]; - private static final long MASK8_3 = MASKS8[3]; - private static final long MASK8_4 = MASKS8[4]; - private static final long MASK8_5 = MASKS8[5]; - private static final long MASK8_6 = MASKS8[6]; - private static final long MASK8_7 = MASKS8[7]; - private static final long MASK16_1 = MASKS16[1]; - private static final long MASK16_2 = MASKS16[2]; - private static final long MASK16_3 = MASKS16[3]; - private static final long MASK16_4 = MASKS16[4]; - private static final long MASK16_5 = MASKS16[5]; - private static final long MASK16_6 = MASKS16[6]; - private static final long MASK16_7 = MASKS16[7]; - private static final long MASK16_9 = MASKS16[9]; - private static final long MASK16_10 = MASKS16[10]; - private static final long MASK16_11 = MASKS16[11]; - private static final long MASK16_12 = MASKS16[12]; - private static final long MASK16_13 = MASKS16[13]; - private static final long MASK16_14 = MASKS16[14]; - private static final long MASK16_15 = MASKS16[15]; - private static final long MASK32_1 = MASKS32[1]; - private static final long MASK32_2 = MASKS32[2]; - private static final long MASK32_3 = MASKS32[3]; - private static final long MASK32_4 = MASKS32[4]; - private static final long MASK32_5 = MASKS32[5]; - private static final long MASK32_6 = MASKS32[6]; - private static final long MASK32_7 = MASKS32[7]; - private static final long MASK32_8 = MASKS32[8]; - private static final long MASK32_9 = MASKS32[9]; - private static final long MASK32_10 = MASKS32[10]; - private static final long MASK32_11 = MASKS32[11]; - private static final long MASK32_12 = MASKS32[12]; - private static final long MASK32_13 = MASKS32[13]; - private static final long MASK32_14 = MASKS32[14]; - private static final long MASK32_15 = MASKS32[15]; - private static final long MASK32_17 = MASKS32[17]; - private static final long MASK32_18 = MASKS32[18]; - private static final long MASK32_19 = MASKS32[19]; - private static final long MASK32_20 = MASKS32[20]; - private static final long MASK32_21 = MASKS32[21]; - private static final long MASK32_22 = MASKS32[22]; - private static final long MASK32_23 = MASKS32[23]; - private static final long MASK32_24 = MASKS32[24]; - - /** Decode 128 integers into {@code longs}. */ - void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { - switch (bitsPerValue) { - case 1: - decode1(in, tmp, longs); - expand8(longs); - break; - case 2: - decode2(in, tmp, longs); - expand8(longs); - break; - case 3: - decode3(in, tmp, longs); - expand8(longs); - break; - case 4: - decode4(in, tmp, longs); - expand8(longs); - break; - case 5: - decode5(in, tmp, longs); - expand8(longs); - break; - case 6: - decode6(in, tmp, longs); - expand8(longs); - break; - case 7: - decode7(in, tmp, longs); - expand8(longs); - break; - case 8: - decode8(in, tmp, longs); - expand8(longs); - break; - case 9: - decode9(in, tmp, longs); - expand16(longs); - break; - case 10: - decode10(in, tmp, longs); - expand16(longs); - break; - case 11: - decode11(in, tmp, longs); - expand16(longs); - break; - case 12: - decode12(in, tmp, longs); - expand16(longs); - break; - case 13: - decode13(in, tmp, longs); - expand16(longs); - break; - case 14: - decode14(in, tmp, longs); - expand16(longs); - break; - case 15: - decode15(in, tmp, longs); - expand16(longs); - break; - case 16: - decode16(in, tmp, longs); - expand16(longs); - break; - case 17: - decode17(in, tmp, longs); - expand32(longs); - break; - case 18: - decode18(in, tmp, longs); - expand32(longs); - break; - case 19: - decode19(in, tmp, longs); - expand32(longs); - break; - case 20: - decode20(in, tmp, longs); - expand32(longs); - break; - case 21: - decode21(in, tmp, longs); - expand32(longs); - break; - case 22: - decode22(in, tmp, longs); - expand32(longs); - break; - case 23: - decode23(in, tmp, longs); - expand32(longs); - break; - case 24: - decode24(in, tmp, longs); - expand32(longs); - break; - case 32: - decode32(in, tmp, longs); - expand32(longs); - break; - default: - decodeSlow(bitsPerValue, in, tmp, longs); - expand32(longs); - break; - } - } - - private static void decode1(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 8); - shiftLongs(tmp, 8, longs, 0, 7, MASK8_1); - shiftLongs(tmp, 8, longs, 8, 6, MASK8_1); - shiftLongs(tmp, 8, longs, 16, 5, MASK8_1); - shiftLongs(tmp, 8, longs, 24, 4, MASK8_1); - shiftLongs(tmp, 8, longs, 32, 3, MASK8_1); - shiftLongs(tmp, 8, longs, 40, 2, MASK8_1); - shiftLongs(tmp, 8, longs, 48, 1, MASK8_1); - shiftLongs(tmp, 8, longs, 56, 0, MASK8_1); - } - - private static void decode2(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 16); - shiftLongs(tmp, 16, longs, 0, 6, MASK8_2); - shiftLongs(tmp, 16, longs, 16, 4, MASK8_2); - shiftLongs(tmp, 16, longs, 32, 2, MASK8_2); - shiftLongs(tmp, 16, longs, 48, 0, MASK8_2); - } - - private static void decode3(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 24); - shiftLongs(tmp, 24, longs, 0, 5, MASK8_3); - shiftLongs(tmp, 24, longs, 24, 2, MASK8_3); - for (int iter = 0, tmpIdx = 0, longsIdx = 48; iter < 8; ++iter, tmpIdx += 3, longsIdx += 2) { - long l0 = (tmp[tmpIdx + 0] & MASK8_2) << 1; - l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_1; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 2; - l1 |= (tmp[tmpIdx + 2] & MASK8_2) << 0; - longs[longsIdx + 1] = l1; - } - } - - private static void decode4(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 32); - shiftLongs(tmp, 32, longs, 0, 4, MASK8_4); - shiftLongs(tmp, 32, longs, 32, 0, MASK8_4); - } - - private static void decode5(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 40); - shiftLongs(tmp, 40, longs, 0, 3, MASK8_5); - for (int iter = 0, tmpIdx = 0, longsIdx = 40; iter < 8; ++iter, tmpIdx += 5, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK8_3) << 2; - l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 4; - l1 |= (tmp[tmpIdx + 2] & MASK8_3) << 1; - l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK8_1; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 3] & MASK8_2) << 3; - l2 |= (tmp[tmpIdx + 4] & MASK8_3) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode6(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 48); - shiftLongs(tmp, 48, longs, 0, 2, MASK8_6); - shiftLongs(tmp, 48, tmp, 0, 0, MASK8_2); - for (int iter = 0, tmpIdx = 0, longsIdx = 48; iter < 16; ++iter, tmpIdx += 3, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 4; - l0 |= tmp[tmpIdx + 1] << 2; - l0 |= tmp[tmpIdx + 2] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode7(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 56); - shiftLongs(tmp, 56, longs, 0, 1, MASK8_7); - shiftLongs(tmp, 56, tmp, 0, 0, MASK8_1); - for (int iter = 0, tmpIdx = 0, longsIdx = 56; iter < 8; ++iter, tmpIdx += 7, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 6; - l0 |= tmp[tmpIdx + 1] << 5; - l0 |= tmp[tmpIdx + 2] << 4; - l0 |= tmp[tmpIdx + 3] << 3; - l0 |= tmp[tmpIdx + 4] << 2; - l0 |= tmp[tmpIdx + 5] << 1; - l0 |= tmp[tmpIdx + 6] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode8(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(longs, 0, 64); - } - - private static void decode9(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 72); - shiftLongs(tmp, 72, longs, 0, 7, MASK16_9); - for (int iter = 0, tmpIdx = 0, longsIdx = 72; iter < 8; ++iter, tmpIdx += 9, longsIdx += 7) { - long l0 = (tmp[tmpIdx + 0] & MASK16_7) << 2; - l0 |= (tmp[tmpIdx + 1] >>> 5) & MASK16_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK16_5) << 4; - l1 |= (tmp[tmpIdx + 2] >>> 3) & MASK16_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 2] & MASK16_3) << 6; - l2 |= (tmp[tmpIdx + 3] >>> 1) & MASK16_6; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 3] & MASK16_1) << 8; - l3 |= (tmp[tmpIdx + 4] & MASK16_7) << 1; - l3 |= (tmp[tmpIdx + 5] >>> 6) & MASK16_1; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 5] & MASK16_6) << 3; - l4 |= (tmp[tmpIdx + 6] >>> 4) & MASK16_3; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 6] & MASK16_4) << 5; - l5 |= (tmp[tmpIdx + 7] >>> 2) & MASK16_5; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 7] & MASK16_2) << 7; - l6 |= (tmp[tmpIdx + 8] & MASK16_7) << 0; - longs[longsIdx + 6] = l6; - } - } - - private static void decode10(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 80); - shiftLongs(tmp, 80, longs, 0, 6, MASK16_10); - for (int iter = 0, tmpIdx = 0, longsIdx = 80; iter < 16; ++iter, tmpIdx += 5, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK16_6) << 4; - l0 |= (tmp[tmpIdx + 1] >>> 2) & MASK16_4; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK16_2) << 8; - l1 |= (tmp[tmpIdx + 2] & MASK16_6) << 2; - l1 |= (tmp[tmpIdx + 3] >>> 4) & MASK16_2; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 3] & MASK16_4) << 6; - l2 |= (tmp[tmpIdx + 4] & MASK16_6) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode11(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 88); - shiftLongs(tmp, 88, longs, 0, 5, MASK16_11); - for (int iter = 0, tmpIdx = 0, longsIdx = 88; iter < 8; ++iter, tmpIdx += 11, longsIdx += 5) { - long l0 = (tmp[tmpIdx + 0] & MASK16_5) << 6; - l0 |= (tmp[tmpIdx + 1] & MASK16_5) << 1; - l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK16_1; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 2] & MASK16_4) << 7; - l1 |= (tmp[tmpIdx + 3] & MASK16_5) << 2; - l1 |= (tmp[tmpIdx + 4] >>> 3) & MASK16_2; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 4] & MASK16_3) << 8; - l2 |= (tmp[tmpIdx + 5] & MASK16_5) << 3; - l2 |= (tmp[tmpIdx + 6] >>> 2) & MASK16_3; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 6] & MASK16_2) << 9; - l3 |= (tmp[tmpIdx + 7] & MASK16_5) << 4; - l3 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_4; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 8] & MASK16_1) << 10; - l4 |= (tmp[tmpIdx + 9] & MASK16_5) << 5; - l4 |= (tmp[tmpIdx + 10] & MASK16_5) << 0; - longs[longsIdx + 4] = l4; - } - } - - private static void decode12(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 96); - shiftLongs(tmp, 96, longs, 0, 4, MASK16_12); - shiftLongs(tmp, 96, tmp, 0, 0, MASK16_4); - for (int iter = 0, tmpIdx = 0, longsIdx = 96; iter < 32; ++iter, tmpIdx += 3, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 8; - l0 |= tmp[tmpIdx + 1] << 4; - l0 |= tmp[tmpIdx + 2] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode13(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 104); - shiftLongs(tmp, 104, longs, 0, 3, MASK16_13); - for (int iter = 0, tmpIdx = 0, longsIdx = 104; iter < 8; ++iter, tmpIdx += 13, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK16_3) << 10; - l0 |= (tmp[tmpIdx + 1] & MASK16_3) << 7; - l0 |= (tmp[tmpIdx + 2] & MASK16_3) << 4; - l0 |= (tmp[tmpIdx + 3] & MASK16_3) << 1; - l0 |= (tmp[tmpIdx + 4] >>> 2) & MASK16_1; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 4] & MASK16_2) << 11; - l1 |= (tmp[tmpIdx + 5] & MASK16_3) << 8; - l1 |= (tmp[tmpIdx + 6] & MASK16_3) << 5; - l1 |= (tmp[tmpIdx + 7] & MASK16_3) << 2; - l1 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_2; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 8] & MASK16_1) << 12; - l2 |= (tmp[tmpIdx + 9] & MASK16_3) << 9; - l2 |= (tmp[tmpIdx + 10] & MASK16_3) << 6; - l2 |= (tmp[tmpIdx + 11] & MASK16_3) << 3; - l2 |= (tmp[tmpIdx + 12] & MASK16_3) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode14(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 112); - shiftLongs(tmp, 112, longs, 0, 2, MASK16_14); - shiftLongs(tmp, 112, tmp, 0, 0, MASK16_2); - for (int iter = 0, tmpIdx = 0, longsIdx = 112; iter < 16; ++iter, tmpIdx += 7, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 12; - l0 |= tmp[tmpIdx + 1] << 10; - l0 |= tmp[tmpIdx + 2] << 8; - l0 |= tmp[tmpIdx + 3] << 6; - l0 |= tmp[tmpIdx + 4] << 4; - l0 |= tmp[tmpIdx + 5] << 2; - l0 |= tmp[tmpIdx + 6] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode15(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 120); - shiftLongs(tmp, 120, longs, 0, 1, MASK16_15); - shiftLongs(tmp, 120, tmp, 0, 0, MASK16_1); - for (int iter = 0, tmpIdx = 0, longsIdx = 120; iter < 8; ++iter, tmpIdx += 15, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 14; - l0 |= tmp[tmpIdx + 1] << 13; - l0 |= tmp[tmpIdx + 2] << 12; - l0 |= tmp[tmpIdx + 3] << 11; - l0 |= tmp[tmpIdx + 4] << 10; - l0 |= tmp[tmpIdx + 5] << 9; - l0 |= tmp[tmpIdx + 6] << 8; - l0 |= tmp[tmpIdx + 7] << 7; - l0 |= tmp[tmpIdx + 8] << 6; - l0 |= tmp[tmpIdx + 9] << 5; - l0 |= tmp[tmpIdx + 10] << 4; - l0 |= tmp[tmpIdx + 11] << 3; - l0 |= tmp[tmpIdx + 12] << 2; - l0 |= tmp[tmpIdx + 13] << 1; - l0 |= tmp[tmpIdx + 14] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode16(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(longs, 0, 128); - } - - private static void decode17(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 136); - shiftLongs(tmp, 136, longs, 0, 15, MASK32_17); - for (int iter = 0, tmpIdx = 0, longsIdx = 136; iter < 8; ++iter, tmpIdx += 17, longsIdx += 15) { - long l0 = (tmp[tmpIdx + 0] & MASK32_15) << 2; - l0 |= (tmp[tmpIdx + 1] >>> 13) & MASK32_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK32_13) << 4; - l1 |= (tmp[tmpIdx + 2] >>> 11) & MASK32_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 2] & MASK32_11) << 6; - l2 |= (tmp[tmpIdx + 3] >>> 9) & MASK32_6; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 3] & MASK32_9) << 8; - l3 |= (tmp[tmpIdx + 4] >>> 7) & MASK32_8; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 4] & MASK32_7) << 10; - l4 |= (tmp[tmpIdx + 5] >>> 5) & MASK32_10; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 5] & MASK32_5) << 12; - l5 |= (tmp[tmpIdx + 6] >>> 3) & MASK32_12; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 6] & MASK32_3) << 14; - l6 |= (tmp[tmpIdx + 7] >>> 1) & MASK32_14; - longs[longsIdx + 6] = l6; - long l7 = (tmp[tmpIdx + 7] & MASK32_1) << 16; - l7 |= (tmp[tmpIdx + 8] & MASK32_15) << 1; - l7 |= (tmp[tmpIdx + 9] >>> 14) & MASK32_1; - longs[longsIdx + 7] = l7; - long l8 = (tmp[tmpIdx + 9] & MASK32_14) << 3; - l8 |= (tmp[tmpIdx + 10] >>> 12) & MASK32_3; - longs[longsIdx + 8] = l8; - long l9 = (tmp[tmpIdx + 10] & MASK32_12) << 5; - l9 |= (tmp[tmpIdx + 11] >>> 10) & MASK32_5; - longs[longsIdx + 9] = l9; - long l10 = (tmp[tmpIdx + 11] & MASK32_10) << 7; - l10 |= (tmp[tmpIdx + 12] >>> 8) & MASK32_7; - longs[longsIdx + 10] = l10; - long l11 = (tmp[tmpIdx + 12] & MASK32_8) << 9; - l11 |= (tmp[tmpIdx + 13] >>> 6) & MASK32_9; - longs[longsIdx + 11] = l11; - long l12 = (tmp[tmpIdx + 13] & MASK32_6) << 11; - l12 |= (tmp[tmpIdx + 14] >>> 4) & MASK32_11; - longs[longsIdx + 12] = l12; - long l13 = (tmp[tmpIdx + 14] & MASK32_4) << 13; - l13 |= (tmp[tmpIdx + 15] >>> 2) & MASK32_13; - longs[longsIdx + 13] = l13; - long l14 = (tmp[tmpIdx + 15] & MASK32_2) << 15; - l14 |= (tmp[tmpIdx + 16] & MASK32_15) << 0; - longs[longsIdx + 14] = l14; - } - } - - private static void decode18(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 144); - shiftLongs(tmp, 144, longs, 0, 14, MASK32_18); - for (int iter = 0, tmpIdx = 0, longsIdx = 144; iter < 16; ++iter, tmpIdx += 9, longsIdx += 7) { - long l0 = (tmp[tmpIdx + 0] & MASK32_14) << 4; - l0 |= (tmp[tmpIdx + 1] >>> 10) & MASK32_4; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK32_10) << 8; - l1 |= (tmp[tmpIdx + 2] >>> 6) & MASK32_8; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 2] & MASK32_6) << 12; - l2 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_12; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 3] & MASK32_2) << 16; - l3 |= (tmp[tmpIdx + 4] & MASK32_14) << 2; - l3 |= (tmp[tmpIdx + 5] >>> 12) & MASK32_2; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 5] & MASK32_12) << 6; - l4 |= (tmp[tmpIdx + 6] >>> 8) & MASK32_6; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 6] & MASK32_8) << 10; - l5 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_10; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 7] & MASK32_4) << 14; - l6 |= (tmp[tmpIdx + 8] & MASK32_14) << 0; - longs[longsIdx + 6] = l6; - } - } - - private static void decode19(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 152); - shiftLongs(tmp, 152, longs, 0, 13, MASK32_19); - for (int iter = 0, tmpIdx = 0, longsIdx = 152; iter < 8; ++iter, tmpIdx += 19, longsIdx += 13) { - long l0 = (tmp[tmpIdx + 0] & MASK32_13) << 6; - l0 |= (tmp[tmpIdx + 1] >>> 7) & MASK32_6; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK32_7) << 12; - l1 |= (tmp[tmpIdx + 2] >>> 1) & MASK32_12; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 2] & MASK32_1) << 18; - l2 |= (tmp[tmpIdx + 3] & MASK32_13) << 5; - l2 |= (tmp[tmpIdx + 4] >>> 8) & MASK32_5; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 4] & MASK32_8) << 11; - l3 |= (tmp[tmpIdx + 5] >>> 2) & MASK32_11; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 5] & MASK32_2) << 17; - l4 |= (tmp[tmpIdx + 6] & MASK32_13) << 4; - l4 |= (tmp[tmpIdx + 7] >>> 9) & MASK32_4; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 7] & MASK32_9) << 10; - l5 |= (tmp[tmpIdx + 8] >>> 3) & MASK32_10; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 8] & MASK32_3) << 16; - l6 |= (tmp[tmpIdx + 9] & MASK32_13) << 3; - l6 |= (tmp[tmpIdx + 10] >>> 10) & MASK32_3; - longs[longsIdx + 6] = l6; - long l7 = (tmp[tmpIdx + 10] & MASK32_10) << 9; - l7 |= (tmp[tmpIdx + 11] >>> 4) & MASK32_9; - longs[longsIdx + 7] = l7; - long l8 = (tmp[tmpIdx + 11] & MASK32_4) << 15; - l8 |= (tmp[tmpIdx + 12] & MASK32_13) << 2; - l8 |= (tmp[tmpIdx + 13] >>> 11) & MASK32_2; - longs[longsIdx + 8] = l8; - long l9 = (tmp[tmpIdx + 13] & MASK32_11) << 8; - l9 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_8; - longs[longsIdx + 9] = l9; - long l10 = (tmp[tmpIdx + 14] & MASK32_5) << 14; - l10 |= (tmp[tmpIdx + 15] & MASK32_13) << 1; - l10 |= (tmp[tmpIdx + 16] >>> 12) & MASK32_1; - longs[longsIdx + 10] = l10; - long l11 = (tmp[tmpIdx + 16] & MASK32_12) << 7; - l11 |= (tmp[tmpIdx + 17] >>> 6) & MASK32_7; - longs[longsIdx + 11] = l11; - long l12 = (tmp[tmpIdx + 17] & MASK32_6) << 13; - l12 |= (tmp[tmpIdx + 18] & MASK32_13) << 0; - longs[longsIdx + 12] = l12; - } - } - - private static void decode20(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 160); - shiftLongs(tmp, 160, longs, 0, 12, MASK32_20); - for (int iter = 0, tmpIdx = 0, longsIdx = 160; iter < 32; ++iter, tmpIdx += 5, longsIdx += 3) { - long l0 = (tmp[tmpIdx + 0] & MASK32_12) << 8; - l0 |= (tmp[tmpIdx + 1] >>> 4) & MASK32_8; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK32_4) << 16; - l1 |= (tmp[tmpIdx + 2] & MASK32_12) << 4; - l1 |= (tmp[tmpIdx + 3] >>> 8) & MASK32_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 3] & MASK32_8) << 12; - l2 |= (tmp[tmpIdx + 4] & MASK32_12) << 0; - longs[longsIdx + 2] = l2; - } - } - - private static void decode21(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 168); - shiftLongs(tmp, 168, longs, 0, 11, MASK32_21); - for (int iter = 0, tmpIdx = 0, longsIdx = 168; iter < 8; ++iter, tmpIdx += 21, longsIdx += 11) { - long l0 = (tmp[tmpIdx + 0] & MASK32_11) << 10; - l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK32_10; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 1] & MASK32_1) << 20; - l1 |= (tmp[tmpIdx + 2] & MASK32_11) << 9; - l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_9; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 3] & MASK32_2) << 19; - l2 |= (tmp[tmpIdx + 4] & MASK32_11) << 8; - l2 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_8; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 5] & MASK32_3) << 18; - l3 |= (tmp[tmpIdx + 6] & MASK32_11) << 7; - l3 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_7; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 7] & MASK32_4) << 17; - l4 |= (tmp[tmpIdx + 8] & MASK32_11) << 6; - l4 |= (tmp[tmpIdx + 9] >>> 5) & MASK32_6; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 9] & MASK32_5) << 16; - l5 |= (tmp[tmpIdx + 10] & MASK32_11) << 5; - l5 |= (tmp[tmpIdx + 11] >>> 6) & MASK32_5; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 11] & MASK32_6) << 15; - l6 |= (tmp[tmpIdx + 12] & MASK32_11) << 4; - l6 |= (tmp[tmpIdx + 13] >>> 7) & MASK32_4; - longs[longsIdx + 6] = l6; - long l7 = (tmp[tmpIdx + 13] & MASK32_7) << 14; - l7 |= (tmp[tmpIdx + 14] & MASK32_11) << 3; - l7 |= (tmp[tmpIdx + 15] >>> 8) & MASK32_3; - longs[longsIdx + 7] = l7; - long l8 = (tmp[tmpIdx + 15] & MASK32_8) << 13; - l8 |= (tmp[tmpIdx + 16] & MASK32_11) << 2; - l8 |= (tmp[tmpIdx + 17] >>> 9) & MASK32_2; - longs[longsIdx + 8] = l8; - long l9 = (tmp[tmpIdx + 17] & MASK32_9) << 12; - l9 |= (tmp[tmpIdx + 18] & MASK32_11) << 1; - l9 |= (tmp[tmpIdx + 19] >>> 10) & MASK32_1; - longs[longsIdx + 9] = l9; - long l10 = (tmp[tmpIdx + 19] & MASK32_10) << 11; - l10 |= (tmp[tmpIdx + 20] & MASK32_11) << 0; - longs[longsIdx + 10] = l10; - } - } - - private static void decode22(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 176); - shiftLongs(tmp, 176, longs, 0, 10, MASK32_22); - for (int iter = 0, tmpIdx = 0, longsIdx = 176; iter < 16; ++iter, tmpIdx += 11, longsIdx += 5) { - long l0 = (tmp[tmpIdx + 0] & MASK32_10) << 12; - l0 |= (tmp[tmpIdx + 1] & MASK32_10) << 2; - l0 |= (tmp[tmpIdx + 2] >>> 8) & MASK32_2; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 2] & MASK32_8) << 14; - l1 |= (tmp[tmpIdx + 3] & MASK32_10) << 4; - l1 |= (tmp[tmpIdx + 4] >>> 6) & MASK32_4; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 4] & MASK32_6) << 16; - l2 |= (tmp[tmpIdx + 5] & MASK32_10) << 6; - l2 |= (tmp[tmpIdx + 6] >>> 4) & MASK32_6; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 6] & MASK32_4) << 18; - l3 |= (tmp[tmpIdx + 7] & MASK32_10) << 8; - l3 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_8; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 8] & MASK32_2) << 20; - l4 |= (tmp[tmpIdx + 9] & MASK32_10) << 10; - l4 |= (tmp[tmpIdx + 10] & MASK32_10) << 0; - longs[longsIdx + 4] = l4; - } - } - - private static void decode23(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 184); - shiftLongs(tmp, 184, longs, 0, 9, MASK32_23); - for (int iter = 0, tmpIdx = 0, longsIdx = 184; iter < 8; ++iter, tmpIdx += 23, longsIdx += 9) { - long l0 = (tmp[tmpIdx + 0] & MASK32_9) << 14; - l0 |= (tmp[tmpIdx + 1] & MASK32_9) << 5; - l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK32_5; - longs[longsIdx + 0] = l0; - long l1 = (tmp[tmpIdx + 2] & MASK32_4) << 19; - l1 |= (tmp[tmpIdx + 3] & MASK32_9) << 10; - l1 |= (tmp[tmpIdx + 4] & MASK32_9) << 1; - l1 |= (tmp[tmpIdx + 5] >>> 8) & MASK32_1; - longs[longsIdx + 1] = l1; - long l2 = (tmp[tmpIdx + 5] & MASK32_8) << 15; - l2 |= (tmp[tmpIdx + 6] & MASK32_9) << 6; - l2 |= (tmp[tmpIdx + 7] >>> 3) & MASK32_6; - longs[longsIdx + 2] = l2; - long l3 = (tmp[tmpIdx + 7] & MASK32_3) << 20; - l3 |= (tmp[tmpIdx + 8] & MASK32_9) << 11; - l3 |= (tmp[tmpIdx + 9] & MASK32_9) << 2; - l3 |= (tmp[tmpIdx + 10] >>> 7) & MASK32_2; - longs[longsIdx + 3] = l3; - long l4 = (tmp[tmpIdx + 10] & MASK32_7) << 16; - l4 |= (tmp[tmpIdx + 11] & MASK32_9) << 7; - l4 |= (tmp[tmpIdx + 12] >>> 2) & MASK32_7; - longs[longsIdx + 4] = l4; - long l5 = (tmp[tmpIdx + 12] & MASK32_2) << 21; - l5 |= (tmp[tmpIdx + 13] & MASK32_9) << 12; - l5 |= (tmp[tmpIdx + 14] & MASK32_9) << 3; - l5 |= (tmp[tmpIdx + 15] >>> 6) & MASK32_3; - longs[longsIdx + 5] = l5; - long l6 = (tmp[tmpIdx + 15] & MASK32_6) << 17; - l6 |= (tmp[tmpIdx + 16] & MASK32_9) << 8; - l6 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_8; - longs[longsIdx + 6] = l6; - long l7 = (tmp[tmpIdx + 17] & MASK32_1) << 22; - l7 |= (tmp[tmpIdx + 18] & MASK32_9) << 13; - l7 |= (tmp[tmpIdx + 19] & MASK32_9) << 4; - l7 |= (tmp[tmpIdx + 20] >>> 5) & MASK32_4; - longs[longsIdx + 7] = l7; - long l8 = (tmp[tmpIdx + 20] & MASK32_5) << 18; - l8 |= (tmp[tmpIdx + 21] & MASK32_9) << 9; - l8 |= (tmp[tmpIdx + 22] & MASK32_9) << 0; - longs[longsIdx + 8] = l8; - } - } - - private static void decode24(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(tmp, 0, 192); - shiftLongs(tmp, 192, longs, 0, 8, MASK32_24); - shiftLongs(tmp, 192, tmp, 0, 0, MASK32_8); - for (int iter = 0, tmpIdx = 0, longsIdx = 192; iter < 64; ++iter, tmpIdx += 3, longsIdx += 1) { - long l0 = tmp[tmpIdx + 0] << 16; - l0 |= tmp[tmpIdx + 1] << 8; - l0 |= tmp[tmpIdx + 2] << 0; - longs[longsIdx + 0] = l0; - } - } - - private static void decode32(DataInput in, long[] tmp, long[] longs) throws IOException { - in.readLongs(longs, 0, 256); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py deleted file mode 100644 index 761650cb60ab..000000000000 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/gen_ForUtil.py +++ /dev/null @@ -1,405 +0,0 @@ -#! /usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from fractions import gcd - -"""Code generation for ForUtil.java""" - -BLOCK_SIZE = 512 -MAX_SPECIALIZED_BITS_PER_VALUE = 24 -OUTPUT_FILE = "ForUtil.java" -PRIMITIVE_SIZE = [8, 16, 32] -SPECIALIZED_BITS_PER_VALUES = [i for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE + 1)] + [32] -HEADER = """// This file has been automatically generated, DO NOT EDIT - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.util.bkd; - -import java.io.IOException; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; - -// Inspired from https://fulmicoton.com/posts/bitpacking/ -// Encodes multiple integers in a long to get SIMD-like speedups. -// If bitsPerValue <= 8 then we pack 8 ints per long -// else if bitsPerValue <= 16 we pack 4 ints per long -// else we pack 2 ints per long -final class ForUtil { - - static final int BLOCK_SIZE = """ + str(BLOCK_SIZE) + """; - static final int BLOCK_SIZE_DIV_2 = BLOCK_SIZE >> 1; - private static final int BLOCK_SIZE_DIV_4 = BLOCK_SIZE >> 2; - private static final int BLOCK_SIZE_DIV_8 = BLOCK_SIZE >> 3; - private static final int BLOCK_SIZE_DIV_64 = BLOCK_SIZE >> 6; - private static final int BLOCK_SIZE_DIV_8_MUL_1 = BLOCK_SIZE_DIV_8; - private static final int BLOCK_SIZE_DIV_8_MUL_2 = BLOCK_SIZE_DIV_8 * 2; - private static final int BLOCK_SIZE_DIV_8_MUL_3 = BLOCK_SIZE_DIV_8 * 3; - private static final int BLOCK_SIZE_DIV_8_MUL_4 = BLOCK_SIZE_DIV_8 * 4; - private static final int BLOCK_SIZE_DIV_8_MUL_5 = BLOCK_SIZE_DIV_8 * 5; - private static final int BLOCK_SIZE_DIV_8_MUL_6 = BLOCK_SIZE_DIV_8 * 6; - private static final int BLOCK_SIZE_DIV_8_MUL_7 = BLOCK_SIZE_DIV_8 * 7; - - private static long expandMask32(long mask32) { - return mask32 | (mask32 << 32); - } - - private static long expandMask16(long mask16) { - return expandMask32(mask16 | (mask16 << 16)); - } - - private static long expandMask8(long mask8) { - return expandMask16(mask8 | (mask8 << 8)); - } - - private static long mask32(int bitsPerValue) { - return expandMask32((1L << bitsPerValue) - 1); - } - - private static long mask16(int bitsPerValue) { - return expandMask16((1L << bitsPerValue) - 1); - } - - private static long mask8(int bitsPerValue) { - return expandMask8((1L << bitsPerValue) - 1); - } - - private static void expand8(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - long l = arr[i]; - arr[i] = (l >>> 56) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_1 + i] = (l >>> 48) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 40) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_3 + i] = (l >>> 32) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 24) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_5 + i] = (l >>> 16) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = (l >>> 8) & 0xFFL; - arr[BLOCK_SIZE_DIV_8_MUL_7 + i] = l & 0xFFL; - } - } - - private static void collapse8(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_8; ++i) { - arr[i] = - (arr[i] << 56) - | (arr[BLOCK_SIZE_DIV_8_MUL_1 + i] << 48) - | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 40) - | (arr[BLOCK_SIZE_DIV_8_MUL_3 + i] << 32) - | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 24) - | (arr[BLOCK_SIZE_DIV_8_MUL_5 + i] << 16) - | (arr[BLOCK_SIZE_DIV_8_MUL_6 + i] << 8) - | arr[BLOCK_SIZE_DIV_8_MUL_7 + i]; - } - } - - private static void expand16(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { - long l = arr[i]; - arr[i] = (l >>> 48) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_2 + i] = (l >>> 32) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = (l >>> 16) & 0xFFFFL; - arr[BLOCK_SIZE_DIV_8_MUL_6 + i] = l & 0xFFFFL; - } - } - - private static void collapse16(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_4; ++i) { - arr[i] = - (arr[i] << 48) - | (arr[BLOCK_SIZE_DIV_8_MUL_2 + i] << 32) - | (arr[BLOCK_SIZE_DIV_8_MUL_4 + i] << 16) - | arr[BLOCK_SIZE_DIV_8_MUL_6 + i]; - } - } - - private static void expand32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - long l = arr[i]; - arr[i] = l >>> 32; - arr[BLOCK_SIZE_DIV_8_MUL_4 + i] = l & 0xFFFFFFFFL; - } - } - - private static void collapse32(long[] arr) { - for (int i = 0; i < BLOCK_SIZE_DIV_2; ++i) { - arr[i] = (arr[i] << 32) | arr[BLOCK_SIZE_DIV_8_MUL_4 + i]; - } - } - - private final long[] tmp = new long[BLOCK_SIZE_DIV_2]; - - /** Encode 128 integers from {@code longs} into {@code out}. */ - void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException { - final int nextPrimitive; - final int numLongs; - if (bitsPerValue <= 8) { - nextPrimitive = 8; - numLongs = BLOCK_SIZE_DIV_8; - collapse8(longs); - } else if (bitsPerValue <= 16) { - nextPrimitive = 16; - numLongs = BLOCK_SIZE_DIV_4; - collapse16(longs); - } else { - nextPrimitive = 32; - numLongs = BLOCK_SIZE_DIV_2; - collapse32(longs); - } - - final int numLongsPerShift = bitsPerValue * BLOCK_SIZE_DIV_64; - int idx = 0; - int shift = nextPrimitive - bitsPerValue; - for (int i = 0; i < numLongsPerShift; ++i) { - tmp[i] = longs[idx++] << shift; - } - for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) { - for (int i = 0; i < numLongsPerShift; ++i) { - tmp[i] |= longs[idx++] << shift; - } - } - - final int remainingBitsPerLong = shift + bitsPerValue; - final long maskRemainingBitsPerLong; - if (nextPrimitive == 8) { - maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong]; - } else if (nextPrimitive == 16) { - maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong]; - } else { - maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong]; - } - - int tmpIdx = 0; - int remainingBitsPerValue = bitsPerValue; - while (idx < numLongs) { - if (remainingBitsPerValue >= remainingBitsPerLong) { - remainingBitsPerValue -= remainingBitsPerLong; - tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong; - if (remainingBitsPerValue == 0) { - idx++; - remainingBitsPerValue = bitsPerValue; - } - } else { - final long mask1, mask2; - if (nextPrimitive == 8) { - mask1 = MASKS8[remainingBitsPerValue]; - mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue]; - } else if (nextPrimitive == 16) { - mask1 = MASKS16[remainingBitsPerValue]; - mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue]; - } else { - mask1 = MASKS32[remainingBitsPerValue]; - mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue]; - } - tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue); - remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue; - tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2; - } - } - - for (int i = 0; i < numLongsPerShift; ++i) { - out.writeLong(tmp[i]); - } - } - - private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs) - throws IOException { - final int numLongs = bitsPerValue * BLOCK_SIZE_DIV_64; - in.readLongs(tmp, 0, numLongs); - final long mask = MASKS32[bitsPerValue]; - int longsIdx = 0; - int shift = 32 - bitsPerValue; - for (; shift >= 0; shift -= bitsPerValue) { - shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask); - longsIdx += numLongs; - } - final int remainingBitsPerLong = shift + bitsPerValue; - final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong]; - int tmpIdx = 0; - int remainingBits = remainingBitsPerLong; - for (; longsIdx < BLOCK_SIZE_DIV_2; ++longsIdx) { - int b = bitsPerValue - remainingBits; - long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b; - while (b >= remainingBitsPerLong) { - b -= remainingBitsPerLong; - l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b; - } - if (b > 0) { - l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b]; - remainingBits = remainingBitsPerLong - b; - } else { - remainingBits = remainingBitsPerLong; - } - longs[longsIdx] = l; - } - } - - /** - * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which - * generates SIMD instructions for it in order to shift multiple longs at once. - */ - private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) { - for (int i = 0; i < count; ++i) { - b[bi + i] = (a[i] >>> shift) & mask; - } - } - -""" - -def writeRemainderWithSIMDOptimize(bpv, next_primitive, remaining_bits_per_long, o, num_values, f): - iteration = 1 - num_longs = bpv * num_values / remaining_bits_per_long - while num_longs % 2 == 0 and num_values % 2 == 0: - num_longs /= 2 - num_values /= 2 - iteration *= 2 - - f.write(' shiftLongs(tmp, %d, tmp, 0, 0, MASK%d_%d);\n' % (iteration * num_longs, next_primitive, remaining_bits_per_long)) - f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values)) - tmp_idx = 0 - b = bpv - b -= remaining_bits_per_long - f.write(' long l0 = tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b)) - tmp_idx += 1 - while b >= remaining_bits_per_long: - b -= remaining_bits_per_long - f.write(' l0 |= tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b)) - tmp_idx += 1 - f.write(' longs[longsIdx + 0] = l0;\n') - f.write(' }\n') - - -def writeRemainder(bpv, next_primitive, remaining_bits_per_long, o, num_values, f): - iteration = 1 - num_longs = bpv * num_values / remaining_bits_per_long - while num_longs % 2 == 0 and num_values % 2 == 0: - num_longs /= 2 - num_values /= 2 - iteration *= 2 - f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values)) - i = 0 - remaining_bits = 0 - tmp_idx = 0 - for i in range(num_values): - b = bpv - if remaining_bits == 0: - b -= remaining_bits_per_long - f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b)) - else: - b -= remaining_bits - f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits, b)) - tmp_idx += 1 - while b >= remaining_bits_per_long: - b -= remaining_bits_per_long - f.write(' l%d |= (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b)) - tmp_idx += 1 - if b > 0: - f.write(' l%d |= (tmp[tmpIdx + %d] >>> %d) & MASK%d_%d;\n' %(i, tmp_idx, remaining_bits_per_long-b, next_primitive, b)) - remaining_bits = remaining_bits_per_long-b - f.write(' longs[longsIdx + %d] = l%d;\n' %(i, i)) - f.write(' }\n') - - -def writeDecode(bpv, f): - next_primitive = 32 - if bpv <= 8: - next_primitive = 8 - elif bpv <= 16: - next_primitive = 16 - f.write(' private static void decode%d(DataInput in, long[] tmp, long[] longs) throws IOException {\n' %bpv) - num_values_per_long = 64 / next_primitive - if bpv == next_primitive: - f.write(' in.readLongs(longs, 0, %d);\n' %(bpv*(BLOCK_SIZE / 64))) - else: - f.write(' in.readLongs(tmp, 0, %d);\n' %(bpv*(BLOCK_SIZE / 64))) - shift = next_primitive - bpv - o = 0 - while shift >= 0: - f.write(' shiftLongs(tmp, %d, longs, %d, %d, MASK%d_%d);\n' %(bpv*(BLOCK_SIZE / 64), o, shift, next_primitive, bpv)) - o += bpv*(BLOCK_SIZE / 64) - shift -= bpv - if shift + bpv > 0: - if bpv % (next_primitive % bpv) == 0: - writeRemainderWithSIMDOptimize(bpv, next_primitive, shift + bpv, o, BLOCK_SIZE/num_values_per_long - o, f) - else: - writeRemainder(bpv, next_primitive, shift + bpv, o, BLOCK_SIZE/num_values_per_long - o, f) - f.write(' }\n') - - -if __name__ == '__main__': - f = open(OUTPUT_FILE, 'w') - f.write(HEADER) - for primitive_size in PRIMITIVE_SIZE: - f.write(' private static final long[] MASKS%d = new long[%d];\n' %(primitive_size, primitive_size)) - f.write('\n') - f.write(' static {\n') - for primitive_size in PRIMITIVE_SIZE: - f.write(' for (int i = 0; i < %d; ++i) {\n' %primitive_size) - f.write(' MASKS%d[i] = mask%d(i);\n' %(primitive_size, primitive_size)) - f.write(' }\n') - f.write(' }') - f.write(""" - // mark values in array as final longs to avoid the cost of reading array, arrays should only be - // used when the idx is a variable -""") - for primitive_size in PRIMITIVE_SIZE: - for bpv in range(1, min(MAX_SPECIALIZED_BITS_PER_VALUE + 1, primitive_size)): - if bpv * 2 != primitive_size or primitive_size == 8: - f.write(' private static final long MASK%d_%d = MASKS%d[%d];\n' %(primitive_size, bpv, primitive_size, bpv)) - - f.write(""" - /** Decode 128 integers into {@code longs}. */ - void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException { - switch (bitsPerValue) { -""") - for bpv in SPECIALIZED_BITS_PER_VALUES: - next_primitive = 32 - if bpv <= 8: - next_primitive = 8 - elif bpv <= 16: - next_primitive = 16 - f.write(' case %d:\n' %bpv) - f.write(' decode%d(in, tmp, longs);\n' %bpv) - f.write(' expand%d(longs);\n' %next_primitive) - f.write(' break;\n') - f.write(' default:\n') - f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n') - f.write(' expand32(longs);\n') - f.write(' break;\n') - f.write(' }\n') - f.write(' }\n') - - f.write('\n') - for i in SPECIALIZED_BITS_PER_VALUES: - writeDecode(i, f) - if i < MAX_SPECIALIZED_BITS_PER_VALUE: - f.write('\n') - - f.write('}\n') \ No newline at end of file From 634e56e56eb3bb8969a63d2a15bd52471d901bcf Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 6 Jan 2022 22:51:41 +0800 Subject: [PATCH 11/34] make diff a bit more beautiful --- .../apache/lucene/util/bkd/DocIdsWriter.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index b648b6b489df..45abff44284e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -198,24 +198,24 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { case BITSET_IDS: readBitSet(in, count, docIDs); break; - case DELTA_VINT: - readDeltaVInts(in, count, docIDs); - break; case DELTA_FOR_UTIL: readBKDForUtilDelta(in, count, docIDs); break; - case BPV_32: - readInts32(in, count, docIDs); - break; case BPV_32_FOR_UTIL: readBKDForUtil32(in, count, docIDs); break; - case BPV_24: - readInts24(in, count, docIDs); - break; case BPV_24_FOR_UTIL: readBKDForUtil24(in, count, docIDs); break; + case DELTA_VINT: + readDeltaVInts(in, count, docIDs); + break; + case BPV_32: + readInts32(in, count, docIDs); + break; + case BPV_24: + readInts24(in, count, docIDs); + break; default: throw new IOException("Unsupported number of bits per value: " + bpv); } @@ -311,24 +311,24 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOExcep case BITSET_IDS: readBitSet(in, count, visitor); break; - case DELTA_VINT: - readDeltaVInts(in, count, visitor); - break; case DELTA_FOR_UTIL: readBKDForUtilDelta(in, count, visitor); break; - case BPV_32: - readInts32(in, count, visitor); - break; case BPV_32_FOR_UTIL: readBKDForUtil32(in, count, visitor); break; - case BPV_24: - readInts24(in, count, visitor); - break; case BPV_24_FOR_UTIL: readBKDForUtil24(in, count, visitor); break; + case DELTA_VINT: + readDeltaVInts(in, count, visitor); + break; + case BPV_32: + readInts32(in, count, visitor); + break; + case BPV_24: + readInts24(in, count, visitor); + break; default: throw new IOException("Unsupported number of bits per value: " + bpv); } From ffdfb26404b7b30460fda4bc0a8f6a8e2cee19bc Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 6 Jan 2022 23:49:47 +0800 Subject: [PATCH 12/34] iter --- .../src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 45abff44284e..8cbde00683d6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -35,11 +35,13 @@ class DocIdsWriter { private static final byte BPV_32 = (byte) 32; private static final byte BPV_32_FOR_UTIL = (byte) 32 + 32; + private final boolean usingDefaultBlockSize; private final BKDForUtil forUtil = new BKDForUtil(); private final long[] scratch; DocIdsWriter(int maxPointsInLeaf) { scratch = new long[maxPointsInLeaf]; + usingDefaultBlockSize = maxPointsInLeaf == BKDForUtil.BLOCK_SIZE; } void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { @@ -77,7 +79,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) - if (count == BKDForUtil.BLOCK_SIZE) { + if (usingDefaultBlockSize && count == BKDForUtil.BLOCK_SIZE) { if (sorted && Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); long[] delta = new long[count]; From 92e67109c0b66941a4bc362f8c64ac519677e5e6 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 6 Jan 2022 23:52:10 +0800 Subject: [PATCH 13/34] assert count --- .../org/apache/lucene/util/bkd/DocIdsWriter.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 8cbde00683d6..5d072c5a7e81 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -224,19 +224,19 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { } private void readBKDForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; final long min = in.readVInt(); forUtil.decode16(in, docIDs); plus(docIDs, min, count); } private void readBKDForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; forUtil.decode24(in, docIDs); } private void readBKDForUtil32(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; forUtil.decode32(in, docIDs); } @@ -392,7 +392,7 @@ private static void readContinuousIds(IndexInput in, int count, IntersectVisitor private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; final int min = in.readVInt(); forUtil.decode16(in, scratch); plus(scratch, min, count); @@ -403,7 +403,7 @@ private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visi private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; forUtil.decode24(in, scratch); for (long l : scratch) { visitor.visit((int) l); @@ -412,7 +412,7 @@ private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; + assert count == scratch.length; forUtil.decode32(in, scratch); for (long l : scratch) { visitor.visit((int) l); From 8c70d9c48fedbb3b156b9c44e9f96244fa2b26ad Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 6 Jan 2022 23:53:38 +0800 Subject: [PATCH 14/34] make writer final --- .../core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 5d072c5a7e81..1aebeb3e7afb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -24,7 +24,7 @@ import org.apache.lucene.util.DocBaseBitSetIterator; import org.apache.lucene.util.FixedBitSet; -class DocIdsWriter { +final class DocIdsWriter { private static final byte CONTINUOUS_IDS = (byte) -2; private static final byte BITSET_IDS = (byte) -1; From cafe4fc82148c0cf904347fbababd46a794d8c1c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 00:08:23 +0800 Subject: [PATCH 15/34] iter --- .../apache/lucene/util/bkd/DocIdsWriter.java | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 1aebeb3e7afb..9177654b775a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -35,13 +35,11 @@ final class DocIdsWriter { private static final byte BPV_32 = (byte) 32; private static final byte BPV_32_FOR_UTIL = (byte) 32 + 32; - private final boolean usingDefaultBlockSize; private final BKDForUtil forUtil = new BKDForUtil(); private final long[] scratch; DocIdsWriter(int maxPointsInLeaf) { scratch = new long[maxPointsInLeaf]; - usingDefaultBlockSize = maxPointsInLeaf == BKDForUtil.BLOCK_SIZE; } void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { @@ -79,7 +77,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) - if (usingDefaultBlockSize && count == BKDForUtil.BLOCK_SIZE) { + if (count == BKDForUtil.BLOCK_SIZE) { if (sorted && Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); long[] delta = new long[count]; @@ -224,19 +222,19 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { } private void readBKDForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; final long min = in.readVInt(); forUtil.decode16(in, docIDs); plus(docIDs, min, count); } private void readBKDForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, docIDs); } private void readBKDForUtil32(IndexInput in, int count, long[] docIDs) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode32(in, docIDs); } @@ -392,30 +390,30 @@ private static void readContinuousIds(IndexInput in, int count, IntersectVisitor private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); forUtil.decode16(in, scratch); plus(scratch, min, count); - for (long l : scratch) { - visitor.visit((int) l); + for (int i = 0; i < count; i++) { + visitor.visit((int) scratch[i]); } } private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, scratch); - for (long l : scratch) { - visitor.visit((int) l); + for (int i = 0; i < count; i++) { + visitor.visit((int) scratch[i]); } } private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == scratch.length; + assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode32(in, scratch); - for (long l : scratch) { - visitor.visit((int) l); + for (int i = 0; i < count; i++) { + visitor.visit((int) scratch[i]); } } From a38794980ca9233451443fe03f25a06f49f0092c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 02:58:44 +0800 Subject: [PATCH 16/34] try to make remainder also SIMD --- .../apache/lucene/util/bkd/BKDForUtil.java | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index 15ceacd476e5..382e51a2646f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -91,27 +91,17 @@ void encode24(long[] longs, DataOutput out) throws IOException { for (int i = 0; i < 192; ++i) { tmp[i] = longs[i] << 8; } - int tmpIdx = 0; - for (int i = 192; i < 256; i++) { - tmp[tmpIdx++] |= (longs[i] >>> 16) & MASK32_8; - tmp[tmpIdx++] |= (longs[i] >>> 8) & MASK32_8; - tmp[tmpIdx++] |= longs[i] & MASK32_8; + for (int i = 0; i < 64; i++) { + final int longIdx = i + 192; + tmp[i] |= (longs[longIdx] >>> 16) & MASK32_8; + tmp[i + 64] |= (longs[longIdx] >>> 8) & MASK32_8; + tmp[i + 128] |= longs[longIdx] & MASK32_8; } for (int i = 0; i < 192; ++i) { out.writeLong(tmp[i]); } } - /** - * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which - * generates SIMD instructions for it in order to shift multiple longs at once. - */ - private static void shiftLongs(long[] a, int count, long[] b, int shift, long mask) { - for (int i = 0; i < count; ++i) { - b[i] = (a[i] >>> shift) & mask; - } - } - void decode16(DataInput in, long[] longs) throws IOException { in.readLongs(longs, 0, 128); expand16(longs); @@ -119,13 +109,12 @@ void decode16(DataInput in, long[] longs) throws IOException { void decode24(DataInput in, long[] longs) throws IOException { in.readLongs(tmp, 0, 192); - shiftLongs(tmp, 192, longs, 8, MASK32_24); - shiftLongs(tmp, 192, tmp, 0, MASK32_8); - for (int iter = 0, tmpIdx = 0, longsIdx = 192; iter < 64; ++iter, tmpIdx += 3, longsIdx += 1) { - long l0 = tmp[tmpIdx] << 16; - l0 |= tmp[tmpIdx + 1] << 8; - l0 |= tmp[tmpIdx + 2]; - longs[longsIdx] = l0; + for (int i = 0; i < 192; ++i) { + longs[i] = (tmp[i] >>> 8) & MASK32_24; + } + for (int i = 0; i < 64; i++) { + longs[i + 192] = + ((tmp[i] & MASK32_8) << 16) | ((tmp[i + 64] & MASK32_8) << 8) | (tmp[i + 128] & MASK32_8); } expand32(longs); } From 7aa92ea763e207eef7fcc887a571966efc68c83e Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 03:43:03 +0800 Subject: [PATCH 17/34] plus when expand --- .../org/apache/lucene/util/bkd/BKDForUtil.java | 14 +++++++------- .../org/apache/lucene/util/bkd/DocIdsWriter.java | 12 ++---------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index 382e51a2646f..aeafbbd0d028 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -40,13 +40,13 @@ private static long mask32(int bitsPerValue) { return expandMask32((1L << bitsPerValue) - 1); } - private static void expand16(long[] arr) { + private static void expand16(long[] arr, final long base) { for (int i = 0; i < 128; ++i) { long l = arr[i]; - arr[i] = (l >>> 48) & 0xFFFFL; - arr[128 + i] = (l >>> 32) & 0xFFFFL; - arr[256 + i] = (l >>> 16) & 0xFFFFL; - arr[384 + i] = l & 0xFFFFL; + arr[i] = ((l >>> 48) & 0xFFFFL) + base; + arr[128 + i] = ((l >>> 32) & 0xFFFFL) + base; + arr[256 + i] = ((l >>> 16) & 0xFFFFL) + base; + arr[384 + i] = (l & 0xFFFFL) + base; } } @@ -102,9 +102,9 @@ void encode24(long[] longs, DataOutput out) throws IOException { } } - void decode16(DataInput in, long[] longs) throws IOException { + void decode16(DataInput in, long[] longs, long base) throws IOException { in.readLongs(longs, 0, 128); - expand16(longs); + expand16(longs, base); } void decode24(DataInput in, long[] longs) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 9177654b775a..acc0a1ac9d2f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -224,8 +224,7 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { private void readBKDForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; final long min = in.readVInt(); - forUtil.decode16(in, docIDs); - plus(docIDs, min, count); + forUtil.decode16(in, docIDs, min); } private void readBKDForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { @@ -392,8 +391,7 @@ private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visi throws IOException { assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); - forUtil.decode16(in, scratch); - plus(scratch, min, count); + forUtil.decode16(in, scratch, min); for (int i = 0; i < count; i++) { visitor.visit((int) scratch[i]); } @@ -416,10 +414,4 @@ private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor visitor.visit((int) scratch[i]); } } - - private static void plus(long[] a, long min, int count) { - for (int i = 0; i < count; i++) { - a[i] = a[i] + min; - } - } } From 6ff0cec5705ea7325094f40e08da80e89be011f7 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 13:07:58 +0800 Subject: [PATCH 18/34] judge cluster should not rely on sorted --- .../apache/lucene/util/bkd/DocIdsWriter.java | 22 ++++++++----------- .../lucene/util/bkd/TestDocIdsWriter.java | 20 +++++++++++++++++ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index acc0a1ac9d2f..72dfde8753c1 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -47,18 +47,21 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx // or when a segment is sorted boolean sorted = true; boolean strictlySorted = true; + int min = docIds[0]; + int max = docIds[0]; for (int i = 1; i < count; ++i) { int last = docIds[start + i - 1]; int current = docIds[start + i]; if (last > current) { sorted = strictlySorted = false; - break; } else if (last == current) { strictlySorted = false; } + min = Math.min(min, current); + max = Math.max(max, current); } - int min2max = docIds[start + count - 1] - docIds[start] + 1; + int min2max = max - min + 1; if (strictlySorted) { if (min2max == count) { // continuous ids, typically happens when segment is sorted @@ -78,23 +81,20 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) if (count == BKDForUtil.BLOCK_SIZE) { - if (sorted && Integer.toUnsignedLong(min2max) <= 0xFFFFL) { + if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); long[] delta = new long[count]; - final int min = docIds[start]; for (int i = 0; i < count; i++) { delta[i] = docIds[i] - min; } - out.writeVInt(docIds[start]); + out.writeVInt(min); forUtil.encode16(delta, out); } else { long[] longs = new long[count]; - long max = 0; for (int i = 0; i < count; i++) { longs[i] = docIds[i]; - max |= longs[i] & 0xffffffffL; } - if (max <= 0xffffff) { + if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { out.writeByte(BPV_24_FOR_UTIL); forUtil.encode24(longs, out); } else { @@ -114,11 +114,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx previous = doc; } } else { - long max = 0; - for (int i = 0; i < count; ++i) { - max |= Integer.toUnsignedLong(docIds[start + i]); - } - if (max <= 0xffffff) { + if (Integer.toUnsignedLong(max) <= 0xffffff) { out.writeByte(BPV_24); // write them the same way we are reading them. int i; diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java index 27c4309c0136..1606bc298b88 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java @@ -71,6 +71,26 @@ private void sorted(IntSupplier size) throws Exception { } } + public void testCluster() throws Exception { + cluster(() -> BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); + cluster(() -> 1 + random().nextInt(5000)); + } + + private void cluster(IntSupplier size) throws Exception { + int numIters = atLeast(100); + try (Directory dir = newDirectory()) { + for (int iter = 0; iter < numIters; ++iter) { + int[] docIDs = new int[size.getAsInt()]; + int min = random().nextInt(1000); + final int bpv = TestUtil.nextInt(random(), 1, 16); + for (int i = 0; i < docIDs.length; ++i) { + docIDs[i] = min + TestUtil.nextInt(random(), 0, (1 << bpv) - 1); + } + test(dir, docIDs); + } + } + } + public void testBitSet() throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { From 1f09b3a14ddba99bede843b43a9ebd31fa51534c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 13:09:44 +0800 Subject: [PATCH 19/34] add an assert --- .../core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 72dfde8753c1..c42e5eeeb81b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -39,6 +39,7 @@ final class DocIdsWriter { private final long[] scratch; DocIdsWriter(int maxPointsInLeaf) { + assert BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE == BKDForUtil.BLOCK_SIZE; scratch = new long[maxPointsInLeaf]; } From 2708036cdc46fa7e22376f2b485b2801e6193a08 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Fri, 7 Jan 2022 13:17:17 +0800 Subject: [PATCH 20/34] assert is making CI angry, remove --- .../core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index c42e5eeeb81b..72dfde8753c1 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -39,7 +39,6 @@ final class DocIdsWriter { private final long[] scratch; DocIdsWriter(int maxPointsInLeaf) { - assert BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE == BKDForUtil.BLOCK_SIZE; scratch = new long[maxPointsInLeaf]; } From f44f260dd3d11ccc77b85e2dc2362bf4f8030c0d Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 13 Jan 2022 16:33:48 +0800 Subject: [PATCH 21/34] use int --- .../store/EndiannessReverserIndexInput.java | 8 ++ .../apache/lucene/store/ByteBufferGuard.java | 6 + .../lucene/store/ByteBufferIndexInput.java | 34 ++++++ .../org/apache/lucene/store/DataInput.java | 7 ++ .../apache/lucene/util/bkd/BKDForUtil.java | 104 ++++++------------ .../org/apache/lucene/util/bkd/BKDReader.java | 12 +- .../apache/lucene/util/bkd/DocIdsWriter.java | 42 ++++--- .../lucene/util/bkd/TestDocIdsWriter.java | 4 +- 8 files changed, 115 insertions(+), 102 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java index d7d0b76853e0..4a1c0566ffc6 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java @@ -80,6 +80,14 @@ public void readLongs(long[] dst, int offset, int length) throws IOException { } } + @Override + public void readInts(int[] dst, int offset, int length) throws IOException { + in.readInts(dst, offset, length); + for (int i = 0; i < length; ++i) { + dst[offset + i] = Integer.reverseBytes(dst[offset + i]); + } + } + @Override public void readFloats(float[] dst, int offset, int length) throws IOException { in.readFloats(dst, offset, length); diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferGuard.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferGuard.java index 68658d64ee8e..2d75597f9deb 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferGuard.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferGuard.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.FloatBuffer; +import java.nio.IntBuffer; import java.nio.LongBuffer; import java.util.concurrent.atomic.AtomicInteger; @@ -138,6 +139,11 @@ public void getLongs(LongBuffer receiver, long[] dst, int offset, int length) { receiver.get(dst, offset, length); } + public void getInts(IntBuffer receiver, int[] dst, int offset, int length) { + ensureValid(); + receiver.get(dst, offset, length); + } + public void getFloats(FloatBuffer receiver, float[] dst, int offset, int length) { ensureValid(); receiver.get(dst, offset, length); diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java index 8469b987010a..4623c4b14aaf 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java @@ -22,6 +22,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.FloatBuffer; +import java.nio.IntBuffer; import java.nio.LongBuffer; /** @@ -36,6 +37,7 @@ public abstract class ByteBufferIndexInput extends IndexInput implements RandomAccessInput { private static final FloatBuffer EMPTY_FLOATBUFFER = FloatBuffer.allocate(0); private static final LongBuffer EMPTY_LONGBUFFER = LongBuffer.allocate(0); + private static final IntBuffer EMPTY_INTBUFFER = IntBuffer.allocate(0); protected final long length; protected final long chunkSizeMask; @@ -46,6 +48,7 @@ public abstract class ByteBufferIndexInput extends IndexInput implements RandomA protected int curBufIndex = -1; protected ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex] private LongBuffer[] curLongBufferViews; + private IntBuffer[] curIntBufferViews; private FloatBuffer[] curFloatBufferViews; protected boolean isClone = false; @@ -176,6 +179,37 @@ public void readLongs(long[] dst, int offset, int length) throws IOException { } } + @Override + public void readInts(int[] dst, int offset, int length) throws IOException { + // See notes about readLongs above + if (curIntBufferViews == null) { + curIntBufferViews = new IntBuffer[Integer.BYTES]; + for (int i = 0; i < Integer.BYTES; ++i) { + if (i < curBuf.limit()) { + curIntBufferViews[i] = + curBuf.duplicate().position(i).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); + } else { + curIntBufferViews[i] = EMPTY_INTBUFFER; + } + } + } + try { + final int position = curBuf.position(); + guard.getInts( + curIntBufferViews[position & 0x03].position(position >>> 2), dst, offset, length); + // if the above call succeeded, then we know the below sum cannot overflow + curBuf.position(position + (length << 2)); + } catch ( + @SuppressWarnings("unused") + BufferUnderflowException e) { + super.readInts(dst, offset, length); + } catch ( + @SuppressWarnings("unused") + NullPointerException npe) { + throw new AlreadyClosedException("Already closed: " + this); + } + } + @Override public final void readFloats(float[] floats, int offset, int len) throws IOException { // See notes about readLongs above diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java index d935cb3930d1..ba8677d45349 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java @@ -169,6 +169,13 @@ public void readLongs(long[] dst, int offset, int length) throws IOException { } } + public void readInts(int[] dst, int offset, int length) throws IOException { + Objects.checkFromIndexSize(offset, length, dst.length); + for (int i = 0; i < length; ++i) { + dst[offset + i] = readInt(); + } + } + /** * Reads a specified number of floats into an array at the specified offset. * diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index aeafbbd0d028..ea08e843648e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -29,98 +29,60 @@ final class BKDForUtil { static final int BLOCK_SIZE = 512; - private static final long MASK32_8 = mask32(8); - private static final long MASK32_24 = mask32(24); - private static long expandMask32(long mask32) { - return mask32 | (mask32 << 32); - } - - private static long mask32(int bitsPerValue) { - return expandMask32((1L << bitsPerValue) - 1); - } + private final int[] tmp = new int[384]; - private static void expand16(long[] arr, final long base) { - for (int i = 0; i < 128; ++i) { - long l = arr[i]; - arr[i] = ((l >>> 48) & 0xFFFFL) + base; - arr[128 + i] = ((l >>> 32) & 0xFFFFL) + base; - arr[256 + i] = ((l >>> 16) & 0xFFFFL) + base; - arr[384 + i] = (l & 0xFFFFL) + base; + void encode16(int[] ints, DataOutput out) throws IOException { + for (int i = 0; i < 256; ++i) { + ints[i] = ints[256 + i] | (ints[i] << 16); } - } - - private static void collapse16(long[] arr) { - for (int i = 0; i < 128; ++i) { - arr[i] = (arr[i] << 48) | (arr[128 + i] << 32) | (arr[256 + i] << 16) | arr[384 + i]; + for (int i = 0; i < 256; i++) { + out.writeInt(ints[i]); } } - private static void expand32(long[] arr) { - for (int i = 0; i < 256; ++i) { - long l = arr[i]; - arr[i] = l >>> 32; - arr[256 + i] = l & 0xFFFFFFFFL; + void encode32(int off, int[] ints, DataOutput out) throws IOException { + for (int i = 0; i < 512; i++) { + out.writeInt(ints[off + i]); } } - private static void collapse32(long[] arr) { - for (int i = 0; i < 256; ++i) { - arr[i] = (arr[i] << 32) | arr[256 + i]; + void encode24(int off, int[] ints, DataOutput out) throws IOException { + for (int i = 0; i < 384; ++i) { + tmp[i] = ints[off + i] << 8; } - } - - private final long[] tmp = new long[256]; - - void encode16(long[] longs, DataOutput out) throws IOException { - collapse16(longs); for (int i = 0; i < 128; i++) { - out.writeLong(longs[i]); + final int longIdx = off + i + 384; + tmp[i] |= (ints[longIdx] >>> 16) & 0xFF; + tmp[i + 128] |= (ints[longIdx] >>> 8) & 0xFF; + tmp[i + 256] |= ints[longIdx] & 0xFF; } - } - - void encode32(long[] longs, DataOutput out) throws IOException { - collapse32(longs); - for (int i = 0; i < 256; i++) { - out.writeLong(longs[i]); + for (int i = 0; i < 384; ++i) { + out.writeInt(tmp[i]); } } - void encode24(long[] longs, DataOutput out) throws IOException { - collapse32(longs); - for (int i = 0; i < 192; ++i) { - tmp[i] = longs[i] << 8; - } - for (int i = 0; i < 64; i++) { - final int longIdx = i + 192; - tmp[i] |= (longs[longIdx] >>> 16) & MASK32_8; - tmp[i + 64] |= (longs[longIdx] >>> 8) & MASK32_8; - tmp[i + 128] |= longs[longIdx] & MASK32_8; - } - for (int i = 0; i < 192; ++i) { - out.writeLong(tmp[i]); + void decode16(DataInput in, int[] ints, final int base) throws IOException { + in.readInts(ints, 0, 256); + for (int i = 0; i < 256; ++i) { + int l = ints[i]; + ints[i] = (l >>> 16) + base; + ints[256 + i] = (l & 0xFFFF) + base; } } - void decode16(DataInput in, long[] longs, long base) throws IOException { - in.readLongs(longs, 0, 128); - expand16(longs, base); - } - - void decode24(DataInput in, long[] longs) throws IOException { - in.readLongs(tmp, 0, 192); - for (int i = 0; i < 192; ++i) { - longs[i] = (tmp[i] >>> 8) & MASK32_24; + void decode24(DataInput in, int[] ints) throws IOException { + in.readInts(tmp, 0, 384); + for (int i = 0; i < 384; ++i) { + ints[i] = tmp[i] >>> 8; } - for (int i = 0; i < 64; i++) { - longs[i + 192] = - ((tmp[i] & MASK32_8) << 16) | ((tmp[i + 64] & MASK32_8) << 8) | (tmp[i + 128] & MASK32_8); + for (int i = 0; i < 128; i++) { + ints[i + 384] = + ((tmp[i] & 0xFF) << 16) | ((tmp[i + 128] & 0xFF) << 8) | (tmp[i + 256] & 0xFF); } - expand32(longs); } - void decode32(DataInput in, long[] longs) throws IOException { - in.readLongs(longs, 0, 256); - expand32(longs); + void decode32(DataInput in, int[] ints) throws IOException { + in.readInts(ints, 0, 512); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 97741c6ed1d0..42823f0f3551 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -702,7 +702,7 @@ private void visitDocValuesNoCardinality( visitor.grow(count); if (r == PointValues.Relation.CELL_INSIDE_QUERY) { for (int i = 0; i < count; ++i) { - visitor.visit((int) scratchIterator.docIDs[i]); + visitor.visit(scratchIterator.docIDs[i]); } return; } @@ -767,7 +767,7 @@ private void visitDocValuesWithCardinality( if (r == PointValues.Relation.CELL_INSIDE_QUERY) { for (int i = 0; i < count; ++i) { - visitor.visit((int) scratchIterator.docIDs[i]); + visitor.visit(scratchIterator.docIDs[i]); } return; } @@ -867,7 +867,7 @@ private void visitCompressedDocValues( in.readBytes( scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix); } - visitor.visit((int) scratchIterator.docIDs[i + j], scratchPackedValue); + visitor.visit(scratchIterator.docIDs[i + j], scratchPackedValue); } i += runLen; } @@ -947,11 +947,11 @@ private static class BKDReaderDocIDSetIterator extends DocIdSetIterator { private int length; private int offset; private int docID; - final long[] docIDs; + final int[] docIDs; private final DocIdsWriter docIdsWriter; public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) { - this.docIDs = new long[maxPointsInLeafNode]; + this.docIDs = new int[maxPointsInLeafNode]; this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode); } @@ -973,7 +973,7 @@ public int nextDoc() throws IOException { if (idx == length) { docID = DocIdSetIterator.NO_MORE_DOCS; } else { - docID = (int) docIDs[offset + idx]; + docID = docIDs[offset + idx]; idx++; } return docID; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 72dfde8753c1..e995e598c435 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -36,10 +36,10 @@ final class DocIdsWriter { private static final byte BPV_32_FOR_UTIL = (byte) 32 + 32; private final BKDForUtil forUtil = new BKDForUtil(); - private final long[] scratch; + private final int[] scratch; DocIdsWriter(int maxPointsInLeaf) { - scratch = new long[maxPointsInLeaf]; + scratch = new int[maxPointsInLeaf]; } void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { @@ -83,23 +83,19 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx if (count == BKDForUtil.BLOCK_SIZE) { if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); - long[] delta = new long[count]; + int[] delta = new int[count]; for (int i = 0; i < count; i++) { - delta[i] = docIds[i] - min; + delta[i] = docIds[start + i] - min; } out.writeVInt(min); forUtil.encode16(delta, out); } else { - long[] longs = new long[count]; - for (int i = 0; i < count; i++) { - longs[i] = docIds[i]; - } if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { out.writeByte(BPV_24_FOR_UTIL); - forUtil.encode24(longs, out); + forUtil.encode24(start, docIds, out); } else { out.writeByte(BPV_32_FOR_UTIL); - forUtil.encode32(longs, out); + forUtil.encode32(start, docIds, out); } } return; @@ -185,7 +181,7 @@ private static void writeIdsAsBitSet(int[] docIds, int start, int count, DataOut } /** Read {@code count} integers into {@code docIDs}. */ - void readInts(IndexInput in, int count, long[] docIDs) throws IOException { + void readInts(IndexInput in, int count, int[] docIDs) throws IOException { final int bpv = in.readByte(); switch (bpv) { case CONTINUOUS_IDS: @@ -217,18 +213,18 @@ void readInts(IndexInput in, int count, long[] docIDs) throws IOException { } } - private void readBKDForUtilDelta(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtilDelta(IndexInput in, int count, int[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; - final long min = in.readVInt(); + final int min = in.readVInt(); forUtil.decode16(in, docIDs, min); } - private void readBKDForUtil24(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtil24(IndexInput in, int count, int[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, docIDs); } - private void readBKDForUtil32(IndexInput in, int count, long[] docIDs) throws IOException { + private void readBKDForUtil32(IndexInput in, int count, int[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode32(in, docIDs); } @@ -242,7 +238,7 @@ private static DocIdSetIterator readBitSetIterator(IndexInput in, int count) thr return new DocBaseBitSetIterator(bitSet, count, offsetWords << 6); } - private static void readContinuousIds(IndexInput in, int count, long[] docIDs) + private static void readContinuousIds(IndexInput in, int count, int[] docIDs) throws IOException { int start = in.readVInt(); for (int i = 0; i < count; i++) { @@ -250,7 +246,7 @@ private static void readContinuousIds(IndexInput in, int count, long[] docIDs) } } - private static void readBitSet(IndexInput in, int count, long[] docIDs) throws IOException { + private static void readBitSet(IndexInput in, int count, int[] docIDs) throws IOException { DocIdSetIterator iterator = readBitSetIterator(in, count); int docId, pos = 0; while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { @@ -259,7 +255,7 @@ private static void readBitSet(IndexInput in, int count, long[] docIDs) throws I assert pos == count : "pos: " + pos + "count: " + count; } - private static void readDeltaVInts(IndexInput in, int count, long[] docIDs) throws IOException { + private static void readDeltaVInts(IndexInput in, int count, int[] docIDs) throws IOException { int doc = 0; for (int i = 0; i < count; i++) { doc += in.readVInt(); @@ -267,13 +263,13 @@ private static void readDeltaVInts(IndexInput in, int count, long[] docIDs) thro } } - private static void readInts32(IndexInput in, int count, long[] docIDs) throws IOException { + private static void readInts32(IndexInput in, int count, int[] docIDs) throws IOException { for (int i = 0; i < count; i++) { docIDs[i] = in.readInt(); } } - private static void readInts24(IndexInput in, int count, long[] docIDs) throws IOException { + private static void readInts24(IndexInput in, int count, int[] docIDs) throws IOException { int i; for (i = 0; i < count - 7; i += 8) { long l1 = in.readLong(); @@ -389,7 +385,7 @@ private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visi final int min = in.readVInt(); forUtil.decode16(in, scratch, min); for (int i = 0; i < count; i++) { - visitor.visit((int) scratch[i]); + visitor.visit(scratch[i]); } } @@ -398,7 +394,7 @@ private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, scratch); for (int i = 0; i < count; i++) { - visitor.visit((int) scratch[i]); + visitor.visit(scratch[i]); } } @@ -407,7 +403,7 @@ private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode32(in, scratch); for (int i = 0; i < count; i++) { - visitor.visit((int) scratch[i]); + visitor.visit(scratch[i]); } } } diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java index 1606bc298b88..dbf80503bc47 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java @@ -133,9 +133,9 @@ private void test(Directory dir, int[] ints) throws Exception { } } try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { - long[] read = new long[ints.length]; + int[] read = new int[ints.length]; docIdsWriter.readInts(in, ints.length, read); - assertArrayEquals(ints, Arrays.stream(read).mapToInt(i -> (int) i).toArray()); + assertArrayEquals(ints, read); assertEquals(len, in.getFilePointer()); } try (IndexInput in = dir.openInput("tmp", IOContext.READONCE)) { From 0b452daf2538ef507133382ae409c97561142443 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 13 Jan 2022 16:54:49 +0800 Subject: [PATCH 22/34] spotless --- .../apache/lucene/store/ByteBufferIndexInput.java | 12 ++++++------ .../org/apache/lucene/util/bkd/DocIdsWriter.java | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java index 4623c4b14aaf..eca04df0996d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java @@ -187,7 +187,7 @@ public void readInts(int[] dst, int offset, int length) throws IOException { for (int i = 0; i < Integer.BYTES; ++i) { if (i < curBuf.limit()) { curIntBufferViews[i] = - curBuf.duplicate().position(i).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); + curBuf.duplicate().position(i).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); } else { curIntBufferViews[i] = EMPTY_INTBUFFER; } @@ -196,16 +196,16 @@ public void readInts(int[] dst, int offset, int length) throws IOException { try { final int position = curBuf.position(); guard.getInts( - curIntBufferViews[position & 0x03].position(position >>> 2), dst, offset, length); + curIntBufferViews[position & 0x03].position(position >>> 2), dst, offset, length); // if the above call succeeded, then we know the below sum cannot overflow curBuf.position(position + (length << 2)); } catch ( - @SuppressWarnings("unused") - BufferUnderflowException e) { + @SuppressWarnings("unused") + BufferUnderflowException e) { super.readInts(dst, offset, length); } catch ( - @SuppressWarnings("unused") - NullPointerException npe) { + @SuppressWarnings("unused") + NullPointerException npe) { throw new AlreadyClosedException("Already closed: " + this); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index e995e598c435..1681df9447ad 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -238,8 +238,7 @@ private static DocIdSetIterator readBitSetIterator(IndexInput in, int count) thr return new DocBaseBitSetIterator(bitSet, count, offsetWords << 6); } - private static void readContinuousIds(IndexInput in, int count, int[] docIDs) - throws IOException { + private static void readContinuousIds(IndexInput in, int count, int[] docIDs) throws IOException { int start = in.readVInt(); for (int i = 0; i < count; i++) { docIDs[i] = start + i; From bf78353ee2cf18e00b5291001721ad4d51194f8b Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 13 Jan 2022 18:29:09 +0800 Subject: [PATCH 23/34] iter on review advice --- .../apache/lucene/util/bkd/BKDForUtil.java | 10 -- .../apache/lucene/util/bkd/DocIdsWriter.java | 104 ++++++++---------- 2 files changed, 44 insertions(+), 70 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index ea08e843648e..1deba664d853 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -1,4 +1,3 @@ -// This file has been automatically generated, DO NOT EDIT /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -21,11 +20,6 @@ import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -// Inspired from https://fulmicoton.com/posts/bitpacking/ -// Encodes multiple integers in a long to get SIMD-like speedups. -// If bitsPerValue <= 8 then we pack 8 ints per long -// else if bitsPerValue <= 16 we pack 4 ints per long -// else we pack 2 ints per long final class BKDForUtil { static final int BLOCK_SIZE = 512; @@ -81,8 +75,4 @@ void decode24(DataInput in, int[] ints) throws IOException { ((tmp[i] & 0xFF) << 16) | ((tmp[i + 128] & 0xFF) << 8) | (tmp[i + 256] & 0xFF); } } - - void decode32(DataInput in, int[] ints) throws IOException { - in.readInts(ints, 0, 512); - } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 1681df9447ad..1ab99f61dd36 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -28,12 +28,12 @@ final class DocIdsWriter { private static final byte CONTINUOUS_IDS = (byte) -2; private static final byte BITSET_IDS = (byte) -1; - private static final byte DELTA_VINT = (byte) 0; private static final byte DELTA_FOR_UTIL = (byte) 32 + 16; - private static final byte BPV_24 = (byte) 24; - private static final byte BPV_24_FOR_UTIL = (byte) 32 + 24; + private static final byte BPV_24 = (byte) 32 + 24; private static final byte BPV_32 = (byte) 32; - private static final byte BPV_32_FOR_UTIL = (byte) 32 + 32; + // These signs are legacy, should no longer be used in the writing side. + private static final byte LEGACY_DELTA_VINT = (byte) 0; + private static final byte LEGACY_BPV_24 = (byte) 24; private final BKDForUtil forUtil = new BKDForUtil(); private final int[] scratch; @@ -91,18 +91,25 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx forUtil.encode16(delta, out); } else { if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { - out.writeByte(BPV_24_FOR_UTIL); + out.writeByte(BPV_24); forUtil.encode24(start, docIds, out); } else { - out.writeByte(BPV_32_FOR_UTIL); + out.writeByte(BPV_32); forUtil.encode32(start, docIds, out); } } return; } + legacyWrite(sorted, docIds, start, count, out, max); + } + + @Deprecated + private void legacyWrite( + boolean sorted, int[] docIds, int start, int count, DataOutput out, int max) + throws IOException { if (sorted) { - out.writeByte(DELTA_VINT); + out.writeByte(LEGACY_DELTA_VINT); int previous = 0; for (int i = 0; i < count; ++i) { int doc = docIds[start + i]; @@ -111,7 +118,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } } else { if (Integer.toUnsignedLong(max) <= 0xffffff) { - out.writeByte(BPV_24); + out.writeByte(LEGACY_BPV_24); // write them the same way we are reading them. int i; for (i = 0; i < count - 7; i += 8) { @@ -191,42 +198,38 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException { readBitSet(in, count, docIDs); break; case DELTA_FOR_UTIL: - readBKDForUtilDelta(in, count, docIDs); - break; - case BPV_32_FOR_UTIL: - readBKDForUtil32(in, count, docIDs); + readDelta16(in, count, docIDs); break; - case BPV_24_FOR_UTIL: - readBKDForUtil24(in, count, docIDs); - break; - case DELTA_VINT: - readDeltaVInts(in, count, docIDs); + case BPV_24: + readInts24(in, count, docIDs); break; case BPV_32: readInts32(in, count, docIDs); break; - case BPV_24: - readInts24(in, count, docIDs); + case LEGACY_DELTA_VINT: + readLegacyDeltaVInts(in, count, docIDs); + break; + case LEGACY_BPV_24: + readLegacyInts24(in, count, docIDs); break; default: throw new IOException("Unsupported number of bits per value: " + bpv); } } - private void readBKDForUtilDelta(IndexInput in, int count, int[] docIDs) throws IOException { + private void readDelta16(IndexInput in, int count, int[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); forUtil.decode16(in, docIDs, min); } - private void readBKDForUtil24(IndexInput in, int count, int[] docIDs) throws IOException { + private void readInts24(IndexInput in, int count, int[] docIDs) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, docIDs); } - private void readBKDForUtil32(IndexInput in, int count, int[] docIDs) throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; - forUtil.decode32(in, docIDs); + private void readInts32(IndexInput in, int count, int[] docIDs) throws IOException { + in.readInts(docIDs, 0, count); } private static DocIdSetIterator readBitSetIterator(IndexInput in, int count) throws IOException { @@ -254,7 +257,8 @@ private static void readBitSet(IndexInput in, int count, int[] docIDs) throws IO assert pos == count : "pos: " + pos + "count: " + count; } - private static void readDeltaVInts(IndexInput in, int count, int[] docIDs) throws IOException { + private static void readLegacyDeltaVInts(IndexInput in, int count, int[] docIDs) + throws IOException { int doc = 0; for (int i = 0; i < count; i++) { doc += in.readVInt(); @@ -262,13 +266,7 @@ private static void readDeltaVInts(IndexInput in, int count, int[] docIDs) throw } } - private static void readInts32(IndexInput in, int count, int[] docIDs) throws IOException { - for (int i = 0; i < count; i++) { - docIDs[i] = in.readInt(); - } - } - - private static void readInts24(IndexInput in, int count, int[] docIDs) throws IOException { + private static void readLegacyInts24(IndexInput in, int count, int[] docIDs) throws IOException { int i; for (i = 0; i < count - 7; i += 8) { long l1 = in.readLong(); @@ -302,29 +300,26 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOExcep readBitSet(in, count, visitor); break; case DELTA_FOR_UTIL: - readBKDForUtilDelta(in, count, visitor); + readDelta16(in, count, visitor); break; - case BPV_32_FOR_UTIL: - readBKDForUtil32(in, count, visitor); - break; - case BPV_24_FOR_UTIL: - readBKDForUtil24(in, count, visitor); - break; - case DELTA_VINT: - readDeltaVInts(in, count, visitor); + case BPV_24: + readInts24(in, count, visitor); break; case BPV_32: readInts32(in, count, visitor); break; - case BPV_24: - readInts24(in, count, visitor); + case LEGACY_DELTA_VINT: + readLegacyDeltaVInts(in, count, visitor); + break; + case LEGACY_BPV_24: + readLegacyInts24(in, count, visitor); break; default: throw new IOException("Unsupported number of bits per value: " + bpv); } } - private static void readDeltaVInts(IndexInput in, int count, IntersectVisitor visitor) + private static void readLegacyDeltaVInts(IndexInput in, int count, IntersectVisitor visitor) throws IOException { int doc = 0; for (int i = 0; i < count; i++) { @@ -333,14 +328,7 @@ private static void readDeltaVInts(IndexInput in, int count, IntersectVisitor vi } } - private static void readInts32(IndexInput in, int count, IntersectVisitor visitor) - throws IOException { - for (int i = 0; i < count; i++) { - visitor.visit(in.readInt()); - } - } - - private static void readInts24(IndexInput in, int count, IntersectVisitor visitor) + private static void readLegacyInts24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { int i; for (i = 0; i < count - 7; i += 8) { @@ -378,8 +366,7 @@ private static void readContinuousIds(IndexInput in, int count, IntersectVisitor visitor.visit(new DocBaseBitSetIterator(bitSet, count, offset)); } - private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visitor) - throws IOException { + private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); forUtil.decode16(in, scratch, min); @@ -388,8 +375,7 @@ private void readBKDForUtilDelta(IndexInput in, int count, IntersectVisitor visi } } - private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor) - throws IOException { + private void readInts24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { assert count == BKDForUtil.BLOCK_SIZE; forUtil.decode24(in, scratch); for (int i = 0; i < count; i++) { @@ -397,10 +383,8 @@ private void readBKDForUtil24(IndexInput in, int count, IntersectVisitor visitor } } - private void readBKDForUtil32(IndexInput in, int count, IntersectVisitor visitor) - throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; - forUtil.decode32(in, scratch); + private void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException { + in.readInts(scratch, 0, count); for (int i = 0; i < count; i++) { visitor.visit(scratch[i]); } From ccdae2f6920abff52abf6aae42d65b33861d4cf6 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 13 Jan 2022 21:51:58 +0800 Subject: [PATCH 24/34] make bkd foruti flexible --- .../apache/lucene/util/bkd/BKDForUtil.java | 72 +++++++++----- .../apache/lucene/util/bkd/DocIdsWriter.java | 98 ++++--------------- 2 files changed, 66 insertions(+), 104 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index 1deba664d853..078280e20c6b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -24,55 +24,79 @@ final class BKDForUtil { static final int BLOCK_SIZE = 512; - private final int[] tmp = new int[384]; + private final int[] tmp; - void encode16(int[] ints, DataOutput out) throws IOException { - for (int i = 0; i < 256; ++i) { - ints[i] = ints[256 + i] | (ints[i] << 16); + BKDForUtil(int maxPointsInLeaf) { + tmp = new int[maxPointsInLeaf * 3 / 4]; + } + + void encode16(int len, int[] ints, DataOutput out) throws IOException { + final int halfLen = len >>> 1; + for (int i = 0; i < halfLen; ++i) { + ints[i] = ints[halfLen + i] | (ints[i] << 16); } - for (int i = 0; i < 256; i++) { + for (int i = 0; i < halfLen; i++) { out.writeInt(ints[i]); } + if ((len & 1) == 1) { + out.writeShort((short) ints[len - 1]); + } } - void encode32(int off, int[] ints, DataOutput out) throws IOException { - for (int i = 0; i < 512; i++) { + void encode32(int off, int len, int[] ints, DataOutput out) throws IOException { + for (int i = 0; i < len; i++) { out.writeInt(ints[off + i]); } } - void encode24(int off, int[] ints, DataOutput out) throws IOException { - for (int i = 0; i < 384; ++i) { + void encode24(int off, int len, int[] ints, DataOutput out) throws IOException { + final int quarterLen = len >>> 2; + final int quarterLen3 = quarterLen * 3; + for (int i = 0; i < quarterLen3; ++i) { tmp[i] = ints[off + i] << 8; } - for (int i = 0; i < 128; i++) { - final int longIdx = off + i + 384; + for (int i = 0; i < quarterLen; i++) { + final int longIdx = off + i + quarterLen3; tmp[i] |= (ints[longIdx] >>> 16) & 0xFF; - tmp[i + 128] |= (ints[longIdx] >>> 8) & 0xFF; - tmp[i + 256] |= ints[longIdx] & 0xFF; + tmp[i + quarterLen] |= (ints[longIdx] >>> 8) & 0xFF; + tmp[i + quarterLen * 2] |= ints[longIdx] & 0xFF; } - for (int i = 0; i < 384; ++i) { + for (int i = 0; i < quarterLen3; ++i) { out.writeInt(tmp[i]); } + + final int remainder = len & 0x3; + for (int i = 0; i < remainder; i++) { + out.writeInt(ints[quarterLen * 4 + i]); + } } - void decode16(DataInput in, int[] ints, final int base) throws IOException { - in.readInts(ints, 0, 256); - for (int i = 0; i < 256; ++i) { + void decode16(DataInput in, int[] ints, int len, final int base) throws IOException { + final int halfLen = len >>> 1; + in.readInts(ints, 0, halfLen); + for (int i = 0; i < halfLen; ++i) { int l = ints[i]; ints[i] = (l >>> 16) + base; - ints[256 + i] = (l & 0xFFFF) + base; + ints[halfLen + i] = (l & 0xFFFF) + base; + } + if ((len & 1) == 1) { + ints[len - 1] = Short.toUnsignedInt(in.readShort()) + base; } } - void decode24(DataInput in, int[] ints) throws IOException { - in.readInts(tmp, 0, 384); - for (int i = 0; i < 384; ++i) { + void decode24(DataInput in, int[] ints, int len) throws IOException { + final int quarterLen = len >>> 2; + final int quarterLen3 = quarterLen * 3; + in.readInts(tmp, 0, quarterLen3); + for (int i = 0; i < quarterLen3; ++i) { ints[i] = tmp[i] >>> 8; } - for (int i = 0; i < 128; i++) { - ints[i + 384] = - ((tmp[i] & 0xFF) << 16) | ((tmp[i + 128] & 0xFF) << 8) | (tmp[i + 256] & 0xFF); + for (int i = 0; i < quarterLen; i++) { + ints[i + quarterLen3] = + ((tmp[i] & 0xFF) << 16) + | ((tmp[i + quarterLen] & 0xFF) << 8) + | (tmp[i + quarterLen * 2] & 0xFF); } + in.readInts(ints, quarterLen << 2, len & 0x3); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 1ab99f61dd36..9186890a8b21 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -35,26 +35,24 @@ final class DocIdsWriter { private static final byte LEGACY_DELTA_VINT = (byte) 0; private static final byte LEGACY_BPV_24 = (byte) 24; - private final BKDForUtil forUtil = new BKDForUtil(); + private final BKDForUtil forUtil; private final int[] scratch; DocIdsWriter(int maxPointsInLeaf) { scratch = new int[maxPointsInLeaf]; + forUtil = new BKDForUtil(maxPointsInLeaf); } void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException { // docs can be sorted either when all docs in a block have the same value // or when a segment is sorted - boolean sorted = true; boolean strictlySorted = true; int min = docIds[0]; int max = docIds[0]; for (int i = 1; i < count; ++i) { int last = docIds[start + i - 1]; int current = docIds[start + i]; - if (last > current) { - sorted = strictlySorted = false; - } else if (last == current) { + if (last >= current) { strictlySorted = false; } min = Math.min(min, current); @@ -79,77 +77,21 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } } - // special optimization when count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE (common case) - if (count == BKDForUtil.BLOCK_SIZE) { - if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { - out.writeByte(DELTA_FOR_UTIL); - int[] delta = new int[count]; - for (int i = 0; i < count; i++) { - delta[i] = docIds[start + i] - min; - } - out.writeVInt(min); - forUtil.encode16(delta, out); - } else { - if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { - out.writeByte(BPV_24); - forUtil.encode24(start, docIds, out); - } else { - out.writeByte(BPV_32); - forUtil.encode32(start, docIds, out); - } - } - return; - } - - legacyWrite(sorted, docIds, start, count, out, max); - } - - @Deprecated - private void legacyWrite( - boolean sorted, int[] docIds, int start, int count, DataOutput out, int max) - throws IOException { - if (sorted) { - out.writeByte(LEGACY_DELTA_VINT); - int previous = 0; - for (int i = 0; i < count; ++i) { - int doc = docIds[start + i]; - out.writeVInt(doc - previous); - previous = doc; + if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { + out.writeByte(DELTA_FOR_UTIL); + int[] delta = new int[count]; + for (int i = 0; i < count; i++) { + delta[i] = docIds[start + i] - min; } + out.writeVInt(min); + forUtil.encode16(count, delta, out); } else { - if (Integer.toUnsignedLong(max) <= 0xffffff) { - out.writeByte(LEGACY_BPV_24); - // write them the same way we are reading them. - int i; - for (i = 0; i < count - 7; i += 8) { - int doc1 = docIds[start + i]; - int doc2 = docIds[start + i + 1]; - int doc3 = docIds[start + i + 2]; - int doc4 = docIds[start + i + 3]; - int doc5 = docIds[start + i + 4]; - int doc6 = docIds[start + i + 5]; - int doc7 = docIds[start + i + 6]; - int doc8 = docIds[start + i + 7]; - long l1 = (doc1 & 0xffffffL) << 40 | (doc2 & 0xffffffL) << 16 | ((doc3 >>> 8) & 0xffffL); - long l2 = - (doc3 & 0xffL) << 56 - | (doc4 & 0xffffffL) << 32 - | (doc5 & 0xffffffL) << 8 - | ((doc6 >> 16) & 0xffL); - long l3 = (doc6 & 0xffffL) << 48 | (doc7 & 0xffffffL) << 24 | (doc8 & 0xffffffL); - out.writeLong(l1); - out.writeLong(l2); - out.writeLong(l3); - } - for (; i < count; ++i) { - out.writeShort((short) (docIds[start + i] >>> 8)); - out.writeByte((byte) docIds[start + i]); - } + if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { + out.writeByte(BPV_24); + forUtil.encode24(start, count, docIds, out); } else { out.writeByte(BPV_32); - for (int i = 0; i < count; ++i) { - out.writeInt(docIds[start + i]); - } + forUtil.encode32(start, count, docIds, out); } } } @@ -218,14 +160,12 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException { } private void readDelta16(IndexInput in, int count, int[] docIDs) throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); - forUtil.decode16(in, docIDs, min); + forUtil.decode16(in, docIDs, count, min); } private void readInts24(IndexInput in, int count, int[] docIDs) throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; - forUtil.decode24(in, docIDs); + forUtil.decode24(in, docIDs, count); } private void readInts32(IndexInput in, int count, int[] docIDs) throws IOException { @@ -367,17 +307,15 @@ private static void readContinuousIds(IndexInput in, int count, IntersectVisitor } private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; final int min = in.readVInt(); - forUtil.decode16(in, scratch, min); + forUtil.decode16(in, scratch, count, min); for (int i = 0; i < count; i++) { visitor.visit(scratch[i]); } } private void readInts24(IndexInput in, int count, IntersectVisitor visitor) throws IOException { - assert count == BKDForUtil.BLOCK_SIZE; - forUtil.decode24(in, scratch); + forUtil.decode24(in, scratch, count); for (int i = 0; i < count; i++) { visitor.visit(scratch[i]); } From 6a84978f32a73336db071f8a830f1f7135dce800 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 23 Jan 2022 17:35:09 +0800 Subject: [PATCH 25/34] unset int buffer --- .../src/java/org/apache/lucene/store/ByteBufferIndexInput.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java index eca04df0996d..c8a2e58ff73d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java @@ -86,6 +86,7 @@ protected void setCurBuf(ByteBuffer curBuf) { this.curBuf = curBuf; curLongBufferViews = null; curFloatBufferViews = null; + curIntBufferViews = null; } @Override @@ -537,6 +538,8 @@ private void unsetBuffers() { curBuf = null; curBufIndex = 0; curLongBufferViews = null; + curFloatBufferViews = null; + curIntBufferViews = null; } /** Optimization of ByteBufferIndexInput for when there is only one buffer */ From 09f2999c38ac2683e7a927722a38abad0cababf7 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 23 Jan 2022 18:15:01 +0800 Subject: [PATCH 26/34] add some tests for read Ints --- .../org/apache/lucene/util/bkd/BKDReader.java | 1 - .../apache/lucene/store/TestMultiMMap.java | 48 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 09cd01792fa7..d2d326b3a156 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -221,7 +221,6 @@ private static class BKDPointTree implements PointTree { // if true the tree is balanced, otherwise unbalanced private final boolean isTreeBalanced; - private BKDPointTree( IndexInput innerNodes, IndexInput leafNodes, diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java index b1275a279116..fbbd89856dfa 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -18,6 +18,8 @@ import java.io.IOException; import java.nio.file.Path; + +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import org.apache.lucene.tests.store.BaseChunkedDirectoryTestCase; import org.apache.lucene.util.BytesRef; import org.junit.BeforeClass; @@ -117,6 +119,52 @@ public void testCloneSliceSafety() throws Exception { mmapDir.close(); } + public void testReadInts() throws Exception { + int chunkSize = 1 << 10; + Directory mmapDir = getDirectory(createTempDir("testImplementations"), chunkSize); + IndexOutput io = mmapDir.createOutput("bytes", newIOContext(random())); + int size = 1 << 14; // add some buffer of 3 for slice tests + byte[] bytes = new byte[size]; + random().nextBytes(bytes); + io.writeBytes(bytes, bytes.length); + io.close(); + IndexInput ii = mmapDir.openInput("bytes", newIOContext(random())); + + ByteArrayDataInput bytesInput = new ByteArrayDataInput(); + int[] actual = new int[size >>> 2]; + int[] expected = new int[size >>> 2]; + { + { + //test single impl + int len = RandomNumbers.randomIntBetween(random(), 20, chunkSize - 1); + IndexInput single = ii.slice("single", 0, len); + assertTrue(single instanceof ByteBufferIndexInput.SingleBufferImpl); + for (int iter=0; iter < 100; iter++) { + int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); + single.seek(fp); + bytesInput.reset(bytes, fp, size); + single.readInts(actual, 0, 4); + bytesInput.readInts(expected, 0, 4); + assertArrayEquals(expected, actual); + } + } + } + { + //test single impl + int len = RandomNumbers.randomIntBetween(random(), chunkSize + 1, size); + IndexInput multi = ii.slice("single", 0, len); + assertTrue(multi instanceof ByteBufferIndexInput.MultiBufferImpl); + for (int iter=0; iter < 100; iter++) { + int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); + multi.seek(fp); + bytesInput.reset(bytes, fp, size); + multi.readInts(actual, 0, 4); + bytesInput.readInts(expected, 0, 4); + assertArrayEquals(expected, actual); + } + } + } + // test has asserts specific to mmap impl... public void testImplementations() throws Exception { for (int i = 2; i < 12; i++) { From 51056c26c5d88c51d8d1f3ba5f4a3d136530912f Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 23 Jan 2022 18:17:26 +0800 Subject: [PATCH 27/34] spotless --- .../test/org/apache/lucene/store/TestMultiMMap.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java index fbbd89856dfa..94c8f8132e8a 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -16,10 +16,9 @@ */ package org.apache.lucene.store; +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.nio.file.Path; - -import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import org.apache.lucene.tests.store.BaseChunkedDirectoryTestCase; import org.apache.lucene.util.BytesRef; import org.junit.BeforeClass; @@ -135,11 +134,11 @@ public void testReadInts() throws Exception { int[] expected = new int[size >>> 2]; { { - //test single impl + // test multi impl int len = RandomNumbers.randomIntBetween(random(), 20, chunkSize - 1); IndexInput single = ii.slice("single", 0, len); assertTrue(single instanceof ByteBufferIndexInput.SingleBufferImpl); - for (int iter=0; iter < 100; iter++) { + for (int iter = 0; iter < 100; iter++) { int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); single.seek(fp); bytesInput.reset(bytes, fp, size); @@ -150,11 +149,11 @@ public void testReadInts() throws Exception { } } { - //test single impl + // test single impl int len = RandomNumbers.randomIntBetween(random(), chunkSize + 1, size); IndexInput multi = ii.slice("single", 0, len); assertTrue(multi instanceof ByteBufferIndexInput.MultiBufferImpl); - for (int iter=0; iter < 100; iter++) { + for (int iter = 0; iter < 100; iter++) { int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); multi.seek(fp); bytesInput.reset(bytes, fp, size); From aa73d07387b6be893787a05c3992d9f22cef418c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 23 Jan 2022 19:54:41 +0800 Subject: [PATCH 28/34] iter --- .../apache/lucene/util/bkd/BKDForUtil.java | 7 ++++-- .../apache/lucene/util/bkd/DocIdsWriter.java | 5 ++--- .../apache/lucene/store/TestMultiMMap.java | 4 ++-- .../lucene/util/bkd/TestDocIdsWriter.java | 22 +++---------------- 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index 078280e20c6b..bb9e276feb91 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -27,7 +27,7 @@ final class BKDForUtil { private final int[] tmp; BKDForUtil(int maxPointsInLeaf) { - tmp = new int[maxPointsInLeaf * 3 / 4]; + tmp = new int[maxPointsInLeaf / 4 * 3]; } void encode16(int len, int[] ints, DataOutput out) throws IOException { @@ -97,6 +97,9 @@ void decode24(DataInput in, int[] ints, int len) throws IOException { | ((tmp[i + quarterLen] & 0xFF) << 8) | (tmp[i + quarterLen * 2] & 0xFF); } - in.readInts(ints, quarterLen << 2, len & 0x3); + int remainder = len & 0x3; + if (remainder > 0) { + in.readInts(ints, quarterLen << 2, remainder); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 9186890a8b21..be41956d34f3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -79,12 +79,11 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); - int[] delta = new int[count]; for (int i = 0; i < count; i++) { - delta[i] = docIds[start + i] - min; + scratch[i] = docIds[start + i] - min; } out.writeVInt(min); - forUtil.encode16(count, delta, out); + forUtil.encode16(count, scratch, out); } else { if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { out.writeByte(BPV_24); diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java index 94c8f8132e8a..d029a2e29205 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -134,7 +134,7 @@ public void testReadInts() throws Exception { int[] expected = new int[size >>> 2]; { { - // test multi impl + // test single impl int len = RandomNumbers.randomIntBetween(random(), 20, chunkSize - 1); IndexInput single = ii.slice("single", 0, len); assertTrue(single instanceof ByteBufferIndexInput.SingleBufferImpl); @@ -149,7 +149,7 @@ public void testReadInts() throws Exception { } } { - // test single impl + // test multi impl int len = RandomNumbers.randomIntBetween(random(), chunkSize + 1, size); IndexInput multi = ii.slice("single", 0, len); assertTrue(multi instanceof ByteBufferIndexInput.MultiBufferImpl); diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java index 94dd91c23dab..7409c71821ca 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java @@ -20,7 +20,6 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import java.util.function.IntSupplier; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.store.Directory; @@ -33,15 +32,10 @@ public class TestDocIdsWriter extends LuceneTestCase { public void testRandom() throws Exception { - rand(() -> BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); - rand(() -> 1 + random().nextInt(5000)); - } - - private void rand(IntSupplier size) throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { for (int iter = 0; iter < numIters; ++iter) { - int[] docIDs = new int[size.getAsInt()]; + int[] docIDs = new int[1 + random().nextInt(5000)]; final int bpv = TestUtil.nextInt(random(), 1, 32); for (int i = 0; i < docIDs.length; ++i) { docIDs[i] = TestUtil.nextInt(random(), 0, (1 << bpv) - 1); @@ -52,15 +46,10 @@ private void rand(IntSupplier size) throws Exception { } public void testSorted() throws Exception { - sorted(() -> BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); - sorted(() -> 1 + random().nextInt(5000)); - } - - private void sorted(IntSupplier size) throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { for (int iter = 0; iter < numIters; ++iter) { - int[] docIDs = new int[size.getAsInt()]; + int[] docIDs = new int[1 + random().nextInt(5000)]; final int bpv = TestUtil.nextInt(random(), 1, 32); for (int i = 0; i < docIDs.length; ++i) { docIDs[i] = TestUtil.nextInt(random(), 0, (1 << bpv) - 1); @@ -72,15 +61,10 @@ private void sorted(IntSupplier size) throws Exception { } public void testCluster() throws Exception { - cluster(() -> BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); - cluster(() -> 1 + random().nextInt(5000)); - } - - private void cluster(IntSupplier size) throws Exception { int numIters = atLeast(100); try (Directory dir = newDirectory()) { for (int iter = 0; iter < numIters; ++iter) { - int[] docIDs = new int[size.getAsInt()]; + int[] docIDs = new int[1 + random().nextInt(5000)]; int min = random().nextInt(1000); final int bpv = TestUtil.nextInt(random(), 1, 16); for (int i = 0; i < docIDs.length; ++i) { From 9e084b881add12b5707679a46e4f8779439735df Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 24 Jan 2022 18:07:23 +0800 Subject: [PATCH 29/34] fix tests and add some notes for tmp length --- .../lucene/store/ByteBufferIndexInput.java | 1 - .../apache/lucene/util/bkd/BKDForUtil.java | 9 ++- .../apache/lucene/store/TestMultiMMap.java | 47 --------------- .../tests/store/BaseDirectoryTestCase.java | 59 +++++++++++++++++++ 4 files changed, 65 insertions(+), 51 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java index c8a2e58ff73d..3229cde0845e 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java @@ -538,7 +538,6 @@ private void unsetBuffers() { curBuf = null; curBufIndex = 0; curLongBufferViews = null; - curFloatBufferViews = null; curIntBufferViews = null; } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index bb9e276feb91..08404157dffc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -22,12 +22,15 @@ final class BKDForUtil { - static final int BLOCK_SIZE = 512; - private final int[] tmp; BKDForUtil(int maxPointsInLeaf) { - tmp = new int[maxPointsInLeaf / 4 * 3]; + // For encode16/decode16, we do not need to use temp. + // For encode24/decode24, we need a (3/4 * maxPointsInLeaf) length tmp array. + // For encode32/decode32, we reuse the scratch in DocIdsWriter. + // So (3/4 * maxPointsInLeaf) is enough here. + final int len = (maxPointsInLeaf >>> 2) * 3; + tmp = new int[len]; } void encode16(int len, int[] ints, DataOutput out) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java index d029a2e29205..b1275a279116 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.store; -import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.nio.file.Path; import org.apache.lucene.tests.store.BaseChunkedDirectoryTestCase; @@ -118,52 +117,6 @@ public void testCloneSliceSafety() throws Exception { mmapDir.close(); } - public void testReadInts() throws Exception { - int chunkSize = 1 << 10; - Directory mmapDir = getDirectory(createTempDir("testImplementations"), chunkSize); - IndexOutput io = mmapDir.createOutput("bytes", newIOContext(random())); - int size = 1 << 14; // add some buffer of 3 for slice tests - byte[] bytes = new byte[size]; - random().nextBytes(bytes); - io.writeBytes(bytes, bytes.length); - io.close(); - IndexInput ii = mmapDir.openInput("bytes", newIOContext(random())); - - ByteArrayDataInput bytesInput = new ByteArrayDataInput(); - int[] actual = new int[size >>> 2]; - int[] expected = new int[size >>> 2]; - { - { - // test single impl - int len = RandomNumbers.randomIntBetween(random(), 20, chunkSize - 1); - IndexInput single = ii.slice("single", 0, len); - assertTrue(single instanceof ByteBufferIndexInput.SingleBufferImpl); - for (int iter = 0; iter < 100; iter++) { - int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); - single.seek(fp); - bytesInput.reset(bytes, fp, size); - single.readInts(actual, 0, 4); - bytesInput.readInts(expected, 0, 4); - assertArrayEquals(expected, actual); - } - } - } - { - // test multi impl - int len = RandomNumbers.randomIntBetween(random(), chunkSize + 1, size); - IndexInput multi = ii.slice("single", 0, len); - assertTrue(multi instanceof ByteBufferIndexInput.MultiBufferImpl); - for (int iter = 0; iter < 100; iter++) { - int fp = RandomNumbers.randomIntBetween(random(), 0, len - 20); - multi.seek(fp); - bytesInput.reset(bytes, fp, size); - multi.readInts(actual, 0, 4); - bytesInput.readInts(expected, 0, 4); - assertArrayEquals(expected, actual); - } - } - } - // test has asserts specific to mmap impl... public void testImplementations() throws Exception { for (int i = 2; i < 12; i++) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index 81b39f712bf5..fcf3d153cd5b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -243,6 +243,65 @@ public void testLittleEndianLongsUnderflow() throws Exception { } } + + public void testAlignedInts() throws Exception { + try (Directory dir = getDirectory(createTempDir("testAlignedInts"))) { + try (IndexOutput out = dir.createOutput("Ints", newIOContext(random()))) { + out.writeInt(3); + out.writeInt(Integer.MAX_VALUE); + out.writeInt(-3); + } + try (IndexInput input = dir.openInput("Ints", newIOContext(random()))) { + assertEquals(12, input.length()); + int[] i = new int[4]; + input.readInts(i, 1, 3); + assertArrayEquals(new int[] {0, 3, Integer.MAX_VALUE, -3}, i); + assertEquals(12, input.getFilePointer()); + } + } + } + + public void testUnalignedInts() throws Exception { + int padding = random().nextInt(3) + 1; + try (Directory dir = getDirectory(createTempDir("testUnalignedInts"))) { + try (IndexOutput out = dir.createOutput("Ints", newIOContext(random()))) { + for (int i = 0; i < padding; i++) { + out.writeByte((byte) 2); + } + out.writeInt(3); + out.writeInt(Integer.MAX_VALUE); + out.writeInt(-3); + } + try (IndexInput input = dir.openInput("Ints", newIOContext(random()))) { + assertEquals(12 + padding, input.length()); + for (int i = 0; i < padding; i++) { + assertEquals(2, input.readByte()); + } + int[] i = new int[4]; + input.readInts(i, 1, 3); + assertArrayEquals(new int[] {0, 3, Integer.MAX_VALUE, -3}, i); + assertEquals(12 + padding, input.getFilePointer()); + } + } + } + + public void testIntsUnderflow() throws Exception { + try (Directory dir = getDirectory(createTempDir("testIntsUnderflow"))) { + final int offset = random().nextInt(4); + final int length = TestUtil.nextInt(random(), 1, 16); + try (IndexOutput out = dir.createOutput("Ints", newIOContext(random()))) { + byte[] b = + new byte[offset + length * Integer.BYTES - TestUtil.nextInt(random(), 1, Integer.BYTES)]; + random().nextBytes(b); + out.writeBytes(b, b.length); + } + try (IndexInput input = dir.openInput("Ints", newIOContext(random()))) { + input.seek(offset); + expectThrows(EOFException.class, () -> input.readInts(new int[length], 0, length)); + } + } + } + public void testAlignedFloats() throws Exception { try (Directory dir = getDirectory(createTempDir("testAlignedFloats"))) { try (IndexOutput out = dir.createOutput("Floats", newIOContext(random()))) { From 38abc4c4ca292b7d68f771d41439a25e32cc2caf Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 24 Jan 2022 18:10:00 +0800 Subject: [PATCH 30/34] fix typo --- lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index 08404157dffc..a0aaabf090f6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -25,7 +25,7 @@ final class BKDForUtil { private final int[] tmp; BKDForUtil(int maxPointsInLeaf) { - // For encode16/decode16, we do not need to use temp. + // For encode16/decode16, we do not need to use tmp array. // For encode24/decode24, we need a (3/4 * maxPointsInLeaf) length tmp array. // For encode32/decode32, we reuse the scratch in DocIdsWriter. // So (3/4 * maxPointsInLeaf) is enough here. From 4db27a2c948dfd84e35741ffbddc7e1d5fd32440 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 24 Jan 2022 18:42:16 +0800 Subject: [PATCH 31/34] spotless --- .../org/apache/lucene/tests/store/BaseDirectoryTestCase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index fcf3d153cd5b..d91671bb9291 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -243,7 +243,6 @@ public void testLittleEndianLongsUnderflow() throws Exception { } } - public void testAlignedInts() throws Exception { try (Directory dir = getDirectory(createTempDir("testAlignedInts"))) { try (IndexOutput out = dir.createOutput("Ints", newIOContext(random()))) { @@ -291,7 +290,8 @@ public void testIntsUnderflow() throws Exception { final int length = TestUtil.nextInt(random(), 1, 16); try (IndexOutput out = dir.createOutput("Ints", newIOContext(random()))) { byte[] b = - new byte[offset + length * Integer.BYTES - TestUtil.nextInt(random(), 1, Integer.BYTES)]; + new byte + [offset + length * Integer.BYTES - TestUtil.nextInt(random(), 1, Integer.BYTES)]; random().nextBytes(b); out.writeBytes(b, b.length); } From 8608db0d5a67b71f0273abbc34743e6ebca28a37 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sat, 29 Jan 2022 14:31:03 +0800 Subject: [PATCH 32/34] iter on feed back --- .../core/src/java/org/apache/lucene/store/DataInput.java | 7 +++++++ .../src/java/org/apache/lucene/util/bkd/BKDForUtil.java | 2 +- .../src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java index ba8677d45349..cfb39e7593f3 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java @@ -169,6 +169,13 @@ public void readLongs(long[] dst, int offset, int length) throws IOException { } } + /** + * Reads a specified number of ints into an array at the specified offset. + * + * @param dst the array to read bytes into + * @param offset the offset in the array to start storing ints + * @param length the number of ints to read + */ public void readInts(int[] dst, int offset, int length) throws IOException { Objects.checkFromIndexSize(offset, length, dst.length); for (int i = 0; i < length; ++i) { diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java index a0aaabf090f6..422540a15522 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDForUtil.java @@ -60,7 +60,7 @@ void encode24(int off, int len, int[] ints, DataOutput out) throws IOException { } for (int i = 0; i < quarterLen; i++) { final int longIdx = off + i + quarterLen3; - tmp[i] |= (ints[longIdx] >>> 16) & 0xFF; + tmp[i] |= ints[longIdx] >>> 16; tmp[i + quarterLen] |= (ints[longIdx] >>> 8) & 0xFF; tmp[i + quarterLen * 2] |= ints[longIdx] & 0xFF; } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index be41956d34f3..07dca9ed3417 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -77,7 +77,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } } - if (Integer.toUnsignedLong(min2max) <= 0xFFFFL) { + if (min2max <= 0xFFFFL) { out.writeByte(DELTA_FOR_UTIL); for (int i = 0; i < count; i++) { scratch[i] = docIds[start + i] - min; @@ -85,7 +85,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx out.writeVInt(min); forUtil.encode16(count, scratch, out); } else { - if (Integer.toUnsignedLong(max) <= 0xFFFFFFL) { + if (max <= 0xFFFFFFL) { out.writeByte(BPV_24); forUtil.encode24(start, count, docIds, out); } else { From 4893afc063cbcbedef8547e36ff9142ec34ac12f Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Mon, 7 Feb 2022 14:34:24 +0800 Subject: [PATCH 33/34] Update lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java Co-authored-by: Adrien Grand --- .../core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 07dca9ed3417..8eefeabb7032 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -77,7 +77,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx } } - if (min2max <= 0xFFFFL) { + if (min2max <= 0xFFFF) { out.writeByte(DELTA_FOR_UTIL); for (int i = 0; i < count; i++) { scratch[i] = docIds[start + i] - min; From be14afa165a327f81cf320ac7544f20520e9acdc Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Mon, 7 Feb 2022 14:34:35 +0800 Subject: [PATCH 34/34] Update lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java Co-authored-by: Adrien Grand --- .../core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 8eefeabb7032..8898b8beddfb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -85,7 +85,7 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx out.writeVInt(min); forUtil.encode16(count, scratch, out); } else { - if (max <= 0xFFFFFFL) { + if (max <= 0xFFFFFF) { out.writeByte(BPV_24); forUtil.encode24(start, count, docIds, out); } else {