Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/astc-encoder/.gitrepo
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/ARM-software/astc-encoder.git
branch = main
commit = f48cc27b2528286126c116f42f2792ed2fa13755
parent = 7c24a986d1f48e5cb08b62a6fc55ae50522c4efb
commit = e83b0c8f67e7e704adb3f85c02d8cd547558fa31
parent = 8bae6674b5fb1be507a73382f482934d25fbb68d
method = merge
cmdver = 0.4.3
2 changes: 1 addition & 1 deletion lib/astc-encoder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if(MSVC)
add_compile_options("/wd4324") # Disable structure was padded due to alignment specifier
endif()

project(astcencoder VERSION 3.7.0)
project(astcencoder VERSION 4.0.0)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down
46 changes: 46 additions & 0 deletions lib/astc-encoder/Docs/ChangeLog-4x.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# 4.x series change log

This page summarizes the major functional and performance changes in each
release of the 4.x series.

All performance data on this page is measured on an Intel Core i5-9600K
clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads.

<!-- ---------------------------------------------------------------------- -->
## 4.0

**Status:** In development

The 4.0 release introduces some major performance enhancement, and a number
of larger changes to the heuristics used in the codec to find a more effective
cost:quality trade off.

* **General:**
* **Feature:** The `-a` alpha weighting option has been re-enabled in the
backend, and now again applies alpha scaling to the RGB error metrics when
encoding. This is based on the maximum alpha in each block, not the
individual texel alpha values used in the earlier implementation.
* **Feature:** The command line tool now has `-repeats <count>` for testing,
which will iterate around compression and decompression `count` times.
Reported performance metrics also now separate compression and
decompression scores.
* **Optimization:** Angular endpoint min/max weight selection is restricted
to weight `QUANT_11` or lower. Higher quantization levels assume default
0-1 range, which is less accurate but must faster.
* **Optimization:** Maximum weight quantization for later trials is selected
based on the weight quantization of the best encoding from the 1 plane 1
partition trial. This significantly reduces the search space for the later
trials with more planes or partitions.
* **Optimization:** Weight scrambling has been moved into the physical layer;
the rest of the codec now uses linear order weights.
* **Optimization:** Weight packing has been moved into the physical layer;
the rest of the codec now uses unpacked weights in the 0-64 range.
* **Optimization:** Consistently vectorize the creation of unquantized weight
grids when they are needed.
* **Optimization:** Remove redundant per-decimation mode copies of endpoint
and weight structures, which were really read-only.
* **Optimization:** Early-out the same endpoint mode color calculation if it
cannot be applied.
- - -

_Copyright © 2022, Arm Limited and contributors. All rights reserved._
3 changes: 3 additions & 0 deletions lib/astc-encoder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ from 0.89 bits/pixel up to 8 bits/pixel.
Release build binaries for the `astcenc` stable releases are provided in the
[GitHub Releases page][3].

**Latest 4.x stable release:** TBD
* Change log: [4.x series](./Docs/ChangeLog-4x.md)

**Latest 3.x stable release:** 3.7
* Change log: [3.x series](./Docs/ChangeLog-3x.md)

Expand Down
272 changes: 272 additions & 0 deletions lib/astc-encoder/Source/UnitTest/test_simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1753,6 +1753,72 @@ TEST(vint4, store_nbytes)
EXPECT_EQ(out, 42);
}

/** @brief Test vint8 store_lanes_masked. */
TEST(vint4, store_lanes_masked)
{
int resulta[4] { 0 };

// Store nothing
vmask4 mask1 = vint4(0) == vint4(1);
vint4 data1 = vint4(1);

store_lanes_masked(resulta, data1, mask1);
vint4 result1v(resulta);
vint4 expect1v = vint4::zero();
EXPECT_TRUE(all(result1v == expect1v));

// Store half
vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
vint4 data2 = vint4(2);

store_lanes_masked(resulta, data2, mask2);
vint4 result2v(resulta);
vint4 expect2v = vint4(2, 2, 0, 0);
EXPECT_TRUE(all(result2v == expect2v));

// Store all
vmask4 mask3 = vint4(1) == vint4(1);
vint4 data3 = vint4(3);

store_lanes_masked(resulta, data3, mask3);
vint4 result3v(resulta);
vint4 expect3v = vint4(3);
EXPECT_TRUE(all(result3v == expect3v));
}

/** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint4, store_lanes_masked_unaligned)
{
int8_t resulta[17] { 0 };

// Store nothing
vmask4 mask1 = vint4(0) == vint4(1);
vint4 data1 = vint4(1);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
vint4 result1v(reinterpret_cast<int*>(resulta + 1));
vint4 expect1v = vint4::zero();
EXPECT_TRUE(all(result1v == expect1v));

// Store half
vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
vint4 data2 = vint4(2);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
vint4 result2v(reinterpret_cast<int*>(resulta + 1));
vint4 expect2v = vint4(2, 2, 0, 0);
EXPECT_TRUE(all(result2v == expect2v));

// Store all
vmask4 mask3 = vint4(1) == vint4(1);
vint4 data3 = vint4(3);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
vint4 result3v(reinterpret_cast<int*>(resulta + 1));
vint4 expect3v = vint4(3);
EXPECT_TRUE(all(result3v == expect3v));
}

/** @brief Test vint4 gatheri. */
TEST(vint4, gatheri)
{
Expand Down Expand Up @@ -1888,6 +1954,62 @@ TEST(vmask4, not)
EXPECT_EQ(mask(r), 0x5);
}

/** @brief Test vint4 table permute. */
TEST(vint4, vtable_8bt_32bi_32entry)
{
vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);

vint4 table0p, table1p;
vtable_prepare(table0, table1, table0p, table1p);

vint4 index(0, 7, 4, 31);

vint4 result = vtable_8bt_32bi(table0p, table1p, index);

EXPECT_EQ(result.lane<0>(), 3);
EXPECT_EQ(result.lane<1>(), 4);
EXPECT_EQ(result.lane<2>(), 7);
EXPECT_EQ(result.lane<3>(), 28);
}

/** @brief Test vint4 table permute. */
TEST(vint4, vtable_8bt_32bi_64entry)
{
vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);

vint4 table0p, table1p, table2p, table3p;
vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);

vint4 index(0, 7, 38, 63);

vint4 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);

EXPECT_EQ(result.lane<0>(), 3);
EXPECT_EQ(result.lane<1>(), 4);
EXPECT_EQ(result.lane<2>(), 37);
EXPECT_EQ(result.lane<3>(), 60);
}

/** @brief Test vint4 rgba byte interleave. */
TEST(vint4, interleave_rgba8)
{
vint4 r(0x01, 0x11, 0x21, 0x31);
vint4 g(0x02, 0x12, 0x22, 0x32);
vint4 b(0x03, 0x13, 0x23, 0x33);
vint4 a(0x04, 0x14, 0x24, 0x34);

vint4 result = interleave_rgba8(r, g, b, a);

EXPECT_EQ(result.lane<0>(), 0x04030201);
EXPECT_EQ(result.lane<1>(), 0x14131211);
EXPECT_EQ(result.lane<2>(), 0x24232221);
EXPECT_EQ(result.lane<3>(), 0x34333231);
}

# if ASTCENC_SIMD_WIDTH == 8

// VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down Expand Up @@ -2973,6 +3095,42 @@ TEST(vint8, max)
EXPECT_EQ(r.lane<7>(), 5);
}

/** @brief Test vint8 lsl. */
TEST(vint8, lsl)
{
vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
a = lsl<0>(a);
EXPECT_EQ(a.lane<0>(), 1);
EXPECT_EQ(a.lane<1>(), 2);
EXPECT_EQ(a.lane<2>(), 4);
EXPECT_EQ(a.lane<3>(), 0xFFFFFFFC);
EXPECT_EQ(a.lane<4>(), 1);
EXPECT_EQ(a.lane<5>(), 2);
EXPECT_EQ(a.lane<6>(), 4);
EXPECT_EQ(a.lane<7>(), 0xFFFFFFFC);


a = lsl<1>(a);
EXPECT_EQ(a.lane<0>(), 2);
EXPECT_EQ(a.lane<1>(), 4);
EXPECT_EQ(a.lane<2>(), 8);
EXPECT_EQ(a.lane<3>(), 0xFFFFFFF8);
EXPECT_EQ(a.lane<4>(), 2);
EXPECT_EQ(a.lane<5>(), 4);
EXPECT_EQ(a.lane<6>(), 8);
EXPECT_EQ(a.lane<7>(), 0xFFFFFFF8);

a = lsl<2>(a);
EXPECT_EQ(a.lane<0>(), 8);
EXPECT_EQ(a.lane<1>(), 16);
EXPECT_EQ(a.lane<2>(), 32);
EXPECT_EQ(a.lane<3>(), 0xFFFFFFE0);
EXPECT_EQ(a.lane<4>(), 8);
EXPECT_EQ(a.lane<5>(), 16);
EXPECT_EQ(a.lane<6>(), 32);
EXPECT_EQ(a.lane<7>(), 0xFFFFFFE0);
}

/** @brief Test vint8 lsr. */
TEST(vint8, lsr)
{
Expand Down Expand Up @@ -3139,6 +3297,72 @@ TEST(vint8, store_nbytes)
EXPECT_EQ(out[1], 314);
}

/** @brief Test vint8 store_lanes_masked. */
TEST(vint8, store_lanes_masked)
{
int resulta[8] { 0 };

// Store nothing
vmask8 mask1 = vint8(0) == vint8(1);
vint8 data1 = vint8(1);

store_lanes_masked(resulta, data1, mask1);
vint8 result1v(resulta);
vint8 expect1v = vint8::zero();
EXPECT_TRUE(all(result1v == expect1v));

// Store half
vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
vint8 data2 = vint8(2);

store_lanes_masked(resulta, data2, mask2);
vint8 result2v(resulta);
vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
EXPECT_TRUE(all(result2v == expect2v));

// Store all
vmask8 mask3 = vint8(1) == vint8(1);
vint8 data3 = vint8(3);

store_lanes_masked(resulta, data3, mask3);
vint8 result3v(resulta);
vint8 expect3v = vint8(3);
EXPECT_TRUE(all(result3v == expect3v));
}

/** @brief Test vint8 store_lanes_masked to unaligned address. */
TEST(vint8, store_lanes_masked_unaligned)
{
int8_t resulta[33] { 0 };

// Store nothing
vmask8 mask1 = vint8(0) == vint8(1);
vint8 data1 = vint8(1);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data1, mask1);
vint8 result1v(reinterpret_cast<int*>(resulta + 1));
vint8 expect1v = vint8::zero();
EXPECT_TRUE(all(result1v == expect1v));

// Store half
vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
vint8 data2 = vint8(2);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data2, mask2);
vint8 result2v(reinterpret_cast<int*>(resulta + 1));
vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
EXPECT_TRUE(all(result2v == expect2v));

// Store all
vmask8 mask3 = vint8(1) == vint8(1);
vint8 data3 = vint8(3);

store_lanes_masked(reinterpret_cast<int*>(resulta + 1), data3, mask3);
vint8 result3v(reinterpret_cast<int*>(resulta + 1));
vint8 expect3v = vint8(3);
EXPECT_TRUE(all(result3v == expect3v));
}

/** @brief Test vint8 gatheri. */
TEST(vint8, gatheri)
{
Expand Down Expand Up @@ -3268,6 +3492,54 @@ TEST(vmask8, not)
EXPECT_EQ(mask(r), 0x55);
}

/** @brief Test vint8 table permute. */
TEST(vint8, vtable_8bt_32bi_32entry)
{
vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);

vint8 table0p, table1p;
vtable_prepare(table0, table1, table0p, table1p);

vint8 index(0, 7, 4, 15, 16, 20, 23, 31);

vint8 result = vtable_8bt_32bi(table0p, table1p, index);

EXPECT_EQ(result.lane<0>(), 3);
EXPECT_EQ(result.lane<1>(), 4);
EXPECT_EQ(result.lane<2>(), 7);
EXPECT_EQ(result.lane<3>(), 12);
EXPECT_EQ(result.lane<4>(), 19);
EXPECT_EQ(result.lane<5>(), 23);
EXPECT_EQ(result.lane<6>(), 20);
EXPECT_EQ(result.lane<7>(), 28);
}

/** @brief Test vint4 table permute. */
TEST(vint8, vtable_8bt_32bi_64entry)
{
vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);

vint8 table0p, table1p, table2p, table3p;
vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);

vint8 index(0, 7, 4, 15, 16, 20, 38, 63);

vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);

EXPECT_EQ(result.lane<0>(), 3);
EXPECT_EQ(result.lane<1>(), 4);
EXPECT_EQ(result.lane<2>(), 7);
EXPECT_EQ(result.lane<3>(), 12);
EXPECT_EQ(result.lane<4>(), 19);
EXPECT_EQ(result.lane<5>(), 23);
EXPECT_EQ(result.lane<6>(), 37);
EXPECT_EQ(result.lane<7>(), 60);
}

#endif

}
Loading