Skip to content

Commit 1a2d962

Browse files
GH-48206 Fix Statistics logic to enable Parquet DB support on s390x
1 parent 2fb2f79 commit 1a2d962

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

cpp/src/parquet/statistics.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "arrow/type_traits.h"
3131
#include "arrow/util/bit_run_reader.h"
3232
#include "arrow/util/checked_cast.h"
33+
#include "arrow/util/endian.h"
3334
#include "arrow/util/float16.h"
3435
#include "arrow/util/logging_internal.h"
3536
#include "arrow/util/ubsan.h"
@@ -925,6 +926,38 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* va
925926

926927
template <typename DType>
927928
void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const {
929+
#if !ARROW_LITTLE_ENDIAN
930+
// For fixed-width numeric types, write explicit little-endian bytes per spec
931+
if constexpr (std::is_same_v<DType, Int32Type>) {
932+
uint32_t u;
933+
std::memcpy(&u, &src, sizeof(u));
934+
u = ::arrow::bit_util::ToLittleEndian(u);
935+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
936+
return;
937+
} else if constexpr (std::is_same_v<DType, Int64Type>) {
938+
uint64_t u;
939+
std::memcpy(&u, &src, sizeof(u));
940+
u = ::arrow::bit_util::ToLittleEndian(u);
941+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
942+
return;
943+
} else if constexpr (std::is_same_v<DType, FloatType>) {
944+
uint32_t u;
945+
static_assert(sizeof(u) == sizeof(float), "size");
946+
std::memcpy(&u, &src, sizeof(u));
947+
u = ::arrow::bit_util::ToLittleEndian(u);
948+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
949+
return;
950+
} else if constexpr (std::is_same_v<DType, DoubleType>) {
951+
uint64_t u;
952+
static_assert(sizeof(u) == sizeof(double), "size");
953+
std::memcpy(&u, &src, sizeof(u));
954+
u = ::arrow::bit_util::ToLittleEndian(u);
955+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
956+
return;
957+
}
958+
#endif
959+
960+
// Fallback: use encoder for other types
928961
auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_);
929962
encoder->Put(&src, 1);
930963
auto buffer = encoder->FlushValues();
@@ -934,6 +967,35 @@ void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) con
934967

935968
template <typename DType>
936969
void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const {
970+
#if !ARROW_LITTLE_ENDIAN
971+
if constexpr (std::is_same_v<DType, Int32Type>) {
972+
uint32_t u = 0;
973+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
974+
u = ::arrow::bit_util::FromLittleEndian(u);
975+
std::memcpy(dst, &u, sizeof(u));
976+
return;
977+
} else if constexpr (std::is_same_v<DType, Int64Type>) {
978+
uint64_t u = 0;
979+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
980+
u = ::arrow::bit_util::FromLittleEndian(u);
981+
std::memcpy(dst, &u, sizeof(u));
982+
return;
983+
} else if constexpr (std::is_same_v<DType, FloatType>) {
984+
uint32_t u = 0;
985+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
986+
u = ::arrow::bit_util::FromLittleEndian(u);
987+
std::memcpy(dst, &u, sizeof(u));
988+
return;
989+
} else if constexpr (std::is_same_v<DType, DoubleType>) {
990+
uint64_t u = 0;
991+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
992+
u = ::arrow::bit_util::FromLittleEndian(u);
993+
std::memcpy(dst, &u, sizeof(u));
994+
return;
995+
}
996+
#endif
997+
998+
// Fallback: use decoder for other types
937999
auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
9381000
decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
9391001
static_cast<int>(src.size()));

0 commit comments

Comments
 (0)