From 242c4e56d1e129fe208970e8fb67f4b38134a67b Mon Sep 17 00:00:00 2001 From: Vishwanatha-HD Date: Fri, 21 Nov 2025 20:50:13 +0530 Subject: [PATCH] GH-48208 Fix Types logic to enable Parquet DB support on s390x --- cpp/src/parquet/types.cc | 19 ++++++++++++++++--- cpp/src/parquet/types.h | 11 +++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index fb4eb92a7544..96754270ae43 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -27,6 +27,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" #include "arrow/util/decimal.h" +#include "arrow/util/endian.h" #include "arrow/util/float16.h" #include "arrow/util/logging_internal.h" @@ -112,6 +113,15 @@ std::enable_if_t, std::string> FormatNumericValue( std::stringstream result; T value{}; std::memcpy(&value, val.data(), sizeof(T)); + +#if !ARROW_LITTLE_ENDIAN + // Parquet stores numeric stats in little-endian. Convert to native-endian + // before printing to avoid endianness mismatches on big-endian platforms. + if constexpr (std::is_integral_v && sizeof(T) > 1) { + value = ::arrow::bit_util::FromLittleEndian(value); + } +#endif + result << value; return result.str(); } @@ -212,9 +222,12 @@ std::string FormatStatValue(Type::type parquet_type, ::std::string_view val, return FormatNumericValue(val); } case Type::INT96: { - std::array values{}; - std::memcpy(values.data(), bytes, 3 * sizeof(int32_t)); - result << values[0] << " " << values[1] << " " << values[2]; + std::array values{}; + std::memcpy(values.data(), bytes, 3 * sizeof(uint32_t)); + // INT96 values are stored in little-endian format + result << ::arrow::bit_util::FromLittleEndian(values[0]) << " " + << ::arrow::bit_util::FromLittleEndian(values[1]) << " " + << ::arrow::bit_util::FromLittleEndian(values[2]); break; } case Type::BYTE_ARRAY: diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 7e8a18fc94d6..942787cca189 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -26,6 +26,7 @@ #include #include +#include "arrow/util/endian.h" #include "parquet/platform.h" #include "parquet/type_fwd.h" #include "parquet/windows_fixup.h" // for OPTIONAL @@ -705,7 +706,12 @@ static inline std::string ByteArrayToString(const ByteArray& a) { } static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) { +#if ARROW_LITTLE_ENDIAN std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds)); +#else + i96.value[0] = static_cast(nanoseconds); + i96.value[1] = static_cast(nanoseconds >> 32); +#endif } struct DecodedInt96 { @@ -720,7 +726,12 @@ static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) { result.days_since_epoch = i96.value[2] - static_cast(kJulianToUnixEpochDays); result.nanoseconds = 0; +#if ARROW_LITTLE_ENDIAN memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t)); +#else + result.nanoseconds = + static_cast(i96.value[0]) | (static_cast(i96.value[1]) << 32); +#endif return result; }