2222#include < cstring>
2323#include < functional>
2424#include < limits>
25+ #include < new>
2526#include < sstream>
2627#include < string_view>
2728
3233#include " arrow/compare.h"
3334#include " arrow/io/memory.h"
3435#include " arrow/pretty_print.h"
36+ #include " arrow/stl_allocator.h"
3537#include " arrow/type.h"
3638#include " arrow/util/fuzz_internal.h"
3739#include " arrow/util/logging.h"
@@ -153,6 +155,19 @@ namespace {
153155// Just to use std::vector<T> while avoiding std::vector<bool>
154156using BooleanSlot = std::array<uint8_t , sizeof (bool )>;
155157
158+ template <typename T>
159+ using PoolAllocator = ::arrow::stl::allocator<T>;
160+
161+ template <typename T>
162+ using PoolVector = std::vector<T, PoolAllocator<T>>;
163+
164+ #define BEGIN_CATCH_BAD_ALLOC try {
165+ #define END_CATCH_BAD_ALLOC \
166+ } \
167+ catch (const std::bad_alloc& e) { \
168+ return Status::OutOfMemory (e.what ()); \
169+ }
170+
156171template <typename DType>
157172struct TypedFuzzEncoding {
158173 static constexpr Type::type kType = DType::type_num;
@@ -176,9 +191,9 @@ struct TypedFuzzEncoding {
176191 // decoder's internal scratch space, which get invalidated on the
177192 // following decoder call. We circumvent the issue by executing a
178193 // functor on each decoded chunk before moving to the next one.
179- Status RunOnDecodedChunks (Encoding::type encoding,
180- std::span<const uint8_t > encoded_data, int chunk_size,
181- std::function<Status(int offset, std::vector< c_type>)> func) {
194+ Status RunOnDecodedChunks (
195+ Encoding::type encoding, std::span<const uint8_t > encoded_data, int chunk_size,
196+ std::function<Status(int offset, std::span< const c_type>)> func) {
182197 BEGIN_PARQUET_CATCH_EXCEPTIONS
183198 int total_values = 0 ;
184199 auto decoder = MakeDecoder (encoding);
@@ -191,8 +206,10 @@ struct TypedFuzzEncoding {
191206 static_cast <int >(encoded_data.size ()));
192207 while (total_values < num_values_) {
193208 const int read_size = std::min (num_values_ - total_values, chunk_size);
194- // ARROW_ASSIGN_OR_RAISE(auto chunk_values, DecodeChunk(read_size));
195- std::vector<c_type> chunk_values (read_size);
209+ PoolVector<c_type> chunk_values (pool ());
210+ BEGIN_CATCH_BAD_ALLOC
211+ chunk_values.resize (read_size);
212+ END_CATCH_BAD_ALLOC
196213 int values_read;
197214 if constexpr (kType == Type::BOOLEAN) {
198215 values_read =
@@ -201,8 +218,7 @@ struct TypedFuzzEncoding {
201218 values_read = decoder->Decode (chunk_values.data (), read_size);
202219 }
203220 ARROW_CHECK_LE (values_read, read_size);
204- chunk_values.resize (values_read);
205- RETURN_NOT_OK (func (total_values, std::move (chunk_values)));
221+ RETURN_NOT_OK (func (total_values, std::span (chunk_values).first (values_read)));
206222 total_values += values_read;
207223 if (values_read < chunk_size) {
208224 break ;
@@ -215,14 +231,16 @@ struct TypedFuzzEncoding {
215231 return Status::OK ();
216232 }
217233
218- Result<std::vector <c_type>> Decode (Encoding::type encoding,
219- std::span<const uint8_t > encoded_data,
220- int chunk_size) {
234+ Result<PoolVector <c_type>> Decode (Encoding::type encoding,
235+ std::span<const uint8_t > encoded_data,
236+ int chunk_size) {
221237 // Decoded chunk values shouldn't embed pointers to decoder scratch space.
222238 static_assert (decoded_values_can_be_persisted ());
223- std::vector<c_type> values;
224- auto accumulate_chunk = [&](int offset, std::vector<c_type> chunk_values) {
239+ PoolVector<c_type> values (pool ());
240+ auto accumulate_chunk = [&](int offset, std::span<const c_type> chunk_values) {
241+ BEGIN_CATCH_BAD_ALLOC
225242 values.insert (values.end (), chunk_values.begin (), chunk_values.end ());
243+ END_CATCH_BAD_ALLOC
226244 return Status::OK ();
227245 };
228246 RETURN_NOT_OK (
@@ -273,7 +291,7 @@ struct TypedFuzzEncoding {
273291
274292 // Re-encode and re-decode using roundtrip encoding
275293 {
276- auto compare_chunk = [&](int offset, std::vector< c_type> chunk_values) {
294+ auto compare_chunk = [&](int offset, std::span< const c_type> chunk_values) {
277295 return CompareChunkAgainstReference (offset, chunk_values);
278296 };
279297 auto encoder = MakeEncoder (roundtrip_encoding_);
@@ -291,7 +309,8 @@ struct TypedFuzzEncoding {
291309 chunk_size, compare_chunk));
292310 }
293311 } else {
294- encoder->Put (reference_values_);
312+ encoder->Put (reference_values_.data (),
313+ static_cast <int >(reference_values_.size ()));
295314 auto reencoded_buffer = encoder->FlushValues ();
296315 auto reencoded_data = reencoded_buffer->template span_as <uint8_t >();
297316 // Vary chunk sizes
@@ -462,7 +481,7 @@ struct TypedFuzzEncoding {
462481
463482 std::shared_ptr<Array> reference_array_;
464483 // Only for INT96 as there is no strictly equivalent Arrow type
465- std::vector <c_type> reference_values_;
484+ PoolVector <c_type> reference_values_;
466485};
467486
468487} // namespace
0 commit comments