Skip to content

Commit ccdae3d

Browse files
authored
Merge branch 'apache:main' into Pyarrow_WoA
2 parents fc9ff54 + ba273ca commit ccdae3d

File tree

36 files changed

+601
-56
lines changed

36 files changed

+601
-56
lines changed

.github/workflows/ruby.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ jobs:
124124
run: archery docker push ubuntu-ruby
125125

126126
macos:
127-
name: ARM64 macOS 14 GLib & Ruby
127+
name: ARM64 macOS GLib & Ruby
128128
runs-on: macos-latest
129129
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
130130
timeout-minutes: 60

c_glib/arrow-glib/reader.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,35 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
668668
}
669669
}
670670

671+
/**
672+
* garrow_record_batch_file_reader_get_metadata:
673+
* @reader: A #GArrowRecordBatchFileReader.
674+
*
675+
* Returns: (nullable) (element-type utf8 utf8) (transfer full):
676+
* The metadata in the footer.
677+
*
678+
* Since: 24.0.0
679+
*/
680+
GHashTable *
681+
garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader *reader)
682+
{
683+
auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader);
684+
auto arrow_metadata = arrow_reader->metadata();
685+
686+
if (!arrow_metadata) {
687+
return nullptr;
688+
}
689+
690+
auto metadata = g_hash_table_new(g_str_hash, g_str_equal);
691+
const auto n = arrow_metadata->size();
692+
for (int64_t i = 0; i < n; ++i) {
693+
g_hash_table_insert(metadata,
694+
const_cast<gchar *>(arrow_metadata->key(i).c_str()),
695+
const_cast<gchar *>(arrow_metadata->value(i).c_str()));
696+
}
697+
return metadata;
698+
}
699+
671700
struct GArrowFeatherFileReaderPrivate
672701
{
673702
std::shared_ptr<arrow::ipc::feather::Reader> feather_reader;

c_glib/arrow-glib/reader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
166166
guint i,
167167
GError **error);
168168

169+
GARROW_AVAILABLE_IN_24_0
170+
GHashTable *
171+
garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader *reader);
172+
169173
#define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type())
170174
GARROW_AVAILABLE_IN_ALL
171175
G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader,

c_glib/arrow-glib/writer.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <arrow-glib/array.hpp>
2121
#include <arrow-glib/enums.h>
2222
#include <arrow-glib/error.hpp>
23+
#include <arrow-glib/internal-hash-table.hpp>
24+
#include <arrow-glib/ipc-options.hpp>
2325
#include <arrow-glib/record-batch.hpp>
2426
#include <arrow-glib/schema.hpp>
2527
#include <arrow-glib/table.hpp>
@@ -288,16 +290,50 @@ GArrowRecordBatchFileWriter *
288290
garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
289291
GArrowSchema *schema,
290292
GError **error)
293+
{
294+
return garrow_record_batch_file_writer_new_full(sink, schema, nullptr, nullptr, error);
295+
}
296+
297+
/**
298+
* garrow_record_batch_file_writer_new_full:
299+
* @sink: The output of the writer.
300+
* @schema: The schema of the writer.
301+
* @options: (nullable): The options for serialization.
302+
* @metadata: (nullable) (element-type utf8 utf8): The custom metadata in
303+
* the footer.
304+
* @error: (nullable): Return location for a #GError or %NULL.
305+
*
306+
* Returns: (nullable): A newly created #GArrowRecordBatchFileWriter
307+
* or %NULL on error.
308+
*
309+
* Since: 24.0.0
310+
*/
311+
GArrowRecordBatchFileWriter *
312+
garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
313+
GArrowSchema *schema,
314+
GArrowWriteOptions *options,
315+
GHashTable *metadata,
316+
GError **error)
291317
{
292318
auto arrow_sink = garrow_output_stream_get_raw(sink);
293319
auto arrow_schema = garrow_schema_get_raw(schema);
320+
arrow::ipc::IpcWriteOptions arrow_options = arrow::ipc::IpcWriteOptions::Defaults();
321+
if (options) {
322+
arrow_options = *garrow_write_options_get_raw(options);
323+
}
324+
std::shared_ptr<arrow::KeyValueMetadata> arrow_metadata;
325+
if (metadata) {
326+
arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
327+
}
328+
294329
std::shared_ptr<arrow::ipc::RecordBatchWriter> arrow_writer;
295-
auto arrow_writer_result = arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema);
296-
if (garrow::check(error, arrow_writer_result, "[record-batch-file-writer][open]")) {
330+
auto arrow_writer_result =
331+
arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema, arrow_options, arrow_metadata);
332+
if (garrow::check(error, arrow_writer_result, "[record-batch-file-writer][new]")) {
297333
auto arrow_writer = *arrow_writer_result;
298334
return garrow_record_batch_file_writer_new_raw(&arrow_writer);
299335
} else {
300-
return NULL;
336+
return nullptr;
301337
}
302338
}
303339

c_glib/arrow-glib/writer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#pragma once
2121

2222
#include <arrow-glib/array.h>
23+
#include <arrow-glib/ipc-options.h>
2324
#include <arrow-glib/record-batch.h>
2425
#include <arrow-glib/schema.h>
2526

@@ -94,6 +95,14 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
9495
GArrowSchema *schema,
9596
GError **error);
9697

98+
GARROW_AVAILABLE_IN_24_0
99+
GArrowRecordBatchFileWriter *
100+
garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
101+
GArrowSchema *schema,
102+
GArrowWriteOptions *options,
103+
GHashTable *metadata,
104+
GError **error);
105+
97106
/**
98107
* GArrowCSVQuotingStyle:
99108
* @GARROW_CSV_QUOTING_STYLE_NEEDED: Only enclose values in quotes which need them.

c_glib/test/test-file-writer.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,36 @@ def test_write_table
8888
input.close
8989
end
9090
end
91+
92+
def test_footer_custom_metadata
93+
tempfile = Tempfile.open("arrow-ipc-file-writer")
94+
output = Arrow::FileOutputStream.new(tempfile.path, false)
95+
96+
array = build_boolean_array([true, false, true])
97+
field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
98+
schema = Arrow::Schema.new([field])
99+
100+
options = Arrow::WriteOptions.new
101+
metadata = {"key1" => "value1", "key2" => "value2"}
102+
begin
103+
file_writer = Arrow::RecordBatchFileWriter.new(output,
104+
schema,
105+
options,
106+
metadata)
107+
file_writer.close
108+
assert do
109+
file_writer.closed?
110+
end
111+
ensure
112+
output.close
113+
end
114+
115+
input = Arrow::MemoryMappedInputStream.new(tempfile.path)
116+
begin
117+
file_reader = Arrow::RecordBatchFileReader.new(input)
118+
assert_equal(metadata, file_reader.metadata)
119+
ensure
120+
input.close
121+
end
122+
end
91123
end

cpp/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
115115
# vcpkg (ab)uses CMAKE_TOOLCHAIN_FILE to inject its logic into CMake
116116
if(ARROW_DEPENDENCY_SOURCE STREQUAL "VCPKG")
117117
include(Usevcpkg)
118+
elseif(ARROW_PACKAGE_KIND STREQUAL "vcpkg")
119+
set(ARROW_VCPKG ON)
118120
endif()
119121

120122
project(arrow VERSION "${ARROW_BASE_VERSION}")

cpp/cmake_modules/FindSnappyAlt.cmake

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,12 @@ if(Snappy_FOUND)
5252
set(SnappyAlt_FOUND TRUE)
5353
return()
5454
else()
55-
# The Conan's Snappy package always uses Snappy::snappy and it's
56-
# an INTERFACE_LIBRARY.
57-
get_target_property(Snappy Snappy::snappy TYPE)
55+
# The Snappy packages in Conan and vcpkg always use
56+
# Snappy::snappy.
57+
#
58+
# The Snappy package in Conan use an INTERFACE_LIBRARY for
59+
# Snappy::snappy.
60+
get_target_property(Snappy_TYPE Snappy::snappy TYPE)
5861
if(Snappy_TYPE STREQUAL "STATIC_LIBRARY" OR Snappy_TYPE STREQUAL
5962
"INTERFACE_LIBRARY")
6063
set(Snappy_TARGET Snappy::snappy)

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1683,7 +1683,8 @@ endif()
16831683
if(ARROW_BUILD_TESTS
16841684
OR ARROW_BUILD_BENCHMARKS
16851685
OR ARROW_BUILD_INTEGRATION
1686-
OR ARROW_USE_GLOG)
1686+
OR ARROW_USE_GLOG
1687+
OR (ARROW_FLIGHT_SQL AND ARROW_BUILD_EXAMPLES))
16871688
set(ARROW_NEED_GFLAGS TRUE)
16881689
else()
16891690
set(ARROW_NEED_GFLAGS FALSE)

cpp/src/arrow/util/align_util.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_of
4646
int64_t length) {
4747
// TODO: We can remove this condition once CRAN upgrades its macOS
4848
// SDK from 11.3.
49-
#if defined(__clang__) && !defined(__cpp_lib_bitops) && !defined(__EMSCRIPTEN__)
49+
// __apple_build_version__ should be defined only on Apple clang
50+
#if defined(__apple_build_version__) && !defined(__cpp_lib_bitops)
5051
static_assert((ALIGN_IN_BYTES != 0) && ((ALIGN_IN_BYTES & (ALIGN_IN_BYTES - 1)) == 0),
5152
"ALIGN_IN_BYTES should be a positive power of two");
5253
#else

0 commit comments

Comments
 (0)