Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/Arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ import Base: ==

const DEBUG_LEVEL = Ref(0)

const DEFAULT_BYTE_ALIGNMENT = let
require_16 = Base.BinaryPlatforms.Platform("aarch64", "macos";
libgfortran_version = "5.0.0",
cxxstring_abi = "cxx11")
Base.BinaryPlatforms.HostPlatform() == require_16 ? 16 : 8
end

function setdebug!(level::Int)
DEBUG_LEVEL[] = level
return
Expand Down
4 changes: 2 additions & 2 deletions src/append.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ record batches simultaneously (e.g. if julia is started with `julia -t 8`
or the `JULIA_NUM_THREADS` environment variable is set).

Supported keyword arguments to `Arrow.append` include:
* `alignment::Int=8`: specify the number of bytes to align buffers to when written in messages; strongly recommended to only use alignment values of 8 or 64 for modern memory cache line optimization
* `alignment::Int=$DEFAULT_BYTE_ALIGNMENT`: specify the number of bytes to align buffers to when written in messages; strongly recommended to only use alignment values of 8 or 64 for modern memory cache line optimization or 16 on Apple silicon.
* `colmetadata=nothing`: the metadata that should be written as the table's columns' `custom_metadata` fields; must either be `nothing` or an `AbstractDict` of `column_name::Symbol => column_metadata` where `column_metadata` is an iterable of `<:AbstractString` pairs.
* `dictencode::Bool=false`: whether all columns should use dictionary encoding when being written; to dict encode specific columns, wrap the column/array in `Arrow.DictEncode(col)`
* `dictencodenested::Bool=false`: whether nested data type columns should also dict encode nested arrays/buffers; other language implementations [may not support this](https://arrow.apache.org/docs/status.html)
Expand Down Expand Up @@ -74,7 +74,7 @@ function append(io::IO, tbl;
denseunions::Bool=true,
dictencode::Bool=false,
dictencodenested::Bool=false,
alignment::Int=8,
alignment::Int=DEFAULT_BYTE_ALIGNMENT,
maxdepth::Int=DEFAULT_MAX_DEPTH,
ntasks=Inf,
convert::Bool=true,
Expand Down
4 changes: 2 additions & 2 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ record batches simultaneously (e.g. if julia is started with `julia -t 8` or the
Supported keyword arguments to `Arrow.write` include:
* `colmetadata=nothing`: the metadata that should be written as the table's columns' `custom_metadata` fields; must either be `nothing` or an `AbstractDict` of `column_name::Symbol => column_metadata` where `column_metadata` is an iterable of `<:AbstractString` pairs.
* `compress`: possible values include `:lz4`, `:zstd`, or your own initialized `LZ4FrameCompressor` or `ZstdCompressor` objects; will cause all buffers in each record batch to use the respective compression encoding
* `alignment::Int=8`: specify the number of bytes to align buffers to when written in messages; strongly recommended to only use alignment values of 8 or 64 for modern memory cache line optimization
* `alignment::Int=$DEFAULT_BYTE_ALIGNMENT`: specify the number of bytes to align buffers to when written in messages; strongly recommended to only use alignment values of 8 or 64 for modern memory cache line optimization
* `dictencode::Bool=false`: whether all columns should use dictionary encoding when being written; to dict encode specific columns, wrap the column/array in `Arrow.DictEncode(col)`
* `dictencodenested::Bool=false`: whether nested data type columns should also dict encode nested arrays/buffers; other language implementations [may not support this](https://arrow.apache.org/docs/status.html)
* `denseunions::Bool=true`: whether Julia `Vector{<:Union}` arrays should be written using the dense union layout; passing `false` will result in the sparse union layout
Expand Down Expand Up @@ -167,7 +167,7 @@ function Base.open(::Type{Writer}, io::IO, compress::Symbol, args...)
open(Writer, io, compressor, args...)
end

function Base.open(::Type{Writer}, io::IO; compress::Union{Nothing,Symbol,LZ4FrameCompressor,<:AbstractVector{LZ4FrameCompressor},ZstdCompressor,<:AbstractVector{ZstdCompressor}}=nothing, file::Bool=true, largelists::Bool=false, denseunions::Bool=true, dictencode::Bool=false, dictencodenested::Bool=false, alignment::Integer=8, maxdepth::Integer=DEFAULT_MAX_DEPTH, ntasks::Integer=typemax(Int32), metadata::Union{Nothing,Any}=nothing, colmetadata::Union{Nothing,Any}=nothing, closeio::Bool=false)
function Base.open(::Type{Writer}, io::IO; compress::Union{Nothing,Symbol,LZ4FrameCompressor,<:AbstractVector{LZ4FrameCompressor},ZstdCompressor,<:AbstractVector{ZstdCompressor}}=nothing, file::Bool=true, largelists::Bool=false, denseunions::Bool=true, dictencode::Bool=false, dictencodenested::Bool=false, alignment::Integer=DEFAULT_BYTE_ALIGNMENT, maxdepth::Integer=DEFAULT_MAX_DEPTH, ntasks::Integer=typemax(Int32), metadata::Union{Nothing,Any}=nothing, colmetadata::Union{Nothing,Any}=nothing, closeio::Bool=false)
open(Writer, io, compress, file, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata, closeio)
end

Expand Down