forked from apache/arrow-julia
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap.jl
More file actions
121 lines (110 loc) · 5.41 KB
/
map.jl
File metadata and controls
121 lines (110 loc) · 5.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Arrow.Map
An `ArrowVector` where each element is a "map" of some kind, like a `Dict`.
"""
struct Map{T, O, A} <: ArrowVector{T}
validity::ValidityBitmap
offsets::Offsets{O}
data::A
ℓ::Int
metadata::Union{Nothing, Base.ImmutableDict{String,String}}
end
Base.size(l::Map) = (l.ℓ,)
@propagate_inbounds function Base.getindex(l::Map{T}, i::Integer) where {T}
@boundscheck checkbounds(l, i)
@inbounds lo, hi = l.offsets[i]
if Base.nonmissingtype(T) !== T
return l.validity[i] ? ArrowTypes.fromarrow(T, Dict(x.key => x.value for x in view(l.data, lo:hi))) : missing
else
return ArrowTypes.fromarrow(T, Dict(x.key => x.value for x in view(l.data, lo:hi)))
end
end
keyvalues(KT, ::Missing) = missing
keyvalues(KT, x::AbstractDict) = [KT(k, v) for (k, v) in pairs(x)]
keyvaluetypes(::Type{NamedTuple{(:key, :value), Tuple{K, V}}}) where {K, V} = (K, V)
arrowvector(::MapKind, x::Map, i, nl, fi, de, ded, meta; kw...) = x
function arrowvector(::MapKind, x, i, nl, fi, de, ded, meta; largelists::Bool=false, kw...)
len = length(x)
validity = ValidityBitmap(x)
ET = eltype(x)
DT = Base.nonmissingtype(ET)
KDT, VDT = keytype(DT), valtype(DT)
ArrowTypes.concrete_or_concreteunion(KDT) || throw(ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == $KDT`"))
ArrowTypes.concrete_or_concreteunion(VDT) || throw(ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == $VDT`"))
KT = KeyValue{KDT,VDT}
VT = Vector{KT}
T = DT !== ET ? Union{Missing, VT} : VT
flat = ToList(T[keyvalues(KT, y) for y in x]; largelists=largelists)
offsets = Offsets(UInt8[], flat.inds)
data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; lareglists=largelists, kw...)
K, V = keyvaluetypes(eltype(data))
return Map{withmissing(ET, Dict{K, V}), eltype(flat.inds), typeof(data)}(validity, offsets, data, len, meta)
end
function compress(Z::Meta.CompressionType, comp, x::A) where {A <: Map}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
offsets = compress(Z, comp, x.offsets.offsets)
buffers = [validity, offsets]
children = Compressed[]
push!(children, compress(Z, comp, x.data))
return Compressed{Z, A}(x, buffers, len, nc, children)
end
function makenodesbuffers!(col::Union{Map{T, O, A}, List{T, O, A}}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, O, A}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@debugv 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
# validity bitmap
blen = nc == 0 ? 0 : bitpackedbytes(len, alignment)
push!(fieldbuffers, Buffer(bufferoffset, blen))
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
# adjust buffer offset, make array buffer
bufferoffset += blen
blen = sizeof(O) * (len + 1)
push!(fieldbuffers, Buffer(bufferoffset, blen))
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += padding(blen, alignment)
if eltype(A) == UInt8
blen = length(col.data)
push!(fieldbuffers, Buffer(bufferoffset, blen))
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += padding(blen, alignment)
else
bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
function writebuffer(io, col::Union{Map{T, O, A}, List{T, O, A}}, alignment) where {T, O, A}
@debugv 1 "writebuffer: col = $(typeof(col))"
@debugv 2 col
writebitmap(io, col, alignment)
# write offsets
n = writearray(io, O, col.offsets.offsets)
@debugv 1 "writing array: col = $(typeof(col.offsets.offsets)), n = $n, padded = $(padding(n, alignment))"
writezeros(io, paddinglength(n, alignment))
# write values array
if eltype(A) == UInt8
n = writearray(io, UInt8, col.data)
@debugv 1 "writing array: col = $(typeof(col.data)), n = $n, padded = $(padding(n, alignment))"
writezeros(io, paddinglength(n, alignment))
else
writebuffer(io, col.data, alignment)
end
return
end