Skip to content

Commit 9efe04b

Browse files
authored
fix(arrow/array): optional struct array with required field (#359)
### Rationale for this change apache/iceberg-go#398 discovered that the current `NewStructArrayWithFields` fails if any child is marked as non-nullable but has nulls (as would be the case in an optional struct array full of nulls with a required field). So we need to allow constructing such an array. ### What changes are included in this PR? A new function is created, `NewStructArrayWithFieldsAndNulls` which takes in the top level struct null bitmap, the number of nulls, offset columns and list of fields. ### Are these changes tested? Yes, a unit test was created for it. ### Are there any user-facing changes? The above case that would error, now will no longer error. Co-authored-by: Matt Topol <[email protected]>
1 parent a62d36f commit 9efe04b

File tree

2 files changed

+47
-5
lines changed

2 files changed

+47
-5
lines changed

arrow/array/struct.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ func NewStructArray(cols []arrow.Array, names []string) (*Struct, error) {
4646
// and provided fields. As opposed to NewStructArray, this allows you to provide
4747
// the full fields to utilize for the struct column instead of just the names.
4848
func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field) (*Struct, error) {
49+
return NewStructArrayWithFieldsAndNulls(cols, fields, nil, 0, 0)
50+
}
51+
52+
// NewStructArrayWithFieldsAndNulls is like NewStructArrayWithFields as a convenience function,
53+
// but also takes in a null bitmap, the number of nulls, and an optional offset
54+
// to use for creating the Struct Array.
55+
func NewStructArrayWithFieldsAndNulls(cols []arrow.Array, fields []arrow.Field, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct, error) {
4956
if len(cols) != len(fields) {
5057
return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid)
5158
}
@@ -63,15 +70,18 @@ func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field) (*Struct
6370
return nil, fmt.Errorf("%w: mismatching data type for child #%d, field says '%s', got '%s'",
6471
arrow.ErrInvalid, i, fields[i].Type, c.DataType())
6572
}
66-
if !fields[i].Nullable && c.NullN() > 0 {
67-
return nil, fmt.Errorf("%w: field says not-nullable, child #%d has nulls",
68-
arrow.ErrInvalid, i)
69-
}
7073

7174
children[i] = c.Data()
7275
}
7376

74-
data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nil}, children, 0, 0)
77+
if nullBitmap == nil {
78+
if nullCount > 0 {
79+
return nil, fmt.Errorf("%w: null count is greater than 0 but null bitmap is nil", arrow.ErrInvalid)
80+
}
81+
nullCount = 0
82+
}
83+
84+
data := NewData(arrow.StructOf(fields...), length-offset, []*memory.Buffer{nullBitmap}, children, nullCount, offset)
7585
defer data.Release()
7686
return NewStructData(data), nil
7787
}

arrow/array/struct_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/apache/arrow-go/v18/arrow/array"
2525
"github.com/apache/arrow-go/v18/arrow/memory"
2626
"github.com/stretchr/testify/assert"
27+
"github.com/stretchr/testify/require"
2728
)
2829

2930
func TestStructArray(t *testing.T) {
@@ -530,3 +531,34 @@ func TestStructArrayUnmarshalJSONMissingFields(t *testing.T) {
530531
)
531532
}
532533
}
534+
535+
func TestCreateStructWithNulls(t *testing.T) {
536+
pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
537+
defer pool.AssertSize(t, 0)
538+
539+
var (
540+
fields = []arrow.Field{
541+
{Name: "f1", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
542+
{Name: "f2", Type: arrow.PrimitiveTypes.Int32, Nullable: false},
543+
}
544+
dtype = arrow.StructOf(fields...)
545+
)
546+
547+
sb := array.NewStructBuilder(pool, dtype)
548+
defer sb.Release()
549+
550+
sb.AppendNulls(100)
551+
552+
arr := sb.NewArray().(*array.Struct)
553+
defer arr.Release()
554+
555+
assert.EqualValues(t, 100, arr.Len())
556+
assert.EqualValues(t, 100, arr.NullN())
557+
558+
arr2, err := array.NewStructArrayWithFieldsAndNulls(
559+
[]arrow.Array{arr.Field(0), arr.Field(1)}, fields, arr.Data().Buffers()[0], arr.NullN(), 0)
560+
require.NoError(t, err)
561+
defer arr2.Release()
562+
563+
assert.True(t, array.Equal(arr, arr2))
564+
}

0 commit comments

Comments
 (0)