Skip to content

Commit 44d8ad3

Browse files
committed
feat: (GH-56) Add arrayApproxEqualString to handle null characters in strings
Signed-off-by: Saurabh Kumar Singh <[email protected]>
1 parent 460f500 commit 44d8ad3

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

arrow/array/compare.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package array
1919
import (
2020
"fmt"
2121
"math"
22+
"strings"
2223

2324
"github.com/apache/arrow-go/v18/arrow"
2425
"github.com/apache/arrow-go/v18/arrow/float16"
@@ -487,13 +488,13 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool {
487488
return arrayEqualBinary(l, r)
488489
case *String:
489490
r := right.(*String)
490-
return arrayEqualString(l, r)
491+
return arrayApproxEqualString(l, r)
491492
case *LargeBinary:
492493
r := right.(*LargeBinary)
493494
return arrayEqualLargeBinary(l, r)
494495
case *LargeString:
495496
r := right.(*LargeString)
496-
return arrayEqualLargeString(l, r)
497+
return arrayApproxEqualLargeString(l, r)
497498
case *BinaryView:
498499
r := right.(*BinaryView)
499500
return arrayEqualBinaryView(l, r)
@@ -644,6 +645,34 @@ func validityBitmapEqual(left, right arrow.Array) bool {
644645
return true
645646
}
646647

648+
func arrayApproxEqualString(left, right *String) bool {
649+
for i := 0; i < left.Len(); i++ {
650+
if left.IsNull(i) {
651+
continue
652+
}
653+
if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) {
654+
return false
655+
}
656+
}
657+
return true
658+
}
659+
660+
func arrayApproxEqualLargeString(left, right *LargeString) bool {
661+
for i := 0; i < left.Len(); i++ {
662+
if left.IsNull(i) {
663+
continue
664+
}
665+
if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) {
666+
return false
667+
}
668+
}
669+
return true
670+
}
671+
672+
func stripNulls(s string) string {
673+
return strings.TrimRight(s, "\x00")
674+
}
675+
647676
func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool {
648677
for i := 0; i < left.Len(); i++ {
649678
if left.IsNull(i) {

arrow/array/compare_test.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,48 @@ func TestArrayApproxEqual(t *testing.T) {
111111
}
112112
}
113113

114+
func TestArrayApproxEqualStrings(t *testing.T) {
115+
for _, tc := range []struct {
116+
name string
117+
a1 interface{}
118+
a2 interface{}
119+
want bool
120+
}{
121+
{
122+
name: "string",
123+
a1: []string{"a", "b", "c", "d", "e", "f"},
124+
a2: []string{"a", "b", "c", "d", "e", "f"},
125+
want: true,
126+
},
127+
{
128+
name: "string",
129+
a1: []string{"a", "b\x00"},
130+
a2: []string{"a", "b"},
131+
want: true,
132+
},
133+
{
134+
name: "string",
135+
a1: []string{"a", "b\x00"},
136+
a2: []string{"a\x00", "b"},
137+
want: true,
138+
},
139+
}{
140+
t.Run(tc.name, func(t *testing.T) {
141+
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
142+
defer mem.AssertSize(t, 0)
143+
144+
a1 := arrayOf(mem, tc.a1, nil)
145+
defer a1.Release()
146+
a2 := arrayOf(mem, tc.a2, nil)
147+
defer a2.Release()
148+
149+
if got, want := array.ApproxEqual(a1, a2), tc.want; got != want {
150+
t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2)
151+
}
152+
})
153+
}
154+
}
155+
114156
func TestArrayApproxEqualFloats(t *testing.T) {
115157
f16sFrom := func(vs []float64) []float16.Num {
116158
o := make([]float16.Num, len(vs))
@@ -445,6 +487,13 @@ func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array {
445487
bldr.AppendValues(a, valids)
446488
return bldr.NewFloat64Array()
447489

490+
case []string:
491+
bldr := array.NewStringBuilder(mem)
492+
defer bldr.Release()
493+
494+
bldr.AppendValues(a, valids)
495+
return bldr.NewStringArray()
496+
448497
default:
449498
panic(fmt.Errorf("arrdata: invalid data slice type %T", a))
450499
}
@@ -725,4 +774,4 @@ func TestTableEqual(t *testing.T) {
725774
}
726775
})
727776
}
728-
}
777+
}

0 commit comments

Comments
 (0)