Skip to content

Commit c191420

Browse files
authored
fix(arrow/memory): Align allocations always (#289)
### Rationale for this change We pay a bit of up-front memory to make sure allocations are always aligned, making in-process FFI easier. ### What changes are included in this PR? Always align in Buffer. ### Are these changes tested? Yes ### Are there any user-facing changes? ~Maybe: the default Go allocator already aligned allocations. Other allocators may now have a bit more overhead. We will now effectively never use `realloc` with the mallocator, instead always preferring a fresh `malloc` + `memcpy` + `free`. Closes #282.
1 parent fa62889 commit c191420

File tree

2 files changed

+96
-27
lines changed

2 files changed

+96
-27
lines changed

arrow/memory/mallocator/mallocator.go

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,20 @@ package mallocator
1919

2020
// #include <stdlib.h>
2121
// #include <string.h>
22-
//
23-
// void* realloc_and_initialize(void* ptr, size_t old_len, size_t new_len) {
24-
// void* new_ptr = realloc(ptr, new_len);
25-
// if (new_ptr && new_len > old_len) {
26-
// memset(new_ptr + old_len, 0, new_len - old_len);
27-
// }
28-
// return new_ptr;
29-
// }
3022
import "C"
3123

3224
import (
25+
"sync"
3326
"sync/atomic"
3427
"unsafe"
3528
)
3629

30+
func roundToPowerOf2(v, round uintptr) uintptr {
31+
forceCarry := round - 1
32+
truncateMask := ^forceCarry
33+
return (v + forceCarry) & truncateMask
34+
}
35+
3736
// Mallocator is an allocator which defers to libc malloc.
3837
//
3938
// The primary reason to use this is when exporting data across the C Data
@@ -45,9 +44,22 @@ import (
4544
// The build tag 'mallocator' will also make this the default allocator.
4645
type Mallocator struct {
4746
allocatedBytes uint64
47+
// We want to align allocations, but since we only get/return []byte,
48+
// we need to remember the "real" address for Free somehow
49+
realAllocations sync.Map
50+
alignment int
4851
}
4952

50-
func NewMallocator() *Mallocator { return &Mallocator{} }
53+
func NewMallocator() *Mallocator { return &Mallocator{alignment: 64} }
54+
55+
func NewMallocatorWithAlignment(alignment int) *Mallocator {
56+
if alignment < 1 {
57+
panic("mallocator: invalid alignment (must be positive)")
58+
} else if alignment > 1 && (alignment&(alignment-1)) != 0 {
59+
panic("mallocator: invalid alignment (must be power of 2)")
60+
}
61+
return &Mallocator{alignment: alignment}
62+
}
5163

5264
func (alloc *Mallocator) Allocate(size int) []byte {
5365
// Use calloc to zero-initialize memory.
@@ -58,28 +70,44 @@ func (alloc *Mallocator) Allocate(size int) []byte {
5870
if size < 0 {
5971
panic("mallocator: negative size")
6072
}
61-
ptr, err := C.calloc(C.size_t(size), 1)
73+
paddedSize := C.size_t(size + alloc.alignment)
74+
ptr, err := C.calloc(paddedSize, 1)
6275
if err != nil {
6376
// under some circumstances and allocation patterns, we can end up in a scenario
6477
// where for some reason calloc return ENOMEM even though there is definitely memory
6578
// available for use. So we attempt to fallback to simply doing malloc + memset in
6679
// this case. If malloc returns a nil pointer, then we know we're out of memory
6780
// and will surface the error.
68-
if ptr = C.malloc(C.size_t(size)); ptr == nil {
81+
if ptr = C.malloc(paddedSize); ptr == nil {
6982
panic(err)
7083
}
71-
C.memset(ptr, 0, C.size_t(size))
84+
C.memset(ptr, 0, paddedSize)
7285
} else if ptr == nil {
7386
panic("mallocator: out of memory")
7487
}
7588

89+
buf := unsafe.Slice((*byte)(ptr), paddedSize)
90+
aligned := roundToPowerOf2(uintptr(ptr), uintptr(alloc.alignment))
91+
alloc.realAllocations.Store(aligned, uintptr(ptr))
7692
atomic.AddUint64(&alloc.allocatedBytes, uint64(size))
77-
return unsafe.Slice((*byte)(ptr), size)
93+
94+
if uintptr(ptr) != aligned {
95+
shift := aligned - uintptr(ptr)
96+
return buf[shift : uintptr(size)+shift : uintptr(size)+shift]
97+
}
98+
return buf[:size:size]
7899
}
79100

80101
func (alloc *Mallocator) Free(b []byte) {
81102
sz := len(b)
82-
C.free(getPtr(b))
103+
ptr := getPtr(b)
104+
realAddr, loaded := alloc.realAllocations.LoadAndDelete(uintptr(ptr))
105+
if !loaded {
106+
// double-free?
107+
return
108+
}
109+
realPtr := unsafe.Pointer(realAddr.(uintptr))
110+
C.free(realPtr)
83111
// Subtract sh.Len via two's complement (since atomic doesn't offer subtract)
84112
atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(sz) - 1))
85113
}
@@ -88,20 +116,16 @@ func (alloc *Mallocator) Reallocate(size int, b []byte) []byte {
88116
if size < 0 {
89117
panic("mallocator: negative size")
90118
}
91-
cp := cap(b)
92-
ptr, err := C.realloc_and_initialize(getPtr(b), C.size_t(cp), C.size_t(size))
93-
if err != nil {
94-
panic(err)
95-
} else if ptr == nil && size != 0 {
96-
panic("mallocator: out of memory")
97-
}
98-
delta := size - len(b)
99-
if delta >= 0 {
100-
atomic.AddUint64(&alloc.allocatedBytes, uint64(delta))
101-
} else {
102-
atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(-delta) - 1))
119+
120+
if cap(b) >= size {
121+
diff := size - len(b)
122+
atomic.AddUint64(&alloc.allocatedBytes, uint64(diff))
123+
return b[:size]
103124
}
104-
return unsafe.Slice((*byte)(ptr), size)
125+
newBuf := alloc.Allocate(size)
126+
copy(newBuf, b)
127+
alloc.Free(b)
128+
return newBuf
105129
}
106130

107131
func (alloc *Mallocator) AllocatedBytes() int64 {

arrow/memory/mallocator/mallocator_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ package mallocator_test
2222
import (
2323
"fmt"
2424
"testing"
25+
"unsafe"
2526

2627
"github.com/apache/arrow-go/v18/arrow/memory/mallocator"
2728
"github.com/stretchr/testify/assert"
@@ -41,6 +42,32 @@ func TestMallocatorAllocate(t *testing.T) {
4142
for idx, c := range buf {
4243
assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
4344
}
45+
// check aligned
46+
if size > 0 {
47+
assert.Equal(t, uintptr(0), uintptr(unsafe.Pointer(&buf[0]))%64)
48+
}
49+
})
50+
}
51+
}
52+
53+
func TestMallocatorAllocateAligned(t *testing.T) {
54+
sizes := []int{0, 1, 4, 33, 65, 4095, 4096, 8193}
55+
for _, size := range sizes {
56+
t.Run(fmt.Sprint(size), func(t *testing.T) {
57+
a := mallocator.NewMallocatorWithAlignment(16)
58+
buf := a.Allocate(size)
59+
defer a.Free(buf)
60+
61+
assert.Equal(t, size, len(buf))
62+
assert.LessOrEqual(t, size, cap(buf))
63+
// check 0-initialized
64+
for idx, c := range buf {
65+
assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
66+
}
67+
// check aligned
68+
if size > 0 {
69+
assert.Equal(t, uintptr(0), uintptr(unsafe.Pointer(&buf[0]))%16)
70+
}
4471
})
4572
}
4673
}
@@ -68,6 +95,7 @@ func TestMallocatorReallocate(t *testing.T) {
6895
for idx, c := range buf {
6996
assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
7097
}
98+
a.AssertSize(t, test.before)
7199

72100
buf = a.Reallocate(test.after, buf)
73101
defer a.Free(buf)
@@ -77,10 +105,27 @@ func TestMallocatorReallocate(t *testing.T) {
77105
for idx, c := range buf {
78106
assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
79107
}
108+
a.AssertSize(t, test.after)
80109
})
81110
}
82111
}
83112

113+
func TestMallocatorReallocateInPlace(t *testing.T) {
114+
a := mallocator.NewMallocator()
115+
buf := a.Allocate(80)
116+
assert.Equal(t, 80, len(buf))
117+
assert.LessOrEqual(t, 80, cap(buf))
118+
a.AssertSize(t, 80)
119+
addr := uintptr(unsafe.Pointer(&buf[0]))
120+
121+
buf2 := a.Reallocate(81, buf)
122+
assert.Equal(t, 81, len(buf2))
123+
assert.LessOrEqual(t, 81, cap(buf2))
124+
a.AssertSize(t, 81)
125+
addr2 := uintptr(unsafe.Pointer(&buf[0]))
126+
assert.Equal(t, addr, addr2)
127+
}
128+
84129
func TestMallocatorAssertSize(t *testing.T) {
85130
a := mallocator.NewMallocator()
86131
assert.Equal(t, int64(0), a.AllocatedBytes())

0 commit comments

Comments
 (0)