Skip to content
Closed
4 changes: 1 addition & 3 deletions numba_dpex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,15 @@
import llvmlite.binding as ll
import numba
from numba.core import ir_utils
from numba.np import arrayobj
from numba.np.ufunc import array_exprs
from numba.np.ufunc.decorators import Vectorize

from numba_dpex._patches import _empty_nd_impl, _is_ufunc, _mk_alloc
from numba_dpex._patches import _is_ufunc, _mk_alloc
from numba_dpex.vectorizers import Vectorize as DpexVectorize

# Monkey patches
array_exprs._is_ufunc = _is_ufunc
ir_utils.mk_alloc = _mk_alloc
arrayobj._empty_nd_impl = _empty_nd_impl


def load_dpctl_sycl_interface():
Expand Down
178 changes: 0 additions & 178 deletions numba_dpex/_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,14 @@
# SPDX-License-Identifier: Apache-2.0

import numpy
from llvmlite import ir as llvmir
from llvmlite.ir import Constant
from numba.core import cgutils
from numba.core import config as numba_config
from numba.core import ir, types
from numba.core.ir_utils import (
convert_size_to_var,
get_np_ufunc_typ,
mk_unique_var,
)
from numba.core.typing import signature
from numba.extending import intrinsic, overload_classmethod
from numba.np.arrayobj import (
_call_allocator,
get_itemsize,
make_array,
populate_array,
)
from numba.np.ufunc.dufunc import DUFunc

from numba_dpex.core.runtime import context as dpexrt
from numba_dpex.core.types import DpnpNdArray

# Numpy array constructors


def _is_ufunc(func):
return isinstance(func, (numpy.ufunc, DUFunc)) or hasattr(
Expand Down Expand Up @@ -170,164 +153,3 @@ def _mk_alloc(
out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign])

return out


def _empty_nd_impl(context, builder, arrtype, shapes):
"""Utility function used for allocating a new array during LLVM code
generation (lowering). Given a target context, builder, array
type, and a tuple or list of lowered dimension sizes, returns a
LLVM value pointing at a Numba runtime allocated array.
"""

arycls = make_array(arrtype)
ary = arycls(context, builder)

datatype = context.get_data_type(arrtype.dtype)
itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype))

# compute array length
arrlen = context.get_constant(types.intp, 1)
overflow = Constant(llvmir.IntType(1), 0)
for s in shapes:
arrlen_mult = builder.smul_with_overflow(arrlen, s)
arrlen = builder.extract_value(arrlen_mult, 0)
overflow = builder.or_(overflow, builder.extract_value(arrlen_mult, 1))

if arrtype.ndim == 0:
strides = ()
elif arrtype.layout == "C":
strides = [itemsize]
for dimension_size in reversed(shapes[1:]):
strides.append(builder.mul(strides[-1], dimension_size))
strides = tuple(reversed(strides))
elif arrtype.layout == "F":
strides = [itemsize]
for dimension_size in shapes[:-1]:
strides.append(builder.mul(strides[-1], dimension_size))
strides = tuple(strides)
else:
raise NotImplementedError(
"Don't know how to allocate array with layout '{0}'.".format(
arrtype.layout
)
)

# Check overflow, numpy also does this after checking order
allocsize_mult = builder.smul_with_overflow(arrlen, itemsize)
allocsize = builder.extract_value(allocsize_mult, 0)
overflow = builder.or_(overflow, builder.extract_value(allocsize_mult, 1))

with builder.if_then(overflow, likely=False):
# Raise same error as numpy, see:
# https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501
context.call_conv.return_user_exc(
builder,
ValueError,
(
"array is too big; `arr.size * arr.dtype.itemsize` is larger than"
" the maximum possible size.",
),
)

if isinstance(arrtype, DpnpNdArray):
usm_ty = arrtype.usm_type
usm_ty_val = 0
if usm_ty == "device":
usm_ty_val = 1
elif usm_ty == "shared":
usm_ty_val = 2
elif usm_ty == "host":
usm_ty_val = 3
usm_type = context.get_constant(types.uint64, usm_ty_val)
device = context.insert_const_string(builder.module, arrtype.device)

args = (
context.get_dummy_value(),
allocsize,
usm_type,
device,
)
mip = types.MemInfoPointer(types.voidptr)
arytypeclass = types.TypeRef(type(arrtype))
sig = signature(
mip,
arytypeclass,
types.intp,
types.uint64,
types.voidptr,
)
from numba_dpex.decorators import dpjit

op = dpjit(_call_usm_allocator)
fnop = context.typing_context.resolve_value_type(op)
# The _call_usm_allocator function will be compiled and added to registry
# when the get_call_type function is invoked.
fnop.get_call_type(context.typing_context, sig.args, {})
eqfn = context.get_function(fnop, sig)
meminfo = eqfn(builder, args)
else:
dtype = arrtype.dtype
align_val = context.get_preferred_array_alignment(dtype)
align = context.get_constant(types.uint32, align_val)
args = (context.get_dummy_value(), allocsize, align)

mip = types.MemInfoPointer(types.voidptr)
arytypeclass = types.TypeRef(type(arrtype))
argtypes = signature(mip, arytypeclass, types.intp, types.uint32)

meminfo = context.compile_internal(
builder, _call_allocator, argtypes, args
)

data = context.nrt.meminfo_data(builder, meminfo)

intp_t = context.get_value_type(types.intp)
shape_array = cgutils.pack_array(builder, shapes, ty=intp_t)
strides_array = cgutils.pack_array(builder, strides, ty=intp_t)

populate_array(
ary,
data=builder.bitcast(data, datatype.as_pointer()),
shape=shape_array,
strides=strides_array,
itemsize=itemsize,
meminfo=meminfo,
)

return ary


@overload_classmethod(DpnpNdArray, "_usm_allocate")
def _ol_array_allocate(cls, allocsize, usm_type, device):
"""Implements an allocator for dpnp.ndarrays."""

def impl(cls, allocsize, usm_type, device):
return intrin_usm_alloc(allocsize, usm_type, device)

return impl


numba_config.DISABLE_PERFORMANCE_WARNINGS = 0


def _call_usm_allocator(arrtype, size, usm_type, device):
"""Trampoline to call the intrinsic used for allocation"""
return arrtype._usm_allocate(size, usm_type, device)


numba_config.DISABLE_PERFORMANCE_WARNINGS = 1


@intrinsic
def intrin_usm_alloc(typingctx, allocsize, usm_type, device):
"""Intrinsic to call into the allocator for Array"""

def codegen(context, builder, signature, args):
[allocsize, usm_type, device] = args
dpexrtCtx = dpexrt.DpexRTContext(context)
meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, device)
return meminfo

mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
sig = signature(mip, allocsize, usm_type, device)
return sig, codegen
57 changes: 33 additions & 24 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,15 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
bool dest_is_float,
bool value_is_float,
int64_t value,
const char *device);
const DPCTLSyclQueueRef qref);
static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
void *data,
npy_intp nitems,
npy_intp itemsize,
DPCTLSyclQueueRef qref);
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
size_t usm_type,
const DPCTLSyclQueueRef qref);
static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info);
static PyObject *box_from_arystruct_parent(arystruct_t *arystruct,
int ndim,
Expand Down Expand Up @@ -477,17 +480,23 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
* @param size The size of memory (data) owned by the NRT_MemInfo
* object.
* @param usm_type The usm type of the memory.
* @param device The device on which the memory was allocated.
* @param qref The sycl queue on which the memory was allocated. Note
* that the ownership of the qref object is passed to
* the NRT_MemInfo. As such, it is the caller's
* responsibility to ensure the qref is nt owned by any
* other object and is not deallocated. For such cases,
* the caller should copy the DpctlSyclQueueRef and
* pass a copy of the original qref.
* @return {return} A new NRT_MemInfo object, NULL if no NRT_MemInfo
* object could be created.
*/
static NRT_MemInfo *
DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
size_t usm_type,
const DPCTLSyclQueueRef qref)
{
NRT_MemInfo *mi = NULL;
NRT_ExternalAllocator *ext_alloca = NULL;
MemInfoDtorInfo *midtor_info = NULL;
DPCTLSyclQueueRef qref = NULL;

DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", __FILE__,
Expand All @@ -499,15 +508,6 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
goto error;
}

if (!(qref = (DPCTLSyclQueueRef)DPEXRTQueue_CreateFromFilterString(device)))
{
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-ERROR: Could not create a sycl::queue from "
"filter string: %s at %s %d.\n",
device, __FILE__, __LINE__));
goto error;
}

// Allocate a new NRT_ExternalAllocator
if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type)))
goto error;
Expand All @@ -520,15 +520,22 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
mi->dtor_info = midtor_info;
mi->data = ext_alloca->malloc(size, qref);

DPEXRT_DEBUG(
DPCTLSyclDeviceRef device_ref; device_ref = DPCTLQueue_GetDevice(qref);
drt_debug_print(
"DPEXRT-DEBUG: DPEXRT_MemInfo_alloc, device info in %s at %d:\n%s",
__FILE__, __LINE__, DPCTLDeviceMgr_GetDeviceInfoStr(device_ref));
DPCTLDevice_Delete(device_ref););

if (mi->data == NULL)
goto error;

mi->size = size;
mi->external_allocator = ext_alloca;
DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p "
"external_allocator=%p for usm_type %zu on device %s, %s at %d\n",
mi, ext_alloca, usm_type, device, __FILE__, __LINE__));
"external_allocator=%p for usm_type=%zu on queue=%p, %s at %d\n",
mi, ext_alloca, usm_type, DPCTLQueue_Hash(qref), __FILE__, __LINE__));

return mi;

Expand All @@ -551,7 +558,7 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
* @param dest_is_float True if the destination array's dtype is float.
* @param value_is_float True if the value to be filled is float.
* @param value The value to be used to fill an array.
* @param device The device on which the memory was allocated.
* @param qref The queue on which the memory was allocated.
* @return NRT_MemInfo* A new NRT_MemInfo object, NULL if no NRT_MemInfo
* object could be created.
*/
Expand All @@ -560,9 +567,8 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
bool dest_is_float,
bool value_is_float,
int64_t value,
const char *device)
const DPCTLSyclQueueRef qref)
{
DPCTLSyclQueueRef qref = NULL;
DPCTLSyclEventRef eref = NULL;
size_t count = 0, size = 0, exp = 0;

Expand Down Expand Up @@ -603,9 +609,6 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
goto error;
}

if (!(qref = (DPCTLSyclQueueRef)DPEXRTQueue_CreateFromFilterString(device)))
goto error;

switch (exp) {
case 3:
{
Expand Down Expand Up @@ -694,8 +697,6 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
}

DPCTLEvent_Wait(eref);

DPCTLQueue_Delete(qref);
DPCTLEvent_Delete(eref);

return mi;
Expand Down Expand Up @@ -1198,6 +1199,14 @@ static int DPEXRT_sycl_queue_from_python(PyObject *obj,
goto error;
}

DPEXRT_DEBUG(DPCTLSyclDeviceRef device_ref;
device_ref = DPCTLQueue_GetDevice(queue_ref);
drt_debug_print("DPEXRT-DEBUG: DPEXRT_sycl_queue_from_python, "
"device info in %s at %d:\n%s",
__FILE__, __LINE__,
DPCTLDeviceMgr_GetDeviceInfoStr(device_ref));
DPCTLDevice_Delete(device_ref););

queue_struct->parent = obj;
queue_struct->queue_ref = queue_ref;

Expand Down
Loading