Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 6d6e46b

Browse files
reminiscehaojin2
authored andcommitted
Comparison ops implemented using mshadow (#16414)
* boolean op without tvm working * Fix astype for boolean arrays * More tests * Revert * Fix * Fix preprocessor in .cu * Fix logical_not * Print compilation flags * Fix transpose taking negative indices * Fix transpose negative axes * Fix transpose * Fix * Try to fix USE_TVM_OP not understood in .cu * Fix squeeze * Finally * Fix * Try to fix invalid ptx * Implement API to get cuda compute capability * Fix test_utils.py * Fix pylint
1 parent 858a52e commit 6d6e46b

31 files changed

+871
-312
lines changed

CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
5858
message(STATUS "CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}")
5959

6060
message(STATUS "CMAKE_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}")
61+
62+
if(USE_TVM_OP)
63+
add_definitions(-DMXNET_USE_TVM_OP=1)
64+
endif()
65+
6166
if(USE_CUDA AND NOT USE_OLDCMAKECUDA)
6267
message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'")
6368
if(
@@ -743,7 +748,6 @@ if(USE_DIST_KVSTORE)
743748
endif()
744749

745750
if(USE_TVM_OP)
746-
add_definitions(-DMXNET_USE_TVM_OP=1)
747751
list(APPEND mxnet_LINKER_LIBS ${CMAKE_CURRENT_BINARY_DIR}/3rdparty/tvm/libtvm_runtime.so)
748752
include(cmake/BuildTVM.cmake)
749753
add_subdirectory("3rdparty/tvm")

python/mxnet/_numpy_op_doc.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,54 @@ def _np_trace(a, offset=0, axis1=0, axis2=1, out=None):
653653
(2, 3)
654654
"""
655655
pass
656+
657+
658+
def _np_squeeze(a, axis=None, out=None):
659+
"""
660+
Remove single-dimensional entries from the shape of an array.
661+
662+
Parameters
663+
----------
664+
a : ndarray
665+
Input data.
666+
axis : None or int or tuple of ints, optional
667+
Selects a subset of the single-dimensional entries in the
668+
shape. If an axis is selected with shape entry greater than
669+
one, an error is raised.
670+
out : ndarray, optional
671+
Array into which the output is placed. It must have the same size
672+
and dtype as the input array.
673+
674+
Returns
675+
-------
676+
squeezed : ndarray
677+
The input array, but with all or a subset of the
678+
dimensions of length 1 removed. It always returns a copy of `a`.
679+
680+
Raises
681+
------
682+
MXNetError
683+
If `axis` is not `None`, and an axis being squeezed is not of length 1
684+
685+
See Also
686+
--------
687+
expand_dims : The inverse operation, adding singleton dimensions
688+
reshape : Insert, remove, and combine dimensions, and resize existing ones
689+
690+
Examples
691+
--------
692+
>>> x = np.array([[[0], [1], [2]]])
693+
>>> x.shape
694+
(1, 3, 1)
695+
>>> np.squeeze(x).shape
696+
(3,)
697+
>>> np.squeeze(x, axis=0).shape
698+
(3, 1)
699+
>>> np.squeeze(x, axis=1).shape
700+
Traceback (most recent call last):
701+
...
702+
mxnet.base.MXNetError: cannot select an axis to squeeze out which has size=3 not equal to one
703+
>>> np.squeeze(x, axis=2).shape
704+
(1, 3)
705+
"""
706+
pass

python/mxnet/ndarray/numpy/_op.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,11 +2110,11 @@ def logical_not(x, out=None, **kwargs):
21102110
--------
21112111
>>> x= np.array([True, False, 0, 1])
21122112
>>> np.logical_not(x)
2113-
array([0., 1., 1., 0.])
2113+
array([False, True, True, False])
21142114
21152115
>>> x = np.arange(5)
21162116
>>> np.logical_not(x<3)
2117-
array([0., 0., 0., 1., 1.])
2117+
array([False, False, False, True, True])
21182118
"""
21192119
return _unary_func_helper(x, _npi.logical_not, _np.logical_not, out=out, **kwargs)
21202120

python/mxnet/numpy/multiarray.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def __getitem__(self, key):
302302
except Exception as err:
303303
raise TypeError('{}'.format(str(err)))
304304
if isinstance(key, _np.ndarray) and key.dtype == _np.bool_:
305-
key = array(key, dtype='bool')
305+
key = array(key, dtype='bool', ctx=self.ctx)
306306
if isinstance(key, ndarray) and key.dtype == _np.bool_: # boolean indexing
307307
key_shape = key.shape
308308
key_ndim = len(key_shape)
@@ -364,6 +364,8 @@ def __setitem__(self, key, value):
364364
"""
365365
if isinstance(value, NDArray) and not isinstance(value, ndarray):
366366
raise TypeError('Cannot assign mx.nd.NDArray to mxnet.numpy.ndarray')
367+
368+
# handle basic and advanced indexing
367369
if self.ndim == 0:
368370
if not isinstance(key, tuple) or len(key) != 0:
369371
raise IndexError('scalar tensor can only accept `()` as index')
@@ -753,7 +755,7 @@ def detach(self):
753755
check_call(_LIB.MXNDArrayDetach(self.handle, ctypes.byref(hdl)))
754756
return _np_ndarray_cls(hdl)
755757

756-
def astype(self, dtype, *args, **kwargs): # pylint: disable=arguments-differ,unused-argument
758+
def astype(self, dtype, **kwargs): # pylint: disable=arguments-differ,unused-argument
757759
"""
758760
Copy of the array, cast to a specified type.
759761
@@ -1237,7 +1239,14 @@ def tile(self, *args, **kwargs):
12371239

12381240
def transpose(self, *axes): # pylint: disable=arguments-differ
12391241
"""Permute the dimensions of an array."""
1240-
return _mx_np_op.transpose(self, axes=axes if len(axes) != 0 else None)
1242+
if len(axes) == 0:
1243+
axes = None
1244+
elif len(axes) == 1:
1245+
if isinstance(axes[0], (tuple, list)):
1246+
axes = axes[0]
1247+
elif axes[0] is None:
1248+
axes = None
1249+
return _mx_np_op.transpose(self, axes=axes)
12411250

12421251
def flip(self, *args, **kwargs):
12431252
"""Convenience fluent method for :py:func:`flip`.
@@ -3401,11 +3410,11 @@ def logical_not(x, out=None, **kwargs):
34013410
--------
34023411
>>> x= np.array([True, False, 0, 1])
34033412
>>> np.logical_not(x)
3404-
array([0., 1., 1., 0.])
3413+
array([False, True, True, False])
34053414
34063415
>>> x = np.arange(5)
34073416
>>> np.logical_not(x<3)
3408-
array([0., 0., 0., 1., 1.])
3417+
array([False, False, False, True, True])
34093418
"""
34103419
return _mx_nd_np.logical_not(x, out=out, **kwargs)
34113420

python/mxnet/numpy_extension/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from . import _register
2626
from ._op import * # pylint: disable=wildcard-import
2727
from ..context import * # pylint: disable=wildcard-import
28-
from ..util import is_np_shape, is_np_array, set_np, reset_np
28+
from ..util import is_np_shape, is_np_array, set_np, reset_np, get_cuda_compute_capability
2929
from ..ndarray import waitall
3030
from .utils import * # pylint: disable=wildcard-import
3131
from . import random # pylint: disable=wildcard-import

python/mxnet/symbol/numpy/_symbol.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import numpy as _np
2424
from . import _op as _mx_np_op
2525
from ...base import _LIB, SymbolHandle, numeric_types, mx_uint
26-
from ...util import check_call, set_module
26+
from ...util import check_call, set_module, _sanity_check_params
2727
from ...context import current_context
2828
from ..symbol import Symbol
2929
from .._internal import _set_np_symbol_class
@@ -181,8 +181,29 @@ def T(self):
181181
return self.transpose()
182182
# pylint: enable= invalid-name, undefined-variable
183183

184-
def astype(self, dtype, **kwargs): # pylint: disable=arguments-differ
185-
raise NotImplementedError
184+
def astype(self, dtype, **kwargs): # pylint: disable=arguments-differ,unused-argument
185+
"""
186+
Copy of the array, cast to a specified type.
187+
188+
Parameters
189+
----------
190+
dtype : str or dtype
191+
Typecode or data-type to which the array is cast.
192+
copy : bool, optional
193+
Default `True`. By default, astype always returns a newly
194+
allocated ndarray on the same context. If this is set to
195+
`False`, and the dtype requested is the same as the ndarray's
196+
dtype, the ndarray is returned instead of a copy.
197+
198+
Returns
199+
-------
200+
arr_t : ndarray
201+
Unless `copy` is False and the other conditions for returning the input
202+
array are satisfied (see description for `copy` input parameter), `arr_t`
203+
is a new array of the same shape as the input array with `dtype`.
204+
"""
205+
_sanity_check_params('astype', ['order', 'casting', 'subok'], kwargs)
206+
return _npi.cast(self, dtype=dtype)
186207

187208
def dot(self, b, out=None):
188209
"""Dot product of two arrays.
@@ -438,7 +459,14 @@ def transpose(self, *axes): # pylint: disable=arguments-differ
438459
"""The arguments are the same as for :py:func:`transpose`, with
439460
this array as data.
440461
"""
441-
return _mx_np_op.transpose(self, axes=axes if len(axes) != 0 else None)
462+
if len(axes) == 0:
463+
axes = None
464+
elif len(axes) == 1:
465+
if isinstance(axes[0], (tuple, list)):
466+
axes = axes[0]
467+
elif axes[0] is None:
468+
axes = None
469+
return _mx_np_op.transpose(self, axes=axes)
442470

443471
def flip(self, *args, **kwargs):
444472
"""Convenience fluent method for :py:func:`flip`.

python/mxnet/test_utils.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from .symbol.numpy import _Symbol as np_symbol
5252
from .util import use_np # pylint: disable=unused-import
5353
from .runtime import Features
54+
from .numpy_extension import get_cuda_compute_capability
5455

5556

5657
def default_context():
@@ -2235,10 +2236,34 @@ def has_tvm_ops():
22352236
"""Returns True if MXNet is compiled with TVM generated operators. If current ctx
22362237
is GPU, it only returns True for CUDA compute capability > 52 where FP16 is supported."""
22372238
built_with_tvm_op = _features.is_enabled("TVM_OP")
2238-
if current_context().device_type == 'gpu':
2239+
ctx = current_context()
2240+
if ctx.device_type == 'gpu':
22392241
try:
2240-
import tvm
2241-
except ImportError:
2242+
cc = get_cuda_compute_capability(ctx)
2243+
except: # pylint: disable=bare-except
2244+
print('Failed to get CUDA compute capability for context {}. The operators '
2245+
'built with USE_TVM_OP=1 will not be run in unit tests.'.format(ctx))
22422246
return False
2243-
return built_with_tvm_op and (int("".join(tvm.nd.gpu(0).compute_version.split('.'))) >= 53)
2247+
print('Cuda arch compute capability: sm_{}'.format(str(cc)))
2248+
return built_with_tvm_op and cc >= 53
22442249
return built_with_tvm_op
2250+
2251+
2252+
def is_op_runnable():
2253+
"""Returns True for all CPU tests. Returns True for GPU tests that are either of the following.
2254+
1. Built with USE_TVM_OP=0.
2255+
2. Built with USE_TVM_OP=1, but with compute capability >= 53."""
2256+
ctx = current_context()
2257+
if ctx.device_type == 'gpu':
2258+
if not _features.is_enabled("TVM_OP"):
2259+
return True
2260+
else:
2261+
try:
2262+
cc = get_cuda_compute_capability(ctx)
2263+
except: # pylint: disable=bare-except
2264+
print('Failed to get CUDA compute capability for context {}. The operators '
2265+
'built with USE_TVM_OP=1 will not be run in unit tests.'.format(ctx))
2266+
return False
2267+
print('Cuda arch compute capability: sm_{}'.format(str(cc)))
2268+
return cc >= 53
2269+
return True

python/mxnet/util.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,3 +602,64 @@ def set_np(shape=True, array=True):
602602
def reset_np():
603603
"""Deactivate NumPy shape and array semantics at the same time."""
604604
set_np(shape=False, array=False)
605+
606+
607+
_CUDA_SUCCESS = 0
608+
609+
610+
def get_cuda_compute_capability(ctx):
611+
"""Returns the cuda compute capability of the input `ctx`.
612+
613+
Parameters
614+
----------
615+
ctx : Context
616+
GPU context whose corresponding cuda compute capability is to be retrieved.
617+
618+
Returns
619+
-------
620+
cuda_compute_capability : int
621+
CUDA compute capability. For example, it returns 70 for CUDA arch equal to `sm_70`.
622+
623+
References
624+
----------
625+
https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549#file-cuda_check-py
626+
"""
627+
if ctx.device_type != 'gpu':
628+
raise ValueError('Expecting a gpu context to get cuda compute capability, '
629+
'while received ctx {}'.format(str(ctx)))
630+
631+
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
632+
for libname in libnames:
633+
try:
634+
cuda = ctypes.CDLL(libname)
635+
except OSError:
636+
continue
637+
else:
638+
break
639+
else:
640+
raise OSError("could not load any of: " + ' '.join(libnames))
641+
642+
# Some constants taken from cuda.h
643+
644+
cc_major = ctypes.c_int()
645+
cc_minor = ctypes.c_int()
646+
device = ctypes.c_int()
647+
error_str = ctypes.c_char_p()
648+
649+
ret = cuda.cuInit(0)
650+
if ret != _CUDA_SUCCESS:
651+
cuda.cuGetErrorString(ret, ctypes.byref(error_str))
652+
raise RuntimeError('cuInit failed with erro code {}: {}'
653+
.format(ret, error_str.value.decode()))
654+
655+
ret = cuda.cuDeviceGet(ctypes.byref(device), ctx.device_id)
656+
if ret != _CUDA_SUCCESS:
657+
cuda.cuGetErrorString(ret, ctypes.byref(error_str))
658+
raise RuntimeError('cuDeviceGet failed with error code {}: {}'
659+
.format(ret, error_str.value.decode()))
660+
ret = cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device)
661+
if ret != _CUDA_SUCCESS:
662+
cuda.cuGetErrorString(ret, ctypes.byref(error_str))
663+
raise RuntimeError('cuDeviceComputeCapability failed with error code {}: {}'
664+
.format(ret, error_str.value.decode()))
665+
return cc_major.value * 10 + cc_minor.value

src/common/utils.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,21 @@ static inline std::string GetOutputName(const nnvm::NodeEntry& e) {
823823
return sym.ListOutputNames()[0];
824824
}
825825

826+
inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {
827+
// convert negative axes to positive values
828+
const int ndim = src.ndim();
829+
mxnet::TShape axes = src;
830+
for (int i = 0; i < ndim; ++i) {
831+
if (axes[i] < 0) {
832+
axes[i] += ndim;
833+
}
834+
CHECK(axes[i] >= 0 && axes[i] < ndim) << "axes[" << i << "]="
835+
<< axes[i] << " exceeds the range ["
836+
<< 0 << ", " << ndim << ")";
837+
}
838+
return axes;
839+
}
840+
826841
} // namespace common
827842
} // namespace mxnet
828843
#endif // MXNET_COMMON_UTILS_H_

src/ndarray/ndarray_function.cc

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,14 @@ template<>
3636
void Copy<cpu, cpu>(const TBlob &from, TBlob *to,
3737
Context from_ctx, Context to_ctx,
3838
RunContext ctx) {
39-
if (from.type_flag_ == mshadow::kBool || to->type_flag_ == mshadow::kBool) {
40-
CHECK_EQ(from.type_flag_, to->type_flag_) << "Only supports copying data between"
41-
" two boolean tensors.";
42-
const index_t size = from.Size();
43-
CHECK_EQ(size, to->Size()) << "copying size mismatch, from: " << size * sizeof(bool)
44-
<< " bytes, to: " << to->Size() * sizeof(bool) << " bytes.";
45-
common::ParallelCopy(to->dptr<bool>(), from.dptr<bool>(), size);
46-
return;
47-
}
48-
MSHADOW_TYPE_SWITCH(to->type_flag_, DType, {
39+
MSHADOW_TYPE_SWITCH_WITH_BOOL(to->type_flag_, DType, {
4940
if (to->type_flag_ == from.type_flag_) {
5041
const index_t size = static_cast<index_t>(from.Size());
5142
CHECK_EQ(size, to->Size()) << "copying size mismatch, from: " << size * sizeof(DType)
5243
<< " bytes, to: " << to->Size() * sizeof(DType) << " bytes.";
5344
common::ParallelCopy(to->dptr<DType>(), from.dptr<DType>(), size);
5445
} else {
55-
MSHADOW_TYPE_SWITCH(from.type_flag_, SrcDType, {
46+
MSHADOW_TYPE_SWITCH_WITH_BOOL(from.type_flag_, SrcDType, {
5647
to->FlatTo1D<cpu, DType>() =
5748
mshadow::expr::tcast<DType>(from.FlatTo1D<cpu, SrcDType>());
5849
})

0 commit comments

Comments
 (0)