Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 0f63659

Browse files
apeforesteric-haibin-lin
authored andcommitted
add a compiler flag to use int64 as tensor size (#14570)
* use a compile flag to use int64 tensor size * use personal mshadow repo * update data type * update make config * change size_t to index_t and add documentation * update mshadow submodule to master * fix compilation warning * fix compiler warning * fix compiler warning * fix compiler warning * fix compiler warning * fix compiler error * change nnvm::Tuple to mxnet::Tuple * fix compiler warning * fix compiler warning * fix compiler warning * fix compiler warning * fix compiler warning * fix lint * update CI runtime_functons * update runtime function * correct runtime_functions * udpate runtime functions * add nightly test for large tensor * update Jenkins files to test new compiler flag * fix CI * add runtime feature detect for the compiler flag * change build from make to cmake * fix CI * move tests to nightly
1 parent a1b0a3a commit 0f63659

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+282
-97
lines changed

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." OFF)
5050
mxnet_option(USE_TENSORRT "Enable infeference optimization with TensorRT." OFF)
5151
mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
5252
mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
53+
mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
5354

5455
message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
5556
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@@ -295,6 +296,13 @@ else()
295296
add_definitions(-DMXNET_USE_NCCL=0)
296297
endif()
297298

299+
if (USE_INT64_TENSOR_SIZE)
300+
message(STATUS "Using 64-bit integer for tensor size")
301+
add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=1)
302+
else()
303+
add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=0)
304+
endif()
305+
298306
include(cmake/ChooseBlas.cmake)
299307
if(USE_CUDA AND FIRST_CUDA)
300308
include(3rdparty/mshadow/cmake/Utils.cmake)

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ ifeq ($(USE_OPERATOR_TUNING), 1)
189189
CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1
190190
endif
191191

192+
ifeq ($(USE_INT64_TENSOR_SIZE), 1)
193+
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=1
194+
else
195+
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=0
196+
endif
192197
# verify existence of separate lapack library when using blas/openblas/atlas
193198
# switch off lapack support in case it can't be found
194199
# issue covered with this

ci/docker/runtime_functions.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,53 @@ build_ubuntu_gpu_cmake() {
755755
ninja -v
756756
}
757757

758+
build_ubuntu_cpu_large_tensor() {
759+
set -ex
760+
cd /work/build
761+
build_ccache_wrappers
762+
cmake \
763+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
764+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
765+
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
766+
-DUSE_SIGNAL_HANDLER=ON \
767+
-DENABLE_TESTCOVERAGE=ON \
768+
-DUSE_CUDA=OFF \
769+
-DUSE_CUDNN=OFF \
770+
-DUSE_MKLDNN=OFF \
771+
-DCMAKE_BUILD_TYPE=Release \
772+
-DUSE_INT64_TENSOR_SIZE=ON \
773+
-G Ninja \
774+
/work/mxnet
775+
776+
ninja -v
777+
}
778+
779+
build_ubuntu_gpu_large_tensor() {
780+
set -ex
781+
cd /work/build
782+
build_ccache_wrappers
783+
cmake \
784+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
785+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
786+
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
787+
-DUSE_SIGNAL_HANDLER=ON \
788+
-DENABLE_TESTCOVERAGE=ON \
789+
-DUSE_CUDA=ON \
790+
-DUSE_CUDNN=ON \
791+
-DUSE_MKL_IF_AVAILABLE=OFF \
792+
-DUSE_MKLML_MKL=OFF \
793+
-DUSE_MKLDNN=OFF \
794+
-DUSE_DIST_KVSTORE=ON \
795+
-DCMAKE_BUILD_TYPE=Release \
796+
-DCUDA_ARCH_NAME=Manual \
797+
-DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
798+
-DUSE_INT64_TENSOR_SIZE=ON \
799+
-G Ninja \
800+
/work/mxnet
801+
802+
ninja -v
803+
}
804+
758805
build_ubuntu_blc() {
759806
echo "pass"
760807
}
@@ -1183,6 +1230,13 @@ nightly_test_KVStore_singleNode() {
11831230
python tests/nightly/test_kvstore.py
11841231
}
11851232

1233+
#Test Large Tensor Size
1234+
nightly_test_large_tensor() {
1235+
set -ex
1236+
export PYTHONPATH=./python/
1237+
nosetests-3.4 tests/nightly/test_large_array.py
1238+
}
1239+
11861240
#Tests Amalgamation Build with 5 different sets of flags
11871241
nightly_test_amalgamation() {
11881242
set -ex

ci/jenkins/Jenkins_steps.groovy

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,34 @@ def compile_unix_openblas_debug_cpu() {
119119
}]
120120
}
121121

122+
def compile_unix_int64_cpu() {
123+
return ['CPU: USE_INT64_TENSOR_SIZE': {
124+
node(NODE_LINUX_CPU) {
125+
ws('workspace/build-cpu-int64') {
126+
timeout(time: max_time, unit: 'MINUTES') {
127+
utils.init_git()
128+
utils.docker_run('ubuntu_cpu', 'build_ubuntu_cpu_large_tensor', false)
129+
utils.pack_lib('ubuntu_cpu_int64', mx_cmake_lib, true)
130+
}
131+
}
132+
}
133+
}]
134+
}
135+
136+
def compile_unix_int64_gpu() {
137+
return ['GPU: USE_INT64_TENSOR_SIZE': {
138+
node(NODE_LINUX_GPU) {
139+
ws('workspace/build-gpu-int64') {
140+
timeout(time: max_time, unit: 'MINUTES') {
141+
utils.init_git()
142+
utils.docker_run('ubuntu_gpu', 'build_ubuntu_gpu_large_tensor', false)
143+
utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib, true)
144+
}
145+
}
146+
}
147+
}]
148+
}
149+
122150
def compile_unix_mkl_cpu() {
123151
return ['CPU: MKL': {
124152
node(NODE_LINUX_CPU) {

ci/jenkins/Jenkinsfile_unix_cpu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ core_logic: {
3838
custom_steps.compile_unix_openblas_debug_cpu(),
3939
custom_steps.compile_unix_mkl_cpu(),
4040
custom_steps.compile_unix_mkldnn_cpu(),
41-
custom_steps.compile_unix_mkldnn_mkl_cpu()
41+
custom_steps.compile_unix_mkldnn_mkl_cpu(),
42+
custom_steps.compile_unix_int64_cpu()
4243
])
4344

4445
utils.parallel_stage('Tests', [

ci/jenkins/Jenkinsfile_unix_gpu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ core_logic: {
4040
custom_steps.compile_unix_cmake_mkldnn_gpu(),
4141
custom_steps.compile_unix_cmake_gpu(),
4242
custom_steps.compile_unix_tensorrt_gpu(),
43+
custom_steps.compile_unix_int64_gpu()
4344
])
4445

4546
utils.parallel_stage('Tests', [

include/mxnet/libinfo.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@
123123
#define MXNET_USE_SIGNAL_HANDLER 0
124124
#endif
125125

126-
126+
#ifndef MXNET_USE_INT64_TENSOR_SIZE
127+
#define MXNET_USE_INT64_TENSOR_SIZE MSHADOW_INT64_TENSOR_SIZE
128+
#endif
127129

128130
namespace mxnet {
129131
namespace features {
@@ -177,6 +179,8 @@ enum : unsigned {
177179
PROFILER,
178180
DIST_KVSTORE,
179181
CXX14,
182+
INT64_TENSOR_SIZE,
183+
180184
// Signal handler to print stack traces on exceptions
181185
SIGNAL_HANDLER,
182186
DEBUG,

include/mxnet/tensor_blob.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,16 @@ class TBlob {
218218
return shape_.ndim();
219219
}
220220
/*!
221-
* \brief return size of i-th dimension, start counting from highest dimension
221+
* \brief return size of i-th dimension, start counting from highest dimension.
222+
* return type needs to be a signed integer.
222223
* \param idx the dimension count from the highest dimensin
223-
* \return the size
224+
* \return the size. -1 means unknown size to support zero-size tensor.
224225
*/
225226
inline index_t size(index_t idx) const {
226227
return shape_[idx];
227228
}
228229
/*! \brief total number of elements in the tensor */
229-
inline index_t Size(void) const {
230+
inline size_t Size(void) const {
230231
return shape_.Size();
231232
}
232233
/*! \brief get pointer in dtype */
@@ -443,7 +444,7 @@ class FieldEntry<mxnet::TShape>
443444
throw dmlc::ParamError(os.str());
444445
}
445446
if (enforce_nonzero_) {
446-
for (mxnet::index_t i = 0; i < v.ndim(); ++i) {
447+
for (int i = 0; i < v.ndim(); ++i) {
447448
if (v[i] == 0U) {
448449
std::ostringstream os;
449450
os << "value " << v << "for Parameter " << this->key_
@@ -457,7 +458,7 @@ class FieldEntry<mxnet::TShape>
457458
this->enforce_nonzero_ = true;
458459
return this->self();
459460
}
460-
inline FieldEntry<mxnet::TShape> &set_expect_ndim(mxnet::index_t ndim) {
461+
inline FieldEntry<mxnet::TShape> &set_expect_ndim(int ndim) {
461462
expect_ndim_ = ndim;
462463
return this->self();
463464
}
@@ -466,7 +467,7 @@ class FieldEntry<mxnet::TShape>
466467
// whether all the entries need to be nonzero
467468
bool enforce_nonzero_;
468469
// expected number of dimension, default = 0 means no restriction.
469-
mxnet::index_t expect_ndim_;
470+
int expect_ndim_;
470471
};
471472

472473
} // namespace parameter

include/mxnet/tuple.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ class TShape : public Tuple<dim_t> {
569569
* \param axis_end The ending axis specified.
570570
* \return the flat 3d shape
571571
*/
572-
inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
572+
inline mshadow::Shape<3> FlatTo3D(int axis_begin, int axis_end) const {
573573
CHECK(axis_end >= axis_begin);
574574
mshadow::Shape<3> s;
575575
CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim";
@@ -579,10 +579,10 @@ class TShape : public Tuple<dim_t> {
579579
s.shape_[1] = 1;
580580
s.shape_[2] = 1;
581581

582-
for (size_t i = 0; i < axis_begin; ++i) {
582+
for (int i = 0; i < axis_begin; ++i) {
583583
s.shape_[0] *= d[i];
584584
}
585-
for (size_t i = axis_begin; i <= axis_end; ++i) {
585+
for (int i = axis_begin; i <= axis_end; ++i) {
586586
s.shape_[1] *= d[i];
587587
}
588588
for (int i = axis_end + 1; i < ndim(); ++i) {
@@ -595,7 +595,7 @@ class TShape : public Tuple<dim_t> {
595595
* \param axis The axis specified.
596596
* \return the flat 3d shape
597597
*/
598-
inline mshadow::Shape<3> FlatTo3D(size_t axis) const {
598+
inline mshadow::Shape<3> FlatTo3D(int axis) const {
599599
return FlatTo3D(axis, axis);
600600
}
601601
inline bool operator==(const TShape &s) const {
@@ -712,8 +712,8 @@ template<typename T>
712712
struct hash<mxnet::Tuple<T> > {
713713
/*! \brief hash a Tuple into unsigned int */
714714
size_t operator()(const mxnet::Tuple<T>& val) const {
715-
std::hash<uint32_t> hash_uint;
716-
size_t res = hash_uint(val.ndim());
715+
std::hash<int> hash_int;
716+
size_t res = hash_int(val.ndim());
717717
for (int i = 0; i < val.ndim(); ++i) {
718718
res = dmlc::HashCombine(res, val[i]);
719719
}
@@ -726,8 +726,8 @@ template<>
726726
struct hash<mxnet::TShape> {
727727
/*! \brief hash a TShape into unsigned int */
728728
size_t operator()(const mxnet::TShape& val) const {
729-
std::hash<uint32_t> hash_uint;
730-
size_t res = hash_uint(val.ndim());
729+
std::hash<int> hash_int;
730+
size_t res = hash_int(val.ndim());
731731
for (int i = 0; i < val.ndim(); ++i) {
732732
res = dmlc::HashCombine(res, val[i]);
733733
}

0 commit comments

Comments
 (0)