Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 6 additions & 29 deletions source/op/pt/comm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class Border : public torch::autograd::Function<Border> {
#ifdef USE_MPI
int mpi_init = 0;
MPI_Initialized(&mpi_init);
int cuda_aware = 1;
int cuda_aware = 0;
int me = 0;
Comment thread
caic99 marked this conversation as resolved.
MPI_Comm world;
int world_size = 0;
Expand All @@ -99,17 +99,9 @@ class Border : public torch::autograd::Function<Border> {
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
#ifdef NO_CUDA_AWARE
cuda_aware = 0;
#else
cuda_aware = MPIX_Query_cuda_support();
#ifndef NO_CUDA_AWARE
cuda_aware = MPIX_Query_cuda_support();
#endif
} else {
cuda_aware = 0;
}
if (cuda_aware == 0) {
Comment thread
caic99 marked this conversation as resolved.
recv_g1_tensor = torch::empty_like(g1).to(torch::kCPU);
recv_g1_tensor.copy_(g1);
Expand Down Expand Up @@ -193,10 +185,6 @@ class Border : public torch::autograd::Function<Border> {
static torch::autograd::variable_list backward_t(
torch::autograd::AutogradContext* ctx,
torch::autograd::variable_list grad_output) {
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
gpuDeviceSynchronize();
#endif

torch::autograd::variable_list saved_variables = ctx->get_saved_variables();
torch::Tensor sendlist_tensor = saved_variables[0];
torch::Tensor sendproc_tensor = saved_variables[1];
Expand All @@ -212,7 +200,7 @@ class Border : public torch::autograd::Function<Border> {
int mpi_init = 0;
MPI_Initialized(&mpi_init);
int world_size = 0;
int cuda_aware = 1;
int cuda_aware = 0;
int me = 0;
MPI_Comm world;
if (mpi_init) {
Expand All @@ -224,17 +212,9 @@ class Border : public torch::autograd::Function<Border> {
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
#ifdef NO_CUDA_AWARE
cuda_aware = 0;
#else
cuda_aware = MPIX_Query_cuda_support();
#ifndef NO_CUDA_AWARE
cuda_aware = MPIX_Query_cuda_support();
#endif
} else {
cuda_aware = 0;
}
if (cuda_aware == 0) {
d_local_g1_tensor = torch::empty_like(grad_output[0]).to(torch::kCPU);
d_local_g1_tensor.copy_(grad_output[0]);
Expand Down Expand Up @@ -329,9 +309,6 @@ class Border : public torch::autograd::Function<Border> {
recv_g1_tensor.slice(0, 0, nrecv));
}
}
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
gpuDeviceSynchronize();
#endif
#ifdef USE_MPI
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (cuda_aware == 0) {
Expand Down