Skip to content

Commit 4978db2

Browse files
author
chengduo
authored
Remove nccl dep when the number of GPU is 1 (#18158)
* remove nccl dep when the number of GPU is 1 test=develop
1 parent 25ab23b commit 4978db2

File tree

4 files changed

+8
-5
lines changed

4 files changed

+8
-5
lines changed

paddle/fluid/framework/parallel_executor.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
369369
"Execution which can get better performance,"
370370
<< "you can force it off by env FLAGS_enable_parallel_graph=0";
371371

372-
if (member_->use_cuda_) {
373-
// Bcast Parameters to all GPUs
372+
if (member_->use_cuda_ && member_->nranks_ > 1) {
374373
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
375374
member_->InitOrGetNCCLCommunicator(scope, build_strategy);
376375

@@ -405,10 +404,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
405404
}
406405
return false;
407406
};
408-
407+
// Bcast Parameters to all GPUs
409408
if (need_broadcast()) {
410409
BCastParamsToDevices(bcast_vars, build_strategy.trainer_id_);
411410
}
411+
412412
// Startup Program has been run. All local scopes has correct parameters.
413413

414414
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert

paddle/fluid/platform/device_context.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() {
316316
eigen_device_.reset();
317317
PADDLE_ENFORCE(cudaStreamDestroy(stream_));
318318
#if !defined(_WIN32)
319-
PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_));
319+
if (nccl_comm_) {
320+
PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_));
321+
}
320322
#endif
321323
}
322324

python/paddle/fluid/tests/unittests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,5 +223,5 @@ if(WITH_DISTRIBUTE)
223223
endif()
224224

225225
set_tests_properties(test_recordio_reader test_parallel_executor_test_while_train test_parallel_executor_mnist
226-
test_parallel_executor_seresnext test_parallel_executor_crf
226+
test_parallel_executor_seresnext test_parallel_executor_crf test_sync_batch_norm_op
227227
PROPERTIES LABELS "RUN_TYPE=DIST")

python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def compare(self, place, layout, only_forward):
9898

9999
#####################################################################
100100
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
101+
assert core.get_cuda_device_count() > 1
101102
main, startup, outs = self.build_program(place, layout, seed, True,
102103
only_forward)
103104
exe = fluid.Executor(place)

0 commit comments

Comments
 (0)