Skip to content

Commit 453a49b

Browse files
author
wopeizl
authored
Make ParallelExecutor support Windows GPU (#17787)
* fix the ParallelExecutor on Windows test=develop * restrict to use one GPU only under windows
1 parent 39bc8a5 commit 453a49b

File tree

2 files changed

+31
-9
lines changed

2 files changed

+31
-9
lines changed

paddle/fluid/framework/parallel_executor.cc

+19-8
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
296296
member_->use_all_reduce_ =
297297
build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
298298
member_->nranks_ = build_strategy.num_trainers_ * places.size();
299+
#if defined(PADDLE_WITH_CUDA) && defined(_WIN32)
300+
if (member_->use_cuda_) {
301+
PADDLE_ENFORCE(places.size() == 1, "Windows can support Single GPU only.");
302+
}
303+
#endif
299304
if (!member_->use_all_reduce_) {
300305
PADDLE_ENFORCE(places.size() > 1,
301306
"If you set build_strategy.reduce with 'Reduce',"
@@ -361,8 +366,6 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
361366
member_->nccl_ctxs_.DefaultFlatCtx()->at(member_->places_[dev_id]);
362367
dev_ctx->set_nccl_comm(nccl_ctx.comm());
363368
}
364-
#else
365-
PADDLE_THROW("Not compiled with CUDA");
366369
#endif
367370
}
368371
// broadcast parameters from the 0th device to others:
@@ -544,8 +547,6 @@ void ParallelExecutor::BCastParamsToDevices(
544547
}
545548
nccl_ctxs->WaitAll();
546549
}
547-
#else
548-
PADDLE_THROW("Not compiled with CUDA");
549550
#endif
550551
} else {
551552
platform::CPUPlace cpu;
@@ -650,7 +651,9 @@ ParallelExecutor::~ParallelExecutor() {
650651
bool ParallelExecutor::EnableParallelGraphExecution(
651652
const ir::Graph &graph, const ExecutionStrategy &exec_strategy,
652653
const BuildStrategy &build_strategy) const {
653-
if (!FLAGS_enable_parallel_graph) return false;
654+
if (!FLAGS_enable_parallel_graph) {
655+
return false;
656+
}
654657

655658
bool enable_parallel_graph = true;
656659

@@ -670,11 +673,19 @@ bool ParallelExecutor::EnableParallelGraphExecution(
670673
}
671674
}
672675

673-
if (!member_->use_all_reduce_ || !member_->use_cuda_)
674-
676+
if (!member_->use_all_reduce_ || !member_->use_cuda_) {
675677
if (build_strategy.enable_sequential_execution_ ||
676-
exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental)
678+
exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) {
677679
enable_parallel_graph = false;
680+
}
681+
}
682+
683+
#ifdef WIN32
684+
VLOG(1) << "Windows has no support to parallel graph, enable_parallel_graph "
685+
"would be forced to false.";
686+
enable_parallel_graph = false;
687+
#endif
688+
678689
return enable_parallel_graph;
679690
}
680691

paddle/fluid/pybind/pybind.cc

+12-1
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,9 @@ All parameter, weight, gradient are variables in Paddle.
13411341
"num_trainers",
13421342
[](const BuildStrategy &self) { return self.num_trainers_; },
13431343
[](BuildStrategy &self, int num_trainers) {
1344+
#ifdef WIN32
1345+
PADDLE_THROW("Windows has NO support to distribute mode.");
1346+
#endif
13441347
self.num_trainers_ = num_trainers;
13451348
})
13461349
.def_property(
@@ -1486,7 +1489,15 @@ All parameter, weight, gradient are variables in Paddle.
14861489
.def_property(
14871490
"is_distribution",
14881491
[](const BuildStrategy &self) { return self.is_distribution_; },
1489-
[](BuildStrategy &self, bool b) { self.is_distribution_ = b; })
1492+
[](BuildStrategy &self, bool b) {
1493+
#ifdef WIN32
1494+
if (b) {
1495+
PADDLE_THROW("Windows has NO support to distribute mode.");
1496+
}
1497+
#else
1498+
self.is_distribution_ = b;
1499+
#endif
1500+
})
14901501
.def_property("async_mode",
14911502
[](const BuildStrategy &self) { return self.async_mode_; },
14921503
[](BuildStrategy &self, bool b) { self.async_mode_ = b; })

0 commit comments

Comments
 (0)