Make ParallelExecutor support Windows GPU (#17787)

wopeizl · web-flow · commit 453a49b1bc92 · 2019-06-06T12:08:36.000+08:00
* fix the ParallelExecutor on Windows
test=develop
* restrict to use one GPU only under windows
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
@@ -296,6 +296,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
   member_->use_all_reduce_ =
       build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
   member_->nranks_ = build_strategy.num_trainers_ * places.size();
+#if defined(PADDLE_WITH_CUDA) && defined(_WIN32)
+  if (member_->use_cuda_) {
+    PADDLE_ENFORCE(places.size() == 1, "Windows can support Single GPU only.");
+  }
+#endif
   if (!member_->use_all_reduce_) {
     PADDLE_ENFORCE(places.size() > 1,
                    "If you set build_strategy.reduce with 'Reduce',"
@@ -361,8 +366,6 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
           member_->nccl_ctxs_.DefaultFlatCtx()->at(member_->places_[dev_id]);
       dev_ctx->set_nccl_comm(nccl_ctx.comm());
     }
-#else
-    PADDLE_THROW("Not compiled with CUDA");
 #endif
   }
   // broadcast parameters from the 0th device to others:
@@ -544,8 +547,6 @@ void ParallelExecutor::BCastParamsToDevices(
         }
         nccl_ctxs->WaitAll();
       }
-#else
-      PADDLE_THROW("Not compiled with CUDA");
 #endif
     } else {
       platform::CPUPlace cpu;
@@ -650,7 +651,9 @@ ParallelExecutor::~ParallelExecutor() {
 bool ParallelExecutor::EnableParallelGraphExecution(
     const ir::Graph &graph, const ExecutionStrategy &exec_strategy,
     const BuildStrategy &build_strategy) const {
-  if (!FLAGS_enable_parallel_graph) return false;
+  if (!FLAGS_enable_parallel_graph) {
+    return false;
+  }
 
   bool enable_parallel_graph = true;
 
@@ -670,11 +673,19 @@ bool ParallelExecutor::EnableParallelGraphExecution(
     }
   }
 
-  if (!member_->use_all_reduce_ || !member_->use_cuda_)
-
+  if (!member_->use_all_reduce_ || !member_->use_cuda_) {
     if (build_strategy.enable_sequential_execution_ ||
-        exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental)
+        exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) {
       enable_parallel_graph = false;
+    }
+  }
+
+#ifdef WIN32
+  VLOG(1) << "Windows has no support to parallel graph, enable_parallel_graph "
+             "would be forced to false.";
+  enable_parallel_graph = false;
+#endif
+
   return enable_parallel_graph;
 }
 
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -1341,6 +1341,9 @@ All parameter, weight, gradient are variables in Paddle.
           "num_trainers",
           [](const BuildStrategy &self) { return self.num_trainers_; },
           [](BuildStrategy &self, int num_trainers) {
+#ifdef WIN32
+            PADDLE_THROW("Windows has NO support to distribute mode.");
+#endif
             self.num_trainers_ = num_trainers;
           })
       .def_property(
@@ -1486,7 +1489,15 @@ All parameter, weight, gradient are variables in Paddle.
       .def_property(
           "is_distribution",
           [](const BuildStrategy &self) { return self.is_distribution_; },
-          [](BuildStrategy &self, bool b) { self.is_distribution_ = b; })
+          [](BuildStrategy &self, bool b) {
+#ifdef WIN32
+            if (b) {
+              PADDLE_THROW("Windows has NO support to distribute mode.");
+            }
+#else
+            self.is_distribution_ = b;
+#endif
+          })
       .def_property("async_mode",
                     [](const BuildStrategy &self) { return self.async_mode_; },
                     [](BuildStrategy &self, bool b) { self.async_mode_ = b; })