【npu】release gil lock when predictor run (PaddlePaddle#72485)

xiaoguoguo626807 · web-flow · commit 9275517b145a · 2025-04-27T18:54:28.000+08:00
* fix npu hang

* fix npu hang

* remove std::cout
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
@@ -744,7 +744,7 @@ void BindPaddlePredictor(py::module *m) {
   paddle_predictor
       .def("run",
            [](PaddlePredictor &self, const std::vector<PaddleTensor> &inputs) {
-#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
+#if !defined(PADDLE_NO_PYTHON)
              pybind11::gil_scoped_release release;
 #endif
              std::vector<PaddleTensor> outputs;
@@ -755,9 +755,15 @@ void BindPaddlePredictor(py::module *m) {
       .def("get_output_tensor", &PaddlePredictor::GetOutputTensor)
       .def("get_input_names", &PaddlePredictor::GetInputNames)
       .def("get_output_names", &PaddlePredictor::GetOutputNames)
-      .def("zero_copy_run",
-           &PaddlePredictor::ZeroCopyRun,
-           py::arg("switch_stream") = false)
+      .def(
+          "zero_copy_run",
+          [](PaddlePredictor &self, bool switch_stream) {
+#if !defined(PADDLE_NO_PYTHON)
+            pybind11::gil_scoped_release release;
+#endif
+            return self.ZeroCopyRun(switch_stream);
+          },
+          py::arg("switch_stream") = false)
       .def("clone", [](PaddlePredictor &self) { return self.Clone(nullptr); })
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
       .def("clone",
@@ -797,7 +803,7 @@ void BindNativePredictor(py::module *m) {
       .def("run",
            [](NativePaddlePredictor &self,
               const std::vector<PaddleTensor> &inputs) {
-#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
+#if !defined(PADDLE_NO_PYTHON)
              pybind11::gil_scoped_release release;
 #endif
              std::vector<PaddleTensor> outputs;
@@ -806,9 +812,15 @@ void BindNativePredictor(py::module *m) {
            })
       .def("get_input_tensor", &NativePaddlePredictor::GetInputTensor)
       .def("get_output_tensor", &NativePaddlePredictor::GetOutputTensor)
-      .def("zero_copy_run",
-           &NativePaddlePredictor::ZeroCopyRun,
-           py::arg("switch_stream") = false)
+      .def(
+          "zero_copy_run",
+          [](NativePaddlePredictor &self, bool switch_stream) {
+#if !defined(PADDLE_NO_PYTHON)
+            pybind11::gil_scoped_release release;
+#endif
+            return self.ZeroCopyRun(switch_stream);
+          },
+          py::arg("switch_stream") = false)
       .def("clone",
            [](NativePaddlePredictor &self) { return self.Clone(nullptr); })
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -1132,7 +1144,7 @@ void BindAnalysisPredictor(py::module *m) {
       .def(
           "run",
           [](AnalysisPredictor &self, const std::vector<PaddleTensor> &inputs) {
-#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
+#if !defined(PADDLE_NO_PYTHON)
             pybind11::gil_scoped_release release;
 #endif
             std::vector<PaddleTensor> outputs;
@@ -1144,9 +1156,15 @@ void BindAnalysisPredictor(py::module *m) {
       .def("get_input_names", &AnalysisPredictor::GetInputNames)
       .def("get_output_names", &AnalysisPredictor::GetOutputNames)
       .def("get_input_tensor_shape", &AnalysisPredictor::GetInputTensorShape)
-      .def("zero_copy_run",
-           &AnalysisPredictor::ZeroCopyRun,
-           py::arg("switch_stream") = false)
+      .def(
+          "zero_copy_run",
+          [](AnalysisPredictor &self, bool switch_stream) {
+#if !defined(PADDLE_NO_PYTHON)
+            pybind11::gil_scoped_release release;
+#endif
+            return self.ZeroCopyRun(switch_stream);
+          },
+          py::arg("switch_stream") = false)
       .def("clear_intermediate_tensor",
            &AnalysisPredictor::ClearIntermediateTensor)
       .def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
@@ -1185,7 +1203,7 @@ void BindPaddleInferPredictor(py::module *m) {
           "run",
           [](paddle_infer::Predictor &self,
              const std::vector<paddle::Tensor> &in_tensor_list) {
-#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
+#if !defined(PADDLE_NO_PYTHON)
             pybind11::gil_scoped_release release;
 #endif
             std::vector<paddle::Tensor> outputs;
@@ -1195,7 +1213,7 @@ void BindPaddleInferPredictor(py::module *m) {
           py::arg("inputs"))
       .def("run",
            [](paddle_infer::Predictor &self) {
-#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
+#if !defined(PADDLE_NO_PYTHON)
              pybind11::gil_scoped_release release;
 #endif
              self.Run();