[CustomDevice] add share_external_data support (#57253)

ronny1996 · web-flow · commit 5422a44b8b17 · 2023-09-13T17:06:11.000+08:00
* [CustomDevice] add share_external_data support

* update
diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc
@@ -284,6 +284,14 @@ static std::shared_ptr<framework::GarbageCollector> GetGC(
                                                               max_memory_size));
       }
     }
+#endif
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+    if (platform::is_custom_place(place)) {
+      if (framework::IsFastEagerDeletionModeEnabled()) {
+        gc.reset(new framework::CustomDeviceUnsafeFastGarbageCollector(
+            place, max_memory_size));
+      }
+    }
 #endif
   }  // max_memory_size >= 0
 
diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc
@@ -947,12 +947,10 @@ static void RegisterOperatorKernel(
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
   auto device_types = phi::DeviceManager::GetAllCustomDeviceTypes();
   for (const auto& dev_type : device_types) {
-    for (auto& dev_id : phi::DeviceManager::GetSelectedDeviceList(dev_type)) {
-      RegisterOperatorKernelWithPlace(name,
-                                      op_kernel_func,
-                                      proto::VarType::RAW,
-                                      platform::CustomPlace(dev_type, dev_id));
-    }
+    RegisterOperatorKernelWithPlace(name,
+                                    op_kernel_func,
+                                    proto::VarType::RAW,
+                                    platform::CustomPlace(dev_type));
   }
 #endif
 }
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2010,12 +2010,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
     }
   } else if (platform::is_custom_place(place_)) {
     auto custom_place = place_;
-    auto paddleplace = static_cast<PaddlePlace>(
-        static_cast<size_t>(PaddlePlace::kCUSTOM) +
-        phi::CustomRegisteredDeviceMap::Instance()
-            .GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
-    res->SetPlace(
-        paddleplace, custom_place.GetDeviceId(), place_.GetDeviceType());
+    res->SetPlace(PaddlePlace::kCUSTOM,
+                  custom_place.GetDeviceId(),
+                  custom_place.GetDeviceType());
   } else {
     auto gpu_place = place_;
     res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
@@ -2064,12 +2061,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
     }
   } else if (platform::is_custom_place(place_)) {
     auto custom_place = place_;
-    auto paddleplace = static_cast<PaddlePlace>(
-        static_cast<size_t>(PaddlePlace::kCUSTOM) +
-        phi::CustomRegisteredDeviceMap::Instance()
-            .GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
-    res->SetPlace(
-        paddleplace, custom_place.GetDeviceId(), place_.GetDeviceType());
+    res->SetPlace(PaddlePlace::kCUSTOM,
+                  custom_place.GetDeviceId(),
+                  custom_place.GetDeviceType());
   } else {
     auto gpu_place = place_;
     res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -244,16 +244,11 @@ void Tensor::CopyFromCpu(const T *data) {
         "Can not create tensor with XPU place because paddle is not compiled "
         "with XPU."));
 #endif
-  } else {
+  } else if (place_ == PlaceType::kCUSTOM) {
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
-    auto device_type_id =
-        static_cast<size_t>(place_) - static_cast<size_t>(PlaceType::kCUSTOM);
     paddle::platform::DeviceContextPool &pool =
         paddle::platform::DeviceContextPool::Instance();
-    paddle::platform::CustomPlace custom_place(
-        phi::CustomRegisteredDeviceMap::Instance().GetGlobalDeviceType(
-            device_type_id),
-        device_);
+    paddle::platform::CustomPlace custom_place(device_type_, device_);
     auto *t_data = tensor->mutable_data<T>(custom_place);
     auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
         pool.Get(custom_place));
@@ -264,9 +259,15 @@ void Tensor::CopyFromCpu(const T *data) {
                          ele_size,
                          dev_ctx->stream());
 #else
-    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
-        "The analysis predictor supports CPU, GPU and XPU now."));
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "Can not create tensor with Custom place because paddle is not "
+        "compiled "
+        "with XPU."));
 #endif
+  } else {
+    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+        "The analysis predictor supports CPU, GPU, XPU and CUSTOM_DEVICE "
+        "now."));
   }
 }
 
@@ -355,6 +356,14 @@ void Tensor::ShareExternalData(const T *data,
             const_cast<T *>(data), size, paddle::platform::XPUPlace(device_)),
         meta);
     *tensor = std::move(dtensor);
+  } else if (place == PlaceType::kCUSTOM) {
+    phi::DenseTensor dtensor(
+        std::make_shared<phi::Allocation>(
+            const_cast<T *>(data),
+            size,
+            paddle::platform::CustomPlace(device_type_, device_)),
+        meta);
+    *tensor = std::move(dtensor);
   } else {
     PADDLE_THROW(paddle::platform::errors::InvalidArgument(
         "PlaceType must be one of [PlaceType::kCPU, PlaceType::kGPU, "
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
@@ -245,6 +245,8 @@ paddle_infer::PlaceType ToPaddleInferPlace(
     return paddle_infer::PlaceType::kGPU;
   } else if (allocation_type == phi::AllocationType::XPU) {
     return paddle_infer::PlaceType::kXPU;
+  } else if (allocation_type == phi::AllocationType::CUSTOM) {
+    return paddle_infer::PlaceType::kCUSTOM;
   } else {
     return paddle_infer::PlaceType::kCPU;
   }

Original file line number	Diff line number	Diff line change
`@@ -284,6 +284,14 @@ static std::shared_ptr<framework::GarbageCollector> GetGC(`
`284`	`284`	`max_memory_size));`
`285`	`285`	`}`
`286`	`286`	`}`
	`287`	`+#endif`
	`288`	`+#ifdef PADDLE_WITH_CUSTOM_DEVICE`
	`289`	`+ if (platform::is_custom_place(place)) {`
	`290`	`+ if (framework::IsFastEagerDeletionModeEnabled()) {`
	`291`	`+ gc.reset(new framework::CustomDeviceUnsafeFastGarbageCollector(`
	`292`	`+ place, max_memory_size));`
	`293`	`+ }`
	`294`	`+ }`
`287`	`295`	`#endif`
`288`	`296`	`} // max_memory_size >= 0`
`289`	`297`
Original file line number	Diff line number	Diff line change
`@@ -245,6 +245,8 @@ paddle_infer::PlaceType ToPaddleInferPlace(`
`245`	`245`	`return paddle_infer::PlaceType::kGPU;`
`246`	`246`	`} else if (allocation_type == phi::AllocationType::XPU) {`
`247`	`247`	`return paddle_infer::PlaceType::kXPU;`
	`248`	`+ } else if (allocation_type == phi::AllocationType::CUSTOM) {`
	`249`	`+ return paddle_infer::PlaceType::kCUSTOM;`
`248`	`250`	`} else {`
`249`	`251`	`return paddle_infer::PlaceType::kCPU;`
`250`	`252`	`}`