[XPU] Fix L3 autotune (#66323)

cmcamdy · web-flow · commit 736a253dec08 · 2024-08-06T20:48:36.000+08:00
* fix xpu l3 cache bug

* fix multi predictor

* fix multi predictor
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2278,6 +2278,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
                              config_.xpu_config_.l3_autotune_size,
                              place_);
   }
+
+  if (config_.use_xpu_ && infer_xpu_ctx != nullptr &&
+      config_.xpu_config_.l3_autotune_size > 0) {
+    infer_xpu_ctx->L3CacheAutotune();
+  }
 #endif
 
   if (config_.new_executor_enabled()) {
@@ -2287,12 +2292,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
   }
   inference::DisplayMemoryInfo(place_, "after run");
 
-#ifdef PADDLE_WITH_XPU
-  if (config_.use_xpu_ && !config_.use_lite_ && infer_xpu_ctx != nullptr) {
-    infer_xpu_ctx->L3CacheAutotune();
-  }
-#endif
-
   // Fix TensorArray reuse not cleaned bug.
   tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
   tensor_array_batch_cleaner_.ResetTensorArray();
diff --git a/paddle/fluid/inference/api/infer_context.cc b/paddle/fluid/inference/api/infer_context.cc
@@ -225,7 +225,7 @@ void InferXPUContext::SetFcAutotuneInfo(std::string fc_autotune_file,
 }
 
 void InferXPUContext::L3CacheAutotune() {
-  if (l3_autotune_size_ == 0) return;
+  if (l3_autotune_size_ == 0 || l3_blocks_.size() == 0) return;
   if (holder_map_.empty()) {
     l3_plan_.RunAutotune(l3_blocks_, l3_size_);
     auto* plan = l3_plan_.plan();
@@ -262,6 +262,7 @@ void InferXPUContext::L3CacheAutotune() {
     }
   }
 }
+
 #endif
 
 }  // namespace paddle

Original file line number	Diff line number	Diff line change
`@@ -225,7 +225,7 @@ void InferXPUContext::SetFcAutotuneInfo(std::string fc_autotune_file,`
`225`	`225`	`}`
`226`	`226`
`227`	`227`	`void InferXPUContext::L3CacheAutotune() {`
`228`		`- if (l3_autotune_size_ == 0) return;`
	`228`	`+ if (l3_autotune_size_ == 0 \|\| l3_blocks_.size() == 0) return;`
`229`	`229`	`if (holder_map_.empty()) {`
`230`	`230`	`l3_plan_.RunAutotune(l3_blocks_, l3_size_);`
`231`	`231`	`auto* plan = l3_plan_.plan();`
`@@ -262,6 +262,7 @@ void InferXPUContext::L3CacheAutotune() {`
`262`	`262`	`}`
`263`	`263`	`}`
`264`	`264`	`}`
	`265`	`+`
`265`	`266`	`#endif`
`266`	`267`
`267`	`268`	`} // namespace paddle`