Skip to content

Commit 736a253

Browse files
authored
[XPU] Fix L3 autotune (#66323)
* fix xpu l3 cache bug * fix multi predictor * fix multi predictor
1 parent 1319992 commit 736a253

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

paddle/fluid/inference/api/analysis_predictor.cc

+5-6
Original file line numberDiff line numberDiff line change
@@ -2278,6 +2278,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
22782278
config_.xpu_config_.l3_autotune_size,
22792279
place_);
22802280
}
2281+
2282+
if (config_.use_xpu_ && infer_xpu_ctx != nullptr &&
2283+
config_.xpu_config_.l3_autotune_size > 0) {
2284+
infer_xpu_ctx->L3CacheAutotune();
2285+
}
22812286
#endif
22822287

22832288
if (config_.new_executor_enabled()) {
@@ -2287,12 +2292,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
22872292
}
22882293
inference::DisplayMemoryInfo(place_, "after run");
22892294

2290-
#ifdef PADDLE_WITH_XPU
2291-
if (config_.use_xpu_ && !config_.use_lite_ && infer_xpu_ctx != nullptr) {
2292-
infer_xpu_ctx->L3CacheAutotune();
2293-
}
2294-
#endif
2295-
22962295
// Fix TensorArray reuse not cleaned bug.
22972296
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
22982297
tensor_array_batch_cleaner_.ResetTensorArray();

paddle/fluid/inference/api/infer_context.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ void InferXPUContext::SetFcAutotuneInfo(std::string fc_autotune_file,
225225
}
226226

227227
void InferXPUContext::L3CacheAutotune() {
228-
if (l3_autotune_size_ == 0) return;
228+
if (l3_autotune_size_ == 0 || l3_blocks_.size() == 0) return;
229229
if (holder_map_.empty()) {
230230
l3_plan_.RunAutotune(l3_blocks_, l3_size_);
231231
auto* plan = l3_plan_.plan();
@@ -262,6 +262,7 @@ void InferXPUContext::L3CacheAutotune() {
262262
}
263263
}
264264
}
265+
265266
#endif
266267

267268
} // namespace paddle

0 commit comments

Comments
 (0)