Skip to content

Commit 71b2ed6

Browse files
authored
support MLU nums, test=develop (#19372)
1 parent e2c6bad commit 71b2ed6

File tree

3 files changed

+31
-24
lines changed

3 files changed

+31
-24
lines changed

paddle/fluid/inference/api/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_
7070
if(ANAKIN_FOUND)
7171
# Do not turn warnings into errors.
7272
set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error")
73-
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash)
73+
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
7474
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
75-
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash)
75+
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
7676
target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
7777
function(anakin_target target_name)
7878
target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})

paddle/fluid/inference/api/api_anakin_engine.cc

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() {
4242
template <typename T, Precision P, OpRunType R>
4343
void PaddleInferenceAnakinPredictor<T, P, R>::InitNet() {
4444
std::unique_lock<std::mutex> lock(this->mutex_);
45+
delete this->executor_p_;
4546
this->executor_p_ = new anakin::Net<T, P, R>(*this->graph_p_, true);
4647
}
4748
template <typename T, Precision P, OpRunType R>
@@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() {
8990
this->InitNet();
9091
}
9192
template <typename T, Precision P, OpRunType R>
92-
void PaddleInferenceAnakinPredictor<T, P, R>::Predict() {
93+
void PaddleInferenceAnakinPredictor<T, P, R>::Predict(int batch_size) {
9394
anakin::TargetWrapper<T>::device_sync();
9495
this->executor_p_->prediction();
9596
anakin::TargetWrapper<T>::device_sync();
@@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
99100
const std::vector<PaddleTensor> &inputs,
100101
std::vector<PaddleTensor> *output_data, int batch_size) {
101102
if (this->config_.re_allocable) {
102-
return this->RunImpl(inputs, output_data);
103+
return this->RunImpl(inputs, output_data, batch_size);
103104
} else {
104105
// Run inputs data that exceeds batch size in batches.
105106
// 1. Reassign the batch size.
@@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
194195
template <typename T, Precision P, OpRunType R>
195196
bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
196197
const std::vector<PaddleTensor> &inputs,
197-
std::vector<PaddleTensor> *output_data) {
198+
std::vector<PaddleTensor> *output_data, int batch_size) {
198199
anakin::TargetWrapper<T>::set_device(this->config_.device_id);
199200
for (const auto &input : inputs) {
200201
if (input.dtype != PaddleDType::FLOAT32) {
@@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
207208
LOG(FATAL) << " input " << input.name
208209
<< "'s shape size should be equal to that of net";
209210
}
211+
#ifndef ANAKIN_MLU_PLACE
210212
int sum = 1;
211213
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
212214
if (sum > net_shape.count()) {
213215
if (this->config_.re_allocable) {
214216
this->graph_p_->Reshape(input.name, input.shape);
215-
delete this->executor_p_;
216217
this->InitNet();
217218
d_tensor_p = this->executor_p_->get_in(input.name);
218219
} else {
@@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
221222
"memory.";
222223
}
223224
}
225+
#endif
224226
std::vector<int> tmp_shape;
225227
for (auto s : input.shape) {
226228
tmp_shape.push_back(s);
@@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
229231
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
230232
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
231233
tmp_shape);
234+
#ifndef ANAKIN_MLU_PLACE
232235
d_tensor_p->reshape(tmp_shape);
233-
236+
#endif
234237
if (input.lod.size() > 0) {
235238
if (input.lod.size() > 1) {
236239
LOG(FATAL) << " input lod first dim should <=1, but you set "
@@ -246,24 +249,28 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
246249
}
247250
d_tensor_p->copy_from(h_tensor);
248251
}
249-
this->Predict();
252+
this->Predict(batch_size);
250253
if (output_data->empty()) {
251-
LOG(FATAL) << "At least one output should be set with tensors' names.";
254+
LOG(FATAL) << "The output param in the Run function is incorrect.";
252255
}
253256
for (auto &output : *output_data) {
254257
if (std::find(this->output_names_.begin(), this->output_names_.end(),
255258
output.name) == this->output_names_.end()) {
256259
LOG(FATAL) << output.name << " is not in the outputs of the graph.";
257260
}
258261
auto *d_tensor_p = this->executor_p_->get_out(output.name);
259-
output.shape = d_tensor_p->valid_shape();
260-
if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) {
261-
output.data.Resize(d_tensor_p->valid_size() * sizeof(float));
262+
auto tmp_shape = d_tensor_p->valid_shape();
263+
#ifdef ANAKIN_MLU_PLACE
264+
tmp_shape.set_num(batch_size);
265+
#endif
266+
output.shape = tmp_shape;
267+
if (output.data.length() < tmp_shape.count() * sizeof(float)) {
268+
output.data.Resize(tmp_shape.count() * sizeof(float));
262269
}
263270
auto *data = static_cast<float *>(output.data.data());
264271
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
265272
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
266-
d_tensor_p->valid_shape());
273+
tmp_shape);
267274
h_tensor.copy_from(*d_tensor_p);
268275
}
269276
return true;
@@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
317324
this->config_.compute_stream_id);
318325
this->ctx_p_->set_model_parallel(this->config_.model_parallel);
319326
this->ctx_p_->set_fusion(this->config_.op_fuse);
327+
this->ctx_p_->enable_batch_changable();
328+
this->ctx_p_->enable_channel_duplicate();
320329
}
321330
template <Precision P, OpRunType R>
322331
void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
@@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
327336
template <Precision P, OpRunType R>
328337
void PaddleInferenceAnakinMLUPredictor<P, R>::InitNet() {
329338
std::unique_lock<std::mutex> lock(this->mutex_);
339+
delete this->executor_p_;
330340
this->executor_p_ = new anakin::Net<anakin::MLU, P, R>();
331341
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
332342
}
333343
template <Precision P, OpRunType R>
334-
void PaddleInferenceAnakinMLUPredictor<P, R>::Predict() {
335-
anakin::TargetWrapper<anakin::MLU>::device_sync();
336-
this->executor_p_->fusion_prediction();
337-
anakin::TargetWrapper<anakin::MLU>::device_sync();
344+
void PaddleInferenceAnakinMLUPredictor<P, R>::Predict(int batch_size) {
345+
this->executor_p_->fusion_prediction(batch_size);
338346
}
339347
#endif
340348

@@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
353361
template <Precision P, OpRunType R>
354362
void PaddleInferenceAnakinBMPredictor<P, R>::InitNet() {
355363
std::unique_lock<std::mutex> lock(this->mutex_);
364+
delete this->executor_p_;
356365
this->executor_p_ = new anakin::Net<anakin::BM, P, R>();
357366
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
358367
}
359368
template <Precision P, OpRunType R>
360-
void PaddleInferenceAnakinBMPredictor<P, R>::Predict() {
361-
anakin::TargetWrapper<anakin::BM>::device_sync();
369+
void PaddleInferenceAnakinBMPredictor<P, R>::Predict(int batch_size) {
362370
this->executor_p_->fusion_prediction();
363-
anakin::TargetWrapper<anakin::BM>::device_sync();
364371
}
365372
#endif
366373

paddle/fluid/inference/api/api_anakin_engine.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
7373
virtual void OptimizeGraph();
7474
virtual void InitNet();
7575
virtual void SetContext();
76-
virtual void Predict();
76+
virtual void Predict(int batch_size);
7777
virtual std::unique_ptr<PaddlePredictor> New();
7878
static std::mutex mutex_;
7979
AnakinConfig config_;
@@ -85,7 +85,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
8585

8686
private:
8787
bool RunImpl(const std::vector<PaddleTensor>& inputs,
88-
std::vector<PaddleTensor>* output_data);
88+
std::vector<PaddleTensor>* output_data, int batch_size = -1);
8989
static std::once_flag init_anakin_;
9090
};
9191

@@ -103,7 +103,7 @@ class PaddleInferenceAnakinMLUPredictor final
103103
void SetContext() override;
104104
void OptimizeGraph() override;
105105
void InitNet() override;
106-
void Predict() override;
106+
void Predict(int batch_size) override;
107107
};
108108
#endif
109109

@@ -120,7 +120,7 @@ class PaddleInferenceAnakinBMPredictor final
120120
std::unique_ptr<PaddlePredictor> New() override;
121121
void OptimizeGraph() override;
122122
void InitNet() override;
123-
void Predict() override;
123+
void Predict(int batch_size) override;
124124
};
125125
#endif
126126
} // namespace paddle

0 commit comments

Comments
 (0)