@@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() {
42
42
template <typename T, Precision P, OpRunType R>
43
43
void PaddleInferenceAnakinPredictor<T, P, R>::InitNet() {
44
44
std::unique_lock<std::mutex> lock (this ->mutex_ );
45
+ delete this ->executor_p_ ;
45
46
this ->executor_p_ = new anakin::Net<T, P, R>(*this ->graph_p_ , true );
46
47
}
47
48
template <typename T, Precision P, OpRunType R>
@@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() {
89
90
this ->InitNet ();
90
91
}
91
92
template <typename T, Precision P, OpRunType R>
92
- void PaddleInferenceAnakinPredictor<T, P, R>::Predict() {
93
+ void PaddleInferenceAnakinPredictor<T, P, R>::Predict(int batch_size ) {
93
94
anakin::TargetWrapper<T>::device_sync ();
94
95
this ->executor_p_ ->prediction ();
95
96
anakin::TargetWrapper<T>::device_sync ();
@@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
99
100
const std::vector<PaddleTensor> &inputs,
100
101
std::vector<PaddleTensor> *output_data, int batch_size) {
101
102
if (this ->config_ .re_allocable ) {
102
- return this ->RunImpl (inputs, output_data);
103
+ return this ->RunImpl (inputs, output_data, batch_size );
103
104
} else {
104
105
// Run inputs data that exceeds batch size in batches.
105
106
// 1. Reassign the batch size.
@@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
194
195
template <typename T, Precision P, OpRunType R>
195
196
bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
196
197
const std::vector<PaddleTensor> &inputs,
197
- std::vector<PaddleTensor> *output_data) {
198
+ std::vector<PaddleTensor> *output_data, int batch_size ) {
198
199
anakin::TargetWrapper<T>::set_device (this ->config_ .device_id );
199
200
for (const auto &input : inputs) {
200
201
if (input.dtype != PaddleDType::FLOAT32) {
@@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
207
208
LOG (FATAL) << " input " << input.name
208
209
<< " 's shape size should be equal to that of net" ;
209
210
}
211
+ #ifndef ANAKIN_MLU_PLACE
210
212
int sum = 1 ;
211
213
for_each (input.shape .begin (), input.shape .end (), [&](int n) { sum *= n; });
212
214
if (sum > net_shape.count ()) {
213
215
if (this ->config_ .re_allocable ) {
214
216
this ->graph_p_ ->Reshape (input.name , input.shape );
215
- delete this ->executor_p_ ;
216
217
this ->InitNet ();
217
218
d_tensor_p = this ->executor_p_ ->get_in (input.name );
218
219
} else {
@@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
221
222
" memory." ;
222
223
}
223
224
}
225
+ #endif
224
226
std::vector<int > tmp_shape;
225
227
for (auto s : input.shape ) {
226
228
tmp_shape.push_back (s);
@@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
229
231
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
230
232
h_tensor (data, typename anakin::DefaultHostType<T>::Host_type (), 0 ,
231
233
tmp_shape);
234
+ #ifndef ANAKIN_MLU_PLACE
232
235
d_tensor_p->reshape (tmp_shape);
233
-
236
+ # endif
234
237
if (input.lod .size () > 0 ) {
235
238
if (input.lod .size () > 1 ) {
236
239
LOG (FATAL) << " input lod first dim should <=1, but you set "
@@ -246,24 +249,28 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
246
249
}
247
250
d_tensor_p->copy_from (h_tensor);
248
251
}
249
- this ->Predict ();
252
+ this ->Predict (batch_size );
250
253
if (output_data->empty ()) {
251
- LOG (FATAL) << " At least one output should be set with tensors' names ." ;
254
+ LOG (FATAL) << " The output param in the Run function is incorrect ." ;
252
255
}
253
256
for (auto &output : *output_data) {
254
257
if (std::find (this ->output_names_ .begin (), this ->output_names_ .end (),
255
258
output.name ) == this ->output_names_ .end ()) {
256
259
LOG (FATAL) << output.name << " is not in the outputs of the graph." ;
257
260
}
258
261
auto *d_tensor_p = this ->executor_p_ ->get_out (output.name );
259
- output.shape = d_tensor_p->valid_shape ();
260
- if (output.data .length () < d_tensor_p->valid_size () * sizeof (float )) {
261
- output.data .Resize (d_tensor_p->valid_size () * sizeof (float ));
262
+ auto tmp_shape = d_tensor_p->valid_shape ();
263
+ #ifdef ANAKIN_MLU_PLACE
264
+ tmp_shape.set_num (batch_size);
265
+ #endif
266
+ output.shape = tmp_shape;
267
+ if (output.data .length () < tmp_shape.count () * sizeof (float )) {
268
+ output.data .Resize (tmp_shape.count () * sizeof (float ));
262
269
}
263
270
auto *data = static_cast <float *>(output.data .data ());
264
271
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
265
272
h_tensor (data, typename anakin::DefaultHostType<T>::Host_type (), 0 ,
266
- d_tensor_p-> valid_shape () );
273
+ tmp_shape );
267
274
h_tensor.copy_from (*d_tensor_p);
268
275
}
269
276
return true ;
@@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
317
324
this ->config_ .compute_stream_id );
318
325
this ->ctx_p_ ->set_model_parallel (this ->config_ .model_parallel );
319
326
this ->ctx_p_ ->set_fusion (this ->config_ .op_fuse );
327
+ this ->ctx_p_ ->enable_batch_changable ();
328
+ this ->ctx_p_ ->enable_channel_duplicate ();
320
329
}
321
330
template <Precision P, OpRunType R>
322
331
void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
@@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
327
336
template <Precision P, OpRunType R>
328
337
void PaddleInferenceAnakinMLUPredictor<P, R>::InitNet() {
329
338
std::unique_lock<std::mutex> lock (this ->mutex_ );
339
+ delete this ->executor_p_ ;
330
340
this ->executor_p_ = new anakin::Net<anakin::MLU, P, R>();
331
341
this ->executor_p_ ->fusion_init (*this ->graph_p_ , this ->ctx_p_ , true );
332
342
}
333
343
template <Precision P, OpRunType R>
334
- void PaddleInferenceAnakinMLUPredictor<P, R>::Predict() {
335
- anakin::TargetWrapper<anakin::MLU>::device_sync ();
336
- this ->executor_p_ ->fusion_prediction ();
337
- anakin::TargetWrapper<anakin::MLU>::device_sync ();
344
+ void PaddleInferenceAnakinMLUPredictor<P, R>::Predict(int batch_size) {
345
+ this ->executor_p_ ->fusion_prediction (batch_size);
338
346
}
339
347
#endif
340
348
@@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
353
361
template <Precision P, OpRunType R>
354
362
void PaddleInferenceAnakinBMPredictor<P, R>::InitNet() {
355
363
std::unique_lock<std::mutex> lock (this ->mutex_ );
364
+ delete this ->executor_p_ ;
356
365
this ->executor_p_ = new anakin::Net<anakin::BM, P, R>();
357
366
this ->executor_p_ ->fusion_init (*this ->graph_p_ , this ->ctx_p_ , true );
358
367
}
359
368
template <Precision P, OpRunType R>
360
- void PaddleInferenceAnakinBMPredictor<P, R>::Predict() {
361
- anakin::TargetWrapper<anakin::BM>::device_sync ();
369
+ void PaddleInferenceAnakinBMPredictor<P, R>::Predict(int batch_size) {
362
370
this ->executor_p_ ->fusion_prediction ();
363
- anakin::TargetWrapper<anakin::BM>::device_sync ();
364
371
}
365
372
#endif
366
373
0 commit comments