Skip to content

Commit 6ef7c9e

Browse files
authored
[MLU] add mlu_ops_api (PaddlePaddle#126)
1 parent 22b38d9 commit 6ef7c9e

File tree

6 files changed

+389
-1
lines changed

6 files changed

+389
-1
lines changed

backends/mlu/cmake/external/neuware.cmake

100644100755
+2-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
2828
set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
2929
set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
3030
set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so)
31+
set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
3132

32-
set(NEUWARE_LIBS ${CNNL_LIB} ${CNRT_LIB} ${CNPAPI_LIB} ${CNCL_LIB})
33+
set(NEUWARE_LIBS ${CNNL_LIB} ${CNRT_LIB} ${CNPAPI_LIB} ${CNCL_LIB} ${MLUOP_LIB})
3334

backends/mlu/kernels/abs_kernel.cc

100644100755
+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ void AbsKernel(const Context& dev_ctx,
3131
GetBasePtr(out));
3232
}
3333

34+
3435
template <typename T, typename Context>
3536
void AbsGradKernel(const Context& dev_ctx,
3637
const phi::DenseTensor& x,

backends/mlu/kernels/funcs/mlu_baseop.cc

100644100755
+220
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,184 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
244244
}
245245
}
246246

247+
248+
class MLUOpTensorDescPool {
249+
public:
250+
mluOpTensorDescriptor_t Pop() {
251+
mluOpTensorDescriptor_t raw_desc;
252+
if (q_.try_dequeue(raw_desc)) {
253+
return raw_desc;
254+
} else {
255+
mluOpCreateTensorDescriptor(&raw_desc);
256+
return raw_desc;
257+
}
258+
}
259+
260+
void Recycle(mluOpTensorDescriptor_t desc) {
261+
mluOpResetTensorDescriptor(desc);
262+
q_.enqueue(desc);
263+
}
264+
265+
~MLUOpTensorDescPool() {
266+
auto size = q_.size_approx();
267+
if (size > 0) {
268+
std::vector<mluOpTensorDescriptor_t> vec(size);
269+
q_.try_dequeue_bulk(vec.data(), size);
270+
for (auto desc : vec) {
271+
mluOpDestroyTensorDescriptor(desc);
272+
}
273+
}
274+
}
275+
276+
private:
277+
moodycamel::ConcurrentQueue<mluOpTensorDescriptor_t> q_;
278+
};
279+
280+
static MLUOpTensorDescPool g_mluop_tensor_desc_pool;
281+
282+
MLUOpTensorDesc& MLUOpTensorDesc::operator=(MLUOpTensorDesc&& rhs) {
283+
if (raw_tensor_desc) {
284+
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
285+
}
286+
raw_tensor_desc = rhs.raw_tensor_desc;
287+
rhs.raw_tensor_desc = nullptr;
288+
return *this;
289+
}
290+
291+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
292+
const int dim_sizes[],
293+
const mluOpDataType_t tensor_dtype) {
294+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
295+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
296+
raw_tensor_desc, MLUOP_LAYOUT_ARRAY, tensor_dtype, tensor_dim, dim_sizes));
297+
}
298+
299+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
300+
const int dim_sizes[],
301+
const mluOpDataType_t tensor_dtype,
302+
const mluOpTensorLayout_t layout) {
303+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
304+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
305+
raw_tensor_desc, layout, tensor_dtype, tensor_dim, dim_sizes));
306+
}
307+
308+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
309+
const int dim_sizes[],
310+
const mluOpDataType_t tensor_dtype,
311+
int position)
312+
: MLUOpTensorDesc(tensor_dim, dim_sizes, tensor_dtype) {
313+
PADDLE_ENFORCE_MLU_SUCCESS(
314+
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
315+
}
316+
317+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
318+
const int64_t dim_sizes[],
319+
const mluOpDataType_t tensor_dtype) {
320+
std::vector<int> dim_sizes_int32(tensor_dim);
321+
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
322+
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
323+
std::transform(int64_cbegin,
324+
int64_cend,
325+
dim_sizes_int32.begin(),
326+
&CheckedNarrowing<int64_t, int>);
327+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
328+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
329+
MLUOP_LAYOUT_ARRAY,
330+
tensor_dtype,
331+
tensor_dim,
332+
dim_sizes_int32.data()));
333+
}
334+
335+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
336+
const int64_t dim_sizes[],
337+
const mluOpDataType_t tensor_dtype,
338+
const mluOpTensorLayout_t layout) {
339+
std::vector<int> dim_sizes_int32(tensor_dim);
340+
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
341+
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
342+
std::transform(int64_cbegin,
343+
int64_cend,
344+
dim_sizes_int32.begin(),
345+
&CheckedNarrowing<int64_t, int>);
346+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
347+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
348+
layout,
349+
tensor_dtype,
350+
tensor_dim,
351+
dim_sizes_int32.data()));
352+
}
353+
354+
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
355+
const int64_t dim_sizes[],
356+
const mluOpDataType_t tensor_dtype,
357+
int position) {
358+
std::vector<int> dim_sizes_int32(tensor_dim);
359+
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
360+
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
361+
std::transform(int64_cbegin,
362+
int64_cend,
363+
dim_sizes_int32.begin(),
364+
&CheckedNarrowing<int64_t, int>);
365+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
366+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
367+
MLUOP_LAYOUT_ARRAY,
368+
tensor_dtype,
369+
tensor_dim,
370+
dim_sizes_int32.data()));
371+
PADDLE_ENFORCE_MLU_SUCCESS(
372+
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
373+
}
374+
375+
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
376+
const mluOpTensorLayout_t layout,
377+
const mluOpDataType_t tensor_dtype) {
378+
auto dims = phi::vectorize<int>(tensor.dims());
379+
int tensor_dim = dims.size();
380+
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
381+
if (tensor_dim == 0) {
382+
int scalar_dims[1] = {1};
383+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
384+
raw_tensor_desc, layout, tensor_dtype, 1, scalar_dims));
385+
} else {
386+
std::vector<int> tensor_dim_sizes_int(dims.begin(), dims.end());
387+
PADDLE_ENFORCE_MLU_SUCCESS(
388+
mluOpSetTensorDescriptor(raw_tensor_desc,
389+
layout,
390+
tensor_dtype,
391+
tensor_dim,
392+
tensor_dim_sizes_int.data()));
393+
}
394+
}
395+
396+
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor)
397+
: MLUOpTensorDesc(
398+
tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {}
399+
400+
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
401+
mluOpTensorLayout_t layout,
402+
const mluOpDataType_t tensor_dtype,
403+
int position)
404+
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
405+
PADDLE_ENFORCE_MLU_SUCCESS(
406+
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
407+
}
408+
409+
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
410+
mluOpTensorLayout_t layout,
411+
const mluOpDataType_t tensor_dtype,
412+
int position,
413+
float scale)
414+
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
415+
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptorPositionAndScale(
416+
raw_tensor_desc, position, scale));
417+
}
418+
419+
MLUOpTensorDesc::~MLUOpTensorDesc(){
420+
if (raw_tensor_desc) {
421+
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
422+
}
423+
}
424+
247425
MLUCnnlActivationDesc::MLUCnnlActivationDesc(
248426
const cnnlActivationMode_t act_mode, const float ceof) {
249427
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_));
@@ -5035,4 +5213,46 @@ MLURNNDesc::~MLURNNDesc() {
50355213
diff_x));
50365214
}
50375215

5216+
5217+
// /* static */ void MLUOp::OpYoloBox(
5218+
// const ExecutionContext& ctx,
5219+
// const mluOpTensorDescriptor_t x_desc, const void *x,
5220+
// const mluOpTensorDescriptor_t img_size_desc, const void *img_size,
5221+
// const mluOpTensorDescriptor_t anchors_desc, const void *anchors,
5222+
// const int class_num,
5223+
// const float conf_thresh,
5224+
// const int downsample_ratio,
5225+
// const bool clip_bbox,
5226+
// const float scale,
5227+
// const bool iou_aware,
5228+
// const float iou_aware_factor,
5229+
// const mluOpTensorDescriptor_t boxes_desc,
5230+
// void *boxes,
5231+
// const mluOpTensorDescriptor_t scores_desc,
5232+
// void *scores){
5233+
// mluOpHandle_t handle = GetMluOpHandleFromCTX(ctx);
5234+
5235+
// PADDLE_ENFORCE_MLU_SUCCESS(mluOpYoloBox(
5236+
// handle,
5237+
// x_desc, x,
5238+
// img_size_desc, img_size,
5239+
// anchors_desc, anchors,
5240+
// class_num,
5241+
// conf_thresh,
5242+
// downsample_ratio,
5243+
// clip_bbox,
5244+
// scale,
5245+
// iou_aware,
5246+
// iou_aware_factor,
5247+
// boxes_desc,
5248+
// boxes,
5249+
// scores_desc,
5250+
// scores));
5251+
5252+
// }
5253+
5254+
5255+
5256+
5257+
50385258
} // namespace custom_kernel

0 commit comments

Comments
 (0)