tianyuzhou668
diff --git a/‎backends/mlu/cmake/external/neuware.cmake
100644100755
+2-1 b/‎backends/mlu/cmake/external/neuware.cmake
100644100755
+2-1
diff --git a/‎backends/mlu/kernels/abs_kernel.cc
100644100755
+1 b/‎backends/mlu/kernels/abs_kernel.cc
100644100755
+1
diff --git a/‎backends/mlu/kernels/funcs/mlu_baseop.cc
100644100755
+220 b/‎backends/mlu/kernels/funcs/mlu_baseop.cc
100644100755
+220
@@ -28,6 +28,7 @@ set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
 set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
 set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
 set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so)
+set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
 
-set(NEUWARE_LIBS ${CNNL_LIB} ${CNRT_LIB} ${CNPAPI_LIB} ${CNCL_LIB})
+set(NEUWARE_LIBS ${CNNL_LIB} ${CNRT_LIB} ${CNPAPI_LIB} ${CNCL_LIB} ${MLUOP_LIB})
 
@@ -31,6 +31,7 @@ void AbsKernel(const Context& dev_ctx,
                GetBasePtr(out));
 }
 
+
 template <typename T, typename Context>
 void AbsGradKernel(const Context& dev_ctx,
                    const phi::DenseTensor& x,
 
@@ -244,6 +244,184 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
   }
 }
 
+
+class MLUOpTensorDescPool {
+ public:
+  mluOpTensorDescriptor_t Pop() {
+    mluOpTensorDescriptor_t raw_desc;
+    if (q_.try_dequeue(raw_desc)) {
+      return raw_desc;
+    } else {
+      mluOpCreateTensorDescriptor(&raw_desc);
+      return raw_desc;
+    }
+  }
+
+  void Recycle(mluOpTensorDescriptor_t desc) {
+    mluOpResetTensorDescriptor(desc);
+    q_.enqueue(desc);
+  }
+
+  ~MLUOpTensorDescPool() {
+    auto size = q_.size_approx();
+    if (size > 0) {
+      std::vector<mluOpTensorDescriptor_t> vec(size);
+      q_.try_dequeue_bulk(vec.data(), size);
+      for (auto desc : vec) {
+        mluOpDestroyTensorDescriptor(desc);
+      }
+    }
+  }
+
+ private:
+  moodycamel::ConcurrentQueue<mluOpTensorDescriptor_t> q_;
+};
+
+static MLUOpTensorDescPool g_mluop_tensor_desc_pool;
+
+MLUOpTensorDesc& MLUOpTensorDesc::operator=(MLUOpTensorDesc&& rhs) {
+  if (raw_tensor_desc) {
+    g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
+  }
+  raw_tensor_desc = rhs.raw_tensor_desc;
+  rhs.raw_tensor_desc = nullptr;
+  return *this;
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                 const int dim_sizes[],
+                                 const mluOpDataType_t tensor_dtype) {
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
+      raw_tensor_desc, MLUOP_LAYOUT_ARRAY, tensor_dtype, tensor_dim, dim_sizes));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                 const int dim_sizes[],
+                                 const mluOpDataType_t tensor_dtype,
+                                 const mluOpTensorLayout_t layout) {
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
+      raw_tensor_desc, layout, tensor_dtype, tensor_dim, dim_sizes));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                 const int dim_sizes[],
+                                 const mluOpDataType_t tensor_dtype,
+                                 int position)
+    : MLUOpTensorDesc(tensor_dim, dim_sizes, tensor_dtype) {
+  PADDLE_ENFORCE_MLU_SUCCESS(
+      mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                 const int64_t dim_sizes[],
+                                 const mluOpDataType_t tensor_dtype) {
+  std::vector<int> dim_sizes_int32(tensor_dim);
+  std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
+  std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
+  std::transform(int64_cbegin,
+                 int64_cend,
+                 dim_sizes_int32.begin(),
+                 &CheckedNarrowing<int64_t, int>);
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
+                                                     MLUOP_LAYOUT_ARRAY,
+                                                     tensor_dtype,
+                                                     tensor_dim,
+                                                     dim_sizes_int32.data()));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                     const int64_t dim_sizes[],
+                                     const mluOpDataType_t tensor_dtype,
+                                     const mluOpTensorLayout_t layout) {
+  std::vector<int> dim_sizes_int32(tensor_dim);
+  std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
+  std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
+  std::transform(int64_cbegin,
+                 int64_cend,
+                 dim_sizes_int32.begin(),
+                 &CheckedNarrowing<int64_t, int>);
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
+                                                     layout,
+                                                     tensor_dtype,
+                                                     tensor_dim,
+                                                     dim_sizes_int32.data()));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
+                                     const int64_t dim_sizes[],
+                                     const mluOpDataType_t tensor_dtype,
+                                     int position) {
+  std::vector<int> dim_sizes_int32(tensor_dim);
+  std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
+  std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
+  std::transform(int64_cbegin,
+                 int64_cend,
+                 dim_sizes_int32.begin(),
+                 &CheckedNarrowing<int64_t, int>);
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
+                                                     MLUOP_LAYOUT_ARRAY,
+                                                     tensor_dtype,
+                                                     tensor_dim,
+                                                     dim_sizes_int32.data()));
+  PADDLE_ENFORCE_MLU_SUCCESS(
+      mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
+                                     const mluOpTensorLayout_t layout,
+                                     const mluOpDataType_t tensor_dtype) {
+  auto dims = phi::vectorize<int>(tensor.dims());
+  int tensor_dim = dims.size();
+  raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
+  if (tensor_dim == 0) {
+    int scalar_dims[1] = {1};
+    PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
+        raw_tensor_desc, layout, tensor_dtype, 1, scalar_dims));
+  } else {
+    std::vector<int> tensor_dim_sizes_int(dims.begin(), dims.end());
+    PADDLE_ENFORCE_MLU_SUCCESS(
+        mluOpSetTensorDescriptor(raw_tensor_desc,
+                                layout,
+                                tensor_dtype,
+                                tensor_dim,
+                                tensor_dim_sizes_int.data()));
+  }
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor)
+    : MLUOpTensorDesc(
+          tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
+                                 mluOpTensorLayout_t layout,
+                                 const mluOpDataType_t tensor_dtype,
+                                 int position)
+    : MLUOpTensorDesc(tensor, layout, tensor_dtype) {
+  PADDLE_ENFORCE_MLU_SUCCESS(
+      mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
+}
+
+MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
+                                 mluOpTensorLayout_t layout,
+                                 const mluOpDataType_t tensor_dtype,
+                                 int position,
+                                 float scale)
+    : MLUOpTensorDesc(tensor, layout, tensor_dtype) {
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptorPositionAndScale(
+      raw_tensor_desc, position, scale));
+}
+
+MLUOpTensorDesc::~MLUOpTensorDesc(){
+   if (raw_tensor_desc) {
+    g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc); 
+  } 
+}
+
 MLUCnnlActivationDesc::MLUCnnlActivationDesc(
     const cnnlActivationMode_t act_mode, const float ceof) {
   PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_));
@@ -5035,4 +5213,46 @@ MLURNNDesc::~MLURNNDesc() {
                                                               diff_x));
 }
 
+
+// /* static */ void MLUOp::OpYoloBox(
+//     const ExecutionContext& ctx,
+//     const mluOpTensorDescriptor_t x_desc, const void *x,
+//     const mluOpTensorDescriptor_t img_size_desc, const void *img_size,
+//     const mluOpTensorDescriptor_t anchors_desc, const void *anchors,
+//     const int class_num, 
+//     const float conf_thresh, 
+//     const int downsample_ratio,
+//     const bool clip_bbox, 
+//     const float scale, 
+//     const bool iou_aware,
+//     const float iou_aware_factor, 
+//     const mluOpTensorDescriptor_t boxes_desc,
+//     void *boxes, 
+//     const mluOpTensorDescriptor_t scores_desc, 
+//     void *scores){
+//         mluOpHandle_t handle =  GetMluOpHandleFromCTX(ctx);
+
+//     PADDLE_ENFORCE_MLU_SUCCESS(mluOpYoloBox(
+//                    handle, 
+//                    x_desc, x,
+//                    img_size_desc, img_size,
+//                    anchors_desc, anchors,
+//                    class_num, 
+//                    conf_thresh, 
+//                    downsample_ratio,
+//                    clip_bbox, 
+//                    scale, 
+//                    iou_aware,
+//                    iou_aware_factor, 
+//                    boxes_desc,
+//                    boxes, 
+//                    scores_desc, 
+//                    scores));
+
+//     }
+
+
+
+
+
 }  // namespace custom_kernel
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ void AbsKernel(const Context& dev_ctx,`
`31`	`31`	`GetBasePtr(out));`
`32`	`32`	`}`
`33`	`33`
	`34`	`+`
`34`	`35`	`template <typename T, typename Context>`
`35`	`36`	`void AbsGradKernel(const Context& dev_ctx,`
`36`	`37`	`const phi::DenseTensor& x,`