|
| 1 | +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | +you may not use this file except in compliance with the License. |
| 4 | +You may obtain a copy of the License at |
| 5 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +Unless required by applicable law or agreed to in writing, software |
| 7 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 8 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 9 | +See the License for the specific language governing permissions and |
| 10 | +limitations under the License. */ |
| 11 | + |
| 12 | +#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h" |
| 13 | + |
| 14 | +namespace paddle { |
| 15 | +namespace operators { |
| 16 | + |
| 17 | +using LoDTensor = framework::LoDTensor; |
| 18 | + |
| 19 | +class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { |
| 20 | + public: |
| 21 | + using framework::OperatorWithKernel::OperatorWithKernel; |
| 22 | + |
| 23 | + protected: |
| 24 | + void InferShape(framework::InferShapeContext *ctx) const override { |
| 25 | + PADDLE_ENFORCE( |
| 26 | + ctx->HasInput("PriorBox"), |
| 27 | + "Input(PriorBox) of BoxDecoderAndAssignOp should not be null."); |
| 28 | + PADDLE_ENFORCE( |
| 29 | + ctx->HasInput("PriorBoxVar"), |
| 30 | + "Input(PriorBoxVar) of BoxDecoderAndAssignOp should not be null."); |
| 31 | + PADDLE_ENFORCE( |
| 32 | + ctx->HasInput("TargetBox"), |
| 33 | + "Input(TargetBox) of BoxDecoderAndAssignOp should not be null."); |
| 34 | + PADDLE_ENFORCE( |
| 35 | + ctx->HasInput("BoxScore"), |
| 36 | + "Input(BoxScore) of BoxDecoderAndAssignOp should not be null."); |
| 37 | + PADDLE_ENFORCE( |
| 38 | + ctx->HasOutput("DecodeBox"), |
| 39 | + "Output(DecodeBox) of BoxDecoderAndAssignOp should not be null."); |
| 40 | + PADDLE_ENFORCE( |
| 41 | + ctx->HasOutput("OutputAssignBox"), |
| 42 | + "Output(OutputAssignBox) of BoxDecoderAndAssignOp should not be null."); |
| 43 | + |
| 44 | + auto prior_box_dims = ctx->GetInputDim("PriorBox"); |
| 45 | + auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); |
| 46 | + auto target_box_dims = ctx->GetInputDim("TargetBox"); |
| 47 | + auto box_score_dims = ctx->GetInputDim("BoxScore"); |
| 48 | + |
| 49 | + PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2, |
| 50 | + "The rank of Input of PriorBox must be 2"); |
| 51 | + PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]"); |
| 52 | + PADDLE_ENFORCE_EQ(prior_box_var_dims.size(), 1, |
| 53 | + "The rank of Input of PriorBoxVar must be 1"); |
| 54 | + PADDLE_ENFORCE_EQ(prior_box_var_dims[0], 4, |
| 55 | + "The shape of PriorBoxVar is [4]"); |
| 56 | + PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, |
| 57 | + "The rank of Input of TargetBox must be 2"); |
| 58 | + PADDLE_ENFORCE_EQ(box_score_dims.size(), 2, |
| 59 | + "The rank of Input of BoxScore must be 2"); |
| 60 | + PADDLE_ENFORCE_EQ(prior_box_dims[0], target_box_dims[0], |
| 61 | + "The first dim of prior_box and target_box is roi nums " |
| 62 | + "and should be same!"); |
| 63 | + PADDLE_ENFORCE_EQ(prior_box_dims[0], box_score_dims[0], |
| 64 | + "The first dim of prior_box and box_score is roi nums " |
| 65 | + "and should be same!"); |
| 66 | + PADDLE_ENFORCE_EQ(target_box_dims[1], box_score_dims[1] * prior_box_dims[1], |
| 67 | + "The shape of target_box is [N, classnum * 4], The shape " |
| 68 | + "of box_score is [N, classnum], The shape of prior_box " |
| 69 | + "is [N, 4]"); |
| 70 | + |
| 71 | + ctx->SetOutputDim("DecodeBox", framework::make_ddim({target_box_dims[0], |
| 72 | + target_box_dims[1]})); |
| 73 | + ctx->ShareLoD("TargetBox", /*->*/ "DecodeBox"); |
| 74 | + ctx->SetOutputDim( |
| 75 | + "OutputAssignBox", |
| 76 | + framework::make_ddim({prior_box_dims[0], prior_box_dims[1]})); |
| 77 | + ctx->ShareLoD("PriorBox", /*->*/ "OutputAssignBox"); |
| 78 | + } |
| 79 | +}; |
| 80 | + |
| 81 | +class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker { |
| 82 | + public: |
| 83 | + void Make() override { |
| 84 | + AddInput( |
| 85 | + "PriorBox", |
| 86 | + "(Tensor, default Tensor<float>) " |
| 87 | + "Box list PriorBox is a 2-D Tensor with shape [N, 4] which holds N " |
| 88 | + "boxes and each box is represented as [xmin, ymin, xmax, ymax], " |
| 89 | + "[xmin, ymin] is the left top coordinate of the anchor box, " |
| 90 | + "if the input is image feature map, they are close to the origin " |
| 91 | + "of the coordinate system. [xmax, ymax] is the right bottom " |
| 92 | + "coordinate of the anchor box."); |
| 93 | + AddInput("PriorBoxVar", |
| 94 | + "(Tensor, default Tensor<float>, optional) " |
| 95 | + "PriorBoxVar is a 2-D Tensor with shape [N, 4] which holds N " |
| 96 | + "group of variance. PriorBoxVar will set all elements to 1 by " |
| 97 | + "default.") |
| 98 | + .AsDispensable(); |
| 99 | + AddInput("TargetBox", |
| 100 | + "(LoDTensor or Tensor) " |
| 101 | + "This input can be a 2-D LoDTensor with shape " |
| 102 | + "[N, classnum*4]. It holds N targets for N boxes."); |
| 103 | + AddInput("BoxScore", |
| 104 | + "(LoDTensor or Tensor) " |
| 105 | + "This input can be a 2-D LoDTensor with shape " |
| 106 | + "[N, classnum], each box is represented as [classnum] which is " |
| 107 | + "the classification probabilities."); |
| 108 | + AddAttr<float>("box_clip", |
| 109 | + "(float, default 4.135, np.log(1000. / 16.)) " |
| 110 | + "clip box to prevent overflowing") |
| 111 | + .SetDefault(4.135f); |
| 112 | + AddOutput("DecodeBox", |
| 113 | + "(LoDTensor or Tensor) " |
| 114 | + "the output tensor of op with shape [N, classnum * 4] " |
| 115 | + "representing the result of N target boxes decoded with " |
| 116 | + "M Prior boxes and variances for each class."); |
| 117 | + AddOutput("OutputAssignBox", |
| 118 | + "(LoDTensor or Tensor) " |
| 119 | + "the output tensor of op with shape [N, 4] " |
| 120 | + "representing the result of N target boxes decoded with " |
| 121 | + "M Prior boxes and variances with the best non-background class " |
| 122 | + "by BoxScore."); |
| 123 | + AddComment(R"DOC( |
| 124 | +
|
| 125 | +Bounding Box Coder. |
| 126 | +
|
| 127 | +Decode the target bounding box with the prior_box information. |
| 128 | +
|
| 129 | +The Decoding schema is described below: |
| 130 | +
|
| 131 | + $$ |
| 132 | + ox = (pw \\times pxv \\times tx + px) - \\frac{tw}{2} |
| 133 | + $$ |
| 134 | + $$ |
| 135 | + oy = (ph \\times pyv \\times ty + py) - \\frac{th}{2} |
| 136 | + $$ |
| 137 | + $$ |
| 138 | + ow = \\exp (pwv \\times tw) \\times pw + \\frac{tw}{2} |
| 139 | + $$ |
| 140 | + $$ |
| 141 | + oh = \\exp (phv \\times th) \\times ph + \\frac{th}{2} |
| 142 | + $$ |
| 143 | +
|
| 144 | +where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width |
| 145 | +and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the |
| 146 | +prior_box's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, |
| 147 | +`phv` denote the variance of the prior_box and `ox`, `oy`, `ow`, `oh` denote the |
| 148 | +decoded coordinates, width and height in decode_box. |
| 149 | +
|
| 150 | +decode_box is obtained after box decode, then assigning schema is described below: |
| 151 | +
|
| 152 | +For each prior_box, use the best non-background class's decoded values to |
| 153 | +update the prior_box locations and get output_assign_box. So, the shape of |
| 154 | +output_assign_box is the same as PriorBox. |
| 155 | +)DOC"); |
| 156 | + } |
| 157 | +}; |
| 158 | + |
| 159 | +} // namespace operators |
| 160 | +} // namespace paddle |
| 161 | + |
| 162 | +namespace ops = paddle::operators; |
| 163 | +REGISTER_OPERATOR(box_decoder_and_assign, ops::BoxDecoderAndAssignOp, |
| 164 | + ops::BoxDecoderAndAssignOpMaker, |
| 165 | + paddle::framework::EmptyGradOpMaker); |
| 166 | +REGISTER_OP_CPU_KERNEL( |
| 167 | + box_decoder_and_assign, |
| 168 | + ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, float>, |
| 169 | + ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, double>); |
0 commit comments