Skip to content

Commit c50fb58

Browse files
authored
test=release/1.5 (PaddlePaddle#18134)
cherry pick for deform roi pooling
1 parent 1810bfb commit c50fb58

File tree

7 files changed

+1786
-0
lines changed

7 files changed

+1786
-0
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_c
238238
paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=None, defaults=None), ('document', '3126e3039e752ce26077f1efaca355c6'))
239239
paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'ccf6bb7912afd2818d24bc45461e807a'))
240240
paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, None)), ('document', 'c896b66265a60bd3c5510f66e6e02919'))
241+
paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '65b8dbe13e00c4dc8224652f6ff89540'))
241242
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '9e87163ba32003f21d2c9d8c6a605ada'))
242243
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'dce69a78638da8f7ad80b1fc00ed2029'))
243244
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '32181f6037e387fb6e68a5beaafe33b6'))
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/operators/deformable_psroi_pooling_op.h"
16+
#include <iostream>
17+
#include <memory>
18+
#include <vector>
19+
#include "paddle/fluid/operators/math/blas.h"
20+
21+
namespace paddle {
22+
namespace operators {
23+
class DeformablePSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
24+
public:
25+
void Make() override {
26+
AddInput("Input",
27+
"(Tensor), "
28+
"the input of Deformable PSROIPooling. "
29+
"The shape of input tensor is [N,C,H,W]. Where N is batch size, "
30+
"C is number of input channels, "
31+
"H is height of the feature, and "
32+
"W is the width of the feature.");
33+
AddInput("ROIs",
34+
"(LoDTensor), "
35+
"ROIs (Regions of Interest) to pool over. "
36+
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4) "
37+
"given as [[x1, y1, x2, y2], ...]. "
38+
"(x1, y1) is the top left coordinates, and "
39+
"(x2, y2) is the bottom right coordinates.");
40+
AddInput("Trans",
41+
"(Tensor),"
42+
"offset of features on ROIs while pooling. "
43+
"The format is NCHW, where N is number of ROIs, "
44+
"C is number of channels, which indicate the offset distance "
45+
"in the x and y directions, "
46+
"H is pooled height, and "
47+
"W is pooled width.");
48+
AddAttr<bool>("no_trans",
49+
"(bool), "
50+
"whether add offset to get new value or not while roi "
51+
"pooling, which value is True or False");
52+
AddAttr<float>("spatial_scale",
53+
"(float), "
54+
"ratio of input feature map height (or width) to "
55+
"raw image height (or width). Equals the reciprocal "
56+
"of total stride in convolutional layers.");
57+
AddAttr<int>("output_dim",
58+
"(int), "
59+
"the number of output channels, which should be less than "
60+
"input channels. Deformable roi_pooling requires "
61+
"output_channels = input_channels, while deformable "
62+
"psroi_pooling requires output_channels = input_channels "
63+
"* pooled_height * pooled_width");
64+
AddAttr<std::vector<int>>(
65+
"group_size",
66+
"(vector<int>), "
67+
"the number of groups which input channels are divided."
68+
"(eg.number of input channels is k1*k2*(C+1), which k1 and k2 "
69+
"are group width and height and C+1 is number of output "
70+
"chanels. eg.(4, 6), which 4 is height of group and 6 is "
71+
"width of group");
72+
AddAttr<int>("pooled_height",
73+
"(int), "
74+
"the pooled output height.");
75+
AddAttr<int>("pooled_width",
76+
"(int), "
77+
"the pooled output width.");
78+
AddAttr<std::vector<int>>(
79+
"part_size",
80+
"(vector<int>), "
81+
"the height and width of offset, eg.(4, 6), which height is 4 "
82+
" and width is 6");
83+
AddAttr<int>("sample_per_part",
84+
"(int), "
85+
"the number of samples in each bin");
86+
AddAttr<float>("trans_std",
87+
"(float), "
88+
"Coefficient of offset");
89+
AddOutput("TopCount",
90+
"(Tensor), "
91+
"record the number of pixel in average pooling to in each bin. "
92+
"The format is NCHW, where N is the number of ROIs, "
93+
"C is the number of output channels, "
94+
"H is the height of output, and "
95+
"W is the width of output.");
96+
AddOutput("Output",
97+
"(Tensor), "
98+
"the output of Deformable PSROIPooling. "
99+
"The format is NCHW, where N is the number of ROIs, "
100+
"C is the number of output channels, "
101+
"H is the height of output, and "
102+
"W is thewidth of output. ");
103+
AddComment(R"DOC(
104+
**DeformablePSROIPooling Operator**
105+
DeformablePSROIPooling is a new method based Region of interest pooling
106+
(also known as RoI pooling).
107+
The operator has four steps:
108+
109+
1. Dividing each region proposal into equal-sized sections with
110+
the pooled_width and pooled_height.
111+
112+
2. Add offset to pixel in ROI to get new location and the new value which are
113+
computed directly through bilinear interpolation with four nearest pixel.
114+
115+
3. Sample several points to get average values in each bin.
116+
117+
4. Copying these average values to the output buffer.
118+
119+
DeformablePSROIPooling is part of Deformable Convolutional Networks,
120+
please refer to https://arxiv.org/abs/1703.06211 for more details.
121+
)DOC");
122+
}
123+
};
124+
125+
class DeformablePSROIPoolOp : public framework::OperatorWithKernel {
126+
public:
127+
using framework::OperatorWithKernel::OperatorWithKernel;
128+
void InferShape(framework::InferShapeContext *ctx) const override {
129+
PADDLE_ENFORCE(ctx->HasInput("Input"),
130+
"Input(Input) of DeformablePSROIPoolOp"
131+
"should not be null.");
132+
PADDLE_ENFORCE(ctx->HasInput("ROIs"),
133+
"Input(ROIs) of DeformablePSROIPoolOp "
134+
"should not be null.");
135+
PADDLE_ENFORCE(ctx->HasInput("Trans"),
136+
"Input(Trans) of DeformablePSROIPoolOp "
137+
"should not be null.");
138+
PADDLE_ENFORCE(ctx->HasOutput("Output"),
139+
"Output(Output) of DeformablePSROIPoolOp "
140+
"should not be null.");
141+
PADDLE_ENFORCE(ctx->HasOutput("TopCount"),
142+
"Output(TopCount) of DeformablePSROIPoolOp "
143+
"should not be null.");
144+
auto input_dims = ctx->GetInputDim("Input");
145+
auto rois_dims = ctx->GetInputDim("ROIs");
146+
auto trans_dims = ctx->GetInputDim("Trans");
147+
PADDLE_ENFORCE(rois_dims.size() == 2,
148+
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
149+
"given as [[ x1, y1, x2, y2], ...].");
150+
PADDLE_ENFORCE(trans_dims.size() == 4,
151+
"The format of Input Trans is (N, 2, H, W).");
152+
auto pooled_height = ctx->Attrs().Get<int>("pooled_height");
153+
auto pooled_width = ctx->Attrs().Get<int>("pooled_width");
154+
auto spatial_scale = ctx->Attrs().Get<float>("spatial_scale");
155+
auto output_channels = ctx->Attrs().Get<int>("output_dim");
156+
auto group_size = ctx->Attrs().Get<std::vector<int>>("group_size");
157+
auto group_height = group_size[0];
158+
auto group_width = group_size[1];
159+
auto part_size = ctx->Attrs().Get<std::vector<int>>("part_size");
160+
auto part_height = part_size[0];
161+
auto part_width = part_size[1];
162+
auto sample_per_part = ctx->Attrs().Get<int>("sample_per_part");
163+
auto trans_std = ctx->Attrs().Get<float>("trans_std");
164+
PADDLE_ENFORCE(trans_std >= 0.0f, "trans_std must greater than 0.0");
165+
PADDLE_ENFORCE(input_dims[1] >= output_channels,
166+
"input channels must greater than out_channels");
167+
PADDLE_ENFORCE_GT(pooled_height, 0,
168+
"The pooled height must greater than 0");
169+
PADDLE_ENFORCE_GT(pooled_width, 0, "The pooled width must greater than 0");
170+
PADDLE_ENFORCE_GT(spatial_scale, 0.0f,
171+
"The spatial scale must greater than 0");
172+
PADDLE_ENFORCE_EQ(group_size.size(), 2,
173+
"The size of group_size should be 2.");
174+
PADDLE_ENFORCE_GT(group_height, 0,
175+
"The group_height in group_size must greater than 0");
176+
PADDLE_ENFORCE_GT(group_width, 0,
177+
"The group_width in group_size must greater than 0");
178+
PADDLE_ENFORCE_EQ(part_size.size(), 2,
179+
"The size of part_size should be 2.");
180+
PADDLE_ENFORCE_GT(part_height, 0,
181+
"The part_height in part_size must greater than 0");
182+
PADDLE_ENFORCE_GT(part_width, 0,
183+
"The part_width in part_size must greater than 0");
184+
PADDLE_ENFORCE(part_height <= trans_dims[2],
185+
"The height of trans must greater than part_height");
186+
PADDLE_ENFORCE(part_width <= trans_dims[3],
187+
"The width of trans must greater than part_width");
188+
PADDLE_ENFORCE_GT(sample_per_part, 0,
189+
"The sample_per_part must greater than 0");
190+
auto out_dims = input_dims;
191+
out_dims[0] = rois_dims[0];
192+
out_dims[1] = output_channels;
193+
out_dims[2] = pooled_height;
194+
out_dims[3] = pooled_width;
195+
ctx->SetOutputDim("Output", out_dims);
196+
ctx->SetOutputDim("TopCount", out_dims);
197+
}
198+
199+
protected:
200+
framework::OpKernelType GetExpectedKernelType(
201+
const framework::ExecutionContext &ctx) const override {
202+
return framework::OpKernelType(ctx.Input<Tensor>("Input")->type(),
203+
ctx.device_context());
204+
}
205+
};
206+
207+
class DeformablePSROIPoolGradOpDescMaker
208+
: public framework::SingleGradOpDescMaker {
209+
public:
210+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
211+
212+
protected:
213+
std::unique_ptr<framework::OpDesc> Apply() const override {
214+
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
215+
216+
op->SetType("deformable_psroi_pooling_grad");
217+
op->SetInput("Input", Input("Input"));
218+
op->SetInput("Trans", Input("Trans"));
219+
op->SetInput("ROIs", Input("ROIs"));
220+
op->SetInput("TopCount", Output("TopCount"));
221+
op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
222+
223+
op->SetOutput(framework::GradVarName("Input"), InputGrad("Input"));
224+
op->SetOutput(framework::GradVarName("Trans"), InputGrad("Trans"));
225+
226+
op->SetAttrMap(Attrs());
227+
return op;
228+
}
229+
};
230+
231+
class DeformablePSROIPoolGradOp : public framework::OperatorWithKernel {
232+
public:
233+
using framework::OperatorWithKernel::OperatorWithKernel;
234+
void InferShape(framework::InferShapeContext *ctx) const override {
235+
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Output")),
236+
"The gradient of Output should not be null.");
237+
if (ctx->HasOutput(framework::GradVarName("Input"))) {
238+
ctx->SetOutputDim(framework::GradVarName("Input"),
239+
ctx->GetInputDim("Input"));
240+
}
241+
if (ctx->HasOutput(framework::GradVarName("Trans"))) {
242+
ctx->SetOutputDim(framework::GradVarName("Trans"),
243+
ctx->GetInputDim("Trans"));
244+
}
245+
}
246+
247+
protected:
248+
framework::OpKernelType GetExpectedKernelType(
249+
const framework::ExecutionContext &ctx) const override {
250+
return framework::OpKernelType(ctx.Input<Tensor>("Trans")->type(),
251+
ctx.device_context());
252+
}
253+
};
254+
255+
} // namespace operators
256+
} // namespace paddle
257+
258+
namespace ops = paddle::operators;
259+
using CPU = paddle::platform::CPUDeviceContext;
260+
REGISTER_OPERATOR(deformable_psroi_pooling, ops::DeformablePSROIPoolOp,
261+
ops::DeformablePSROIPoolOpMaker,
262+
ops::DeformablePSROIPoolGradOpDescMaker);
263+
REGISTER_OPERATOR(deformable_psroi_pooling_grad,
264+
ops::DeformablePSROIPoolGradOp);
265+
REGISTER_OP_CPU_KERNEL(deformable_psroi_pooling,
266+
ops::DeformablePSROIPoolCPUKernel<CPU, float>,
267+
ops::DeformablePSROIPoolCPUKernel<CPU, double>);
268+
REGISTER_OP_CPU_KERNEL(deformable_psroi_pooling_grad,
269+
ops::DeformablePSROIPoolGradCPUKernel<CPU, float>,
270+
ops::DeformablePSROIPoolGradCPUKernel<CPU, double>);

0 commit comments

Comments
 (0)