19
19
#include " paddle/extension.h"
20
20
21
21
template <typename data_t >
22
- void add_forward_kernel (const data_t * x_data,
23
- const data_t * y_data,
24
- data_t * out_data,
25
- int64_t numel) {
22
+ void add_one_pointer (const data_t * x_data, data_t * out_data, int64_t numel) {
26
23
for (size_t i = 0 ; i < numel; ++i) {
27
- out_data[i] = x_data[i] + y_data [i];
24
+ out_data[i] + = x_data[i];
28
25
}
29
26
}
30
27
31
28
template <typename data_t >
32
- void add_backward_kernel (data_t * x_grad_data,
33
- const data_t * out_grad_data,
34
- int64_t numel) {
29
+ void add_two_pointers (const data_t * x_data,
30
+ const data_t * y_data,
31
+ data_t * out_data,
32
+ int64_t numel) {
35
33
for (size_t i = 0 ; i < numel; ++i) {
36
- x_grad_data [i] += out_grad_data [i];
34
+ out_data [i] = x_data[i] + y_data [i];
37
35
}
38
36
}
39
37
@@ -53,12 +51,12 @@ std::vector<paddle::Tensor> AddForward(
53
51
PD_DISPATCH_FLOATING_TYPES (
54
52
x.type (), " AddForward" , ([&] {
55
53
if (y) {
56
- add_forward_kernel <data_t >(x.data <data_t >(),
57
- y->data <data_t >(),
58
- out.data <data_t >(),
59
- x.size ());
54
+ add_two_pointers <data_t >(x.data <data_t >(),
55
+ y->data <data_t >(),
56
+ out.data <data_t >(),
57
+ x.size ());
60
58
} else {
61
- add_forward_kernel <data_t >(
59
+ add_two_pointers <data_t >(
62
60
x.data <data_t >(), x.data <data_t >(), out.data <data_t >(), x.size ());
63
61
}
64
62
}));
@@ -69,7 +67,6 @@ std::vector<paddle::DataType> AddInferDtype(
69
67
const paddle::DataType& x_dtype,
70
68
const paddle::optional<paddle::DataType>& y_dtype) {
71
69
if (y_dtype) {
72
- std::cout << " DEBUG AddInferDtype" << *y_dtype << std::endl;
73
70
return {*y_dtype};
74
71
}
75
72
return {x_dtype};
@@ -98,18 +95,14 @@ std::vector<paddle::Tensor> AddBackward(
98
95
PD_CHECK (x.place () == paddle::PlaceType::kCPU , " x must be a CPU Tensor." );
99
96
100
97
paddle::Tensor x_grad = paddle::zeros (x.shape (), x.dtype (), x.place ());
101
- paddle::Tensor y_grad = paddle::zeros (x.shape (), x.dtype (), x.place ());
102
98
103
99
PD_DISPATCH_FLOATING_TYPES (
104
100
out_grad.type (), " AddBackward" , ([&] {
105
- add_backward_kernel<data_t >(
106
- x_grad.data <data_t >(), out_grad.data <data_t >(), out_grad.size ());
107
- if (y) {
108
- add_backward_kernel<data_t >(
109
- y_grad.data <data_t >(), out_grad.data <data_t >(), out_grad.size ());
110
- } else {
111
- add_backward_kernel<data_t >(
112
- x_grad.data <data_t >(), out_grad.data <data_t >(), out_grad.size ());
101
+ add_one_pointer<data_t >(
102
+ out_grad.data <data_t >(), x_grad.data <data_t >(), out_grad.size ());
103
+ if (!y) {
104
+ add_one_pointer<data_t >(
105
+ out_grad.data <data_t >(), x_grad.data <data_t >(), out_grad.size ());
113
106
}
114
107
}));
115
108
@@ -127,3 +120,91 @@ PD_BUILD_GRAD_OP(custom_add)
127
120
.Inputs({" X" , paddle::Optional (" Y" ), paddle::Grad (" Out" )})
128
121
.Outputs({paddle::Grad (" X" )})
129
122
.SetKernelFn(PD_KERNEL(AddBackward));
123
+
124
+ /*
125
+ if (y) {
126
+ out = x + y[0] + y[1] + ...;
127
+ } else {
128
+ out = x + x;
129
+ }
130
+ */
131
+ std::vector<paddle::Tensor> AddVectorForward (
132
+ const paddle::Tensor& x,
133
+ const paddle::optional<std::vector<paddle::Tensor>>& y) { // NOLINT
134
+ PD_CHECK (x.place () == paddle::PlaceType::kCPU , " x must be a CPU Tensor." );
135
+ paddle::Tensor out = paddle::zeros (x.shape (), x.dtype (), x.place ());
136
+
137
+ PD_DISPATCH_FLOATING_TYPES (
138
+ x.type (), " AddVectorForward" , ([&] {
139
+ if (y) {
140
+ add_one_pointer<data_t >(
141
+ x.data <data_t >(), out.data <data_t >(), out.size ());
142
+ for (size_t i = 0 ; i < y->size (); ++i) {
143
+ add_one_pointer<data_t >(
144
+ y->at (i).data <data_t >(), out.data <data_t >(), out.size ());
145
+ }
146
+ } else {
147
+ add_two_pointers<data_t >(
148
+ x.data <data_t >(), x.data <data_t >(), out.data <data_t >(), x.size ());
149
+ }
150
+ }));
151
+ return {out};
152
+ }
153
+
154
+ std::vector<paddle::DataType> AddVectorInferDtype (
155
+ const paddle::DataType& x_dtype,
156
+ const paddle::optional<std::vector<paddle::DataType>>& y_dtype) {
157
+ if (y_dtype) {
158
+ return {y_dtype->at (0 )};
159
+ }
160
+ return {x_dtype};
161
+ }
162
+
163
+ std::vector<std::vector<int64_t >> AddVectorInferShape (
164
+ const std::vector<int64_t >& x_shape,
165
+ const paddle::optional<std::vector<std::vector<int64_t >>>& y_shape) {
166
+ if (y_shape) {
167
+ return {y_shape->at (0 )};
168
+ }
169
+ return {x_shape};
170
+ }
171
+
172
+ /*
173
+ if (y) {
174
+ x_grad = out_grad;
175
+ } else {
176
+ x_grad = out_grad + out_grad;
177
+ }
178
+ */
179
+ std::vector<paddle::Tensor> AddVectorBackward (
180
+ const paddle::Tensor& x,
181
+ const paddle::optional<std::vector<paddle::Tensor>>& y,
182
+ const paddle::Tensor& out_grad) { // NOLINT
183
+ PD_CHECK (x.place () == paddle::PlaceType::kCPU , " x must be a CPU Tensor." );
184
+
185
+ paddle::Tensor x_grad = paddle::zeros (x.shape (), x.dtype (), x.place ());
186
+
187
+ PD_DISPATCH_FLOATING_TYPES (
188
+ out_grad.type (), " AddVectorBackward" , ([&] {
189
+ add_one_pointer<data_t >(
190
+ out_grad.data <data_t >(), x_grad.data <data_t >(), out_grad.size ());
191
+ if (!y) {
192
+ add_one_pointer<data_t >(
193
+ out_grad.data <data_t >(), x_grad.data <data_t >(), out_grad.size ());
194
+ }
195
+ }));
196
+
197
+ return {x_grad};
198
+ }
199
+
200
+ PD_BUILD_OP (custom_add_vec)
201
+ .Inputs({" X" , paddle::Optional (paddle::Vec (" Y" ))})
202
+ .Outputs({" Out" })
203
+ .SetKernelFn(PD_KERNEL(AddVectorForward))
204
+ .SetInferShapeFn(PD_INFER_SHAPE(AddVectorInferShape))
205
+ .SetInferDtypeFn(PD_INFER_DTYPE(AddVectorInferDtype));
206
+
207
+ PD_BUILD_GRAD_OP (custom_add_vec)
208
+ .Inputs({" X" , paddle::Optional (paddle::Vec (" Y" )), paddle::Grad (" Out" )})
209
+ .Outputs({paddle::Grad (" X" )})
210
+ .SetKernelFn(PD_KERNEL(AddVectorBackward));
0 commit comments