Skip to content

Commit db6f726

Browse files
committed
【Paddle Tensor 规范化第二期】add,sub,div,mul support 0-size tensor (PaddlePaddle#71089)
* add test * fix win * fix * add test * fix test
1 parent b9b5a48 commit db6f726

9 files changed

+399
-1
lines changed

paddle/phi/kernels/cpu/elementwise_add_kernel.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ void AddKernel(const Context& dev_ctx,
5151
const DenseTensor& x,
5252
const DenseTensor& y,
5353
DenseTensor* out) {
54+
if (x.numel() == 0 || y.numel() == 0) {
55+
out->Resize(out->dims());
56+
dev_ctx.template Alloc<T>(out);
57+
return;
58+
}
5459
AddFunctor<T, Context>(dev_ctx, x, y, -1, out);
5560
}
5661

paddle/phi/kernels/cpu/elementwise_divide_kernel.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ void DivideKernel(const Context& dev_ctx,
2727
const DenseTensor& x,
2828
const DenseTensor& y,
2929
DenseTensor* out) {
30-
// allocate memory for out
30+
if (x.numel() == 0 || y.numel() == 0) {
31+
out->Resize(out->dims());
32+
dev_ctx.template Alloc<T>(out);
33+
return;
34+
}
3135
dev_ctx.template Alloc<T>(out);
3236
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
3337
SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()(

paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ void MultiplyKernel(const Context& dev_ctx,
2727
const DenseTensor& x,
2828
const DenseTensor& y,
2929
DenseTensor* out) {
30+
if (x.numel() == 0 || y.numel() == 0) {
31+
out->Resize(out->dims());
32+
dev_ctx.template Alloc<T>(out);
33+
return;
34+
}
3035
dev_ctx.template Alloc<T>(out);
3136
if (x.dims() == y.dims()) {
3237
SameDimsElementwiseCompute<SameDimsMultiplyFunctor<CPUContext, T>>()(

paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ void SubtractKernel(const Context& dev_ctx,
2727
const DenseTensor& x,
2828
const DenseTensor& y,
2929
DenseTensor* out) {
30+
if (x.numel() == 0 || y.numel() == 0) {
31+
out->Resize(out->dims());
32+
dev_ctx.template Alloc<T>(out);
33+
return;
34+
}
3035
dev_ctx.template Alloc<T>(out);
3136
if (x.dims() == y.dims()) {
3237
SameDimsElementwiseCompute<SameDimsSubtractFunctor<CPUContext, T>>()(

paddle/phi/kernels/kps/elementwise_kernel.cu

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ void SubtractKernel(const Context& dev_ctx,
3232
const DenseTensor& x,
3333
const DenseTensor& y,
3434
DenseTensor* out) {
35+
if (x.numel() == 0 || y.numel() == 0) {
36+
out->Resize(out->dims());
37+
dev_ctx.template Alloc<T>(out);
38+
return;
39+
}
3540
phi::SubtractRawKernel<T, Context>(dev_ctx, x, y, -1, out);
3641
}
3742

@@ -40,6 +45,11 @@ void MultiplyKernel(const Context& dev_ctx,
4045
const DenseTensor& x,
4146
const DenseTensor& y,
4247
DenseTensor* out) {
48+
if (x.numel() == 0 || y.numel() == 0) {
49+
out->Resize(out->dims());
50+
dev_ctx.template Alloc<T>(out);
51+
return;
52+
}
4353
phi::MultiplyRawKernel<T, Context>(dev_ctx, x, y, -1, out);
4454
}
4555

@@ -48,6 +58,11 @@ void DivideKernel(const Context& dev_ctx,
4858
const DenseTensor& x,
4959
const DenseTensor& y,
5060
DenseTensor* out) {
61+
if (x.numel() == 0 || y.numel() == 0) {
62+
out->Resize(out->dims());
63+
dev_ctx.template Alloc<T>(out);
64+
return;
65+
}
5166
phi::DivideRawKernel<T, Context>(dev_ctx, x, y, -1, out);
5267
}
5368

@@ -85,6 +100,11 @@ void AddKernel(const Context& dev_ctx,
85100
const DenseTensor& x,
86101
const DenseTensor& y,
87102
DenseTensor* out) {
103+
if (x.numel() == 0 || y.numel() == 0) {
104+
out->Resize(out->dims());
105+
dev_ctx.template Alloc<T>(out);
106+
return;
107+
}
88108
#ifdef PADDLE_WITH_CUDA
89109
if (x.dtype() == phi::DataType::FLOAT32 &&
90110
(y.dtype() == phi::DataType::BFLOAT16 ||

test/legacy_test/test_elementwise_add_op.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,36 @@ def init_input_output(self):
144144
self.out = np.add(self.x, self.y)
145145

146146

147+
class TestElementwiseAddOp_ZeroSize1(TestElementwiseAddOp):
148+
def init_input_output(self):
149+
self.x = np.random.uniform(0.1, 1, [3]).astype(self.dtype)
150+
self.y = np.random.uniform(0.1, 1, [0, 3]).astype(self.dtype)
151+
self.out = np.add(self.x, self.y)
152+
153+
def test_check_grad_normal(self):
154+
pass
155+
156+
def test_check_grad_ignore_x(self):
157+
pass
158+
159+
def test_check_grad_ignore_y(self):
160+
pass
161+
162+
163+
class TestElementwiseAddOp_ZeroSize2(TestElementwiseAddOp_ZeroSize1):
164+
def init_input_output(self):
165+
self.x = np.random.uniform(0.1, 1, [1, 3, 4]).astype(self.dtype)
166+
self.y = np.random.uniform(0.1, 1, [0, 3, 4]).astype(self.dtype)
167+
self.out = np.add(self.x, self.y)
168+
169+
170+
class TestElementwiseAddOp_ZeroSize3(TestElementwiseAddOp_ZeroSize1):
171+
def init_input_output(self):
172+
self.x = np.random.uniform(0.1, 1, [1, 0, 2]).astype(self.dtype)
173+
self.y = np.random.uniform(0.1, 1, [3, 0, 1]).astype(self.dtype)
174+
self.out = np.add(self.x, self.y)
175+
176+
147177
@unittest.skipIf(
148178
not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
149179
)
@@ -659,6 +689,67 @@ def test_dygraph(self):
659689
self.assertEqual((np_z == z_expected).all(), True)
660690

661691

692+
class TestAddApiZeroSize(unittest.TestCase):
693+
def init_data(self):
694+
self.x_numpy = np.random.rand(1, 3, 4).astype('float32')
695+
self.y_numpy = np.random.rand(0, 3, 4).astype('float32')
696+
697+
def _executed_api(self, x, y, name=None):
698+
return paddle.add(x, y, name)
699+
700+
def test_declarative(self):
701+
self.init_data()
702+
with base.program_guard(base.Program()):
703+
x = paddle.static.data(
704+
name="x", shape=self.x_numpy.shape, dtype=self.x_numpy.dtype
705+
)
706+
y = paddle.static.data(
707+
name="y", shape=self.y_numpy.shape, dtype=self.y_numpy.dtype
708+
)
709+
z = self._executed_api(x, y)
710+
711+
place = base.CPUPlace()
712+
exe = base.Executor(place)
713+
z_value = exe.run(
714+
feed={"x": self.x_numpy, "y": self.y_numpy}, fetch_list=[z]
715+
)
716+
np_z = np.add(self.x_numpy, self.y_numpy)
717+
np.testing.assert_allclose(z_value[0], np_z, rtol=1e-05, atol=1e-05)
718+
719+
def test_dygraph(self):
720+
self.init_data()
721+
places = (
722+
[paddle.CPUPlace(), paddle.CUDAPlace(0)]
723+
if core.is_compiled_with_cuda()
724+
else [paddle.CPUPlace()]
725+
)
726+
for place in places:
727+
with base.dygraph.guard(place):
728+
x = paddle.to_tensor(self.x_numpy)
729+
y = paddle.to_tensor(self.y_numpy)
730+
z = self._executed_api(x, y)
731+
np_z = np.add(self.x_numpy, self.y_numpy)
732+
np.testing.assert_allclose(z, np_z, rtol=1e-05, atol=1e-05)
733+
734+
735+
class TestAddApiZeroSize2(TestAddApiZeroSize):
736+
def init_data(self):
737+
self.x_numpy = np.random.rand(3).astype('float32')
738+
self.y_numpy = np.random.rand(0, 3).astype('float32')
739+
740+
741+
class TestAddApiZeroSize3(TestAddApiZeroSize):
742+
def init_data(self):
743+
self.x_numpy = np.random.rand(2, 0).astype('float32')
744+
self.y_numpy = np.random.rand(1, 0).astype('float32')
745+
746+
747+
class TestAddApiZeroSize4(TestAddApiZeroSize):
748+
def init_data(self):
749+
self.x_numpy = np.random.rand(1, 0, 2).astype('float32')
750+
self.y_numpy = np.random.rand(3, 0, 1).astype('float32')
751+
752+
662753
class TestAddInplaceApi(TestAddApi):
663754
def _executed_api(self, x, y, name=None):
664755
return x.add_(y, name)

test/legacy_test/test_elementwise_div_op.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,30 @@ def compute_gradient_y(self, grad_out, out, y):
188188
return -1 * grad_out * out / y
189189

190190

191+
class TestElementwiseDivOp_ZeroSize1(ElementwiseDivOp):
192+
def init_input_output(self):
193+
self.x = np.random.uniform(0.1, 1, [3]).astype(self.dtype)
194+
self.y = np.random.uniform(0.1, 1, [0, 3]).astype(self.dtype)
195+
self.out = np.divide(self.x, self.y)
196+
197+
def test_check_gradient(self):
198+
pass
199+
200+
201+
class TestElementwiseDivOp_ZeroSize2(TestElementwiseDivOp_ZeroSize1):
202+
def init_input_output(self):
203+
self.x = np.random.uniform(0.1, 1, [1, 3, 4]).astype(self.dtype)
204+
self.y = np.random.uniform(0.1, 1, [0, 3, 4]).astype(self.dtype)
205+
self.out = np.divide(self.x, self.y)
206+
207+
208+
class TestElementwiseDivOp_ZeroSize3(TestElementwiseDivOp_ZeroSize1):
209+
def init_input_output(self):
210+
self.x = np.random.uniform(0.1, 1, [1, 0, 2]).astype(self.dtype)
211+
self.y = np.random.uniform(0.1, 1, [3, 0, 1]).astype(self.dtype)
212+
self.out = np.divide(self.x, self.y)
213+
214+
191215
@unittest.skipIf(
192216
not core.is_compiled_with_cuda()
193217
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
@@ -697,6 +721,67 @@ def test_pir_div(self):
697721
np.testing.assert_allclose(actual_res[2], expect_b_grad)
698722

699723

724+
class TestDivApiZeroSize(unittest.TestCase):
725+
def init_data(self):
726+
self.x_numpy = np.random.rand(1, 3, 4).astype('float32')
727+
self.y_numpy = np.random.rand(0, 3, 4).astype('float32')
728+
729+
def _executed_api(self, x, y, name=None):
730+
return paddle.divide(x, y, name)
731+
732+
def test_declarative(self):
733+
self.init_data()
734+
with base.program_guard(base.Program()):
735+
x = paddle.static.data(
736+
name="x", shape=self.x_numpy.shape, dtype=self.x_numpy.dtype
737+
)
738+
y = paddle.static.data(
739+
name="y", shape=self.y_numpy.shape, dtype=self.y_numpy.dtype
740+
)
741+
z = self._executed_api(x, y)
742+
743+
place = base.CPUPlace()
744+
exe = base.Executor(place)
745+
z_value = exe.run(
746+
feed={"x": self.x_numpy, "y": self.y_numpy}, fetch_list=[z]
747+
)
748+
np_z = np.divide(self.x_numpy, self.y_numpy)
749+
np.testing.assert_allclose(z_value[0], np_z, rtol=1e-05, atol=1e-05)
750+
751+
def test_dygraph(self):
752+
self.init_data()
753+
places = (
754+
[paddle.CPUPlace(), paddle.CUDAPlace(0)]
755+
if core.is_compiled_with_cuda()
756+
else [paddle.CPUPlace()]
757+
)
758+
for place in places:
759+
with base.dygraph.guard(place):
760+
x = paddle.to_tensor(self.x_numpy)
761+
y = paddle.to_tensor(self.y_numpy)
762+
z = self._executed_api(x, y)
763+
np_z = np.divide(self.x_numpy, self.y_numpy)
764+
np.testing.assert_allclose(z, np_z, rtol=1e-05, atol=1e-05)
765+
766+
767+
class TestDivApiZeroSize2(TestDivApiZeroSize):
768+
def init_data(self):
769+
self.x_numpy = np.random.rand(3).astype('float32')
770+
self.y_numpy = np.random.rand(0, 3).astype('float32')
771+
772+
773+
class TestDivApiZeroSize3(TestDivApiZeroSize):
774+
def init_data(self):
775+
self.x_numpy = np.random.rand(2, 0).astype('float32')
776+
self.y_numpy = np.random.rand(1, 0).astype('float32')
777+
778+
779+
class TestDivApiZeroSize4(TestDivApiZeroSize):
780+
def init_data(self):
781+
self.x_numpy = np.random.rand(1, 0, 2).astype('float32')
782+
self.y_numpy = np.random.rand(3, 0, 1).astype('float32')
783+
784+
700785
if __name__ == '__main__':
701786
paddle.enable_static()
702787
unittest.main()

0 commit comments

Comments
 (0)