diff --git a/paddle/fluid/prim/api/api.yaml b/paddle/fluid/prim/api/api.yaml index e47c7a45713dcb..7c5a827adeb4df 100644 --- a/paddle/fluid/prim/api/api.yaml +++ b/paddle/fluid/prim/api/api.yaml @@ -2,6 +2,10 @@ - subtract - multiply - divide +- bitwise_and +- bitwise_not +- bitwise_or +- bitwise_xor - unsqueeze - exp - scale diff --git a/paddle/fluid/prim/tests/test_eager_prim.cc b/paddle/fluid/prim/tests/test_eager_prim.cc index a4ab38a6ad137b..63a524a6638de0 100644 --- a/paddle/fluid/prim/tests/test_eager_prim.cc +++ b/paddle/fluid/prim/tests/test_eager_prim.cc @@ -35,6 +35,10 @@ PD_DECLARE_KERNEL(tanh_grad, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(pow, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_and, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_or, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_xor, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_not, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(tanh, GPU, ALL_LAYOUT); @@ -42,6 +46,11 @@ PD_DECLARE_KERNEL(tanh_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(pow, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(multiply, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_and, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_or, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_xor, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_not, KPS, ALL_LAYOUT); + #endif namespace paddle { @@ -81,7 +90,7 @@ TEST(EagerPrim, TanhBackwardTest) { paddle::experimental::Tensor out1 = tanh_ad_func(tensor1); std::vector outs1 = {out1}; - // Disable prim + // Enable prim PrimCommonUtils::SetBwdPrimEnabled(true); ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled()); // 4. Run Backward @@ -104,6 +113,44 @@ TEST(EagerPrim, TanhBackwardTest) { ->data()[0]); } +TEST(EagerPrim, LogicalOperantsTest) { + // 1. Initialized + eager_test::InitEnv(paddle::platform::CPUPlace()); + FLAGS_tensor_operants_mode = "eager"; + paddle::prim::InitTensorOperants(); + // 2. pre + paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::experimental::Tensor tensor0 = + ::egr::egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::INT32, + phi::DataLayout::NCHW, + 1 /*value*/, + true /*is_leaf*/); + ::egr::egr_utils_api::RetainGradForTensor(tensor0); + paddle::experimental::Tensor tensor1 = + ::egr::egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::INT32, + phi::DataLayout::NCHW, + 0 /*value*/, + true /*is_leaf*/); + ::egr::egr_utils_api::RetainGradForTensor(tensor1); + // 3. Run Forward once + paddle::experimental::Tensor out0 = tensor0 & tensor1; + paddle::experimental::Tensor out1 = bitwise_and_ad_func(tensor0, tensor1); + EXPECT_EQ(out0.data()[0], out1.data()[0]); + out0 = tensor0 | tensor1; + out1 = bitwise_or_ad_func(tensor0, tensor1); + EXPECT_EQ(out0.data()[0], out1.data()[0]); + out0 = tensor0 ^ tensor1; + out1 = bitwise_xor_ad_func(tensor0, tensor1); + EXPECT_EQ(out0.data()[0], out1.data()[0]); + out0 = ~tensor0; + out1 = bitwise_not_ad_func(tensor0); + EXPECT_EQ(out0.data()[0], out1.data()[0]); +} + TEST(EagerPrim, TestFlags) { PrimCommonUtils::SetBwdPrimEnabled(true); ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled()); diff --git a/paddle/fluid/prim/tests/test_static_prim.cc b/paddle/fluid/prim/tests/test_static_prim.cc index 0125829ef59016..5433378f70e1ac 100644 --- a/paddle/fluid/prim/tests/test_static_prim.cc +++ b/paddle/fluid/prim/tests/test_static_prim.cc @@ -38,6 +38,10 @@ PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(subtract, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_and, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_or, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_xor, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_not, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(tanh, GPU, ALL_LAYOUT); @@ -47,6 +51,10 @@ PD_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(subtract, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(multiply, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_and, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_or, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_xor, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(bitwise_not, KPS, ALL_LAYOUT); #endif namespace paddle { namespace prim { @@ -362,6 +370,68 @@ TEST(StaticCompositeGradMaker, TestMutiOutputMethod) { ASSERT_EQ(fw_out_name[1], "out2"); } +TEST(StaticCompositeGradMaker, LogicalOperantsTest) { + // Initialized environment + FLAGS_tensor_operants_mode = "static"; + paddle::OperantsManager::Instance().static_operants.reset( + new paddle::prim::StaticTensorOperants()); + + TestBaseProgram base_program = TestBaseProgram(); + auto* target_block = base_program.GetBlock(0); + std::vector shape = {2, 2}; + StaticCompositeContext::Instance().SetBlock(target_block); + Tensor x0 = prim::empty( + shape, phi::DataType::INT32, phi::CPUPlace()); + std::string x0_name = + std::static_pointer_cast(x0.impl())->Name(); + Tensor x1 = prim::empty( + shape, phi::DataType::INT32, phi::CPUPlace()); + std::string x1_name = + std::static_pointer_cast(x1.impl())->Name(); + Tensor x2 = prim::empty( + shape, phi::DataType::INT32, phi::CPUPlace()); + std::string x2_name = + std::static_pointer_cast(x2.impl())->Name(); + Tensor x3 = prim::empty( + shape, phi::DataType::INT32, phi::CPUPlace()); + std::string x3_name = + std::static_pointer_cast(x3.impl())->Name(); + + Tensor out_not = ~x0; + Tensor out_and = out_not & x1; + Tensor out_or = out_and | x2; + Tensor out_xor = out_or ^ x3; + + ASSERT_EQ(target_block->AllOps().size(), static_cast(4)); + ASSERT_EQ(target_block->AllOps()[0]->Type(), "bitwise_not"); + ASSERT_EQ(target_block->AllOps()[0]->Inputs().at("X").size(), + static_cast(1)); + ASSERT_EQ(target_block->AllOps()[0]->Inputs().at("X")[0], x0_name); + ASSERT_EQ(target_block->AllOps()[0]->Outputs().at("Out").size(), + std::size_t(1)); + + ASSERT_EQ(target_block->AllOps()[1]->Type(), "bitwise_and"); + ASSERT_EQ(target_block->AllOps()[1]->Inputs().at("Y").size(), + static_cast(1)); + ASSERT_EQ(target_block->AllOps()[1]->Inputs().at("Y")[0], x1_name); + ASSERT_EQ(target_block->AllOps()[1]->Outputs().at("Out").size(), + std::size_t(1)); + + ASSERT_EQ(target_block->AllOps()[2]->Type(), "bitwise_or"); + ASSERT_EQ(target_block->AllOps()[2]->Inputs().at("Y").size(), + static_cast(1)); + ASSERT_EQ(target_block->AllOps()[2]->Inputs().at("Y")[0], x2_name); + ASSERT_EQ(target_block->AllOps()[2]->Outputs().at("Out").size(), + std::size_t(1)); + + ASSERT_EQ(target_block->AllOps()[3]->Type(), "bitwise_xor"); + ASSERT_EQ(target_block->AllOps()[3]->Inputs().at("Y").size(), + static_cast(1)); + ASSERT_EQ(target_block->AllOps()[3]->Inputs().at("Y")[0], x3_name); + ASSERT_EQ(target_block->AllOps()[3]->Outputs().at("Out").size(), + std::size_t(1)); +} + TEST(StaticPrim, TestFlags) { PrimCommonUtils::SetBwdPrimEnabled(true); ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled()); @@ -378,3 +448,7 @@ USE_OP_ITSELF(elementwise_mul); USE_OP_ITSELF(elementwise_sub); USE_OP_ITSELF(elementwise_pow); USE_OP_ITSELF(scale); +USE_OP_ITSELF(bitwise_xor); +USE_OP_ITSELF(bitwise_and); +USE_OP_ITSELF(bitwise_not); +USE_OP_ITSELF(bitwise_or); diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index 4bae72e029ca31..b3ddc03785ae8a 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -550,6 +550,14 @@ class PADDLE_API Tensor final { Tensor operator-() const; + Tensor operator~() const; + + Tensor operator&(const Tensor& other) const; + + Tensor operator|(const Tensor& other) const; + + Tensor operator^(const Tensor& other) const; + /* Part 8: Autograd methods */ /** @@ -669,6 +677,10 @@ class PADDLE_API Tensor final { Tensor divide(const Scalar& y) const; Tensor multiply(const Scalar& y) const; Tensor subtract(const Scalar& y) const; + Tensor bitwise_and(const Tensor& y) const; + Tensor bitwise_or(const Tensor& y) const; + Tensor bitwise_xor(const Tensor& y) const; + Tensor bitwise_not() const; Tensor pow(const Tensor& y) const; Tensor pow(const Scalar& y) const; diff --git a/paddle/phi/api/yaml/generator/tensor_operants_gen.py b/paddle/phi/api/yaml/generator/tensor_operants_gen.py index c2fd879d31042d..a30bde15986ee5 100644 --- a/paddle/phi/api/yaml/generator/tensor_operants_gen.py +++ b/paddle/phi/api/yaml/generator/tensor_operants_gen.py @@ -29,6 +29,7 @@ indent = " " +# E.g.: Prim uses `elementwise_pow + fill_constant` to replace `pow`, so that we use this map to generate the `pow` signature when iterating over `elementwise_pow` API. specific_ops_map = {"elementwise_pow": "pow"} @@ -149,6 +150,22 @@ class TensorOperantsBase { return scale(-1.0, 0.0, true); } +Tensor Tensor::operator~() const { + return bitwise_not(); +} + +Tensor Tensor::operator&(const Tensor &other) const { + return bitwise_and(other); +} + +Tensor Tensor::operator|(const Tensor &other) const { + return bitwise_or(other); +} + +Tensor Tensor::operator^(const Tensor &other) const { + return bitwise_xor(other); +} + Tensor Tensor::pow(const Tensor& y) const { return paddle::OperantsManager::Instance().pow(static_cast(*this), y); } diff --git a/paddle/phi/api/yaml/tensor_operants.yaml b/paddle/phi/api/yaml/tensor_operants.yaml index 80eb4d12ffc576..629408c4f40173 100644 --- a/paddle/phi/api/yaml/tensor_operants.yaml +++ b/paddle/phi/api/yaml/tensor_operants.yaml @@ -1,8 +1,13 @@ # Attach operants to Tensor, this file should be consistent with the declaration in `tensor.h` +# Assure this file is the subset of `paddle/fluid/prim/api/api.yaml` - add - subtract - multiply - divide +- bitwise_and +- bitwise_not +- bitwise_or +- bitwise_xor - unsqueeze - exp - scale diff --git a/python/paddle/fluid/tests/custom_op/custom_tensor_operator.cc b/python/paddle/fluid/tests/custom_op/custom_tensor_operator.cc index f46d0ec8954b28..3036fc89dea5a3 100644 --- a/python/paddle/fluid/tests/custom_op/custom_tensor_operator.cc +++ b/python/paddle/fluid/tests/custom_op/custom_tensor_operator.cc @@ -79,6 +79,37 @@ PD_BUILD_GRAD_OP(custom_scalar_add) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ScalarAddBackward)); +// y = 1 + x +std::vector LeftScalarAddForward(const paddle::Tensor& x) { + if (x.is_cpu() || x.is_gpu()) { + return {1 + x}; + } else { + PD_THROW("Not implemented."); + } +} + +// dy / dx = 1 * grad_out +std::vector LeftScalarAddBackward( + const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + if (x.is_cpu() || x.is_gpu()) { + return {1 * grad_out}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_left_scalar_add) + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(LeftScalarAddForward)); + +PD_BUILD_GRAD_OP(custom_left_scalar_add) + .Inputs({"X", "Out", paddle::Grad("Out")}) + .Outputs({paddle::Grad("X")}) + .SetKernelFn(PD_KERNEL(LeftScalarAddBackward)); + // y = x - 1 std::vector SubtractForward(const paddle::Tensor& x) { if (x.is_cpu() || x.is_gpu()) { @@ -141,6 +172,37 @@ PD_BUILD_GRAD_OP(custom_scalar_subtract) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ScalarSubtractBackward)); +// y = - 1 + x +std::vector LeftScalarSubtractForward(const paddle::Tensor& x) { + if (x.is_cpu() || x.is_gpu()) { + return {-1 + x}; + } else { + PD_THROW("Not implemented."); + } +} + +// dy / dx = 1 * grad_out +std::vector LeftScalarSubtractBackward( + const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + if (x.is_cpu() || x.is_gpu()) { + return {1 * grad_out}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_left_scalar_subtract) + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(LeftScalarSubtractForward)); + +PD_BUILD_GRAD_OP(custom_left_scalar_subtract) + .Inputs({"X", "Out", paddle::Grad("Out")}) + .Outputs({paddle::Grad("X")}) + .SetKernelFn(PD_KERNEL(LeftScalarSubtractBackward)); + // y = x * 5 std::vector MultiplyForward(const paddle::Tensor& x) { if (x.is_cpu() || x.is_gpu()) { @@ -206,6 +268,37 @@ PD_BUILD_GRAD_OP(custom_scalar_multiply) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ScalarMultiplyBackward)); +// y = 5 * x +std::vector LeftScalarMultiplyForward(const paddle::Tensor& x) { + if (x.is_cpu() || x.is_gpu()) { + return {5 * x}; + } else { + PD_THROW("Not implemented."); + } +} + +// dy / dx = 5 * grad_out +std::vector LeftScalarMultiplyBackward( + const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + if (x.is_cpu() || x.is_gpu()) { + return {5 * grad_out}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_left_scalar_multiply) + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(LeftScalarMultiplyForward)); + +PD_BUILD_GRAD_OP(custom_left_scalar_multiply) + .Inputs({"X", "Out", paddle::Grad("Out")}) + .Outputs({paddle::Grad("X")}) + .SetKernelFn(PD_KERNEL(LeftScalarMultiplyBackward)); + // y = 1 / x std::vector DivideForward(const paddle::Tensor& x) { if (x.is_cpu() || x.is_gpu()) { @@ -270,3 +363,93 @@ PD_BUILD_GRAD_OP(custom_scalar_divide) .Inputs({"X", "Out", paddle::Grad("Out")}) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ScalarDivideBackward)); + +// y = 1 / x +std::vector LeftScalarDivideForward(const paddle::Tensor& x) { + if (x.is_cpu() || x.is_gpu()) { + return {1 / x}; + } else { + PD_THROW("Not implemented."); + } +} + +// dy / dx = -grad_out / (x * x) +std::vector LeftScalarDivideBackward( + const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + if (x.is_cpu() || x.is_gpu()) { + return {-grad_out / (x * x)}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_left_scalar_divide) + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(LeftScalarDivideForward)); + +PD_BUILD_GRAD_OP(custom_left_scalar_divide) + .Inputs({"X", "Out", paddle::Grad("Out")}) + .Outputs({paddle::Grad("X")}) + .SetKernelFn(PD_KERNEL(LeftScalarDivideBackward)); + +// out = x & y +std::vector AndForward(const paddle::Tensor& x, + const paddle::Tensor& y) { + if (x.is_cpu() || x.is_gpu()) { + return {x & y}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_logical_and) + .Inputs({"X", "Y"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(AndForward)); + +// out = x | y +std::vector OrForward(const paddle::Tensor& x, + const paddle::Tensor& y) { + if (x.is_cpu() || x.is_gpu()) { + return {x | y}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_logical_or) + .Inputs({"X", "Y"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(OrForward)); + +// out = x ^ y +std::vector XorForward(const paddle::Tensor& x, + const paddle::Tensor& y) { + if (x.is_cpu() || x.is_gpu()) { + return {x ^ y}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_logical_xor) + .Inputs({"X", "Y"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(XorForward)); + +// out = ~x +std::vector NotForward(const paddle::Tensor& x) { + if (x.is_cpu() || x.is_gpu()) { + return {~x}; + } else { + PD_THROW("Not implemented."); + } +} + +PD_BUILD_OP(custom_logical_not) + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(NotForward)); diff --git a/python/paddle/fluid/tests/custom_op/test_custom_tensor_operator.py b/python/paddle/fluid/tests/custom_op/test_custom_tensor_operator.py index 59f047e0e1a184..bad85eb5986321 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_tensor_operator.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_tensor_operator.py @@ -240,6 +240,13 @@ def test_all(self): self.divide = self.custom_module.custom_scalar_divide self._test_static() self._test_dynamic() + self.add = self.custom_module.custom_left_scalar_add + self.subtract = self.custom_module.custom_left_scalar_subtract + self.multiply = self.custom_module.custom_left_scalar_multiply + self.divide = self.custom_module.custom_left_scalar_divide + self._test_static() + self._test_dynamic() + self._test_logical_operants() def _test_static(self): for device in self.devices: @@ -324,6 +331,30 @@ def _test_dynamic(self): ) np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + def _test_logical_operants(self): + for device in self.devices: + paddle.set_device(device) + np_x = paddle.randint(0, 2, [4, 8]) + x = paddle.to_tensor(np_x, dtype="int32") + np_y = paddle.randint(0, 2, [4, 8]) + y = paddle.to_tensor(np_y, dtype="int32") + + out = self.custom_module.custom_logical_and(x, y) + pd_out = paddle.bitwise_and(x, y) + np.testing.assert_equal(out.numpy(), pd_out.numpy()) + + out = self.custom_module.custom_logical_or(x, y) + pd_out = paddle.bitwise_or(x, y) + np.testing.assert_equal(out.numpy(), pd_out.numpy()) + + out = self.custom_module.custom_logical_xor(x, y) + pd_out = paddle.bitwise_xor(x, y) + np.testing.assert_equal(out.numpy(), pd_out.numpy()) + + out = self.custom_module.custom_logical_not(x) + pd_out = paddle.bitwise_not(x) + np.testing.assert_equal(out.numpy(), pd_out.numpy()) + if __name__ == '__main__': unittest.main()