PaddlePaddle · wangchaochaohu · May 15, 2020 · May 13, 2020
diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h
@@ -20,48 +20,26 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/operators/utils.h"
 
 namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
 
-inline framework::DDim GetShape(const framework::ExecutionContext &ctx) {
+inline framework::DDim GetShape(const framework::ExecutionContext &ctx,
+                                std::string op_type) {
   // 1. shape is a Tensor
   if (ctx.HasInput("ShapeTensor")) {
     auto *shape_tensor = ctx.Input<framework::LoDTensor>("ShapeTensor");
-    auto *shape_data = shape_tensor->data<int>();
-    framework::Tensor cpu_shape_tensor;
-    if (platform::is_gpu_place(shape_tensor->place())) {
-      TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor);
-      shape_data = cpu_shape_tensor.data<int>();
-    }
-    auto vec_shape =
-        std::vector<int>(shape_data, shape_data + shape_tensor->numel());
+    auto vec_shape = GetDataFromTensor<int>(shape_tensor);
     return framework::make_ddim(vec_shape);
   }
 
   // 2. shape is a list/tuple containing Tensor
   auto shape_tensor_list = ctx.MultiInput<framework::Tensor>("ShapeTensorList");
   if (shape_tensor_list.size() > 0) {
-    std::vector<int> vec_shape;
-    for (size_t i = 0; i < shape_tensor_list.size(); ++i) {
-      auto tensor = shape_tensor_list[i];
-      PADDLE_ENFORCE_EQ(
-          tensor->dims(), framework::make_ddim({1}),
-          platform::errors::InvalidArgument(
-              "If the element type of 'shape'(tensor_list type) in "
-              "FillConstantOp is Tensor, the shape of this Tensor element must "
-              "be [1]. But received the Tensor element's shape is [%s]",
-              tensor->dims()));
-      if (platform::is_gpu_place(tensor->place())) {
-        framework::Tensor temp;
-        TensorCopySync(*tensor, platform::CPUPlace(), &temp);
-        vec_shape.push_back(*temp.data<int>());
-      } else {
-        vec_shape.push_back(*tensor->data<int>());
-      }
-    }
+    auto vec_shape = GetDataFromTensorList(shape_tensor_list);
     return framework::make_ddim(vec_shape);
   }
 
@@ -115,7 +93,8 @@ class FillConstantKernel : public framework::OpKernel<T> {
       }
       value = tensor_data[0];
     }
-    auto shape = GetShape(ctx);
+    const std::string op_type = "fill_constant";
+    auto shape = GetShape(ctx, op_type);
 
     if (out_var->IsType<framework::LoDTensor>()) {
       tensor = out_var->GetMutable<framework::LoDTensor>();

diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc
@@ -14,16 +14,45 @@ limitations under the License. */
 
 #include <random>
 #include "paddle/fluid/framework/op_registry.h"
-
+#include "paddle/fluid/operators/fill_constant_op.h"
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
 
 namespace paddle {
 namespace operators {
 
+using Tensor = framework::Tensor;
 template <typename T>
 class CPUGaussianRandomKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    float mean = context.Attr<float>("mean");
+    float std = context.Attr<float>("std");
+    auto* tensor = context.Output<framework::Tensor>("Out");
+
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
+    std::minstd_rand engine;
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    engine.seed(seed);
+    std::normal_distribution<T> dist(mean, std);
+
+    const std::string op_type = "gaussian_random";
+    auto shape = GetShape(context, op_type);
+    tensor->Resize(shape);
+    int64_t size = tensor->numel();
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
+    for (int64_t i = 0; i < size; ++i) {
+      data[i] = dist(engine);
+    }
+  }
+};
+
+template <typename T>
+class CPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     float mean = context.Attr<float>("mean");
@@ -58,12 +87,26 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
     for (auto dim : shape) {
       temp.push_back(static_cast<int64_t>(dim));
     }
-    PADDLE_ENFORCE_GT(
-        shape.size(), 0UL,
-        platform::errors::InvalidArgument(
-            "Attribute(shape) of GaussianRandomOp must be set "
-            "and shape.size() > 0, but reveived shape.size() is %d",
-            shape.size()));
+    if (shape.empty() && ctx->HasInput("ShapeTensor")) {
+      auto shape_dims = ctx->GetInputDim("ShapeTensor");
+      int num_ele = 1;
+      for (int i = 0; i < shape_dims.size(); ++i) {
+        num_ele *= shape_dims[i];
+      }
+      auto vec_dims = std::vector<int>(num_ele, -1);
+      ctx->SetOutputDim("Out", framework::make_ddim(vec_dims));
+
+      return;
+    }
+    if (!(ctx->HasInput("ShapeTensor") && !ctx->HasInputs("ShapeTensorList"))) {
+      PADDLE_ENFORCE_GT(
+          shape.size(), 0UL,
+          platform::errors::InvalidArgument(
+              "Attribute(shape) of GaussianRandomOp must be set "
+              "and shape.size() > 0, but reveived shape.size() is %d",
+              shape.size()));
+    }
+
     ctx->SetOutputDim("Out", framework::make_ddim(temp));
   }
 
@@ -85,6 +128,16 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
         static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
         ctx.device_context(), layout, library);
   }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name, const Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const override {
+    if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 
 class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -94,7 +147,18 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
 
     AddAttr<std::vector<int64_t>>("shape",
                                   "(vector<int64_t>) "
-                                  "The dimension of random tensor.");
+                                  "The dimension of random tensor.")
+        .SetDefault({});
+    AddInput("ShapeTensor",
+             "(Tensor<int>), optional). The shape of the output."
+             "It has a higher priority than Attr(shape).")
+        .AsDispensable();
+    AddInput("ShapeTensorList",
+             "(vector<Tensor<int>>, optional). The shape of the output. "
+             "It has a higher priority than Attr(shape)."
+             "The shape of the element in vector must be [1].")
+        .AsDuplicable()
+        .AsDispensable();
     AddAttr<float>("mean",
                    "(float, default 0.0) "
                    "mean of random tensor.")
@@ -135,5 +199,5 @@ REGISTER_OP_WITHOUT_GRADIENT(gaussian_random, ops::GaussianRandomOp,
 REGISTER_OP_CPU_KERNEL(gaussian_random, ops::CPUGaussianRandomKernel<float>,
                        ops::CPUGaussianRandomKernel<double>);
 REGISTER_OP_CPU_KERNEL(gaussian_random_batch_size_like,
-                       ops::CPUGaussianRandomKernel<float>,
-                       ops::CPUGaussianRandomKernel<double>);
+                       ops::CPUGaussianRandomBatchSizeLikeKernel<float>,
+                       ops::CPUGaussianRandomBatchSizeLikeKernel<double>);
diff --git a/paddle/fluid/operators/gaussian_random_op.cu b/paddle/fluid/operators/gaussian_random_op.cu
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <thrust/transform.h>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/operators/fill_constant_op.h"
 
 namespace paddle {
 namespace operators {
@@ -41,7 +42,6 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* tensor = context.Output<framework::Tensor>("Out");
-    T* data = tensor->mutable_data<T>(context.GetPlace());
     unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     if (seed == 0) {
       std::random_device rd;
@@ -50,19 +50,45 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
     T mean = static_cast<T>(context.Attr<float>("mean"));
     T std = static_cast<T>(context.Attr<float>("std"));
     thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    const std::string op_type = "gaussian_random";
+    auto shape = GetShape(context, op_type);
+    tensor->Resize(shape);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
     int64_t size = tensor->numel();
     thrust::transform(index_sequence_begin, index_sequence_begin + size,
                       thrust::device_ptr<T>(data),
                       GaussianGenerator<T>(mean, std, seed));
   }
 };
 
+template <typename T>
+class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* tensor = context.Output<framework::Tensor>("Out");
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
+    if (seed == 0) {
+      std::random_device rd;
+      seed = rd();
+    }
+    T mean = static_cast<T>(context.Attr<float>("mean"));
+    T std = static_cast<T>(context.Attr<float>("std"));
+    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    int64_t size = tensor->numel();
+    thrust::transform(index_sequence_begin, index_sequence_begin + size,
+                      thrust::device_ptr<T>(data),
+                      GaussianGenerator<T>(mean, std, seed));
+  }
+};
 }  // namespace operators
 }  // namespace paddle
 
 REGISTER_OP_CUDA_KERNEL(gaussian_random,
                         paddle::operators::GPUGaussianRandomKernel<float>,
                         paddle::operators::GPUGaussianRandomKernel<double>);
-REGISTER_OP_CUDA_KERNEL(gaussian_random_batch_size_like,
-                        paddle::operators::GPUGaussianRandomKernel<float>,
-                        paddle::operators::GPUGaussianRandomKernel<double>);
+REGISTER_OP_CUDA_KERNEL(
+    gaussian_random_batch_size_like,
+    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<float>,
+    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<double>);
diff --git a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <string>
+#include "paddle/fluid/operators/fill_constant_op.h"
 #include "paddle/fluid/operators/mean_op.h"
 
 namespace paddle {
@@ -26,7 +27,6 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
     float mean = context.Attr<float>("mean");
     float std = context.Attr<float>("std");
     auto* tensor = context.Output<framework::Tensor>("Out");
-    T* data = tensor->mutable_data<T>(context.GetPlace());
 
     unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     std::minstd_rand engine;
@@ -35,6 +35,11 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
     }
     engine.seed(seed);
     std::normal_distribution<T> dist(mean, std);
+
+    const std::string op_type = "gaussian_random";
+    auto shape = GetShape(context, op_type);
+    tensor->Resize(shape);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
     int64_t size = tensor->numel();
     for (int64_t i = 0; i < size; ++i) {
       data[i] = dist(engine);

diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py
@@ -357,8 +357,9 @@ def sample(self, shape, seed=0):
             output_shape = shape + batch_shape
             zero_tmp = tensor.fill_constant_batch_size_like(
                 self.loc + self.scale, batch_shape + shape, self.loc.dtype, 0.)
-            normal_random_tmp = nn.gaussian_random_batch_size_like(
-                zero_tmp, zero_tmp.shape, mean=0., std=1., seed=seed)
+            zero_tmp_shape = nn.shape(zero_tmp)
+            normal_random_tmp = nn.gaussian_random(
+                zero_tmp_shape, mean=0., std=1., seed=seed)
             output = normal_random_tmp * (zero_tmp + self.scale) + self.loc
             return nn.reshape(output, output_shape)
         else: