Skip to content

add data layout #6832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions paddle/framework/data_layout.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

namespace paddle {
namespace framework {

enum DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
};

inline DataLayout StringToDataLayout(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
return DataLayout::kNHWC;
} else if (str == "NCHW" || str == "nchw") {
return DataLayout::kNCHW;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
}
}

} // namespace framework
} // namespace paddle
64 changes: 33 additions & 31 deletions paddle/operators/batch_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/operators/batch_norm_op.h"
#include "paddle/framework/data_layout.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DataLayout = framework::DataLayout;

template <typename T>
using EigenArrayMap =
Expand Down Expand Up @@ -60,15 +62,15 @@ class BatchNormOp : public framework::OperatorWithKernel {
"Variance and VarianceOut should share the same memory");

const auto x_dims = ctx->GetInputDim("X");
const TensorFormat tensor_format =
StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format"));
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));

PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"Input X must have 2 to 5 dimensions.");

const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);

PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C);
Expand All @@ -90,7 +92,7 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<bool>("is_test", "").SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "").SetDefault(1e-5);
AddAttr<std::string>("tensor_format", "").SetDefault("NCHW");
AddAttr<std::string>("data_layout", "").SetDefault("NCHW");
AddInput("X", "The input tensor");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C "
Expand Down Expand Up @@ -141,18 +143,18 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
const float epsilon = ctx.Attr<float>("epsilon");
const float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);

const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 2 and 5");
const int N = x_dims[0];
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;

auto *y = ctx.Output<Tensor>("Y");
Expand All @@ -177,8 +179,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
saved_mean_e.setZero();
saved_variance_e.setZero();

switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
saved_mean_e(nc % C) += x_arr.col(nc).sum();
Expand All @@ -191,7 +193,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
saved_variance_e /= N * sample_size;
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
for (int i = 0; i < N * sample_size; ++i) {
saved_mean_e += x_arr.col(i);
Expand All @@ -205,7 +207,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", tensor_format_str);
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
}

EigenVectorArrayMap<T> running_mean_arr(
Expand Down Expand Up @@ -247,8 +249,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
Eigen::Array<T, Eigen::Dynamic, 1> new_bias =
bias_arr - mean_arr * inv_std * scale_arr;

switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
EigenArrayMap<T> y_arr(y->mutable_data<T>(ctx.GetPlace()), sample_size,
N * C);
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
Expand All @@ -257,7 +259,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
}
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
EigenArrayMap<T>(y->mutable_data<T>(ctx.GetPlace()), C,
N * sample_size) =
(ConstEigenArrayMap<T>(x->data<T>(), C, N * sample_size).colwise() *
Expand All @@ -267,7 +269,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %d", tensor_format);
PADDLE_THROW("Unknown storage order: %d", data_layout);
}
}
};
Expand All @@ -290,11 +292,11 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")), "");

const auto x_dims = ctx->GetInputDim("X");
const TensorFormat tensor_format =
StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format"));
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);

ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->SetOutputDim(framework::GradVarName("Scale"), {C});
Expand Down Expand Up @@ -333,9 +335,9 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
const auto *saved_mean = ctx.Input<Tensor>("SavedMean");
// SavedVariance have been reverted in forward operator
const auto *saved_inv_variance = ctx.Input<Tensor>("SavedVariance");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);

// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
Expand All @@ -344,8 +346,8 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
"The Input dim size should be between 2 and 5");
const int N = x_dims[0];
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;

ConstEigenVectorArrayMap<T> scale_arr(scale->data<T>(), C);
Expand Down Expand Up @@ -376,8 +378,8 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>

const auto scale_inv_var_nhw = scale_arr * inv_var_arr / (N * sample_size);

switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), sample_size, N * C);
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()),
Expand All @@ -400,7 +402,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
}
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), C, N * sample_size);
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()), C,
Expand All @@ -425,7 +427,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", tensor_format_str);
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
}
}
};
Expand Down
35 changes: 18 additions & 17 deletions paddle/operators/batch_norm_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/operators/batch_norm_op.h"
#include "paddle/framework/data_layout.h"

#include <cfloat>
#include "paddle/operators/math/math_function.h"
Expand All @@ -22,26 +23,26 @@ namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using DataLayout = framework::DataLayout;
template <typename T>
using CudnnDataType = platform::CudnnDataType<T>;

void ExtractNCWHD(const framework::DDim &dims,
const TensorFormat &tensor_format, int *N, int *C, int *H,
int *W, int *D) {
void ExtractNCWHD(const framework::DDim &dims, const DataLayout &data_layout,
int *N, int *C, int *H, int *W, int *D) {
*N = dims[0];
if (dims.size() == 2) {
*C = dims[1];
*H = 1;
*W = 1;
*D = 1;
} else {
*C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1];
*H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1];
*C = data_layout == DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
*H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1];
*W = dims.size() > 3
? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2])
? (data_layout == DataLayout::kNCHW ? dims[3] : dims[2])
: 1;
*D = dims.size() > 4
? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3])
? (data_layout == DataLayout::kNCHW ? dims[4] : dims[3])
: 1;
}
}
Expand All @@ -56,9 +57,9 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
double epsilon = static_cast<double>(ctx.Attr<float>("epsilon"));
const float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);

// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
Expand All @@ -67,7 +68,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 2 and 5");
int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);

// ------------------- cudnn descriptors ---------------------
cudnnTensorDescriptor_t data_desc_;
Expand All @@ -93,7 +94,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
VLOG(1) << "Setting descriptors.";
std::vector<int> dims;
std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
if (data_layout == DataLayout::kNCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
Expand Down Expand Up @@ -180,9 +181,9 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
double epsilon = static_cast<double>(ctx.Attr<float>("epsilon"));
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const auto *x = ctx.Input<Tensor>("X");
const auto *d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto *scale = ctx.Input<Tensor>("Scale");
Expand All @@ -192,7 +193,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 2 and 5");
int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);

PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL);
PADDLE_ENFORCE_EQ(scale->dims()[0], C);
Expand All @@ -219,7 +220,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>

std::vector<int> dims;
std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
if (data_layout == DataLayout::kNCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
Expand Down
15 changes: 0 additions & 15 deletions paddle/operators/batch_norm_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {

enum TensorFormat {
NHWC = 0,
NCHW = 1,
};

inline TensorFormat StringToTensorFormat(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
return TensorFormat::NHWC;
} else if (str == "NCHW" || str == "nchw") {
return TensorFormat::NCHW;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
}
}

template <typename DeviceContext, typename T>
class BatchNormKernel : public framework::OpKernel<T> {
public:
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/v2/fluid/tests/test_batch_norm_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def test_python(self):
print 'python: NHWC, NCHW, backward checking passed'

def test_forward_backward(self):
def test_with_place(place, tensor_format, shape):
def test_with_place(place, data_layout, shape):
# attr
epsilon = 0.00001
momentum = 0.9
Expand Down Expand Up @@ -292,7 +292,7 @@ def test_with_place(place, tensor_format, shape):
SavedVariance="saved_variance",
# attrs
is_test=False,
tensor_format=tensor_format,
data_layout=data_layout,
momentum=momentum,
epsilon=epsilon)

Expand All @@ -311,7 +311,7 @@ def test_with_place(place, tensor_format, shape):
atol = 1e-4
self.__assert_close(variance_out_tensor, variance_out,
"variance_out", atol)
print "op test forward passed: ", str(place), tensor_format
print "op test forward passed: ", str(place), data_layout

# run backward
batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set())
Expand All @@ -336,7 +336,7 @@ def test_with_place(place, tensor_format, shape):
self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
print "op test backward passed: ", str(place), tensor_format
print "op test backward passed: ", str(place), data_layout

places = [core.CPUPlace()]
if core.is_compile_gpu() and core.op_support_gpu("batch_norm"):
Expand Down