Skip to content

Commit 8556834

Browse files
author
Abhinav Arora
committed
Improve the initializer Interface for fc, sequence_conv and conv2d layers
1 parent bce1c03 commit 8556834

File tree

3 files changed

+121
-36
lines changed

3 files changed

+121
-36
lines changed

python/paddle/v2/fluid/framework.py

+32-21
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,37 @@ def unique_name(prefix):
1515
return "_".join([prefix, str(uid)])
1616

1717

18+
def convert_np_dtype_to_dtype_(np_dtype):
19+
dtype = np.dtype(np_dtype)
20+
if dtype == np.float32:
21+
return core.DataType.FP32
22+
elif dtype == np.float64:
23+
return core.DataType.FP64
24+
elif dtype == np.float16:
25+
return core.DataType.FP16
26+
elif dtype == np.int32:
27+
return core.DataType.INT32
28+
elif dtype == np.int16:
29+
return core.DataType.INT16
30+
elif dtype == np.int64:
31+
return core.DataType.INT64
32+
elif dtype == np.bool:
33+
return core.DataType.BOOL
34+
else:
35+
raise ValueError("Not supported numpy dtype " + str(dtype))
36+
37+
38+
def dtype_is_floating(dtype):
39+
if not isinstance(dtype, core.DataType):
40+
dtype = convert_np_dtype_to_dtype_(dtype)
41+
42+
if (dtype == core.DataType.FP16 or dtype == core.DataType.FP16 or
43+
dtype == core.DataType.FP64):
44+
return True
45+
else:
46+
return False
47+
48+
1849
def _debug_string_(proto, throw_on_error=True):
1950
error_fields = list()
2051
if not proto.IsInitialized(error_fields) and throw_on_error:
@@ -66,7 +97,7 @@ def __init__(self,
6697
"matched.".format(self.name, old_shape, shape))
6798
if dtype is not None:
6899
if not isinstance(dtype, core.DataType):
69-
dtype = Variable._convert_np_dtype_to_dtype_(dtype)
100+
dtype = convert_np_dtype_to_dtype_(dtype)
70101
if is_new_var:
71102
self.desc.set_data_type(dtype)
72103
else:
@@ -148,26 +179,6 @@ def _unique_var_name_():
148179
uid = core.unique_integer(prefix) # unique during whole process.
149180
return "_".join([prefix, str(uid)])
150181

151-
@staticmethod
152-
def _convert_np_dtype_to_dtype_(np_dtype):
153-
dtype = np.dtype(np_dtype)
154-
if dtype == np.float32:
155-
return core.DataType.FP32
156-
elif dtype == np.float64:
157-
return core.DataType.FP64
158-
elif dtype == np.float16:
159-
return core.DataType.FP16
160-
elif dtype == np.int32:
161-
return core.DataType.INT32
162-
elif dtype == np.int16:
163-
return core.DataType.INT16
164-
elif dtype == np.int64:
165-
return core.DataType.INT64
166-
elif dtype == np.bool:
167-
return core.DataType.BOOL
168-
else:
169-
raise ValueError("Not supported numpy dtype " + str(dtype))
170-
171182

172183
def get_all_op_protos():
173184
"""

python/paddle/v2/fluid/layer_helper.py

+26-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import itertools
33

44
from paddle.v2.fluid.framework import Variable, g_main_program, \
5-
g_startup_program, unique_name, Program
5+
g_startup_program, unique_name, Program, dtype_is_floating
66
from paddle.v2.fluid.initializer import ConstantInitializer, \
77
UniformInitializer, XavierInitializer
88

@@ -61,7 +61,7 @@ def input(self, input_param_name='input'):
6161

6262
@property
6363
def param_attr(self):
64-
default = {'name': None, 'initializer': XavierInitializer()}
64+
default = {'name': None}
6565
actual = self.kwargs.get('param_attr', None)
6666
if actual is None:
6767
actual = default
@@ -72,7 +72,7 @@ def param_attr(self):
7272

7373
@property
7474
def bias_attr(self):
75-
default = {'name': None, 'initializer': ConstantInitializer()}
75+
default = {'name': None}
7676
bias_attr = self.kwargs.get('bias_attr', None)
7777
if bias_attr is None:
7878
bias_attr = default
@@ -119,6 +119,8 @@ def create_parameter(self, attr, shape, dtype, suffix='w',
119119
attr_copy = copy.deepcopy(attr)
120120
if initializer is not None:
121121
attr_copy['initializer'] = initializer
122+
else:
123+
attr_copy['initializer'] = _get_default_initializer(dtype)
122124
if attr_copy['name'] is None:
123125
attr_copy['name'] = unique_name(".".join([self.name, suffix]))
124126
self.startup_program.global_block().create_parameter(
@@ -149,13 +151,19 @@ def set_variable_initializer(self, var, initializer):
149151
persistable=True,
150152
initializer=initializer)
151153

152-
def append_bias_op(self, input_var, dim_start=1, dim_end=None):
154+
def append_bias_op(self,
155+
input_var,
156+
bias_initializer,
157+
dim_start=1,
158+
dim_end=None):
153159
"""
154160
Append bias operator and return its output. If the user does not set
155161
bias_attr, append_bias_op will return input_var
156162
157-
:param input_var: the input variable. The len(input_var.shape) is larger
158-
or equal than 2.
163+
:param input_var: the input variable. The len(input_var.shape) is
164+
larger or equal than 2.
165+
:bias_initializer: an instance of a subclass of Initializer used to
166+
initialize the bias
159167
:param dim_start:
160168
:param dim_end: the shape of the bias will be
161169
input_var.shape[dim_start:dim_end]. The bias is broadcasted to other
@@ -167,7 +175,11 @@ def append_bias_op(self, input_var, dim_start=1, dim_end=None):
167175
return input_var
168176

169177
b = self.create_parameter(
170-
attr=bias_attr, shape=size, dtype=input_var.data_type, suffix='b')
178+
attr=bias_attr,
179+
shape=size,
180+
dtype=input_var.data_type,
181+
suffix='b',
182+
initializer=bias_initializer)
171183
tmp = self.create_tmp_variable(dtype=input_var.data_type)
172184
self.append_op(
173185
type='elementwise_add',
@@ -191,3 +203,10 @@ def append_activation(self, input_var):
191203
outputs={"Y": [tmp]},
192204
attrs=act)
193205
return tmp
206+
207+
def _get_default_initializer(dtype):
208+
if dtype is None or dtype_is_floating(dtype) == True:
209+
return XavierInitializer()
210+
else:
211+
# For integer and boolean types, initialize with all zeros
212+
return ConstantInitializer()

python/paddle/v2/fluid/layers.py

+63-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \
44
Operator
55
from paddle.v2.fluid.initializer import ConstantInitializer, \
6-
NormalInitializer
6+
NormalInitializer, XavierInitializer
77
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
88
import re
99
import cStringIO
@@ -18,7 +18,9 @@
1818
def fc(input,
1919
size,
2020
param_attr=None,
21+
param_initializer=None,
2122
bias_attr=None,
23+
bias_initializer=None,
2224
name=None,
2325
act=None,
2426
num_flatten_dims=1,
@@ -31,7 +33,11 @@ def fc(input,
3133
input: The input tensor to the function
3234
size: The size of the layer
3335
param_attr: The parameters/weights to the FC Layer
36+
param_initializer: Initializer used for the weight/parameter.
37+
If None, XavierInitializer() is used
3438
bias_attr: The bias parameter for the FC layer
39+
bias_initializer: Initializer used for the bias.
40+
If None, then ConstantInitializer() is used
3541
name: Name/alias of the function
3642
act: Activation to be applied to the output of FC layer
3743
num_flatten_dims: Number of columns in input
@@ -50,18 +56,34 @@ def fc(input,
5056
to the LayerHelper constructor.
5157
5258
"""
59+
60+
def _get_default_param_initializer():
61+
return XavierInitializer()
62+
63+
def _get_default_bias_initializer():
64+
return ConstantInitializer()
65+
5366
helper = LayerHelper('fc', **locals())
5467

5568
dtype = helper.input_dtype()
5669

70+
if param_initializer is None:
71+
param_initializer = _get_default_param_initializer()
72+
73+
if bias_initializer is None:
74+
bias_initializer = _get_default_bias_initializer()
75+
5776
mul_results = []
5877
for input_var, param_attr in helper.iter_inputs_and_params():
5978
input_shape = input_var.shape
6079
param_shape = [
6180
reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
6281
] + [size]
6382
w = helper.create_parameter(
64-
attr=param_attr, shape=param_shape, dtype=dtype)
83+
attr=param_attr,
84+
initializer=param_initializer,
85+
shape=param_shape,
86+
dtype=dtype)
6587
tmp = helper.create_tmp_variable(dtype)
6688
helper.append_op(
6789
type="mul",
@@ -82,7 +104,7 @@ def fc(input,
82104
helper.append_op(
83105
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
84106
# add bias
85-
pre_activation = helper.append_bias_op(pre_bias)
107+
pre_activation = helper.append_bias_op(pre_bias, bias_initializer)
86108
# add activation
87109
return helper.append_activation(pre_activation)
88110

@@ -599,24 +621,41 @@ def sequence_conv(input,
599621
act=None,
600622
padding=None,
601623
bias_attr=None,
624+
bias_initializer=None,
602625
param_attr=None,
626+
param_initializer=None,
603627
main_program=None,
604628
startup_program=None):
605629
"""
606630
This function creates the op for sequence_conv, using the inputs and
607631
other convolutional configurations for the filters and stride as given
608632
in the input parameters to the function.
609633
"""
634+
635+
def _get_default_bias_initializer():
636+
return ConstantInitializer()
637+
638+
def _get_default_param_initializer():
639+
return XavierInitializer()
640+
610641
# FIXME(dzh) : want to unify the argument of python layer
611642
# function. So we ignore some unecessary attributes.
612643
# such as, padding_trainable, context_start.
613644

614645
helper = LayerHelper('sequence_conv', **locals())
615646
dtype = helper.input_dtype()
616647

648+
if param_initializer is None:
649+
param_initializer = _get_default_param_initializer()
650+
if bias_initializer is None:
651+
bias_initializer = _get_default_bias_initializer()
652+
617653
filter_shape = [filter_size * input.shape[1], num_filters]
618654
filter = helper.create_parameter(
619-
attr=helper.param_attr, shape=filter_shape, dtype=dtype)
655+
attr=helper.param_attr,
656+
shape=filter_shape,
657+
dtype=dtype,
658+
initializer=param_initializer)
620659
pre_bias = helper.create_tmp_variable(dtype)
621660

622661
helper.append_op(
@@ -631,7 +670,7 @@ def sequence_conv(input,
631670
'contextStart': -int(filter_size / 2),
632671
'contextLength': filter_size
633672
})
634-
pre_act = helper.append_bias_op(pre_bias)
673+
pre_act = helper.append_bias_op(pre_bias, _get_default_bias_initializer)
635674
return helper.append_activation(pre_act)
636675

637676

@@ -644,7 +683,9 @@ def conv2d(input,
644683
stride=[1, 1],
645684
padding=None,
646685
bias_attr=None,
686+
bias_initializer=None,
647687
param_attr=None,
688+
param_initializer=None,
648689
main_program=None,
649690
startup_program=None):
650691
"""
@@ -654,6 +695,14 @@ def conv2d(input,
654695
This funciton can also append an activation on top of the
655696
conv-2d output, if mentioned in the input parameters.
656697
"""
698+
699+
def _get_default_bias_initializer():
700+
return ConstantInitializer()
701+
702+
def _get_default_param_initializer(filter_size, num_channels):
703+
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
704+
return NormalInitializer(0.0, std, 0)
705+
657706
helper = LayerHelper('conv2d', **locals())
658707
dtype = helper.input_dtype()
659708

@@ -675,12 +724,17 @@ def conv2d(input,
675724
input_shape = input.shape
676725
filter_shape = [num_filters, num_filter_channels] + filter_size
677726

678-
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
727+
if param_initializer is None:
728+
param_initializer = _get_default_param_initializer(filter_size,
729+
num_channels)
730+
if bias_initializer is None:
731+
bias_initializer = _get_default_bias_initializer()
732+
679733
filter = helper.create_parameter(
680734
attr=helper.param_attr,
681735
shape=filter_shape,
682736
dtype=dtype,
683-
initializer=NormalInitializer(0.0, std, 0))
737+
initializer=param_initializer)
684738
pre_bias = helper.create_tmp_variable(dtype)
685739

686740
helper.append_op(
@@ -694,7 +748,8 @@ def conv2d(input,
694748
'paddings': padding,
695749
'groups': groups})
696750

697-
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
751+
pre_act = helper.append_bias_op(
752+
pre_bias, bias_initializer, dim_start=1, dim_end=2)
698753

699754
return helper.append_activation(pre_act)
700755

0 commit comments

Comments
 (0)