3
3
from paddle .v2 .fluid .framework import OpProtoHolder , Variable , Program , \
4
4
Operator
5
5
from paddle .v2 .fluid .initializer import ConstantInitializer , \
6
- NormalInitializer
6
+ NormalInitializer , XavierInitializer
7
7
from paddle .v2 .fluid .layer_helper import LayerHelper , unique_name
8
8
import re
9
9
import cStringIO
18
18
def fc (input ,
19
19
size ,
20
20
param_attr = None ,
21
+ param_initializer = None ,
21
22
bias_attr = None ,
23
+ bias_initializer = None ,
22
24
name = None ,
23
25
act = None ,
24
26
num_flatten_dims = 1 ,
@@ -31,7 +33,11 @@ def fc(input,
31
33
input: The input tensor to the function
32
34
size: The size of the layer
33
35
param_attr: The parameters/weights to the FC Layer
36
+ param_initializer: Initializer used for the weight/parameter.
37
+ If None, XavierInitializer() is used
34
38
bias_attr: The bias parameter for the FC layer
39
+ bias_initializer: Initializer used for the bias.
40
+ If None, then ConstantInitializer() is used
35
41
name: Name/alias of the function
36
42
act: Activation to be applied to the output of FC layer
37
43
num_flatten_dims: Number of columns in input
@@ -50,18 +56,34 @@ def fc(input,
50
56
to the LayerHelper constructor.
51
57
52
58
"""
59
+
60
+ def _get_default_param_initializer ():
61
+ return XavierInitializer ()
62
+
63
+ def _get_default_bias_initializer ():
64
+ return ConstantInitializer ()
65
+
53
66
helper = LayerHelper ('fc' , ** locals ())
54
67
55
68
dtype = helper .input_dtype ()
56
69
70
+ if param_initializer is None :
71
+ param_initializer = _get_default_param_initializer ()
72
+
73
+ if bias_initializer is None :
74
+ bias_initializer = _get_default_bias_initializer ()
75
+
57
76
mul_results = []
58
77
for input_var , param_attr in helper .iter_inputs_and_params ():
59
78
input_shape = input_var .shape
60
79
param_shape = [
61
80
reduce (lambda a , b : a * b , input_shape [num_flatten_dims :], 1 )
62
81
] + [size ]
63
82
w = helper .create_parameter (
64
- attr = param_attr , shape = param_shape , dtype = dtype )
83
+ attr = param_attr ,
84
+ initializer = param_initializer ,
85
+ shape = param_shape ,
86
+ dtype = dtype )
65
87
tmp = helper .create_tmp_variable (dtype )
66
88
helper .append_op (
67
89
type = "mul" ,
@@ -82,7 +104,7 @@ def fc(input,
82
104
helper .append_op (
83
105
type = "sum" , inputs = {"X" : mul_results }, outputs = {"Out" : pre_bias })
84
106
# add bias
85
- pre_activation = helper .append_bias_op (pre_bias )
107
+ pre_activation = helper .append_bias_op (pre_bias , bias_initializer )
86
108
# add activation
87
109
return helper .append_activation (pre_activation )
88
110
@@ -599,24 +621,41 @@ def sequence_conv(input,
599
621
act = None ,
600
622
padding = None ,
601
623
bias_attr = None ,
624
+ bias_initializer = None ,
602
625
param_attr = None ,
626
+ param_initializer = None ,
603
627
main_program = None ,
604
628
startup_program = None ):
605
629
"""
606
630
This function creates the op for sequence_conv, using the inputs and
607
631
other convolutional configurations for the filters and stride as given
608
632
in the input parameters to the function.
609
633
"""
634
+
635
+ def _get_default_bias_initializer ():
636
+ return ConstantInitializer ()
637
+
638
+ def _get_default_param_initializer ():
639
+ return XavierInitializer ()
640
+
610
641
# FIXME(dzh) : want to unify the argument of python layer
611
642
# function. So we ignore some unecessary attributes.
612
643
# such as, padding_trainable, context_start.
613
644
614
645
helper = LayerHelper ('sequence_conv' , ** locals ())
615
646
dtype = helper .input_dtype ()
616
647
648
+ if param_initializer is None :
649
+ param_initializer = _get_default_param_initializer ()
650
+ if bias_initializer is None :
651
+ bias_initializer = _get_default_bias_initializer ()
652
+
617
653
filter_shape = [filter_size * input .shape [1 ], num_filters ]
618
654
filter = helper .create_parameter (
619
- attr = helper .param_attr , shape = filter_shape , dtype = dtype )
655
+ attr = helper .param_attr ,
656
+ shape = filter_shape ,
657
+ dtype = dtype ,
658
+ initializer = param_initializer )
620
659
pre_bias = helper .create_tmp_variable (dtype )
621
660
622
661
helper .append_op (
@@ -631,7 +670,7 @@ def sequence_conv(input,
631
670
'contextStart' : - int (filter_size / 2 ),
632
671
'contextLength' : filter_size
633
672
})
634
- pre_act = helper .append_bias_op (pre_bias )
673
+ pre_act = helper .append_bias_op (pre_bias , _get_default_bias_initializer )
635
674
return helper .append_activation (pre_act )
636
675
637
676
@@ -644,7 +683,9 @@ def conv2d(input,
644
683
stride = [1 , 1 ],
645
684
padding = None ,
646
685
bias_attr = None ,
686
+ bias_initializer = None ,
647
687
param_attr = None ,
688
+ param_initializer = None ,
648
689
main_program = None ,
649
690
startup_program = None ):
650
691
"""
@@ -654,6 +695,14 @@ def conv2d(input,
654
695
This funciton can also append an activation on top of the
655
696
conv-2d output, if mentioned in the input parameters.
656
697
"""
698
+
699
+ def _get_default_bias_initializer ():
700
+ return ConstantInitializer ()
701
+
702
+ def _get_default_param_initializer (filter_size , num_channels ):
703
+ std = (2.0 / (filter_size [0 ]** 2 * num_channels ))** 0.5
704
+ return NormalInitializer (0.0 , std , 0 )
705
+
657
706
helper = LayerHelper ('conv2d' , ** locals ())
658
707
dtype = helper .input_dtype ()
659
708
@@ -675,12 +724,17 @@ def conv2d(input,
675
724
input_shape = input .shape
676
725
filter_shape = [num_filters , num_filter_channels ] + filter_size
677
726
678
- std = (2.0 / (filter_size [0 ]** 2 * num_channels ))** 0.5
727
+ if param_initializer is None :
728
+ param_initializer = _get_default_param_initializer (filter_size ,
729
+ num_channels )
730
+ if bias_initializer is None :
731
+ bias_initializer = _get_default_bias_initializer ()
732
+
679
733
filter = helper .create_parameter (
680
734
attr = helper .param_attr ,
681
735
shape = filter_shape ,
682
736
dtype = dtype ,
683
- initializer = NormalInitializer ( 0.0 , std , 0 ) )
737
+ initializer = param_initializer )
684
738
pre_bias = helper .create_tmp_variable (dtype )
685
739
686
740
helper .append_op (
@@ -694,7 +748,8 @@ def conv2d(input,
694
748
'paddings' : padding ,
695
749
'groups' : groups })
696
750
697
- pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 2 )
751
+ pre_act = helper .append_bias_op (
752
+ pre_bias , bias_initializer , dim_start = 1 , dim_end = 2 )
698
753
699
754
return helper .append_activation (pre_act )
700
755
0 commit comments