Skip to content

Commit 1fc4352

Browse files
luotao1reyoung
authored andcommitted
refine sparse momentum api and unittest (#126)
* refine sparse momentum api and unittest * fix unittests bug
1 parent 6decbdf commit 1fc4352

File tree

8 files changed

+82
-55
lines changed

8 files changed

+82
-55
lines changed

doc/algorithm/rnn/rnn.rst

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
142142
The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
143143

144144
.. code-block:: python
145+
145146
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
146147
StaticInput(input=encoded_proj,is_seq=True)]
147148
trg_embedding = embedding_layer(

doc/ui/api/trainer_config_helpers/optimizers.rst

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ BaseSGDOptimizer
44
:members: BaseSGDOptimizer
55
:noindex:
66

7+
MomentumOptimizer
8+
=================
9+
.. automodule:: paddle.trainer_config_helpers.optimizers
10+
:members: MomentumOptimizer
11+
:noindex:
12+
713
AdamOptimizer
814
=============
915
.. automodule:: paddle.trainer_config_helpers.optimizers

paddle/trainer/tests/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ add_test(NAME test_CompareTwoOpts
4747
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
4848
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts
4949
--config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
50-
--num_passes=1 --need_high_accuracy=1
50+
--num_passes=1 --need_high_accuracy=0
5151
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/)
5252

5353
################# test_CompareSparse ##################

paddle/trainer/tests/mnist.list

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
trainer/tests/mnist_bin_part

paddle/trainer/tests/mnist_bin_part

3.68 MB
Binary file not shown.

paddle/trainer/tests/sample_trainer_config_opt_a.conf

+22-25
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,29 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
15+
from paddle.trainer_config_helpers import *
1616

1717
################################### Data Configuration ###################################
18-
TrainData(ProtoData(files = "train.list"))
18+
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
1919
################################### Algorithm Configuration ###################################
20-
Settings(
21-
learning_rate_decay_a = 0.0,
22-
learning_rate_decay_b = 0.0,
23-
learning_rate = 1e-03,
24-
batch_size = 1000,
25-
algorithm = 'sgd',
26-
num_batches_per_send_parameter = 1,
27-
num_batches_per_get_parameter = 1,
28-
learning_method='sparse_momentum',
29-
)
30-
default_momentum(0.5)
20+
settings(batch_size = 1000,
21+
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
3122
################################### Network Configuration ###################################
32-
Layer(type = "data", name = "input", size = 784)
33-
Layer(inputs = [Input("input", parameter_name = "_layer1.w")], name = "layer1", bias = Bias(parameter_name = "_layer1.bias"), active_type = "sigmoid", type = "fc", size = 800)
34-
Layer(inputs = [Input("layer1", parameter_name = "_layer2.w")], name = "layer2", bias = Bias(parameter_name = "_layer2.bias"), active_type = "sigmoid", type = "fc", size = 800)
35-
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "margin", type = "fc", size = 10)
36-
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), type = "fc", size = 10)
37-
Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w")], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "softmax", type = "fc", size = 10)
38-
Layer(type = "data", name = "label", size = 1)
39-
Layer(inputs = [Input("output"), Input("label")], type = "multi-class-cross-entropy", name = "cost")
40-
#Layer(inputs = [Input("output"), Input("label")], type = "huber", name = "cost")
41-
Evaluator(inputs=["output", "label"], type = "classification_error", name = "classification_error")
42-
Inputs("input", "label")
43-
Outputs("cost")
23+
data = data_layer(name ="input", size=784)
24+
25+
fc1 = fc_layer(input=data, size=800,
26+
bias_attr=True,
27+
act=SigmoidActivation())
28+
29+
fc2 = fc_layer(input=fc1, size=800,
30+
bias_attr=True,
31+
act=SigmoidActivation())
32+
33+
output = fc_layer(input=[fc1, fc2], size=10,
34+
bias_attr=True,
35+
act=SoftmaxActivation())
36+
37+
lbl = data_layer(name ="label", size=1)
38+
39+
cost = classification_cost(input=output, label=lbl)
40+
outputs(cost)

paddle/trainer/tests/sample_trainer_config_opt_b.conf

+22-25
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,29 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
15+
from paddle.trainer_config_helpers import *
1616

1717
################################### Data Configuration ###################################
18-
TrainData(ProtoData(files = "train.list"))
18+
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
1919
################################### Algorithm Configuration ###################################
20-
Settings(
21-
learning_rate_decay_a = 0.0,
22-
learning_rate_decay_b = 0.0,
23-
learning_rate = 1e-03,
24-
batch_size = 1000,
25-
algorithm = 'sgd',
26-
num_batches_per_send_parameter = 1,
27-
num_batches_per_get_parameter = 1,
28-
learning_method='momentum',
29-
)
30-
default_momentum(0.5)
20+
settings(batch_size = 1000,
21+
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
3122
################################### Network Configuration ###################################
32-
Layer(type = "data", name = "input", size = 784)
33-
Layer(inputs = [Input("input", parameter_name = "_layer1.w")], name = "layer1", bias = Bias(parameter_name = "_layer1.bias"), active_type = "sigmoid", type = "fc", size = 800)
34-
Layer(inputs = [Input("layer1", parameter_name = "_layer2.w")], name = "layer2", bias = Bias(parameter_name = "_layer2.bias"), active_type = "sigmoid", type = "fc", size = 800)
35-
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "margin", type = "fc", size = 10)
36-
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), type = "fc", size = 10)
37-
Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w")], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "softmax", type = "fc", size = 10)
38-
Layer(type = "data", name = "label", size = 1)
39-
Layer(inputs = [Input("output"), Input("label")], type = "multi-class-cross-entropy", name = "cost")
40-
#Layer(inputs = [Input("output"), Input("label")], type = "huber", name = "cost")
41-
Evaluator(inputs=["output", "label"], type = "classification_error", name = "classification_error")
42-
Inputs("input", "label")
43-
Outputs("cost")
23+
data = data_layer(name ="input", size=784)
24+
25+
fc1 = fc_layer(input=data, size=800,
26+
bias_attr=True,
27+
act=SigmoidActivation())
28+
29+
fc2 = fc_layer(input=fc1, size=800,
30+
bias_attr=True,
31+
act=SigmoidActivation())
32+
33+
output = fc_layer(input=[fc1, fc2], size=10,
34+
bias_attr=True,
35+
act=SoftmaxActivation())
36+
37+
lbl = data_layer(name ="label", size=1)
38+
39+
cost = classification_cost(input=output, label=lbl)
40+
outputs(cost)

python/paddle/trainer_config_helpers/optimizers.py

+29-4
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,41 @@ def to_setting_kwargs(self):
7171

7272

7373
class MomentumOptimizer(BaseSGDOptimizer):
74+
"""
75+
MomentumOptimizer.
76+
77+
When sparse=True, the update scheme:
78+
79+
.. math::
80+
81+
\\alpha_t &= \\alpha_{t-1} / k \\\\
82+
\\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
83+
u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
84+
v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
85+
\\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
86+
87+
where :math:`k` is momentum, :math:`\\lambda` is decay rate,
88+
:math:`\\gamma_t` is learning rate at the t'th step.
89+
90+
:param sparse: with sparse support or not.
91+
:type sparse: bool
92+
"""
7493
def extra_settings(self):
7594
default_momentum(self.momentum)
7695

7796
def to_setting_kwargs(self):
78-
return {
79-
'learning_method': 'momentum'
80-
}
97+
if self.sparse:
98+
return {
99+
'learning_method': 'sparse_momentum'
100+
}
101+
else:
102+
return {
103+
'learning_method': 'momentum'
104+
}
81105

82-
def __init__(self, momentum=None):
106+
def __init__(self, momentum=None, sparse=False):
83107
self.momentum = momentum
108+
self.sparse = sparse
84109

85110

86111
class AdamOptimizer(BaseSGDOptimizer):

0 commit comments

Comments
 (0)