Skip to content

Commit 5957778

Browse files
authored
Merge pull request #13960 from seiriosPlus/fix_1_release_1.0.0
Bug fix in release 1.0.0
2 parents 3cb8da9 + ca631ff commit 5957778

File tree

6 files changed

+122
-24
lines changed

6 files changed

+122
-24
lines changed

paddle/fluid/operators/fill_constant_op.cc

+8-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class FillConstantOp : public framework::OperatorBase {
7070
}
7171
};
7272

73+
class FillConstantOpVarTypeInference : public framework::VarTypeInference {
74+
public:
75+
void operator()(const framework::OpDesc &op_desc,
76+
framework::BlockDesc *block) const override {}
77+
};
78+
7379
class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
7480
public:
7581
void Make() override {
@@ -102,4 +108,5 @@ Fill up a variable with specified constant value.
102108
namespace ops = paddle::operators;
103109
REGISTER_OPERATOR(fill_constant, ops::FillConstantOp,
104110
ops::FillConstantInferShape, ops::FillConstantOpMaker,
105-
paddle::framework::EmptyGradOpMaker);
111+
paddle::framework::EmptyGradOpMaker,
112+
ops::FillConstantOpVarTypeInference);

python/paddle/fluid/framework.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1522,13 +1522,17 @@ def _lr_schedule_guard(self):
15221522
>>> with program.lr_schedule_guard():
15231523
>>> lr = lr * decay
15241524
"""
1525+
1526+
tmp_role = self._current_role
1527+
tmp_var = self._op_role_var
1528+
15251529
OpRole = core.op_proto_and_checker_maker.OpRole
15261530
self._current_role = OpRole.LRSched
15271531
# TODO(typhoonzero): how to set target learning rate var
15281532
self._op_role_var = []
15291533
yield
1530-
self._op_role_var = []
1531-
self._current_role = OpRole.Forward
1534+
self._op_role_var = tmp_var
1535+
self._current_role = tmp_role
15321536

15331537
def __str__(self):
15341538
"""

python/paddle/fluid/optimizer.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import print_function
1616
import re
1717
from collections import defaultdict
18-
from paddle.fluid.framework import Program, Variable, name_scope
18+
from paddle.fluid.framework import Program, Variable, name_scope, default_main_program
1919
from . import framework
2020
from . import layers
2121
from .backward import append_backward
@@ -111,7 +111,8 @@ def _create_param_lr(self, param_and_grad):
111111
if param_lr == 1.0:
112112
return self._global_learning_rate()
113113
else:
114-
return self._global_learning_rate() * param_lr
114+
with default_main_program()._lr_schedule_guard():
115+
return self._global_learning_rate() * param_lr
115116

116117
def _create_accumulators(self, block, parameters):
117118
"""Create all accumulators needed by the parameters

python/paddle/fluid/tests/unittests/dist_simnet_bow.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ def get_optimizer():
8181
return optimizer
8282

8383

84-
def train_network(batch_size, is_distributed=False, is_sparse=False):
84+
def train_network(batch_size,
85+
is_distributed=False,
86+
is_sparse=False,
87+
is_self_contained_lr=False):
8588
# query
8689
q = fluid.layers.data(
8790
name="query_ids", shape=[1], dtype="int64", lod_level=1)
@@ -93,7 +96,9 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
9396
param_attr=fluid.ParamAttr(
9497
initializer=fluid.initializer.Constant(value=0.01),
9598
name="__emb__",
96-
learning_rate=emb_lr),
99+
learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr(
100+
initializer=fluid.initializer.Constant(value=0.01),
101+
name="__emb__"),
97102
is_sparse=is_sparse)
98103
## vsum
99104
q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
@@ -119,7 +124,9 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
119124
param_attr=fluid.ParamAttr(
120125
initializer=fluid.initializer.Constant(value=0.01),
121126
name="__emb__",
122-
learning_rate=emb_lr),
127+
learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr(
128+
initializer=fluid.initializer.Constant(value=0.01),
129+
name="__emb__"),
123130
is_sparse=is_sparse)
124131
## vsum
125132
pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
@@ -144,7 +151,9 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
144151
param_attr=fluid.ParamAttr(
145152
initializer=fluid.initializer.Constant(value=0.01),
146153
name="__emb__",
147-
learning_rate=emb_lr),
154+
learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr(
155+
initializer=fluid.initializer.Constant(value=0.01),
156+
name="__emb__"),
148157
is_sparse=is_sparse)
149158
## vsum
150159
nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
@@ -220,7 +229,10 @@ class TestDistSimnetBow2x2(TestDistRunnerBase):
220229
def get_model(self, batch_size=2):
221230
# Train program
222231
avg_cost, acc, predict = \
223-
train_network(batch_size, bool(int(os.environ["IS_DISTRIBUTED"])), bool(int(os.environ["IS_SPARSE"])))
232+
train_network(batch_size,
233+
bool(int(os.environ["IS_DISTRIBUTED"])),
234+
bool(int(os.environ["IS_SPARSE"])),
235+
bool(int(os.environ["IS_SELF_CONTAINED_LR"])))
224236

225237
inference_program = fluid.default_main_program().clone()
226238

python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py

+74-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ def _setup_config(self):
2525
self._enforce_place = "CPU"
2626

2727
def test_simnet_bow(self):
28-
need_envs = {"IS_DISTRIBUTED": '0', "IS_SPARSE": '0'}
28+
need_envs = {
29+
"IS_DISTRIBUTED": '0',
30+
"IS_SPARSE": '0',
31+
'IS_SELF_CONTAINED_LR': '1'
32+
}
2933
self.check_with_place(
3034
"dist_simnet_bow.py",
3135
delta=1e-5,
@@ -39,7 +43,11 @@ def _setup_config(self):
3943
self._enforce_place = "CPU"
4044

4145
def test_simnet_bow(self):
42-
need_envs = {"IS_DISTRIBUTED": '0', "IS_SPARSE": '0'}
46+
need_envs = {
47+
"IS_DISTRIBUTED": '0',
48+
"IS_SPARSE": '0',
49+
'IS_SELF_CONTAINED_LR': '1'
50+
}
4351
self.check_with_place(
4452
"dist_simnet_bow.py",
4553
delta=100,
@@ -53,7 +61,11 @@ def _setup_config(self):
5361
self._enforce_place = "CPU"
5462

5563
def test_simnet_bow(self):
56-
need_envs = {"IS_DISTRIBUTED": '0', "IS_SPARSE": '1'}
64+
need_envs = {
65+
"IS_DISTRIBUTED": '0',
66+
"IS_SPARSE": '1',
67+
'IS_SELF_CONTAINED_LR': '1'
68+
}
5769
self.check_with_place(
5870
"dist_simnet_bow.py",
5971
delta=1e-5,
@@ -67,13 +79,71 @@ def _setup_config(self):
6779
self._enforce_place = "CPU"
6880

6981
def test_simnet_bow(self):
70-
need_envs = {"IS_DISTRIBUTED": '0', "IS_SPARSE": '1'}
82+
need_envs = {
83+
"IS_DISTRIBUTED": '0',
84+
"IS_SPARSE": '1',
85+
'IS_SELF_CONTAINED_LR': '1'
86+
}
7187
self.check_with_place(
7288
"dist_simnet_bow.py",
7389
delta=100,
7490
check_error_log=False,
7591
need_envs=need_envs)
7692

7793

94+
class TestDistSimnetBow2x2LookupTableSync(TestDistBase):
95+
def _setup_config(self):
96+
self._sync_mode = True
97+
self._enforce_place = "CPU"
98+
99+
def test_simnet_bow(self):
100+
need_envs = {
101+
"IS_DISTRIBUTED": '1',
102+
"IS_SPARSE": '1',
103+
'IS_SELF_CONTAINED_LR': '1'
104+
}
105+
self.check_with_place(
106+
"dist_simnet_bow.py",
107+
delta=1e-5,
108+
check_error_log=False,
109+
need_envs=need_envs)
110+
111+
112+
class TestDistSimnetBow2x2LookupTableAsync(TestDistBase):
113+
def _setup_config(self):
114+
self._sync_mode = False
115+
self._enforce_place = "CPU"
116+
117+
def test_simnet_bow(self):
118+
need_envs = {
119+
"IS_DISTRIBUTED": '1',
120+
"IS_SPARSE": '1',
121+
'IS_SELF_CONTAINED_LR': '1'
122+
}
123+
self.check_with_place(
124+
"dist_simnet_bow.py",
125+
delta=100,
126+
check_error_log=False,
127+
need_envs=need_envs)
128+
129+
130+
class TestDistSimnetBow2x2LookupTableNotContainLRSync(TestDistBase):
131+
def _setup_config(self):
132+
self._sync_mode = True
133+
self._enforce_place = "CPU"
134+
135+
def test_simnet_bow(self):
136+
need_envs = {
137+
"IS_DISTRIBUTED": '1',
138+
"IS_SPARSE": '1',
139+
'IS_SELF_CONTAINED_LR': '0'
140+
}
141+
self.check_with_place(
142+
"dist_simnet_bow.py",
143+
delta=1e-5,
144+
check_error_log=False,
145+
need_envs=need_envs)
146+
147+
78148
if __name__ == "__main__":
79149
unittest.main()

python/paddle/fluid/transpiler/distribute_transpiler.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,7 @@ def _replace_lookup_table_op_with_prefetch(self, program,
11181118

11191119
def _split_table_grad_and_add_send_vars(self, program, pserver_endpoints):
11201120
# 2. add split_ids_op and send_op to send gradient to pservers
1121+
11211122
# there should only be one table_name
11221123
all_ops = program.global_block().ops
11231124
table_grad_name = grad_var_name(self.table_name)
@@ -1142,7 +1143,7 @@ def _split_table_grad_and_add_send_vars(self, program, pserver_endpoints):
11421143
if self.sync_mode else []
11431144
},
11441145
attrs={
1145-
"sync_mode": self.sync_mode,
1146+
"sync_mode": not self.sync_mode,
11461147
"epmap": pserver_endpoints,
11471148
RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE,
11481149
OP_ROLE_VAR_ATTR_NAME: [
@@ -1188,7 +1189,15 @@ def _create_prefetch_block(self, pserver_index, pserver_program,
11881189
def _create_table_optimize_block(self, pserver_index, pserver_program,
11891190
pre_block_idx, grad_to_block_id):
11901191
# STEP: create table optimize block
1192+
table_opt_block = pserver_program._create_block(pre_block_idx)
11911193
# create table param and grad var in pserver program
1194+
# create table optimize block in pserver program
1195+
table_opt_op = [
1196+
op for op in self.optimize_ops
1197+
if 'Param' in op.input_names and op.input("Param")[0] ==
1198+
self.table_name
1199+
][0]
1200+
11921201
origin_param_var = self.origin_program.global_block().vars[
11931202
self.table_name]
11941203

@@ -1204,19 +1213,16 @@ def _create_table_optimize_block(self, pserver_index, pserver_program,
12041213
dtype=origin_param_var.dtype,
12051214
type=core.VarDesc.VarType.SELECTED_ROWS,
12061215
persistable=True)
1216+
12071217
# parameter must be selected rows
12081218
param_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS)
12091219
grad_var = pserver_program.global_block()._clone_variable(
12101220
self.origin_program.global_block().vars[grad_var_name(
12111221
self.table_name)])
12121222

1213-
# create table optimize block in pserver program
1214-
table_opt_op = [
1215-
op for op in self.optimize_ops
1216-
if 'Param' in op.input_names and op.input("Param")[0] ==
1217-
self.table_name
1218-
][0]
1219-
table_opt_block = pserver_program._create_block(pre_block_idx)
1223+
lr_var = pserver_program.global_block()._clone_variable(
1224+
self.origin_program.global_block().vars[table_opt_op.input(
1225+
"LearningRate")[0]])
12201226

12211227
if self.sync_mode:
12221228
# create grad vars in pserver program
@@ -1248,8 +1254,6 @@ def _create_table_optimize_block(self, pserver_index, pserver_program,
12481254
grad_var = pserver_program.global_block()._rename_var(
12491255
origin_grad_name, splited_grad_name)
12501256

1251-
lr_var = pserver_program.global_block().vars[table_opt_op.input(
1252-
"LearningRate")[0]]
12531257
inputs = {
12541258
"Param": [param_var],
12551259
"Grad": [grad_var],

0 commit comments

Comments
 (0)