Skip to content

Commit 73ade55

Browse files
authored
Merge pull request #300 from helinwang/demo
Add demo for fault tolerant label semantic role and machine translation.
2 parents 56810cf + 1d5420f commit 73ade55

File tree

2 files changed

+468
-0
lines changed

2 files changed

+468
-0
lines changed

demo/label_semantic_roles/train_ft.py

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
import os
2+
import math
3+
import numpy as np
4+
import paddle.v2 as paddle
5+
import paddle.v2.dataset.conll05 as conll05
6+
import paddle.v2.evaluator as evaluator
7+
from paddle.v2.reader.creator import cloud_reader
8+
9+
etcd_ip = os.getenv("ETCD_IP")
10+
etcd_endpoint = "http://" + etcd_ip + ":" + "2379"
11+
12+
word_dict, verb_dict, label_dict = conll05.get_dict()
13+
word_dict_len = len(word_dict)
14+
label_dict_len = len(label_dict)
15+
pred_len = len(verb_dict)
16+
17+
mark_dict_len = 2
18+
word_dim = 32
19+
mark_dim = 5
20+
hidden_dim = 512
21+
depth = 8
22+
default_std = 1 / math.sqrt(hidden_dim) / 3.0
23+
mix_hidden_lr = 1e-3
24+
25+
26+
def d_type(size):
27+
return paddle.data_type.integer_value_sequence(size)
28+
29+
30+
def db_lstm():
31+
#8 features
32+
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
33+
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
34+
35+
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
36+
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
37+
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
38+
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
39+
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
40+
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
41+
42+
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
43+
std_0 = paddle.attr.Param(initial_std=0.)
44+
std_default = paddle.attr.Param(initial_std=default_std)
45+
46+
predicate_embedding = paddle.layer.embedding(
47+
size=word_dim,
48+
input=predicate,
49+
param_attr=paddle.attr.Param(name='vemb', initial_std=default_std))
50+
mark_embedding = paddle.layer.embedding(
51+
size=mark_dim, input=mark, param_attr=std_0)
52+
53+
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
54+
emb_layers = [
55+
paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para)
56+
for x in word_input
57+
]
58+
emb_layers.append(predicate_embedding)
59+
emb_layers.append(mark_embedding)
60+
61+
hidden_0 = paddle.layer.mixed(
62+
size=hidden_dim,
63+
bias_attr=std_default,
64+
input=[
65+
paddle.layer.full_matrix_projection(
66+
input=emb, param_attr=std_default) for emb in emb_layers
67+
])
68+
69+
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
70+
hidden_para_attr = paddle.attr.Param(
71+
initial_std=default_std, learning_rate=mix_hidden_lr)
72+
73+
lstm_0 = paddle.layer.lstmemory(
74+
input=hidden_0,
75+
act=paddle.activation.Relu(),
76+
gate_act=paddle.activation.Sigmoid(),
77+
state_act=paddle.activation.Sigmoid(),
78+
bias_attr=std_0,
79+
param_attr=lstm_para_attr)
80+
81+
#stack L-LSTM and R-LSTM with direct edges
82+
input_tmp = [hidden_0, lstm_0]
83+
84+
for i in range(1, depth):
85+
mix_hidden = paddle.layer.mixed(
86+
size=hidden_dim,
87+
bias_attr=std_default,
88+
input=[
89+
paddle.layer.full_matrix_projection(
90+
input=input_tmp[0], param_attr=hidden_para_attr),
91+
paddle.layer.full_matrix_projection(
92+
input=input_tmp[1], param_attr=lstm_para_attr)
93+
])
94+
95+
lstm = paddle.layer.lstmemory(
96+
input=mix_hidden,
97+
act=paddle.activation.Relu(),
98+
gate_act=paddle.activation.Sigmoid(),
99+
state_act=paddle.activation.Sigmoid(),
100+
reverse=((i % 2) == 1),
101+
bias_attr=std_0,
102+
param_attr=lstm_para_attr)
103+
104+
input_tmp = [mix_hidden, lstm]
105+
106+
feature_out = paddle.layer.mixed(
107+
size=label_dict_len,
108+
bias_attr=std_default,
109+
input=[
110+
paddle.layer.full_matrix_projection(
111+
input=input_tmp[0], param_attr=hidden_para_attr),
112+
paddle.layer.full_matrix_projection(
113+
input=input_tmp[1], param_attr=lstm_para_attr)
114+
], )
115+
116+
return feature_out
117+
118+
119+
def load_parameter(file_name, h, w):
120+
with open(file_name, 'rb') as f:
121+
f.read(16) # skip header.
122+
return np.fromfile(f, dtype=np.float32).reshape(h, w)
123+
124+
125+
def main():
126+
paddle.init()
127+
128+
# define network topology
129+
feature_out = db_lstm()
130+
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
131+
crf_cost = paddle.layer.crf(
132+
size=label_dict_len,
133+
input=feature_out,
134+
label=target,
135+
param_attr=paddle.attr.Param(
136+
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
137+
138+
crf_dec = paddle.layer.crf_decoding(
139+
size=label_dict_len,
140+
input=feature_out,
141+
label=target,
142+
param_attr=paddle.attr.Param(name='crfw'))
143+
evaluator.sum(input=crf_dec)
144+
145+
# create parameters
146+
parameters = paddle.parameters.create(crf_cost)
147+
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
148+
149+
# create optimizer
150+
optimizer = paddle.optimizer.Momentum(
151+
momentum=0,
152+
learning_rate=2e-2,
153+
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
154+
model_average=paddle.optimizer.ModelAverage(
155+
average_window=0.5, max_average_window=10000), )
156+
157+
trainer = paddle.trainer.SGD(
158+
cost=crf_cost,
159+
parameters=parameters,
160+
update_equation=optimizer,
161+
extra_layers=crf_dec)
162+
163+
reader = paddle.batch(
164+
paddle.reader.shuffle(cloud_reader(
165+
["/pfs/dlnel/public/dataset/conll05/conl105_train-*"],
166+
etcd_endpoint), buf_size=8192), batch_size=10)
167+
168+
feeding = {
169+
'word_data': 0,
170+
'ctx_n2_data': 1,
171+
'ctx_n1_data': 2,
172+
'ctx_0_data': 3,
173+
'ctx_p1_data': 4,
174+
'ctx_p2_data': 5,
175+
'verb_data': 6,
176+
'mark_data': 7,
177+
'target': 8
178+
}
179+
180+
def event_handler(event):
181+
if isinstance(event, paddle.event.EndIteration):
182+
if event.batch_id % 100 == 0:
183+
print "Pass %d, Batch %d, Cost %f, %s" % (
184+
event.pass_id, event.batch_id, event.cost, event.metrics)
185+
if event.batch_id % 1000 == 0:
186+
result = trainer.test(reader=reader, feeding=feeding)
187+
print "\nTest with Pass %d, Batch %d, %s" % (
188+
event.pass_id, event.batch_id, result.metrics)
189+
190+
if isinstance(event, paddle.event.EndPass):
191+
# save parameters
192+
with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
193+
parameters.to_tar(f)
194+
195+
result = trainer.test(reader=reader, feeding=feeding)
196+
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
197+
198+
trainer.train(
199+
reader=reader,
200+
event_handler=event_handler,
201+
num_passes=1,
202+
feeding=feeding)
203+
204+
test_creator = paddle.dataset.conll05.test()
205+
test_data = []
206+
for item in test_creator():
207+
test_data.append(item[0:8])
208+
if len(test_data) == 1:
209+
break
210+
211+
predict = paddle.layer.crf_decoding(
212+
size=label_dict_len,
213+
input=feature_out,
214+
param_attr=paddle.attr.Param(name='crfw'))
215+
probs = paddle.infer(
216+
output_layer=predict,
217+
parameters=parameters,
218+
input=test_data,
219+
field='id')
220+
assert len(probs) == len(test_data[0][0])
221+
labels_reverse = {}
222+
for (k, v) in label_dict.items():
223+
labels_reverse[v] = k
224+
pre_lab = [labels_reverse[i] for i in probs]
225+
print pre_lab
226+
227+
228+
if __name__ == '__main__':
229+
main()

0 commit comments

Comments
 (0)