Skip to content

07/Label semantic roles #5798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Nov 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/operators/linear_chain_crf_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
ll -= std::log(sum);
// Now ll is equal to -log(Z).

const int* lbl = label.data<int>();
const int64_t* lbl = label.data<int64_t>();
PADDLE_ENFORCE_LT(
static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num,
"An invalid tag label that execesses the largest tag number.");
Expand Down Expand Up @@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor* emission_grad) const {
const T* w_exps = transition_exps.data<T>();
const T* x_exps = emission_exps.data<T>();
const int* label_value = label.data<int>();
const int64_t* label_value = label.data<int64_t>();
T* beta_value = beta->data<T>();

auto x_dims = emission_exps.dims();
Expand Down
5 changes: 4 additions & 1 deletion python/paddle/v2/fluid/layer_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ def create_parameter(self, attr, shape, dtype, suffix='w',
self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy)
return self.main_program.global_block().create_parameter(
name=attr_copy['name'], dtype=dtype, shape=shape)
name=attr_copy['name'],
dtype=dtype,
shape=shape,
trainable=attr_copy.get('trainable', True))

def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var(
Expand Down
45 changes: 44 additions & 1 deletion python/paddle/v2/fluid/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _get_default_bias_initializer():
def embedding(input,
size,
is_sparse=False,
param_initializer=None,
param_attr=None,
data_type='float32',
main_program=None,
Expand All @@ -136,9 +137,16 @@ def embedding(input,
to the LayerHelper constructor.

"""

def _get_default_param_initializer():
return XavierInitializer()

helper = LayerHelper('embedding', **locals())
w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=data_type)
attr=helper.param_attr,
shape=size,
dtype=data_type,
initializer=param_initializer or _get_default_param_initializer())
tmp = helper.create_tmp_variable(data_type)
helper.append_op(
type='lookup_table',
Expand Down Expand Up @@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
return out


def linear_chain_crf(input,
label,
param_attr=None,
param_initializer=None,
main_program=None,
startup_program=None):
def _get_default_param_initializer():
return XavierInitializer()

helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1]
transition = helper.create_parameter(
attr=helper.param_attr,
shape=[size + 2, size],
dtype=helper.input_dtype(),
initializer=param_initializer or _get_default_param_initializer())
alpha = helper.create_tmp_variable(dtype=helper.input_dtype())
emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='linear_chain_crf',
inputs={"Emission": [input],
"Transition": transition,
"Label": label},
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
})

return log_likelihood


def assign(input, output, main_program=None, startup_program=None):
helper = LayerHelper('assign', **locals())
helper.append_op(
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/v2/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ def create_optimization_pass(self,

optimize_ops = []
for param_and_grad in parameters_and_grads:
if param_and_grad[1] is not None:
if param_and_grad[0].trainable is True and param_and_grad[
1] is not None:
optimize_op = self._append_optimize_op(loss.block,
param_and_grad)
optimize_ops.append(optimize_op)
Expand Down
192 changes: 192 additions & 0 deletions python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor, g_scope
from paddle.v2.fluid.optimizer import SGDOptimizer

word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3

IS_SPARSE = True
PASS_NUM = 10
BATCH_SIZE = 20

embedding_name = 'emb'


def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)


def db_lstm():
# 8 features
word = layers.data(name='word_data', shape=[1], data_type='int64')
predicate = layers.data(name='verb_data', shape=[1], data_type='int64')
ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64')
ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64')
ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64')
ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64')
ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64')
mark = layers.data(name='mark_data', shape=[1], data_type='int64')

predicate_embedding = layers.embedding(
input=predicate,
size=[pred_len, word_dim],
data_type='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'vemb'})

mark_embedding = layers.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
data_type='float32',
is_sparse=IS_SPARSE)

word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
layers.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr={'name': embedding_name,
'trainable': False}) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)

hidden_0_layers = [
layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
]

hidden_0 = layers.sums(input=hidden_0_layers)

lstm_0 = layers.dynamic_lstm(
input=hidden_0,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid')

# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]

for i in range(1, depth):
mix_hidden = layers.sums(input=[
layers.fc(input=input_tmp[0], size=hidden_dim),
layers.fc(input=input_tmp[1], size=hidden_dim)
])

lstm = layers.dynamic_lstm(
input=mix_hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1))

input_tmp = [mix_hidden, lstm]

feature_out = layers.sums(input=[
layers.fc(input=input_tmp[0], size=label_dict_len),
layers.fc(input=input_tmp[1], size=label_dict_len)
])

return feature_out


def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res


def main():
# define network topology
feature_out = db_lstm()
target = layers.data(name='target', shape=[1], data_type='int64')
crf_cost = layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr={"name": 'crfw',
"learning_rate": mix_hidden_lr})
avg_cost = layers.mean(x=crf_cost)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
sgd_optimizer = SGDOptimizer(learning_rate=0.0001)
opts = sgd_optimizer.minimize(avg_cost)

train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)

exe.run(framework.default_startup_program())

embedding_param = g_scope.find_var(embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)

batch_id = 0
for pass_id in xrange(PASS_NUM):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place)
ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place)
ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place)
ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place)
ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place)
verb_data = to_lodtensor(map(lambda x: x[6], data), place)
mark_data = to_lodtensor(map(lambda x: x[7], data), place)
target = to_lodtensor(map(lambda x: x[8], data), place)

outs = exe.run(framework.default_main_program(),
feed={
'word_data': word_data,
'ctx_n2_data': ctx_n2_data,
'ctx_n1_data': ctx_n1_data,
'ctx_0_data': ctx_0_data,
'ctx_p1_data': ctx_p1_data,
'ctx_p2_data': ctx_p2_data,
'verb_data': verb_data,
'mark_data': mark_data,
'target': target
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])

if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val))

# exit early for CI
exit(0)

batch_id = batch_id + 1


if __name__ == '__main__':
main()
32 changes: 25 additions & 7 deletions python/paddle/v2/fluid/tests/test_layers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import unittest

import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.core as core
import unittest


class TestBook(unittest.TestCase):
Expand All @@ -20,7 +20,8 @@ def test_fit_a_line(self):
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost)
print str(program)

# print str(program)

def test_recognize_digits_mlp(self):
program = Program()
Expand Down Expand Up @@ -49,7 +50,7 @@ def test_recognize_digits_mlp(self):
input=predict, label=label, main_program=program)
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)
print str(program)
# print str(program)

def test_simple_conv2d(self):
program = Program()
Expand All @@ -64,7 +65,7 @@ def test_simple_conv2d(self):
filter_size=[4, 4],
main_program=program)

print str(program)
# print str(program)

def test_recognize_digits_conv(self):
program = Program()
Expand Down Expand Up @@ -103,7 +104,7 @@ def test_recognize_digits_conv(self):

program.append_backward(avg_cost)

print str(program)
# print str(program)

def test_word_embedding(self):
program = Program()
Expand Down Expand Up @@ -164,7 +165,24 @@ def test_word_embedding(self):
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)

print str(program)
# print str(program)

def test_linear_chain_crf(self):
program = Program()

# Change g_program, so the rest layers use `g_program`
images = layers.data(
name='pixel',
shape=[784],
data_type='float32',
main_program=program)
label = layers.data(
name='label', shape=[1], data_type='int32', main_program=program)
hidden = layers.fc(input=images, size=128, main_program=program)
crf = layers.linear_chain_crf(
input=hidden, label=label, main_program=program)

# print str(program)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def set_test_data(self):
transition_exps = np.exp(transition)

labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")

self.inputs = {
"Emission": (emission, lod),
Expand Down