Skip to content

Commit 4615c51

Browse files
luotao1lcy-seso
authored andcommitted
beam search api and unitest in hierarchical rnn (#122)
1 parent baaaa0b commit 4615c51

File tree

8 files changed

+184
-74
lines changed

8 files changed

+184
-74
lines changed

demo/seqToseq/seqToseq_net.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,13 @@ def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
171171
beam_gen = beam_search(name=decoder_group_name,
172172
step=gru_decoder_with_attention,
173173
input=group_inputs,
174-
id_input=data_layer(name="sent_id",
175-
size=1),
176-
dict_file=trg_dict_path,
177174
bos_id=0,
178175
eos_id=1,
179176
beam_size=beam_size,
180-
max_length=max_length,
181-
result_file=gen_trans_file)
177+
max_length=max_length)
178+
179+
seqtext_printer_evaluator(input=beam_gen,
180+
id_input=data_layer(name="sent_id", size=1),
181+
dict_file=trg_dict_path,
182+
result_file=gen_trans_file)
182183
outputs(beam_gen)

doc/algorithm/rnn/rnn.rst

+12-8
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,17 @@ After training the model, we can use it to generate sequences. A common practice
202202
* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
203203
* use :code:`beam_search` function. This function needs to set:
204204

205-
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
206-
- :code:`dict_file`: the dictionary file for converting word id to word.
207205
- :code:`bos_id`: the start token. Every sentence starts with the start token.
208206
- :code:`eos_id`: the end token. Every sentence ends with the end token.
209207
- :code:`beam_size`: the beam size used in beam search.
210208
- :code:`max_length`: the maximum length of the generated sentences.
211-
- :code:`result_file`: the path of the generation result file.
212209

210+
* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
211+
212+
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
213+
- :code:`dict_file`: the dictionary file for converting word id to word.
214+
- :code:`result_file`: the path of the generation result file.
215+
213216
The code is listed below:
214217

215218
.. code-block:: python
@@ -230,14 +233,15 @@ The code is listed below:
230233
beam_gen = beam_search(name=decoder_group_name,
231234
step=gru_decoder_with_attention,
232235
input=group_inputs,
233-
id_input=data_layer(name="sent_id",
234-
size=1),
235-
dict_file=trg_dict_path,
236236
bos_id=0, # Beginnning token.
237237
eos_id=1, # End of sentence token.
238238
beam_size=beam_size,
239-
max_length=max_length,
240-
result_file=gen_trans_file)
239+
max_length=max_length)
240+
241+
seqtext_printer_evaluator(input=beam_gen,
242+
id_input=data_layer(name="sent_id", size=1),
243+
dict_file=trg_dict_path,
244+
result_file=gen_trans_file)
241245
outputs(beam_gen)
242246
243247
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
0 1 2 3 4
2+
1 2 3 4
3+
1 2 3 4
4+
1 2 3 4
5+
1 2 3 4
6+
1 2 3 4
7+
1 2 3 4
8+
1 2 3 4
9+
1 2 3 4
10+
1 2 3 4
11+
1 2 3 4
12+
1 2 3 4
13+
1 2 3 4
14+
1 2 3 4
15+
1 2 3 4
16+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#edit-mode: -*- python -*-
2+
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
17+
from paddle.trainer_config_helpers import *
18+
19+
settings(batch_size=15, learning_rate=0)
20+
21+
num_words = 5
22+
beam_flag = get_config_arg('beam_search', bool, False)
23+
24+
sent_id = data_layer(name="sent_id", size=1)
25+
26+
# This layer has no actual use, but only to decide batch_size in generation.
27+
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
28+
dummy_data = data_layer(name="dummy_data_input", size=2)
29+
30+
def outer_step(dummy_data):
31+
32+
gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True),
33+
GeneratedInput(size=num_words,
34+
embedding_name="wordvec",
35+
embedding_size=num_words)]
36+
37+
def inner_step(dummy_memory, predict_word):
38+
39+
# simplified RNN for testing
40+
with mixed_layer(size=num_words) as layer:
41+
layer += full_matrix_projection(input=predict_word,
42+
param_attr=ParamAttr(name="transtable"))
43+
44+
with mixed_layer(size=num_words, act=ExpActivation()) as out:
45+
out += trans_full_matrix_projection(input=layer,
46+
param_attr=ParamAttr(name="wordvec"))
47+
48+
return out
49+
50+
beam_gen = beam_search(name="rnn_gen",
51+
step=inner_step,
52+
input=gen_inputs,
53+
bos_id=0,
54+
eos_id=num_words-1,
55+
beam_size=2 if beam_flag else 1,
56+
num_results_per_sample=2 if beam_flag else 1,
57+
max_length=10)
58+
return beam_gen
59+
60+
beam_gen_concat = recurrent_group(name="rnn_gen_concat",
61+
step=outer_step,
62+
input=[SubsequenceInput(dummy_data)])
63+
64+
seqtext_printer_evaluator(input=beam_gen_concat,
65+
id_input=sent_id,
66+
dict_file="./trainer/tests/test_gen_dict.txt",
67+
result_file="./trainer/tests/dump_text.test")
68+
#outputs(beam_gen_concat)
69+
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
70+
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
71+
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
72+
Inputs("sent_id","dummy_data_input")
73+
Outputs("__beam_search_predict__")

paddle/trainer/tests/sample_trainer_rnn_gen.conf

+4-3
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,16 @@ def step(dummy_memory, predict_word):
4848
beam_gen = beam_search(name="rnn_gen",
4949
step=step,
5050
input=gen_inputs,
51-
id_input=sent_id,
52-
dict_file="./trainer/tests/test_gen_dict.txt",
53-
result_file="./trainer/tests/dump_text.test",
5451
bos_id=0,
5552
eos_id=num_words-1,
5653
beam_size=2 if beam_flag else 1,
5754
num_results_per_sample=2 if beam_flag else 1,
5855
max_length=10)
5956

57+
seqtext_printer_evaluator(input=beam_gen,
58+
id_input=sent_id,
59+
dict_file="./trainer/tests/test_gen_dict.txt",
60+
result_file="./trainer/tests/dump_text.test")
6061
#outputs(beam_gen)
6162
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
6263
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs

paddle/trainer/tests/test_recurrent_machine_generation.cpp

+44-18
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
1615
#include <fstream>
1716

1817
#include <paddle/utils/PythonUtil.h>
@@ -24,6 +23,8 @@ using namespace paddle; // NOLINT
2423
using namespace std; // NOLINT
2524

2625
static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf";
26+
static const string& NEST_CONFIG_FILE =
27+
"trainer/tests/sample_trainer_nest_rnn_gen.conf";
2728
static const string& OUTPUT_DIR = "trainer/tests/dump_text.test";
2829
static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT
2930
static string expectFile = // NOLINT
@@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) {
5051
}
5152
}
5253

53-
void prepareInArgs(vector<Argument>& inArgs,
54-
const size_t batchSize, bool useGpu) {
54+
void prepareInArgs(vector<Argument>& inArgs, const size_t batchSize,
55+
bool useGpu, bool hasSubseq) {
5556
inArgs.clear();
5657
// sentence id
5758
Argument sentId;
5859
sentId.value = nullptr;
59-
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
60-
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
60+
if (hasSubseq) {
61+
// as there is only one sequence, there is only one label.
62+
IVector::resizeOrCreate(sentId.ids, 1, useGpu);
63+
sentId.ids->setElement(0, 0);
64+
} else {
65+
// as there is batchSize word, there is batchSize label.
66+
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
67+
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
68+
}
6169
inArgs.emplace_back(sentId);
6270

6371
// a dummy layer to decide batch size
6472
Argument dummyInput;
6573
dummyInput.value = Matrix::create(batchSize, 2, false, useGpu);
6674
dummyInput.value->randomizeUniform();
75+
if (hasSubseq) {
76+
// generate one sequence with batchSize subsequence,
77+
// and each subsequence has only one word.
78+
dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false);
79+
int* buf = dummyInput.sequenceStartPositions->getMutableData(false);
80+
dummyInput.subSequenceStartPositions =
81+
ICpuGpuVector::create(batchSize + 1, false);
82+
int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false);
83+
buf[0] = 0;
84+
buf[1] = batchSize;
85+
for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i;
86+
}
6787
inArgs.emplace_back(dummyInput);
6888
}
6989

70-
void testGeneration(bool useGpu, const string& expRetFile) {
90+
void testGeneration(const string& configFile, bool useGpu, bool hasSubseq,
91+
const string& expRetFile) {
7192
FLAGS_use_gpu = useGpu;
72-
auto config = std::make_shared<TrainerConfigHelper>(CONFIG_FILE);
93+
auto config = std::make_shared<TrainerConfigHelper>(configFile);
7394
unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config));
7495
gradientMachine->loadParameters(modelDir);
7596
vector<Argument> inArgs(2);
7697

7798
const size_t batchSize = 15;
78-
prepareInArgs(inArgs, batchSize, useGpu);
99+
prepareInArgs(inArgs, batchSize, useGpu, hasSubseq);
79100
vector<Argument> outArgs;
80101
unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator());
81102
testEvaluator->start();
@@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) {
93114
#else
94115
const auto useGpuConfs = {true, false};
95116
#endif
96-
FLAGS_config_args = "beam_search=0"; // no beam search
97-
string expectRetFileNoBeam = expectFile + ".nobeam";
98-
for (auto useGpu : useGpuConfs) {
99-
testGeneration(useGpu, expectRetFileNoBeam);
100-
}
101-
FLAGS_config_args = "beam_search=1"; // no beam search
102-
string expectRetFileBeam = expectFile + ".beam";
103-
for (auto useGpu : useGpuConfs) {
104-
testGeneration(useGpu, expectRetFileBeam);
105-
}
117+
auto testGen = [&](const string& configFile, bool hasSubseq,
118+
const string& expRetFile, bool beam_search) {
119+
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
120+
for (auto useGpu : useGpuConfs) {
121+
testGeneration(configFile, useGpu, hasSubseq, expRetFile);
122+
}
123+
};
124+
testGen(CONFIG_FILE, false, expectFile + ".nobeam", false); // no beam search
125+
testGen(CONFIG_FILE, false, expectFile + ".beam", true); // beam search
126+
// In hierarchical RNN, beam search and one way search are only in inner-RNN,
127+
// outer-RNN will concat the generated inner-results (first for beam search)
128+
// from inner-RNN. Thus, they have the same outer-results.
129+
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest",
130+
false); // no beam search
131+
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true); // beam search
106132
}
107133
#endif
108134

python/paddle/trainer_config_helpers/evaluators.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ def maxframe_printer_evaluator(
559559
def seqtext_printer_evaluator(
560560
input,
561561
result_file,
562+
id_input=None,
562563
dict_file=None,
563564
delimited=None,
564565
name=None,
@@ -567,11 +568,10 @@ def seqtext_printer_evaluator(
567568
Sequence text printer will print text according to index matrix and a
568569
dictionary. There can be multiple input to this layer:
569570
570-
1. If there is only one input, the input must be a matrix containing
571+
1. If there is no id_input, the input must be a matrix containing
571572
the sequence of indices;
572573
573-
2. If there are more than one input, the first input should be ids,
574-
and are interpreted as sample ids.
574+
2. If there is id_input, it should be ids, and interpreted as sample ids.
575575
576576
The output format will be:
577577
@@ -602,26 +602,43 @@ def seqtext_printer_evaluator(
602602
603603
.. code-block:: python
604604
605-
eval = seqtext_printer_evaluator(input,
605+
eval = seqtext_printer_evaluator(input=maxid_layer,
606+
id_input=sample_id,
606607
dict_file=dict_file,
607608
result_file=result_file)
608609
609610
:param input: Input Layer name.
610611
:type input: LayerOutput|list
611-
:param dict_file: The input dictionary which contains a list of tokens.
612-
:type dict_file: basestring
613-
:param result_file: The file is to save the results.
612+
:param result_file: Path of the file to store the generated results.
614613
:type result_file: basestring
614+
:param id_input: Index of the input sequence, and the specified index will
615+
be prited in the gereated results. This an optional
616+
parameter.
617+
:type id_input: LayerOutput
618+
:param dict_file: Path of dictionary. This is an optional parameter.
619+
Every line is a word in the dictionary with
620+
(line number - 1) as the word index.
621+
If this parameter is set to None, or to an empty string,
622+
only word index are printed in the generated results.
623+
:type dict_file: basestring
615624
:param delimited: Whether to use space to separate output tokens.
616625
Default is True. No space is added if set to False.
617626
:type delimited: bool
618627
:param name: Evaluator name.
619628
:type name: None|basestring
629+
:return: The seq_text_printer that prints the generated sequence to a file.
630+
:rtype: evaluator
620631
"""
621632
assert isinstance(result_file, basestring)
633+
if id_input is None:
634+
inputs = [input]
635+
else:
636+
inputs = [id_input, input]
637+
input.parents.append(id_input)
638+
622639
evaluator_base(name=name,
623640
type="seq_text_printer",
624-
input=input,
641+
input=inputs,
625642
dict_file=dict_file,
626643
result_file=result_file,
627644
delimited=delimited)

0 commit comments

Comments
 (0)