beam search api and unitest in hierarchical rnn (#122)

luotao1 · lcy-seso · commit 4615c5172c82 · 2016-09-29T10:06:27.000+08:00
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
@@ -171,12 +171,13 @@ def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
         beam_gen = beam_search(name=decoder_group_name,
                                step=gru_decoder_with_attention,
                                input=group_inputs,
-                               id_input=data_layer(name="sent_id",
-                                                   size=1),
-                               dict_file=trg_dict_path,
                                bos_id=0,
                                eos_id=1,
                                beam_size=beam_size,
-                               max_length=max_length,
-                               result_file=gen_trans_file)
+                               max_length=max_length)
+
+        seqtext_printer_evaluator(input=beam_gen,
+                                  id_input=data_layer(name="sent_id", size=1),
+                                  dict_file=trg_dict_path,
+                                  result_file=gen_trans_file)
         outputs(beam_gen)
diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst
@@ -202,14 +202,17 @@ After training the model, we can use it to generate sequences. A common practice
 * use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
 * use :code:`beam_search` function. This function needs to set:
 
-  - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
-  - :code:`dict_file`: the dictionary file for converting word id to word.
   - :code:`bos_id`: the start token. Every sentence starts with the start token.
   - :code:`eos_id`: the end token. Every sentence ends with the end token.
   - :code:`beam_size`: the beam size used in beam search.
   - :code:`max_length`: the maximum length of the generated sentences.
-  - :code:`result_file`: the path of the generation result file.
 
+* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
+
+  - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
+  - :code:`dict_file`: the dictionary file for converting word id to word.
+  - :code:`result_file`: the path of the generation result file.
+    
 The code is listed below:
 
 .. code-block:: python
@@ -230,14 +233,15 @@ The code is listed below:
     beam_gen = beam_search(name=decoder_group_name,
                            step=gru_decoder_with_attention,
                            input=group_inputs,
-                           id_input=data_layer(name="sent_id",
-                                               size=1),
-                           dict_file=trg_dict_path,
                            bos_id=0, # Beginnning token.
                            eos_id=1, # End of sentence token.
                            beam_size=beam_size,
-                           max_length=max_length,
-                           result_file=gen_trans_file)
+                           max_length=max_length)
+
+    seqtext_printer_evaluator(input=beam_gen,
+                              id_input=data_layer(name="sent_id", size=1),
+                              dict_file=trg_dict_path,
+                              result_file=gen_trans_file)
     outputs(beam_gen)
 
 
diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest b/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest
@@ -0,0 +1,16 @@
+0	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+
diff --git a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
@@ -0,0 +1,73 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=15, learning_rate=0)
+
+num_words = 5
+beam_flag = get_config_arg('beam_search', bool, False)
+
+sent_id = data_layer(name="sent_id", size=1)
+
+# This layer has no actual use, but only to decide batch_size in generation.
+# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
+dummy_data = data_layer(name="dummy_data_input", size=2)
+
+def outer_step(dummy_data):
+
+    gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True),
+                  GeneratedInput(size=num_words,
+                                 embedding_name="wordvec",
+                                 embedding_size=num_words)]
+
+    def inner_step(dummy_memory, predict_word):
+        
+        # simplified RNN for testing
+        with mixed_layer(size=num_words) as layer:
+            layer += full_matrix_projection(input=predict_word,
+                                            param_attr=ParamAttr(name="transtable"))
+
+        with mixed_layer(size=num_words, act=ExpActivation()) as out:
+            out += trans_full_matrix_projection(input=layer,
+                                                param_attr=ParamAttr(name="wordvec"))
+
+        return out
+    
+    beam_gen = beam_search(name="rnn_gen",
+                           step=inner_step,
+                           input=gen_inputs,
+                           bos_id=0,
+                           eos_id=num_words-1,
+                           beam_size=2 if beam_flag else 1,
+                           num_results_per_sample=2 if beam_flag else 1,
+                           max_length=10) 
+    return beam_gen
+
+beam_gen_concat = recurrent_group(name="rnn_gen_concat",
+                                  step=outer_step,
+                                  input=[SubsequenceInput(dummy_data)])
+
+seqtext_printer_evaluator(input=beam_gen_concat,
+                          id_input=sent_id,
+                          dict_file="./trainer/tests/test_gen_dict.txt",
+                          result_file="./trainer/tests/dump_text.test")
+#outputs(beam_gen_concat)
+# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
+# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
+# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
+Inputs("sent_id","dummy_data_input")
+Outputs("__beam_search_predict__")
diff --git a/paddle/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_rnn_gen.conf
@@ -48,15 +48,16 @@ def step(dummy_memory, predict_word):
 beam_gen = beam_search(name="rnn_gen",
                        step=step,
                        input=gen_inputs,
-                       id_input=sent_id,
-                       dict_file="./trainer/tests/test_gen_dict.txt",
-                       result_file="./trainer/tests/dump_text.test",
                        bos_id=0,
                        eos_id=num_words-1,
                        beam_size=2 if beam_flag else 1,
                        num_results_per_sample=2 if beam_flag else 1,
                        max_length=10) 
 
+seqtext_printer_evaluator(input=beam_gen,
+                          id_input=sent_id,
+                          dict_file="./trainer/tests/test_gen_dict.txt",
+                          result_file="./trainer/tests/dump_text.test")
 #outputs(beam_gen)
 # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
 # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
diff --git a/paddle/trainer/tests/test_recurrent_machine_generation.cpp b/paddle/trainer/tests/test_recurrent_machine_generation.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include <fstream>
 
 #include <paddle/utils/PythonUtil.h>
@@ -24,6 +23,8 @@ using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
 
 static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf";
+static const string& NEST_CONFIG_FILE =
+    "trainer/tests/sample_trainer_nest_rnn_gen.conf";
 static const string& OUTPUT_DIR = "trainer/tests/dump_text.test";
 static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1";  // NOLINT
 static string expectFile =                                           // NOLINT
@@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) {
   }
 }
 
-void prepareInArgs(vector<Argument>& inArgs,
-                   const size_t batchSize, bool useGpu) {
+void prepareInArgs(vector<Argument>& inArgs, const size_t batchSize,
+                   bool useGpu, bool hasSubseq) {
   inArgs.clear();
   // sentence id
   Argument sentId;
   sentId.value = nullptr;
-  IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
-  for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
+  if (hasSubseq) {
+    // as there is only one sequence, there is only one label.
+    IVector::resizeOrCreate(sentId.ids, 1, useGpu);
+    sentId.ids->setElement(0, 0);
+  } else {
+    // as there is batchSize word, there is batchSize label.
+    IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
+    for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
+  }
   inArgs.emplace_back(sentId);
 
   // a dummy layer to decide batch size
   Argument dummyInput;
   dummyInput.value = Matrix::create(batchSize, 2, false, useGpu);
   dummyInput.value->randomizeUniform();
+  if (hasSubseq) {
+    // generate one sequence with batchSize subsequence,
+    // and each subsequence has only one word.
+    dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false);
+    int* buf = dummyInput.sequenceStartPositions->getMutableData(false);
+    dummyInput.subSequenceStartPositions =
+        ICpuGpuVector::create(batchSize + 1, false);
+    int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false);
+    buf[0] = 0;
+    buf[1] = batchSize;
+    for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i;
+  }
   inArgs.emplace_back(dummyInput);
 }
 
-void testGeneration(bool useGpu, const string& expRetFile) {
+void testGeneration(const string& configFile, bool useGpu, bool hasSubseq,
+                    const string& expRetFile) {
   FLAGS_use_gpu = useGpu;
-  auto config = std::make_shared<TrainerConfigHelper>(CONFIG_FILE);
+  auto config = std::make_shared<TrainerConfigHelper>(configFile);
   unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config));
   gradientMachine->loadParameters(modelDir);
   vector<Argument> inArgs(2);
 
   const size_t batchSize = 15;
-  prepareInArgs(inArgs, batchSize, useGpu);
+  prepareInArgs(inArgs, batchSize, useGpu, hasSubseq);
   vector<Argument> outArgs;
   unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator());
   testEvaluator->start();
@@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) {
 #else
   const auto useGpuConfs = {true, false};
 #endif
-  FLAGS_config_args = "beam_search=0";  // no beam search
-  string expectRetFileNoBeam = expectFile + ".nobeam";
-  for (auto useGpu : useGpuConfs) {
-    testGeneration(useGpu, expectRetFileNoBeam);
-  }
-  FLAGS_config_args = "beam_search=1";  // no beam search
-  string expectRetFileBeam = expectFile + ".beam";
-  for (auto useGpu : useGpuConfs) {
-    testGeneration(useGpu, expectRetFileBeam);
-  }
+  auto testGen = [&](const string& configFile, bool hasSubseq,
+                     const string& expRetFile, bool beam_search) {
+    FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
+    for (auto useGpu : useGpuConfs) {
+      testGeneration(configFile, useGpu, hasSubseq, expRetFile);
+    }
+  };
+  testGen(CONFIG_FILE, false, expectFile + ".nobeam", false);  // no beam search
+  testGen(CONFIG_FILE, false, expectFile + ".beam", true);     // beam search
+  // In hierarchical RNN, beam search and one way search are only in inner-RNN,
+  // outer-RNN will concat the generated inner-results (first for beam search)
+  // from inner-RNN. Thus, they have the same outer-results.
+  testGen(NEST_CONFIG_FILE, true, expectFile + ".nest",
+          false);  // no beam search
+  testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true);  // beam search
 }
 #endif
 
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
@@ -559,6 +559,7 @@ def maxframe_printer_evaluator(
 def seqtext_printer_evaluator(
         input,
         result_file,
+        id_input=None,
         dict_file=None,
         delimited=None,
         name=None,
@@ -567,11 +568,10 @@ def seqtext_printer_evaluator(
     Sequence text printer will print text according to index matrix and a
     dictionary. There can be multiple input to this layer:
 
-    1. If there is only one input, the input must be a matrix containing
+    1. If there is no id_input, the input must be a matrix containing
     the sequence of indices;
 
-    2. If there are more than one input, the first input should be ids,
-    and are interpreted as sample ids.
+    2. If there is id_input, it should be ids, and interpreted as sample ids.
 
     The output format will be:
 
@@ -602,26 +602,43 @@ def seqtext_printer_evaluator(
 
     .. code-block:: python
 
-       eval = seqtext_printer_evaluator(input,
+       eval = seqtext_printer_evaluator(input=maxid_layer,
+                                        id_input=sample_id,
                                         dict_file=dict_file,
                                         result_file=result_file)
 
     :param input: Input Layer name.
     :type input: LayerOutput|list
-    :param dict_file: The input dictionary which contains a list of tokens.
-    :type dict_file: basestring
-    :param result_file: The file is to save the results.
+    :param result_file: Path of the file to store the generated results.
     :type result_file: basestring
+    :param id_input: Index of the input sequence, and the specified index will
+                     be prited in the gereated results. This an optional
+                     parameter.
+    :type id_input: LayerOutput
+    :param dict_file: Path of dictionary. This is an optional parameter.
+                      Every line is a word in the dictionary with
+                      (line number - 1) as the word index.
+                      If this parameter is set to None, or to an empty string,
+                      only word index are printed in the generated results.
+    :type dict_file: basestring
     :param delimited: Whether to use space to separate output tokens.
                 Default is True. No space is added if set to False.
     :type delimited: bool
     :param name: Evaluator name.
     :type name: None|basestring
+    :return: The seq_text_printer that prints the generated sequence to a file.
+    :rtype: evaluator
     """
     assert isinstance(result_file, basestring)
+    if id_input is None:
+        inputs = [input]
+    else:
+        inputs = [id_input, input]
+        input.parents.append(id_input)
+
     evaluator_base(name=name,
                    type="seq_text_printer",
-                   input=input,
+                   input=inputs,
                    dict_file=dict_file,
                    result_file=result_file,
                    delimited=delimited)
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,16 @@ @@
 +0	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
 +	 1 2 3 4
++