PaddlePaddle · kuke · Apr 18, 2019 · Apr 17, 2019 · Apr 18, 2019 · Apr 18, 2019
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
@@ -158,7 +158,7 @@ bool CheckLoD(const LoD &in, int tensor_height) {
     if (level.size() < 2) return false;
     // check: the first offset(the begin offset) of each level should be 0.
     if (level.front() != 0) return false;
-    // check: all the offsets in a level should be ascending(allow same items)
+    // check: all the offsets in a level should be non-descending
     if (!std::is_sorted(level.begin(), level.end())) {
       return false;
     }
@@ -182,7 +182,7 @@ bool CheckAbsLoD(const LoD &in, int tensor_height) {
   if (in.empty()) return true;
   for (const auto &level : in) {
     // check: all the offsets in a level should be ascending(no same items
-    // allows).
+    // allowed).
     if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
           if (a < b) return true;
           return false;

diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h
@@ -79,7 +79,7 @@ bool operator==(const LoD& a, const LoD& b);
  *
  * It will check two things:
  *
- *  1. all the offsets in a level should be ascending(no same items allows).
+ *  1. all the offsets in a level should be non-descending.
  *  2. there should be more than 2 offsets existing in each level.
  *  3. the higher level's last offset should equals the lower level's size-1.
  *  4. the first offset(the begin offset) of each level should be 0.
@@ -95,7 +95,7 @@ bool CheckLoD(const LoD& in, int tensor_height = -1);
  *   - Empty lod is treated as valid.
  *
  * It will check two things:
- *  1. all the offsets in a level should be ascending(no same items allows)
+ *  1. all the offsets in a level should be ascending(no same items allowed).
  *  2. there should be more than 2 offsets existing in each level.
  *  3. the first offset of each level should be 0, and the last should be the
  *     same(the height of underlying tensor) or `tensor_height` if

diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h
@@ -46,6 +46,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
     math::SetConstant<DeviceContext, int64_t>()(
         ctx.template device_context<DeviceContext>(), decoded_path, 0);
     for (size_t i = 0; i < seq_num; ++i) {
+      if (lod[level][i] == lod[level][i + 1]) continue;
       int start_pos = static_cast<int>(lod[level][i]);
       int end_pos = static_cast<int>(lod[level][i + 1]);
       Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);

diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h
@@ -104,6 +104,8 @@ class ContextProjectFunctor {
     sequence_width = in.dims()[1];
 
     for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+      if (lod_level_0[i] == lod_level_0[i + 1]) continue;
+
       input_row_begin = (context_start > 0)
                             ? static_cast<int>(lod_level_0[i]) + context_start
                             : static_cast<int>(lod_level_0[i]);
@@ -134,6 +136,8 @@ class ContextProjectFunctor {
     if (padding_trainable) {
       PADDLE_ENFORCE_NOT_NULL(padding_data);
       for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+        if (lod_level_0[i] == lod_level_0[i + 1]) continue;
+
         Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]),
                                   static_cast<int>(lod_level_0[i + 1]));
 
@@ -216,6 +220,8 @@ class ContextProjectGradFunctor {
 
     if (input_grad) {
       for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+        if (lod_level_0[i] == lod_level_0[i + 1]) continue;
+
         input_row_begin = (context_start > 0)
                               ? static_cast<int>(lod_level_0[i]) + context_start
                               : static_cast<int>(lod_level_0[i]);
@@ -248,6 +254,8 @@ class ContextProjectGradFunctor {
     if (pad_grad) {
       if (padding_trainable) {
         for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
+          if (lod_level_0[i] == lod_level_0[i + 1]) continue;
+
           Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]),
                                     static_cast<int>(lod_level_0[i + 1]));
 

diff --git a/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h b/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h
@@ -197,9 +197,9 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op,
     threads = dim3(frame_per_block, 1);
     grid = dim3(frame_blocks, 1);
   } else {
-    /* frame_per_block = 32 batch_per_block = 32 */
-    threads = dim3(32, 32);
-    grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 32 - 1) / 32);
+    /* frame_per_block = 32 batch_per_block = 16 */
+    threads = dim3(32, 16);
+    grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 16 - 1) / 16);
   }
 
   auto stream =

diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h
@@ -34,7 +34,9 @@ inline framework::LoD ConcatLoD(const Container &xs,
     for (size_t j = 0; j < xs.size(); ++j) {
       auto &x_lod = xs[j].get().lod()[0];
       const framework::Tensor &tensor = xs[j].get();
-      xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i]));
+      if (x_lod[i - 1] < x_lod[i]) {
+        xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i]));
+      }
       sum += x_lod[i];
     }
     result[i] = sum;
@@ -97,6 +99,8 @@ class SeqConcatGradKernel : public framework::OpKernel<T> {
         const framework::LoDTensor *x = xs[j];
         framework::LoDTensor *dx = dxs[j];
         auto &x_lod = x->lod()[0];
+        if (x_lod[i - 1] == x_lod[i]) continue;
+
         sliced_x.emplace_back(x->Slice(x_lod[i - 1], x_lod[i]));
         if (dx != nullptr) {
           sliced_dx.emplace_back(dx->Slice(x_lod[i - 1], x_lod[i]));

diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
@@ -47,8 +47,10 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
     out->set_lod(in->lod());
     auto out_data = out->mutable_data<T>(context.GetPlace());
     for (size_t i = 0; i < lod0.size() - 1; ++i) {
+      if (lod0[i] == lod0[i + 1]) continue;
       int start = lod0[i];
       int end = lod0[i + 1];
+
       int copy_size = win_size < end - start + 1 ? win_size : end - start + 1;
       int mid = end + 1 - copy_size;
       int pad_num = win_size - copy_size;

diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
@@ -160,6 +160,7 @@ struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
         int x_start = x_lod[i - 1];
         int x_end = x_lod[i];
         int x_seq_len = x_end - x_start;
+        if (x_seq_len == 0) continue;
         auto dx_sub = dx->Slice(x_start, x_end);
         dx_sub.Resize(flatten_to_1d(dx_sub.dims()));
         int dout_end = dout_offset + repeat_num * x_seq_len;

diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h
@@ -76,9 +76,9 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
 
     for (size_t i = 0; i < n; ++i) {
       PADDLE_ENFORCE_LE(0, offset_data[i],
-                        "The offset[%d] must greater than zero.", i);
-      PADDLE_ENFORCE_LT(0, length_data[i],
-                        "The length[%d] must greater than zero.", i);
+                        "The offset[%d] must be nonnegative.", i);
+      PADDLE_ENFORCE_LE(0, length_data[i],
+                        "The length[%d] must be nonnegative.", i);
       PADDLE_ENFORCE_LE(lod[0][i] + offset_data[i] + length_data[i],
                         lod[0][i + 1], "The target tensor's length overflow.");
     }
@@ -95,6 +95,7 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
 
     size_t out_offset = 0;
     for (size_t i = 0; i < n; ++i) {
+      if (length_data[i] == 0) continue;
       Tensor in_t = in->Slice(
           static_cast<int>(lod[0][i] + offset_data[i]),
           static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
@@ -144,6 +145,7 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
                static_cast<T>(0));
 
       for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
+        if (length_data[i] == 0) continue;
         Tensor out_grad_t =
             out_grad->Slice(static_cast<int>(out_lod[0][i]),
                             static_cast<int>(out_lod[0][i + 1]));

diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
@@ -128,12 +128,15 @@ class TestCRFDecodingOp2(OpTest):
     ground truth being given.
     """
 
+    def init_lod(self):
+        self.lod = [[1, 2, 3, 4]]
+
     def setUp(self):
         self.op_type = "crf_decoding"
         TAG_NUM = 5
 
-        lod = [[1, 2, 3, 4]]
-        total_len = sum(lod[-1])
+        self.init_lod()
+        total_len = sum(self.lod[-1])
         transition = np.repeat(
             np.arange(
                 TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
@@ -152,9 +155,9 @@ def setUp(self):
         expected_output = (labels == predicted_labels).astype("int64")
 
         self.inputs = {
-            "Emission": (emission, lod),
+            "Emission": (emission, self.lod),
             "Transition": transition,
-            "Label": (labels, lod)
+            "Label": (labels, self.lod)
         }
 
         self.outputs = {"ViterbiPath": expected_output}
@@ -163,5 +166,15 @@ def test_check_output(self):
         self.check_output()
 
 
+class TestCRFDecodingOp3(TestCRFDecodingOp2):
+    def init_lod(self):
+        self.lod = [[1, 0, 0, 4]]
+
+
+class TestCRFDecodingOp4(TestCRFDecodingOp2):
+    def init_lod(self):
+        self.lod = [[0, 2, 3, 0]]
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
@@ -58,67 +58,83 @@ def setUp(self):
         x2 = np.array([[12, 4, 7, 8]]).astype("int64")
         x1 = np.transpose(x1)
         x2 = np.transpose(x2)
-        x1_lod = [1, 4]
-        x2_lod = [3, 1]
+        self.x1_lod = [1, 4]
+        self.x2_lod = [3, 1]
 
-        num_strs = len(x1_lod)
+        num_strs = len(self.x1_lod)
         distance = np.zeros((num_strs, 1)).astype("float32")
         sequence_num = np.array(2).astype("int64")
 
         x1_offset = 0
         x2_offset = 0
         for i in range(0, num_strs):
             distance[i] = Levenshtein(
-                hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
-                ref=x2[x2_offset:(x2_offset + x2_lod[i])])
-            x1_offset += x1_lod[i]
-            x2_offset += x2_lod[i]
+                hyp=x1[x1_offset:(x1_offset + self.x1_lod[i])],
+                ref=x2[x2_offset:(x2_offset + self.x2_lod[i])])
+            x1_offset += self.x1_lod[i]
+            x2_offset += self.x2_lod[i]
             if normalized is True:
-                len_ref = x2_lod[i]
+                len_ref = self.x2_lod[i]
                 distance[i] = distance[i] / len_ref
 
         self.attrs = {'normalized': normalized}
-        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
+        self.inputs = {'Hyps': (x1, [self.x1_lod]), 'Refs': (x2, [self.x2_lod])}
         self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
 
     def test_check_output(self):
         self.check_output()
 
 
-class TestEditDistanceOpNormalized(OpTest):
+class TestEditDistanceOpNormalizedCase0(OpTest):
+    def reset_config(self):
+        pass
+
     def setUp(self):
         self.op_type = "edit_distance"
         normalized = True
         x1 = np.array([[10, 3, 6, 5, 8, 2]]).astype("int64")
         x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64")
         x1 = np.transpose(x1)
         x2 = np.transpose(x2)
-        x1_lod = [1, 2, 3]
-        x2_lod = [2, 1, 2]
+        self.x1_lod = [3, 0, 3]
+        self.x2_lod = [2, 1, 2]
+        self.reset_config()
 
-        num_strs = len(x1_lod)
+        num_strs = len(self.x1_lod)
         distance = np.zeros((num_strs, 1)).astype("float32")
         sequence_num = np.array(3).astype("int64")
 
         x1_offset = 0
         x2_offset = 0
         for i in range(0, num_strs):
             distance[i] = Levenshtein(
-                hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
-                ref=x2[x2_offset:(x2_offset + x2_lod[i])])
-            x1_offset += x1_lod[i]
-            x2_offset += x2_lod[i]
+                hyp=x1[x1_offset:(x1_offset + self.x1_lod[i])],
+                ref=x2[x2_offset:(x2_offset + self.x2_lod[i])])
+            x1_offset += self.x1_lod[i]
+            x2_offset += self.x2_lod[i]
             if normalized is True:
-                len_ref = x2_lod[i]
+                len_ref = self.x2_lod[i]
                 distance[i] = distance[i] / len_ref
 
         self.attrs = {'normalized': normalized}
-        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
+        self.inputs = {'Hyps': (x1, [self.x1_lod]), 'Refs': (x2, [self.x2_lod])}
         self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
 
     def test_check_output(self):
         self.check_output()
 
 
+class TestEditDistanceOpNormalizedCase1(TestEditDistanceOpNormalizedCase0):
+    def reset_config(self):
+        self.x1_lod = [0, 6, 0]
+        self.x2_lod = [2, 1, 2]
+
+
+class TestEditDistanceOpNormalizedCase2(TestEditDistanceOpNormalizedCase0):
+    def reset_config(self):
+        self.x1_lod = [0, 0, 6]
+        self.x2_lod = [2, 2, 1]
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py
@@ -82,9 +82,9 @@ def _step(x, h_p, w, b, act_state, act_gate):
     hidden = np.zeros((T, D), dtype=dtype)
 
     idx_in_seq_list, sorted_seqs = _seq_to_batch(lod, is_reverse)
-    h_p = h0[sorted_seqs]
+    h_p = h0[[seq for seq in sorted_seqs if lod[0][seq] > 0]]
+
     max_seq_len = len(idx_in_seq_list)
-    assert len(idx_in_seq_list[0]) == N
     end_idx = 0
     for batch_idx in range(max_seq_len):
         x = input[idx_in_seq_list[batch_idx]]
@@ -119,7 +119,6 @@ def setUp(self):
 
         T = sum(self.lod[0])
         N = len(self.lod[0])
-
         input = np.random.rand(T, 3 * self.D).astype(self.dtype)
         weight = np.random.rand(self.D, 3 * self.D).astype(self.dtype)
         bias = np.random.rand(
@@ -173,13 +172,36 @@ def set_confs(self):
         self.dtype = 'float32'
 
 
+class TestGRUOp2Len0(TestGRUOp):
+    def set_confs(self):
+        self.D = 19
+        self.lod = [[2, 0, 4]]
+        self.dtype = 'float32'
+
+
 class TestGRUOp2OriginMode(TestGRUOp):
     def set_confs(self):
         self.D = 19
         self.dtype = 'float32'
         self.origin_mode = True
 
 
+class TestGRUOp2OriginModeLen0(TestGRUOp):
+    def set_confs(self):
+        self.D = 19
+        self.lod = [[0, 3, 4]]
+        self.dtype = 'float32'
+        self.origin_mode = True
+
+
+class TestGRUOp2OriginModeLastLen0(TestGRUOp):
+    def set_confs(self):
+        self.D = 19
+        self.lod = [[0, 3, 0]]
+        self.dtype = 'float32'
+        self.origin_mode = True
+
+
 class TestGRUOpNoInitial(TestGRUOp):
     def set_confs(self):
         self.with_h0 = False

diff --git a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
@@ -89,7 +89,8 @@ def crf_forward_compute(self):
         for i in range(self.seq_num):
             start = self.seq_start_positions[i]
             end = self.seq_start_positions[i + 1]
-
+            if start >= end:
+                continue
             self.log_likelihood[i] = self._forward_a_sequence(
                 self.x[start:end, :], self.x_row_max[start:end, :],
                 self.x_exps[start:end, :], self.labels[start:end, :],
@@ -110,7 +111,7 @@ def set_test_data(self):
         lod = [[]]
         seq_start_pos = [0]
         for i in range(SEQ_NUM):
-            lod[-1].append(random.randint(1, MAX_SEQ_LEN))
+            lod[-1].append(random.randint(0, MAX_SEQ_LEN))
             seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1])
         emission = np.random.uniform(
             -1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64")