Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.

Commit 7ad4046

Browse files
authored
Merge pull request #267 from cyj1986/map
update seq sort
2 parents 59ed7a9 + 6a82e61 commit 7ad4046

File tree

2 files changed

+6
-17
lines changed

2 files changed

+6
-17
lines changed

saber/funcs/impl/cuda/base/cuda_c/cuda_utils.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,9 @@ class SeqSortedseqTranseUtil {
112112
emit_length = max_len;
113113

114114
if (max_len == 1) {
115-
_emit_offset_vec.push_back(0);
116-
_emit_offset_vec.push_back(emit_length * batch_size);
115+
_emit_offset_vec.resize(2);
116+
_emit_offset_vec[0] = 0;
117+
_emit_offset_vec[1] = emit_length * batch_size;
117118
return false;
118119
}
119120

saber/funcs/impl/cuda/vender_lstm.cpp

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ set_lstm_params_region(LstmParam<OpTensor>& param, int word_size) {
4949

5050
for (int i = 0; i < _cudnn_lstm_weights_layernum; i++) {
5151
ParamsRegion& region = _inner_weight_region[i];
52-
//get_sub_tensor(cudnnW[i], (Op_dtype*) region._offset, region._size/hidden_size/4, hidden_size, 4*hidden_size, cuda_stream);
53-
get_sub_tensor<Op_dtype>(cudnnW[i], (Op_dtype*) region._offset, region._size/hidden_size/4, hidden_size, 4*hidden_size);
52+
get_sub_tensor<Op_dtype>(cudnnW[i], (Op_dtype*) region._offset, region._size/(sizeof(Op_dtype) * hidden_size), hidden_size, 4*hidden_size, cuda_stream);
5453
}
5554

5655
for (int i = 0; i < _cudnn_lstm_weights_layernum; i++) {
@@ -63,18 +62,7 @@ set_lstm_params_region(LstmParam<OpTensor>& param, int word_size) {
6362
CUDA_CHECK(cudaMemsetAsync((void*)(region_b._offset), 0, region_b._size, cuda_stream));
6463
}
6564
}
66-
cudaDeviceSynchronize();
6765
}
68-
int region_id = 0;
69-
for (auto region : _inner_weight_region) {
70-
char buf[100];
71-
sprintf(buf, "./lstm_%d.txt", region_id);
72-
record_dev_tensorfile<NV>((Op_dtype*)region._offset, region._size/4, buf);
73-
region_id++;
74-
}
75-
cudaDeviceSynchronize();
76-
record_dev_tensorfile<NV>(param.weight()->data(), param.weight()->valid_size(), "lstm_param_weight.txt");
77-
cudaDeviceSynchronize();
7866
}
7967

8068
template <>
@@ -183,7 +171,7 @@ create(const std::vector<DataTensor*>& inputs,
183171
_y_desc.reset(new cudnn::TensorDescriptors<DataDtype>(
184172
offset_after_sort,
185173
{batch_size, _hidden_size * lstm_param.num_direction, 1},
186-
{_hidden_size * lstm_param.num_direction, 1, 1}));
174+
{_hiden_size * lstm_param.num_direction, 1, 1}));
187175

188176
Shape in_dim = inputs[0]->valid_shape();
189177
Shape in_stride = inputs[0]->get_stride();
@@ -227,7 +215,7 @@ dispatch(const std::vector<DataTensor*>& inputs,
227215
if (inputs.size() == 2) {
228216
in_hidden_data = inputs[1]->data();
229217
}
230-
bool isHW2Seq = inputs[0]->get_seq_offset().size() > 2;
218+
bool isHW2Seq = inputs[0]->get_seq_offset().size() > 2 || param.is_reverse;
231219

232220
if (isHW2Seq) {
233221
_temp_tensor_in.reshape(inputs[0]->valid_shape());

0 commit comments

Comments
 (0)