Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.

Commit 2e96673

Browse files
committed
remove no useful code and fix bugs
1 parent dda804d commit 2e96673

File tree

4 files changed

+7
-47
lines changed

4 files changed

+7
-47
lines changed

saber/funcs/impl/cuda/base/cuda_c/saber_attension_lstm.cu

+1-28
Original file line numberDiff line numberDiff line change
@@ -211,28 +211,15 @@ template<>
211211
/*for other fc*/
212212
for (int word_id = 0; word_id < max_len; word_id++) {
213213
_attn_outs[0]->reshape(first_fc_out_0_shape);
214-
//if (word_id > 1) {
215-
// break;
216-
//}
217214

218215
if (word_id > 0) {
219216
Shape h_shape = {seq_num, N_0, 1, 1};
220217
_first_fc_out_1.reshape(h_shape);
221218

222-
//auto kernel_1 = saber_find_fast_sass_gemm(false, !fc_vec[0].is_transpose_weights, seq_num, N_0, hidden_size);
223219
auto kernel_1 = saber_find_fast_sass_gemm(false, false, seq_num, N_0, hidden_size);
224220
kernel_1(seq_num, N_0, hidden_size, 1.0f,
225221
_cell_out.data(), 0.f,
226-
fc_vec[0].weights->data() + K_0 * N_0, _first_fc_out_1.mutable_data(), stream);
227-
//cudaDeviceSynchronize();
228-
//print_tensor_device(_lstm_out);
229-
//print_tensor_device(*(fc_vec[0]->weights));
230-
//cudaDeviceSynchronize();
231-
//gemm(_handle, false, false, seq_num, N_0, hidden_size,
232-
// 1.0, _lstm_out.data() + (word_id - 1) * seq_num * hidden_size,
233-
// fc_vec[0]->weights->data() + K_0 * N_0,
234-
// 0.f, _first_fc_out_1.mutable_data());
235-
//cudaDeviceSynchronize();
222+
fc_vec[0].weights->data() + K_0 * N_0, _first_fc_out_1.mutable_data(), stream);
236223

237224
sequence_bias_relu<<<CUDA_GET_BLOCKS(_attn_outs[0]->valid_size()), CUDA_NUM_THREADS, 0, stream>>>(_first_fc_out_0.data(), _first_fc_out_1.data(), fc_vec[0].bias->data(),
238225
_dev_seq_id_map.data(), M_0, N_0, _attn_outs[0]->mutable_data());
@@ -252,7 +239,6 @@ template<>
252239
auto fc_in_data = _attn_outs[i - 1]->data();
253240
auto fc_out_data = _attn_outs[i]->mutable_data();
254241
255-
//auto kernel = saber_find_fast_sass_gemm(false, !fc_vec[i].is_transpose_weights, M, N, K);
256242
auto kernel = saber_find_fast_sass_gemm(false, false, M, N, K);
257243
kernel(M, N, K, 1.0f, fc_in_data, 0.0f, fc_vec[i].weights->data(), fc_out_data, stream);
258244
bias_relu<<<CUDA_GET_BLOCKS(_attn_outs[i]->valid_size()), CUDA_NUM_THREADS, 0, stream>>>(fc_out_data, fc_vec[i].bias->data(), _attn_outs[i]->valid_size(), N, fc_out_data);
@@ -268,14 +254,6 @@ template<>
268254
sequence_softmax<<<CUDA_GET_BLOCKS(seq_num), CUDA_NUM_THREADS, 0, stream>>>(_attn_outs[fc_num - 1]->data(), _dev_offset.data(), seq_num, _softmax_out.mutable_data());
269255
270256
sequence_pool<<<CUDA_GET_BLOCKS(seq_num * dim), CUDA_NUM_THREADS, 0, stream>>>(input->data(), _softmax_out.data(), _dev_offset.data(), seq_num, inputs[0]->num(), dim, _pool_out.mutable_data());
271-
/*data after pool need be sorted or append*/
272-
//cudaDeviceSynchronize();
273-
//record_dev_tensorfile<NV>(_pool_out.mutable_data(), _pool_out.valid_size(), "./sequence_pool_out_cu.txt");
274-
//record_dev_tensorfile<NV>(_softmax_out.mutable_data(), _softmax_out.valid_size(), "./softmax_out_cu.txt");
275-
//record_dev_tensorfile<NV>(_attn_outs[0]->mutable_data(), _attn_outs[0]->valid_size(), "./attn_fc_0_cu.txt");
276-
//record_dev_tensorfile<NV>(_attn_outs[1]->mutable_data(), _attn_outs[1]->valid_size(), "./attn_fc_1_cu.txt");
277-
//record_dev_tensorfile<NV>(_first_fc_out_1.mutable_data(), _first_fc_out_1.valid_size(), "./first_fc_1_cu.txt");
278-
//record_dev_tensorfile<NV>(attn_param.fc_vec[0].weights->data() + 30, /*attn_param.fc_vec[0]->weights->valid_size()*/ 15, "./fc_0_weight.txt");
279257
280258
281259
auto x_data = _pool_out.data();
@@ -298,11 +276,6 @@ template<>
298276
_dev_offset.data(), seq_num, word_num, hidden_size, outputs[0]->mutable_data());
299277
300278
outputs[0]->set_seq_offset(inputs[0]->get_seq_offset());
301-
CUDA_CHECK(cudaDeviceSynchronize());
302-
CUDA_CHECK(cudaPeekAtLastError());
303-
//cudaDeviceSynchronize();
304-
//record_dev_tensorfile<NV>(outputs[0]->data(), outputs[0]->valid_size(), "./final_out.txt");
305-
//record_dev_tensorfile<NV>(_lstm_out.mutable_data(), _lstm_out.valid_size(), "./lstm_out.txt");
306279
return SaberSuccess;
307280
}
308281

saber/funcs/impl/x86/saber_attension_lstm.cpp

+1-15
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,6 @@ SaberStatus SaberAttensionLstm<X86, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NC
205205
1.f, inputs[0]->data(), _attn_fc_weights[0]->data(),
206206
0.f, _first_fc_out_0.mutable_data());
207207
for (int word_id = 0; word_id < max_len; word_id++) {
208-
if (word_id > 1) {
209-
break;
210-
}
211208
_attn_outs[0]->reshape(first_fc_out_0_shape);
212209
if (word_id > 0) {
213210
Shape first_fc_out_1_shape = {seq_num, _attn_fc_size[0], 1, 1};
@@ -240,32 +237,21 @@ SaberStatus SaberAttensionLstm<X86, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NC
240237
int fc_num = attn_param.fc_vec.size();
241238
sequence_softmax(_attn_outs[fc_num - 1]->mutable_data(), seq_offset, _softmax_out.mutable_data());
242239
sequence_pool(inputs[0]->data(), _softmax_out.data(), seq_offset, inputs[0]->valid_size() / word_num, _pool_out.mutable_data());
243-
record_dev_tensorfile(&_pool_out, "./pool_out_x86.txt");
244-
record_dev_tensorfile(&_softmax_out, "./softmax_out_x86.txt");
245-
record_dev_tensorfile(_attn_outs[0], "./attn_fc_0_x86.txt");
246-
record_dev_tensorfile(_attn_outs[1], "./attn_fc_1_x86.txt");
247-
record_dev_tensorfile(&_first_fc_out_0, "./first_fc_out_0.txt");
248-
record_dev_tensorfile(&_first_fc_out_1, "./first_fc_out_1.txt");
249240
_hidden_out.reshape(Shape(seq_num, 4*_hidden_size, 1,1));
250-
LOG(INFO)<<"hidden_size" << _hidden_size;
241+
//LOG(INFO)<<"hidden_size" << _hidden_size;
251242
gemm(false, false, seq_num, 4 * _hidden_size, _word_size,
252243
1.f, _pool_out.data(), _weights_i2h, 0.f, _hidden_out.mutable_data());
253244
if (word_id > 0) {
254245
gemm(false, false, seq_num, 4 * _hidden_size, _hidden_size,
255246
1.f, _lstm_out.data() + (word_id - 1) * seq_num * _hidden_size, _weights_h2h, 1.f, _hidden_out.mutable_data());
256247
}
257-
record_dev_tensorfile(&_hidden_out, "./hidden_out_before_act.txt");
258248
lstm_bias_and_act(_hidden_out.data(), _weights_bias,
259249
_lstm_out.mutable_data() + word_id * seq_num * _hidden_size,
260250
_cell_out.mutable_data(), seq_num, _hidden_size, false);
261-
record_dev_tensorfile(&_hidden_out, "./hidden_out_after_act.txt");
262-
record_dev_tensorfile(&_cell_out, "./hidden_out_after_act.txt");
263251
}
264252

265253
lstm_result_to_sequence(_lstm_out.data(), _hidden_size, seq_offset, outputs[0]->mutable_data());
266254
outputs[0]->set_seq_offset(seq_offset);
267-
record_dev_tensorfile(outputs[0], "./final_out_x86.txt");
268-
record_dev_tensorfile(&_lstm_out, "./lstm_out_x86.txt");
269255

270256
return SaberSuccess;
271257
}

test/saber/cuda/test_saber_func_attension_lstm.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using namespace anakin::saber;
1919

2020
void test_saber_attension_lstm(int sequence_size = 2, int batch_size = 1, int word_size = 30,
2121
int hidden_size = 15) {
22-
22+
#if defined(USE_X86_PLACE) && defined(USE_CUDA)
2323
Context<NV> ctx_dev(0, 0, 0);
2424
Context<X86> ctx_x86(0, 0, 0);
2525
typedef Tensor<NV, AK_FLOAT, NCHW> TensorDf4;
@@ -28,7 +28,7 @@ void test_saber_attension_lstm(int sequence_size = 2, int batch_size = 1, int wo
2828

2929

3030

31-
std::vector<int> offsets = {0, 3, 7};
31+
std::vector<int> offsets = {0, 3};
3232
bool is_reverse = false;
3333
batch_size = offsets.size() - 1;
3434
Shape input_shape(offsets[offsets.size() - 1], word_size, 1, 1);
@@ -206,7 +206,7 @@ void test_saber_attension_lstm(int sequence_size = 2, int batch_size = 1, int wo
206206
#endif
207207

208208
return;
209-
209+
#endif
210210
}
211211

212212
TEST(TestSaberFuncNV, test_func_saber_lstm) {

test/saber/cuda/test_saber_func_lstm.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cublasHandle_t cublas_handle;
1919

2020
void test_saber_lstm(int sequence_size = 2, int batch_size = 1, int word_size = 4,
2121
int hidden_size = 4) {
22+
#if defined(USE_CUDA) && defined(USE_X86_PLACE)
2223

2324
Context<NV> ctx_dev(0, 0, 0);
2425
Context<X86> ctx_x86(0, 0, 0);
@@ -156,8 +157,8 @@ void test_saber_lstm(int sequence_size = 2, int batch_size = 1, int word_size =
156157
<< t2.get_average_ms();
157158
#endif
158159

160+
#endif
159161
return;
160-
161162
}
162163

163164
TEST(TestSaberFuncNV, test_func_saber_lstm) {

0 commit comments

Comments
 (0)