Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions kernels/elementwise/elementwise.cu
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ __global__ void elementwise_add_f16x8_pack_kernel(half *a, half *b, half *c,
// reinterpret as float4 and store 128 bits in 1 memory issue.
if ((idx + 7) < N) {
LDST128BITS(c[idx]) = LDST128BITS(pack_c[0]);
} else {
for (int i=0; nx+i<N; i++) {
c[idx+i] = __hadd(a[idx+i], b[idx+i]);
}
}
}

Expand Down