-
Notifications
You must be signed in to change notification settings - Fork 482
oneAPI backend update: kernel and layer optimizations #1246
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
70323c9
4162599
34f0d82
951a1f6
8445de7
0d21e99
257385a
0b8ef13
cf98216
70054aa
c307715
454d556
7e028e6
97c187d
00f82a3
496846d
84ad787
120c2e4
e2cec76
d869a5c
7e2e747
0b3dbeb
36881e0
60c0f42
44ee08f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,117 @@ | |
// currently this is fixed | ||
using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>)); | ||
|
||
// Pipe properties for host pipes. Host pipes connect to the data source DMA and sink DMA. | ||
// They are connected to the first and the last layer to stream data into and out from the kernel. | ||
using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties( | ||
sycl::ext::intel::experimental::ready_latency<0>, sycl::ext::intel::experimental::bits_per_symbol<8>, | ||
sycl::ext::intel::experimental::uses_valid<true>, sycl::ext::intel::experimental::first_symbol_in_high_order_bits<true>, | ||
sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready)); | ||
|
||
namespace nnet { | ||
|
||
#if !defined(IS_BSP) | ||
// Definition for buffer locations for Avalon MM host. | ||
inline constexpr unsigned kInputBufferLocation = 0; | ||
inline constexpr unsigned kOutputBufferLocation = 1; | ||
#endif | ||
|
||
// Implementation of a direct memory access kernel. Move data from source, convert, | ||
// and send to the sink. Adaptive to SYCL HLS and hardware acceleration flow. | ||
template <class src_T, class dest_pipe> struct DMA_convert_data { | ||
#if !defined(IS_BSP) | ||
// When targeting a device family, we instantiate an Avalon Memory Mapped Host for | ||
|
||
// data transaction between host and the DMA kernel during emulation and simulation. | ||
sycl::ext::oneapi::experimental::annotated_arg< | ||
src_T *, | ||
decltype(sycl::ext::oneapi::experimental::properties{ | ||
sycl::ext::intel::experimental::latency<0>, sycl::ext::intel::experimental::dwidth<16>, | ||
sycl::ext::intel::experimental::buffer_location<kInputBufferLocation>, | ||
sycl::ext::intel::experimental::read_write_mode_read, sycl::ext::intel::experimental::wait_request_requested})> | ||
#else | ||
// When targeting oneAPI BSP, we can use USM pointer to access host memory. | ||
src_T *const | ||
#endif | ||
src; | ||
size_t num_iteration; | ||
|
||
[[intel::kernel_args_restrict]] void operator()() const { | ||
|
||
#if defined(IS_BSP) | ||
// Access data using host pointer. | ||
sycl::ext::intel::host_ptr<src_T> src_ptr(src); | ||
#else | ||
// Host allocation is not supported when targeting an FPGA family or part number. | ||
src_T *src_ptr(src); | ||
#endif | ||
// First, extract the PipeDataT from the pipe | ||
using PipeDataType = typename nnet::ExtractPipeType<dest_pipe>::value_type; | ||
// Then, extract the DataT from StreamingBeat | ||
using DstDataType = typename nnet::ExtractDataType<PipeDataType>::value_type; | ||
constexpr auto dstTypeSize = std::tuple_size<DstDataType>{}; | ||
|
||
[[intel::fpga_register]] typename nnet::ExtractPipeType<dest_pipe>::value_type packet; | ||
|
||
// Keep sending data to the input layer and keep the kernels running. | ||
for (size_t i = 0; i < num_iteration; i++) { | ||
#pragma unroll | ||
for (size_t j = 0; j < dstTypeSize; j++) { | ||
packet.data[j] = src_ptr[i * dstTypeSize + j]; | ||
} | ||
packet.sop = (i == 0); | ||
// Assert end-of-packet signal after the last iteration. | ||
// All down-stream kernels will stop seeing eop. | ||
packet.eop = (i == (num_iteration - 1)); | ||
dest_pipe::write(packet); | ||
} | ||
} | ||
}; | ||
|
||
// Symmetrical to the DMA_convert_data above, this DMA drains the output pipe and | ||
// writes result to memory. | ||
template <class src_pipe, class dst_T> struct DMA_convert_data_back { | ||
#if !defined(IS_BSP) | ||
// Without BSP, instantiate an Avalon Memory Mapped Host to write to host. | ||
sycl::ext::oneapi::experimental::annotated_arg< | ||
dst_T *, | ||
decltype(sycl::ext::oneapi::experimental::properties{ | ||
sycl::ext::intel::experimental::latency<0>, sycl::ext::intel::experimental::dwidth<16>, | ||
sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation>, | ||
sycl::ext::intel::experimental::read_write_mode_write, sycl::ext::intel::experimental::wait_request_requested})> | ||
#else | ||
// USM pointer, otherwise. | ||
dst_T *const | ||
#endif | ||
dst; | ||
size_t num_iteration; | ||
|
||
[[intel::kernel_args_restrict]] void operator()() const { | ||
#if defined(IS_BSP) | ||
sycl::ext::intel::host_ptr<dst_T> dst_ptr(dst); | ||
#else | ||
dst_T *dst_ptr(dst); | ||
#endif | ||
// First, extract the PipeDataT from the pipe | ||
using PipeDataType = typename nnet::ExtractPipeType<src_pipe>::value_type; | ||
// Then, extract the DataT from StreamingBeat | ||
using SrcDataType = typename nnet::ExtractDataType<PipeDataType>::value_type; | ||
constexpr auto srcTypeSize = std::tuple_size<SrcDataType>{}; | ||
|
||
[[intel::fpga_register]] typename nnet::ExtractPipeType<src_pipe>::value_type packet; | ||
|
||
// Drain the output pipe and write result to memory. | ||
for (size_t i = 0; i < num_iteration; i++) { | ||
packet = src_pipe::read(); | ||
#pragma unroll | ||
for (size_t j = 0; j < srcTypeSize; j++) { | ||
dst_ptr[i * srcTypeSize + j] = static_cast<dst_T>(packet.data[j].to_double()); | ||
} | ||
} | ||
} | ||
}; | ||
|
||
} // namespace nnet | ||
|
||
// Need to declare the input and output pipes | ||
|
||
// hls-fpga-machine-learning insert inputs | ||
|
Uh oh!
There was an error while loading. Please reload this page.