From c09b89512c68aa69de78349aa32bd6bc23c31c92 Mon Sep 17 00:00:00 2001 From: ueqri Date: Fri, 18 Mar 2022 23:00:45 +0800 Subject: [PATCH 1/2] enable sw_emu on Vitis HLS v2021.2 --- spmv/k2k_relay.cpp | 24 ++++++++++++------------ spmv/libfpga/common.h | 16 +++++++++++++++- spmv/libfpga/shuffle.h | 6 +++--- spmv/libfpga/spmv_cluster.h | 8 +++++--- spmv/libfpga/stream_utils.h | 20 +++++++++++++------- spmv/spmv_result_drain.cpp | 8 ++++---- spmv/spmv_sk0.cpp | 4 ++-- spmv/spmv_sk1.cpp | 4 ++-- spmv/spmv_sk2.cpp | 4 ++-- spmv/spmv_vector_loader.cpp | 13 ++++++++----- sw/Makefile | 9 ++++++++- sw/host.cpp | 6 +++--- 12 files changed, 77 insertions(+), 45 deletions(-) diff --git a/spmv/k2k_relay.cpp b/spmv/k2k_relay.cpp index 1cf7344..9fe654e 100644 --- a/spmv/k2k_relay.cpp +++ b/spmv/k2k_relay.cpp @@ -5,28 +5,28 @@ extern "C" { void k2k_relay( - hls::stream &in, // in - hls::stream &out // out + hls::stream &in, // in + hls::stream &out // out ) { #pragma HLS interface ap_ctrl_none port=return #pragma HLS interface axis register both port=in #pragma HLS interface axis register both port=out -#ifndef __SYNTHESIS__ - bool exit = false; - while (!exit) { - VEC_AXIS_T pkt = in.read(); - out.write(pkt); - exit = (pkt.user == EOS); - } -#else +// #ifndef __SYNTHESIS__ +// bool exit = false; +// while (!exit) { +// VEC_AXIS_IF_T pkt = in.read(); +// out.write(pkt); +// exit = (pkt.user == EOS); +// } +// #else while (1) { #pragma HLS pipeline II=1 - VEC_AXIS_T pkt = in.read(); + VEC_AXIS_IF_T pkt = in.read(); out.write(pkt); } -#endif +// #endif } // kernel } // extern "C" diff --git a/spmv/libfpga/common.h b/spmv/libfpga/common.h index c962132..18e708f 100644 --- a/spmv/libfpga/common.h +++ b/spmv/libfpga/common.h @@ -96,6 +96,7 @@ struct VEC_PLD_T{ #define VEC_PLD_EOS ((VEC_PLD_T){0,0,EOS}) #ifndef __SYNTHESIS__ +namespace { std::string inst2str(INST_T inst) { switch (inst) { case SOD: return std::string("SOD"); @@ -130,6 +131,7 @@ std::ostream& operator<<(std::ostream& os, const VEC_PLD_T &p) { << "inst: " << inst2str(p.inst) << '}'; return os; } +} #endif //------------------------------------------------------------------------- @@ -140,12 +142,14 @@ std::ostream& operator<<(std::ostream& os, const VEC_PLD_T &p) { typedef struct { ap_uint<32 * (PACK_SIZE + 1)> data; ap_uint<2> user; // same as INST_T -} VEC_AXIS_T; +} VEC_AXIS_T; // only used for stream FIFOs +typedef ap_axiu<32 * (PACK_SIZE + 1), 2, 0, 0> VEC_AXIS_IF_T; // AXI4-Stream interface of split kernels #define VEC_AXIS_PKT_IDX(p) (p.data(31,0)) #define VEC_AXIS_VAL(p, i) (p.data(63 + 32 * i,32 + 32 * i)) #ifndef __SYNTHESIS__ +namespace { std::ostream& operator<<(std::ostream& os, const VEC_AXIS_T &p) { os << '{' << "pktidx: " << VEC_AXIS_PKT_IDX(p) << '|'; for (unsigned i = 0; i < PACK_SIZE; i++) { @@ -154,6 +158,16 @@ std::ostream& operator<<(std::ostream& os, const VEC_AXIS_T &p) { os << "user: " << inst2str(p.user) << '}'; return os; } + +std::ostream& operator<<(std::ostream& os, const VEC_AXIS_IF_T &p) { + os << '{' << "pktidx: " << VEC_AXIS_PKT_IDX(p) << '|'; + for (unsigned i = 0; i < PACK_SIZE; i++) { + os << "val: " << float(VEC_AXIS_VAL(p, i)) / (1 << FBITS) << '|'; + } + os << "user: " << inst2str(p.user) << '}'; + return os; +} +} #endif //------------------------------------------------------------------------- diff --git a/spmv/libfpga/shuffle.h b/spmv/libfpga/shuffle.h index d06b7bd..4b302b1 100644 --- a/spmv/libfpga/shuffle.h +++ b/spmv/libfpga/shuffle.h @@ -13,7 +13,7 @@ // bool line_tracing_shuffle_core = false; // bool csim_abort_shuffle_core = false; // unsigned max_iter_limit_shuffle_core = 100; -unsigned long long iter_cnt = 0; +// unsigned long long iter_cnt = 0; #endif const unsigned ARBITER_LATENCY = 7; @@ -34,7 +34,7 @@ void arbiter_1p( #pragma HLS pipeline II=1 enable_flush #pragma HLS latency min=ARBITER_LATENCY max=ARBITER_LATENCY - #pragma HLS array_partition variable=in_addr complete + #pragma HLS array_partition variable=in_resend complete #pragma HLS array_partition variable=xbar_sel complete // prioritized valid and addr @@ -112,7 +112,7 @@ void arbiter_1p( #pragma HLS pipeline II=1 enable_flush #pragma HLS latency min=ARBITER_LATENCY max=ARBITER_LATENCY - #pragma HLS array_partition variable=in_addr complete + #pragma HLS array_partition variable=in_resend complete #pragma HLS array_partition variable=xbar_sel complete // prioritized valid and addr diff --git a/spmv/libfpga/spmv_cluster.h b/spmv/libfpga/spmv_cluster.h index 508cd30..63f1b93 100644 --- a/spmv/libfpga/spmv_cluster.h +++ b/spmv/libfpga/spmv_cluster.h @@ -22,7 +22,7 @@ template T array_max(T array[len]) { #pragma HLS inline - #pragma HLS expression_balance + // #pragma HLS expression_balance T result = 0; for (unsigned i = 0; i < len; i++) { #pragma HLS unroll @@ -31,6 +31,7 @@ T array_max(T array[len]) { return result; } +namespace { void CPSR_matrix_loader( const SPMV_MAT_PKT_T *matrix_hbm, // in unsigned row_partition_idx, // in @@ -191,6 +192,7 @@ void spmv_result_packer ( #endif } } +} // one computational cluster template @@ -213,14 +215,14 @@ void spmv_cluster( #pragma HLS stream variable=ML2SF depth=FIFO_DEPTH #pragma HLS stream variable=SF2VAU depth=FIFO_DEPTH #pragma HLS stream variable=VAU2SF depth=FIFO_DEPTH - #pragma HLS stream variable=FS2PE depth=FIFO_DEPTH + #pragma HLS stream variable=SF2PE depth=FIFO_DEPTH #pragma HLS stream variable=PE2PK depth=FIFO_DEPTH #pragma HLS stream variable=UPK2VAU depth=FIFO_DEPTH #pragma HLS bind_storage variable=ML2SF type=FIFO impl=SRL #pragma HLS bind_storage variable=SF2VAU type=FIFO impl=SRL #pragma HLS bind_storage variable=VAU2SF type=FIFO impl=SRL - #pragma HLS bind_storage variable=FS2PE type=FIFO impl=SRL + #pragma HLS bind_storage variable=SF2PE type=FIFO impl=SRL #pragma HLS bind_storage variable=PE2PK type=FIFO impl=SRL #pragma HLS bind_storage variable=UPK2VAU type=FIFO impl=SRL diff --git a/spmv/libfpga/stream_utils.h b/spmv/libfpga/stream_utils.h index 0862223..3a7a9ff 100644 --- a/spmv/libfpga/stream_utils.h +++ b/spmv/libfpga/stream_utils.h @@ -7,21 +7,24 @@ // duplicate 1 AXIS stream to N template void axis_duplicate( - hls::stream &in, + hls::stream &in, hls::stream out[N] ) { bool exit = false; while (!exit) { #pragma HLS pipeline II=1 - VEC_AXIS_T pkt = in.read(); - VEC_AXIS_T pkt_reg = reg(reg(pkt)); + VEC_AXIS_IF_T pkt_if = in.read(); + VEC_AXIS_IF_T pkt_if_reg = reg(reg(pkt_if)); + VEC_AXIS_T pkt_reg; + pkt_reg.data = pkt_if_reg.data; + pkt_reg.user = pkt_if_reg.user; VEC_AXIS_T pkt_replicas[N]; for (unsigned k = 0; k < N; k++) { #pragma HLS unroll pkt_replicas[k] = reg(pkt_reg); out[k].write(pkt_replicas[k]); } - exit = (pkt.user == EOS); + exit = (pkt_if.user == EOS); } } @@ -35,7 +38,7 @@ void axis_duplicate( template void axis_merge( hls::stream in[N], - hls::stream &out + hls::stream &out ) { unsigned i = 0; unsigned c = 0; @@ -47,7 +50,10 @@ void axis_merge( VEC_AXIS_T pkt = in[i].read(); VEC_AXIS_PKT_IDX(pkt) = c; if (pkt.user != EOS) { - out.write(pkt); + VEC_AXIS_IF_T pkt_if; + pkt_if.data = pkt.data; + pkt_if.user = pkt.user; + out.write(pkt_if); #ifdef AXIS_MERGE_LINE_TRACING std::cout << "axis merge write output from input " << i << std::endl << " " << pkt << std::endl; @@ -64,7 +70,7 @@ void axis_merge( } - VEC_AXIS_T eos; + VEC_AXIS_IF_T eos; for (unsigned k = 0; k < PACK_SIZE; k++) { #pragma HLS unroll VEC_AXIS_VAL(eos, k) = 0; diff --git a/spmv/spmv_result_drain.cpp b/spmv/spmv_result_drain.cpp index 2bafae1..d5a1e17 100644 --- a/spmv/spmv_result_drain.cpp +++ b/spmv/spmv_result_drain.cpp @@ -12,9 +12,9 @@ void spmv_result_drain( PACKED_VAL_T *packed_dense_result, // out const unsigned row_part_id, // in // const unsigned rows_per_c_in_partition, // in - hls::stream &from_SLR0, // out - hls::stream &from_SLR1, // out - hls::stream &from_SLR2 // out + hls::stream &from_SLR0, // out + hls::stream &from_SLR1, // out + hls::stream &from_SLR2 // out ) { #pragma HLS interface m_axi port=packed_dense_result offset=slave bundle=spmv_vin #pragma HLS interface s_axilite port=packed_dense_result bundle=control @@ -37,7 +37,7 @@ void spmv_result_drain( result_drain_main_loop: while (!exit) { #pragma HLS pipeline II=1 - VEC_AXIS_T pkt; + VEC_AXIS_IF_T pkt; bool do_write = false; switch (current_input) { case 0: diff --git a/spmv/spmv_sk0.cpp b/spmv/spmv_sk0.cpp index f1ae058..f17930f 100644 --- a/spmv/spmv_sk0.cpp +++ b/spmv/spmv_sk0.cpp @@ -15,8 +15,8 @@ void spmv_sk0( const SPMV_MAT_PKT_T *matrix_hbm_1, // in const SPMV_MAT_PKT_T *matrix_hbm_2, // in const SPMV_MAT_PKT_T *matrix_hbm_3, // in - hls::stream &vec_in, // in - hls::stream &res_out, // out + hls::stream &vec_in, // in + hls::stream &res_out, // out const unsigned row_partition_idx, // in const unsigned rows_per_c_in_partition, // in const unsigned num_col_partitions, // in diff --git a/spmv/spmv_sk1.cpp b/spmv/spmv_sk1.cpp index cc1f46b..dcafb6b 100644 --- a/spmv/spmv_sk1.cpp +++ b/spmv/spmv_sk1.cpp @@ -14,8 +14,8 @@ void spmv_sk1( const SPMV_MAT_PKT_T *matrix_hbm_7, // in const SPMV_MAT_PKT_T *matrix_hbm_8, // in const SPMV_MAT_PKT_T *matrix_hbm_9, // in - hls::stream &vec_in, // in - hls::stream &res_out, // out + hls::stream &vec_in, // in + hls::stream &res_out, // out const unsigned row_partition_idx, // in const unsigned rows_per_c_in_partition, // in const unsigned num_col_partitions, // in diff --git a/spmv/spmv_sk2.cpp b/spmv/spmv_sk2.cpp index 33c459e..622bfa8 100644 --- a/spmv/spmv_sk2.cpp +++ b/spmv/spmv_sk2.cpp @@ -13,8 +13,8 @@ void spmv_sk2( const SPMV_MAT_PKT_T *matrix_hbm_13, // in const SPMV_MAT_PKT_T *matrix_hbm_14, // in const SPMV_MAT_PKT_T *matrix_hbm_15, // in - hls::stream &vec_in, // in - hls::stream &res_out, // out + hls::stream &vec_in, // in + hls::stream &res_out, // out const unsigned row_partition_idx, // in const unsigned rows_per_c_in_partition, // in const unsigned num_col_partitions, // in diff --git a/spmv/spmv_vector_loader.cpp b/spmv/spmv_vector_loader.cpp index 13807d5..bf23165 100644 --- a/spmv/spmv_vector_loader.cpp +++ b/spmv/spmv_vector_loader.cpp @@ -80,14 +80,17 @@ void load_duplicate( void write_k2ks( hls::stream &in, // in - hls::stream &out // out + hls::stream &out // out ) { bool exit = false; loop_fifo2axis: while (!exit) { #pragma HLS pipeline II=1 VEC_AXIS_T pkt = in.read(); - out.write(pkt); + VEC_AXIS_IF_T pkt_if; + pkt_if.data = pkt.data; + pkt_if.user = pkt.user; + out.write(pkt_if); exit = (pkt.user == EOS); } } @@ -96,9 +99,9 @@ extern "C" { void spmv_vector_loader( const PACKED_VAL_T *packed_dense_vector, // in const unsigned num_cols, // in - hls::stream &to_SLR0, // out - hls::stream &to_SLR1, // out - hls::stream &to_SLR2 // out + hls::stream &to_SLR0, // out + hls::stream &to_SLR1, // out + hls::stream &to_SLR2 // out ) { #pragma HLS interface m_axi port=packed_dense_vector offset=slave bundle=spmv_vin #pragma HLS interface s_axilite port=packed_dense_vector bundle=control diff --git a/sw/Makefile b/sw/Makefile index 449ee99..452110e 100644 --- a/sw/Makefile +++ b/sw/Makefile @@ -15,7 +15,7 @@ HOST_ARCH = x86 CXX := g++ -CXXFLAGS += -Wall -std=c++11 +CXXFLAGS += -Wall -Wunknown-pragmas -std=c++14 CXXFLAGS += -I$(CNPY_INCLUDE) LDFLAGS += -L$(CNPY_LIB) -lcnpy @@ -40,6 +40,9 @@ CXXFLAGS += -Wno-int-in-bool-context $(HW_DIR)/build_dir.hw_emu/spmv.xclbin: cd $(HW_DIR); make -j5 build TARGET=hw_emu IMPL=$(IMPL) +$(HW_DIR)/build_dir.sw_emu/spmv.xclbin: + cd $(HW_DIR); make -j5 build TARGET=sw_emu IMPL=$(IMPL) + $(HW_DIR)/build_dir.hw/spmv.xclbin: cd $(HW_DIR); make -j5 build TARGET=hw IMPL=$(IMPL) @@ -59,6 +62,10 @@ hw_emu: host $(HW_DIR)/build_dir.hw_emu/spmv.xclbin cp $(HW_DIR)/emconfig.json . ./host hw_emu $(HW_DIR)/build_dir.hw_emu/spmv.xclbin +sw_emu: host $(HW_DIR)/build_dir.sw_emu/spmv.xclbin + cp $(HW_DIR)/emconfig.json . + ./host sw_emu $(HW_DIR)/build_dir.sw_emu/spmv.xclbin + hw: host $(HW_DIR)/build_dir.hw/spmv.xclbin ./host hw $(HW_DIR)/build_dir.hw/spmv.xclbin diff --git a/sw/host.cpp b/sw/host.cpp index 2964339..8dfa7bf 100644 --- a/sw/host.cpp +++ b/sw/host.cpp @@ -537,13 +537,13 @@ int main (int argc, char** argv) { // parse command-line arguments if (argc != 3) { std::cout << "Usage: " << argv[0] - << " " << std::endl; + << " " << std::endl; return 0; } std::string target = argv[1]; std::string xclbin = argv[2]; - if (target != "hw_emu" && target != "hw") { - std::cout << "This host program only support hw_emu and hw!" << std::endl; + if (target != "sw_emu" && target != "hw_emu" && target != "hw") { + std::cout << "This host program only support sw_emu, hw_emu and hw!" << std::endl; return 1; } From f8762b5d8553576cbf6cc3b148aa204345a72313 Mon Sep 17 00:00:00 2001 From: ueqri Date: Sat, 19 Mar 2022 23:23:15 +0800 Subject: [PATCH 2/2] remove exit condition in k2k_relay --- spmv/k2k_relay.cpp | 9 --------- sw/Makefile | 4 +++- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/spmv/k2k_relay.cpp b/spmv/k2k_relay.cpp index 9fe654e..ad4ed20 100644 --- a/spmv/k2k_relay.cpp +++ b/spmv/k2k_relay.cpp @@ -13,20 +13,11 @@ void k2k_relay( #pragma HLS interface axis register both port=in #pragma HLS interface axis register both port=out -// #ifndef __SYNTHESIS__ -// bool exit = false; -// while (!exit) { -// VEC_AXIS_IF_T pkt = in.read(); -// out.write(pkt); -// exit = (pkt.user == EOS); -// } -// #else while (1) { #pragma HLS pipeline II=1 VEC_AXIS_IF_T pkt = in.read(); out.write(pkt); } -// #endif } // kernel } // extern "C" diff --git a/sw/Makefile b/sw/Makefile index 452110e..b7de995 100644 --- a/sw/Makefile +++ b/sw/Makefile @@ -15,7 +15,7 @@ HOST_ARCH = x86 CXX := g++ -CXXFLAGS += -Wall -Wunknown-pragmas -std=c++14 +CXXFLAGS += -Wall -std=c++14 CXXFLAGS += -I$(CNPY_INCLUDE) LDFLAGS += -L$(CNPY_LIB) -lcnpy @@ -36,6 +36,8 @@ LDFLAGS += -lrt -lstdc++ CXXFLAGS += -Wno-maybe-uninitialized CXXFLAGS += -Wno-uninitialized CXXFLAGS += -Wno-int-in-bool-context +CXXFLAGS += -Wno-unknown-pragmas +CXXFLAGS += -Wno-unused-function $(HW_DIR)/build_dir.hw_emu/spmv.xclbin: cd $(HW_DIR); make -j5 build TARGET=hw_emu IMPL=$(IMPL)