From 7741c0db48790a05c02b868613392d85dbb611d4 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Wed, 9 Apr 2025 09:52:01 +0100 Subject: [PATCH 1/6] [AIE2P] revert separate subregister index for accumulator --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 32 +++---- .../Target/AIE/aie2p/AIE2PInstrPatterns.td | 87 +++++++++---------- .../AIE/aie2p/AIE2PInstructionSelector.cpp | 30 +++---- .../Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 8 +- .../lib/Target/AIE/aie2p/AIE2PRegisterInfo.td | 86 ++++++++---------- 5 files changed, 114 insertions(+), 129 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 49d892737bf3..53c457e9ee5c 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -618,22 +618,22 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (AIE2P::VEC1024RegClass.contains(SrcReg) && AIE2P::ACC1024RegClass.contains(DstReg)) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_512_acc_lo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_512_acc_hi)) + TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if (AIE2P::ACC1024RegClass.contains(SrcReg) && AIE2P::VEC1024RegClass.contains(DstReg)) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_lo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_hi), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::ACC2048RegClass.contains(SrcReg)) && (AIE2P::ACC2048RegClass.contains(DstReg))) { @@ -715,20 +715,20 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, (AIE2P::FIFO1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_lo_fifo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_lo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_hi_fifo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_hi), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::FIFO1024RegClass.contains(SrcReg)) && (AIE2P::ACC1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_512_acc_lo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_lo_fifo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_512_acc_hi)) + TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_hi_fifo), getKillRegState(KillSrc)); } else if ((AIE2P::eLRegClass.contains(SrcReg)) && @@ -1025,8 +1025,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { return {{AIE2P::ST_dms_sts_spill, AIE2P::sub_l_even}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_l_odd}}; case AIE2P::VST_CM_SPILL: - return {{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_acc_lo}, - {AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_acc_hi}}; + return {{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_lo}, + {AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_hi}}; case AIE2P::VST_FIFO_SPILL: return {{AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_lo_fifo}, {AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_hi_fifo}}; @@ -1036,8 +1036,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::ST_dms_sts_spill, AIE2P::sub_ptr}}; case AIE2P::VST_DM_SPILL: - return {{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_acc_lo}, - {AIE2P::VST_CM_SPILL, AIE2P::sub_1024_acc_hi}}; + return {{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_lo}, + {AIE2P::VST_CM_SPILL, AIE2P::sub_1024_hi}}; case AIE2P::VST_Y_SPILL: return {{AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_512_lo}, {AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_512_hi}}; @@ -1062,8 +1062,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { return {{AIE2P::LDA_dms_lda_spill, AIE2P::sub_l_even}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_l_odd}}; case AIE2P::VLDA_CM_SPILL: - return {{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_acc_lo}, - {AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_acc_hi}}; + return {{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_lo}, + {AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_hi}}; case AIE2P::VLDA_FIFO_SPILL: return {{AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_lo_fifo}, {AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_hi_fifo}}; @@ -1074,8 +1074,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::LDA_dms_lda_spill, AIE2P::sub_ptr}, }; case AIE2P::VLDA_DM_SPILL: - return {{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_acc_lo}, - {AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_acc_hi}}; + return {{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_lo}, + {AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_hi}}; case AIE2P::VLDA_Y_SPILL: return {{AIE2P::VLDA_dmx_lda_x_spill, AIE2P::sub_512_lo}, {AIE2P::VLDA_dmx_lda_x_spill, AIE2P::sub_512_hi}}; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index 77c3d23f6f5b..b5ef4369482c 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -243,19 +243,19 @@ def : Pat<(v64bf16 (fmul v64bf16:$vec1, v64bf16:$vec2)), (VCONV_bf16_fp32_mv_x_srs_bf (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_Y_Y VEC1024:$vec1, VEC1024:$vec2, (i32 mulbf16_vecconf.ConfBits)), - sub_1024_acc_lo)), + sub_1024_lo)), sub_512_lo, (VCONV_bf16_fp32_mv_x_srs_bf (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_Y_Y VEC1024:$vec1, VEC1024:$vec2, (i32 mulbf16_vecconf.ConfBits)), - sub_1024_acc_hi)), + sub_1024_hi)), sub_512_hi))>; def : Pat<(v32bf16 (fmul v32bf16:$vec1, v32bf16:$vec2)), (VCONV_bf16_fp32_mv_x_srs_bf (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$vec1, VEC512:$vec2, (i32 mulbf16_vecconf.ConfBits)), - sub_1024_acc_lo))>; + sub_1024_lo))>; // VMUL/VMAC Intrinsics @@ -303,40 +303,40 @@ def : Pat<(int_aie2p_I512_I1024_ACC2048_mul_conf VEC512:$s1, VEC1024:$s2, eR:$ac def : Pat<(int_aie2p_I512_I512_ACC1024_addmac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, ACC1024:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMAC_vmac_cm2_add_reg_vmul_cm_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), - (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo), VEC512:$s1, VEC512:$s2, eR:$acc), - sub_1024_acc_lo)>; + sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, ACC1024:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMAC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), - (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo), VEC512:$s1, VEC512:$s2, eR:$acc), - sub_1024_acc_lo)>; + sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc), (EXTRACT_SUBREG (VMAC_f_vmac_bf_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), VEC512:$s1, VEC512:$s2, eR:$acc), - sub_1024_acc_lo)>; + sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc), (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), - sub_1024_acc_lo)>; + sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC1024_mac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc), (EXTRACT_SUBREG (VMAC_vmul_cm_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), VEC512:$s1, VEC512:$s2, eR:$acc), - sub_1024_acc_lo)>; + sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC1024_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc), - (EXTRACT_SUBREG (VMUL_vmul_cm_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_1024_acc_lo)>; + (EXTRACT_SUBREG (VMUL_vmul_cm_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC2048_addmac_conf VEC512:$s1, VEC512:$s2, ACC2048:$acc1, ACC2048:$acc2, eR:$acc), @@ -361,18 +361,18 @@ def : Pat<(int_aie2p_I512_I512_ACC2048_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc) def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_addmac_conf VEC512:$s1, VEC512:$s2, ACC512:$acc1, ACC512:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMAC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo), - (REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_acc_lo), - VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>; + (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo), + (REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_lo), + VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_mac_conf VEC512:$s1, VEC512:$s2, ACC512:$acc1, eR:$acc), (EXTRACT_SUBREG (VMAC_f_vmac_bf_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo), - VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>; + (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo), + VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc), - (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>; + (EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>; // BFP16 VMUL/VMAC Intrinsics @@ -591,11 +591,11 @@ def : Pat<(int_aie2p_I512_I512_ACC1024_addmsc_conf ACC1024:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMSC_vmac_cm2_add_reg_vmul_cm_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), - (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmsc_conf VEC512:$s1, VEC512:$s2, @@ -603,21 +603,21 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmsc_conf ACC1024:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMSC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), - (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_msc_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc), (EXTRACT_SUBREG (VMSC_f_vmac_bf_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_negmul_conf VEC512:$s1, VEC512:$s2, @@ -625,17 +625,17 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_negmul_conf (EXTRACT_SUBREG (VNEGMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC1024_msc_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc), (EXTRACT_SUBREG (VMSC_vmul_cm_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), + (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC1024_negmul_conf VEC512:$s1, VEC512:$s2, @@ -643,7 +643,7 @@ def : Pat<(int_aie2p_I512_I512_ACC1024_negmul_conf (EXTRACT_SUBREG (VNEGMUL_vmul_cm_core_X_X VEC512:$s1, VEC512:$s2, - eR:$acc), sub_1024_acc_lo)>; + eR:$acc), sub_1024_lo)>; def : Pat<(int_aie2p_I512_I512_ACC2048_addmsc_conf VEC512:$s1, VEC512:$s2, @@ -711,21 +711,21 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_addmsc_conf ACC512:$acc2, eR:$acc), (EXTRACT_SUBREG (VADDMSC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo), - (REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_acc_lo), + (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo), + (REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_512_acc_lo)>; + eR:$acc), sub_512_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_msc_conf VEC512:$s1, VEC512:$s2, ACC512:$acc1, eR:$acc), (EXTRACT_SUBREG (VMSC_f_vmac_bf_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo), + (REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo), VEC512:$s1, VEC512:$s2, - eR:$acc), sub_512_acc_lo)>; + eR:$acc), sub_512_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_negmul_conf VEC512:$s1, VEC512:$s2, @@ -733,7 +733,7 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_negmul_conf (EXTRACT_SUBREG (VNEGMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, - eR:$acc), sub_512_acc_lo)>; + eR:$acc), sub_512_lo)>; // SRS Intrinsic // Note : Non-constant sign is handled in .cpp @@ -867,7 +867,6 @@ def : Pat<(int_aie2p_vshift_I512_I512 VEC512:$s1, VEC512:$s2, mSs:$pre, eR:$shif (VSHIFT_ALIGN VEC512:$s1, mSs:$pre, VEC512:$s2, eR:$shift)>; def : Pat<(int_aie2p_vshift_bf512_bf512 VEC512:$s1, VEC512:$s2, mSs:$pre, eR:$shift), (VSHIFT_ALIGN VEC512:$s1, mSs:$pre, VEC512:$s2, eR:$shift)>; - // Combine broadcast + shift into VPUSH_hi_64 def : Pat<(int_aie2p_vshift_I512_I512 (v16i32 VEC512:$s0), (v16i32 (bcst_vector_node(v2i32 eL : $s1))), 0x0, 0x8), (VPUSH_hi_64 VEC512:$s0, eL:$s1)>; @@ -998,16 +997,16 @@ def : Pat<(v128i8 (concat_vectors (v64i8 VEC512:$src0), (v64i8 VEC512:$src1))), // concat_vector - accumulator bank def : Pat<(v16i64 (concat_vectors (v8i64 ACC512:$src0), (v8i64 ACC512:$src1))), - (v16i64 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_acc_lo, ACC512:$src1, sub_512_acc_hi))>; + (v16i64 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_lo, ACC512:$src1, sub_512_hi))>; def : Pat<(v32i32 (concat_vectors (v16i32 ACC512:$src0), (v16i32 ACC512:$src1))), - (v32i32 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_acc_lo, ACC512:$src1, sub_512_acc_hi))>; + (v32i32 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_lo, ACC512:$src1, sub_512_hi))>; def : Pat<(v32i64 (concat_vectors (v16i64 ACC1024:$src0), (v16i64 ACC1024:$src1))), - (v32i64 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_acc_lo, ACC1024:$src1, sub_1024_acc_hi))>; + (v32i64 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_lo, ACC1024:$src1, sub_1024_hi))>; def : Pat<(v64i32 (concat_vectors (v32i32 ACC1024:$src0), (v32i32 ACC1024:$src1))), - (v64i32 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_acc_lo, ACC1024:$src1, sub_1024_acc_hi))>; + (v64i32 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_lo, ACC1024:$src1, sub_1024_hi))>; // concat_vector - gpr bank foreach Ty = [v4i16, v8i8] in { diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index d2827dce5b54..b18aef81781d 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -896,12 +896,8 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( /* create bitvector for vector select */ // %6 = ADD %3, -1 // %7 = VSEL_16 , , %6 - const unsigned Src0RB = RBI.getRegBank(SrcReg0, MRI, TRI)->getID(); - const bool IsVecRB = Src0RB == AIE2P::VRegBankID; - const unsigned Low512Idx = - IsVecRB ? AIE2P::sub_512_lo : AIE2P::sub_512_acc_lo; - const unsigned High512Idx = - IsVecRB ? AIE2P::sub_512_hi : AIE2P::sub_512_acc_hi; + const unsigned Low512Idx = AIE2P::sub_512_lo; + const unsigned High512Idx = AIE2P::sub_512_hi; if (!IdxVal) { auto SubRegCopyLo = MIB.buildInstr(TargetOpcode::COPY, {SrcRegLo}, {}) .addReg(SrcReg0, 0, Low512Idx); @@ -984,9 +980,9 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( Register SrcReg512Reg4 = MRI.createVirtualRegister(RC512); MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Lo}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_lo); + .addReg(SrcReg0, 0, AIE2P::sub_1024_lo); MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Hi}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_hi); + .addReg(SrcReg0, 0, AIE2P::sub_1024_hi); auto SubReg512MICopy1 = MIB.buildInstr(TargetOpcode::COPY, {SrcReg512Reg1}, {}) @@ -1086,7 +1082,7 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( if (LaneIdx < QuarterNumLanes) { auto SubReg1024MI = MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Lo}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_lo); + .addReg(SrcReg0, 0, AIE2P::sub_1024_lo); auto SubReg1024MIReg = SubReg1024MI.getReg(0); unsigned AdjustedIdx = LaneIdx; SelSrcIdx.IdxReg = MIB.buildInstr(AIE2P::MOV_RLC_imm11_pseudo, @@ -1103,7 +1099,7 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( } else if (LaneIdx < (QuarterNumLanes * 2)) { auto SubReg1024MI = MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Lo}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_lo); + .addReg(SrcReg0, 0, AIE2P::sub_1024_lo); auto SubReg1024MIReg = SubReg1024MI.getReg(0); unsigned AdjustedIdx = LaneIdx - QuarterNumLanes; @@ -1120,7 +1116,7 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( } else if (LaneIdx < (QuarterNumLanes * 3)) { auto SubReg1024MI = MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Hi}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_hi); + .addReg(SrcReg0, 0, AIE2P::sub_1024_hi); unsigned AdjustedIdx = LaneIdx - HalfNumLanes; auto SubReg1024MIReg = SubReg1024MI.getReg(0); SelSrcIdx.IdxReg = MIB.buildInstr(AIE2P::MOV_RLC_imm11_pseudo, @@ -1137,7 +1133,7 @@ static SelSrcAndIdx getExtractOrInsertVectorEltInputs( } else { auto SubReg1024MI = MIB.buildInstr(TargetOpcode::COPY, {SrcReg1024Hi}, {}) - .addReg(SrcReg0, 0, AIE2P::sub_1024_acc_hi); + .addReg(SrcReg0, 0, AIE2P::sub_1024_hi); unsigned AdjustedIdx = LaneIdx - (QuarterNumLanes * 3); auto SubReg1024MIReg = SubReg1024MI.getReg(0); SelSrcIdx.IdxReg = MIB.buildInstr(AIE2P::MOV_RLC_imm11_pseudo, @@ -1498,7 +1494,7 @@ bool AIE2PInstructionSelector::selectCascadeStreamInsn(MachineInstr &I, MIB.buildInstr(OpCode, {DstReg}, {}).addReg(CopyPosReg.getReg(0)); auto DestMI = MIB.buildInstr(TargetOpcode::COPY, {CascadeReg}, {}) .addReg(CascadeMV->getOperand(0).getReg(), 0, - AIE2P::sub_1024_acc_lo); + AIE2P::sub_1024_lo); constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *DestMI, AIE2P::ACC1024RegClass, DestMI->getOperand(0)); } @@ -1521,7 +1517,7 @@ bool AIE2PInstructionSelector::selectCascadeStreamInsn(MachineInstr &I, RBI.constrainGenericRegister(R31, AIE2P::mR31_scdRegClass, MRI); auto DestMI = MIB.buildInstr(TargetOpcode::COPY, {CascadeReg}, {}) .addReg(CascadeMV->getOperand(0).getReg(), 0, - AIE2P::sub_1024_acc_lo); + AIE2P::sub_1024_lo); constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *DestMI, AIE2P::ACC1024RegClass, DestMI->getOperand(0)); } @@ -2452,9 +2448,9 @@ bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( SmallVector SubRegIdxes; if (RBID == AIE2P::AccRegBankID) { - SubRegIdxes = {AIE2P::sub_512_acc_lo, AIE2P::sub_512_acc_hi, - AIE2P::sub_1024_acc_hi_then_sub_512_acc_lo, - AIE2P::sub_1024_acc_hi_then_sub_512_acc_hi}; + SubRegIdxes = {AIE2P::sub_512_lo, AIE2P::sub_512_hi, + AIE2P::sub_1024_hi_then_sub_512_lo, + AIE2P::sub_1024_hi_then_sub_512_hi}; RC512 = &AIE2P::ACC512RegClass; RC1024 = &AIE2P::ACC1024RegClass; } else if (RBID == AIE2P::VRegBankID) { diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index c0f856aef2f8..3cb013124e73 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -540,14 +540,14 @@ void AIE2PRegisterInfo::getTargetSubRegs(std::vector &Subregs, Subregs.push_back(AIE2P::sub_256_hi); break; case 512: - Subregs.push_back(IsVecRB ? AIE2P::sub_512_lo : AIE2P::sub_512_acc_lo); - Subregs.push_back(IsVecRB ? AIE2P::sub_512_hi : AIE2P::sub_512_acc_hi); + Subregs.push_back(AIE2P::sub_512_lo); + Subregs.push_back(AIE2P::sub_512_hi); break; case 1024: assert(!IsVecRB && "expected accumulator register bank for 256 dest type!"); - Subregs.push_back(AIE2P::sub_1024_acc_lo); - Subregs.push_back(AIE2P::sub_1024_acc_hi); + Subregs.push_back(AIE2P::sub_1024_lo); + Subregs.push_back(AIE2P::sub_1024_hi); break; default: llvm_unreachable("Unsupported subreg type!"); diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td index adc322b352f1..1123ba323a8f 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td @@ -33,8 +33,8 @@ def sub_512_hi : SubRegIndex<512, 512>; def sub_512_acc_lo : SubRegIndex<512, 0>; def sub_512_acc_hi : SubRegIndex<512, 512>; -def sub_1024_acc_lo : SubRegIndex<1024, 0>; -def sub_1024_acc_hi : SubRegIndex<1024, 1024>; +def sub_1024_lo : SubRegIndex<1024, 0>; +def sub_1024_hi : SubRegIndex<1024, 1024>; def sub_512_hi_256_lo : ComposedSubRegIndex; def sub_512_hi_256_hi : ComposedSubRegIndex; @@ -207,16 +207,6 @@ class AIE2PRegisterClass regTypes, dag reg class AIE2PVector1024RegisterClass : AIE2PRegisterClass<1024, 512, [v128i8, v64i16, v64bf16, v32i32, v16i64, v32f32], reglist>; - class AIE2PAcc512RegisterClass : - AIE2PRegisterClass<512, 512, [v16i32, v8i64, v16f32], reglist> { - let hasCompleteDecoder = 0; - } - - class AIE2PAcc1024RegisterClass : - AIE2PRegisterClass<1024, 512, [v32i32, v16i64, v32f32], reglist> { - let hasCompleteDecoder = 0; - } - class AIE2PAcc2048RegisterClass : AIE2PRegisterClass<2048, 512, [v64i32, v32i64, v64f32], reglist> { let hasCompleteDecoder = 0; @@ -426,52 +416,52 @@ class AIE2PDim3DRegisterClass def bmhh #i : AIE2P3BitReg; } - def eBMLL : AIE2PAcc512RegisterClass<(add bmll0, bmll1, bmll2, bmll3, bmll4)>; - def eBMLH : AIE2PAcc512RegisterClass<(add bmlh0, bmlh1, bmlh2, bmlh3, bmlh4)>; - def eBMHL : AIE2PAcc512RegisterClass<(add bmhl0, bmhl1, bmhl2, bmhl3, bmhl4)>; - def eBMHH : AIE2PAcc512RegisterClass<(add bmhh0, bmhh1, bmhh2, bmhh3, bmhh4)>; - def mBMLLm : AIE2PAcc512RegisterClass<(add eBMLL)>; - def mBMLHm : AIE2PAcc512RegisterClass<(add eBMLH)>; - def mBMHLm : AIE2PAcc512RegisterClass<(add eBMHL)>; - def mBMHHm : AIE2PAcc512RegisterClass<(add eBMHH)>; - def mBMm : AIE2PAcc512RegisterClass<(add mBMLLm, mBMLHm, mBMHLm, mBMHHm)>; - def mBMs : AIE2PAcc512RegisterClass<(add mBMm)>; - def mMcdBMSrc : AIE2PAcc512RegisterClass<(add mBMm)>; - def eBMSLL : AIE2PAcc512RegisterClass<(add bmll0, bmll1, bmll2, bmll3)>; - def eBMSLH : AIE2PAcc512RegisterClass<(add bmlh0, bmlh1, bmlh2, bmlh3)>; - def eBMSHL : AIE2PAcc512RegisterClass<(add bmhl0, bmhl1, bmhl2, bmhl3)>; - def eBMSHH : AIE2PAcc512RegisterClass<(add bmhh0, bmhh1, bmhh2, bmhh3)>; - def mBMSm : AIE2PAcc512RegisterClass<(add eBMSLL, eBMSLH, eBMSHL, eBMSHH)>; - - - let SubRegIndices = [sub_512_acc_lo, sub_512_acc_hi], CoveredBySubRegs = 1 in { + def eBMLL : AIE2PVector512RegisterClass<(add bmll0, bmll1, bmll2, bmll3, bmll4)>; + def eBMLH : AIE2PVector512RegisterClass<(add bmlh0, bmlh1, bmlh2, bmlh3, bmlh4)>; + def eBMHL : AIE2PVector512RegisterClass<(add bmhl0, bmhl1, bmhl2, bmhl3, bmhl4)>; + def eBMHH : AIE2PVector512RegisterClass<(add bmhh0, bmhh1, bmhh2, bmhh3, bmhh4)>; + def mBMLLm : AIE2PVector512RegisterClass<(add eBMLL)>; + def mBMLHm : AIE2PVector512RegisterClass<(add eBMLH)>; + def mBMHLm : AIE2PVector512RegisterClass<(add eBMHL)>; + def mBMHHm : AIE2PVector512RegisterClass<(add eBMHH)>; + def mBMm : AIE2PVector512RegisterClass<(add mBMLLm, mBMLHm, mBMHLm, mBMHHm)>; + def mBMs : AIE2PVector512RegisterClass<(add mBMm)>; + def mMcdBMSrc : AIE2PVector512RegisterClass<(add mBMm)>; + def eBMSLL : AIE2PVector512RegisterClass<(add bmll0, bmll1, bmll2, bmll3)>; + def eBMSLH : AIE2PVector512RegisterClass<(add bmlh0, bmlh1, bmlh2, bmlh3)>; + def eBMSHL : AIE2PVector512RegisterClass<(add bmhl0, bmhl1, bmhl2, bmhl3)>; + def eBMSHH : AIE2PVector512RegisterClass<(add bmhh0, bmhh1, bmhh2, bmhh3)>; + def mBMSm : AIE2PVector512RegisterClass<(add eBMSLL, eBMSLH, eBMSHL, eBMSHH)>; + + + let SubRegIndices = [sub_512_lo, sub_512_hi], CoveredBySubRegs = 1 in { foreach i = 0 - 4 in { def cml #i : AIE2P3BitReg("bmll" #i), !cast("bmlh" #i)]>; } foreach i = 0 - 4 in { def cmh #i : AIE2P3BitReg("bmhl" #i), !cast("bmhh" #i)]>; } - } // let SubRegIndices = [sub_512_acc_lo, sub_512_acc_hi], CoveredBySubRegs = 1 - - def eCML : AIE2PAcc1024RegisterClass<(add cml0, cml1, cml2, cml3, cml4)>; - def eCMH : AIE2PAcc1024RegisterClass<(add cmh0, cmh1, cmh2, cmh3, cmh4)>; - def mCMLm : AIE2PAcc1024RegisterClass<(add eCML)>; - def mCMHm : AIE2PAcc1024RegisterClass<(add eCMH)>; - def mCMLs : AIE2PAcc1024RegisterClass<(add eCML)>; - def mCMHs : AIE2PAcc1024RegisterClass<(add eCMH)>; - def mCMm : AIE2PAcc1024RegisterClass<(add mCMLm, mCMHm)>; - def mCMs : AIE2PAcc1024RegisterClass<(add mCMm)>; + } // let SubRegIndices = [sub_512_lo, sub_512_hi], CoveredBySubRegs = 1 + + def eCML : AIE2PVector1024RegisterClass<(add cml0, cml1, cml2, cml3, cml4)>; + def eCMH : AIE2PVector1024RegisterClass<(add cmh0, cmh1, cmh2, cmh3, cmh4)>; + def mCMLm : AIE2PVector1024RegisterClass<(add eCML)>; + def mCMHm : AIE2PVector1024RegisterClass<(add eCMH)>; + def mCMLs : AIE2PVector1024RegisterClass<(add eCML)>; + def mCMHs : AIE2PVector1024RegisterClass<(add eCMH)>; + def mCMm : AIE2PVector1024RegisterClass<(add mCMLm, mCMHm)>; + def mCMs : AIE2PVector1024RegisterClass<(add mCMm)>; class AIE2PVRegDM Enc, string n, list subregs = []> : Register { let HWEncoding{2-0} = Enc; let SubRegs = subregs; } - let SubRegIndices = [sub_1024_acc_lo, sub_1024_acc_hi], CoveredBySubRegs = 1 in { + let SubRegIndices = [sub_1024_lo, sub_1024_hi], CoveredBySubRegs = 1 in { foreach i = 0 - 4 in { def dm #i : AIE2PVRegDM("cml" #i), !cast("cmh" #i)]>; } - } // let SubRegIndices = [sub_1024_acc_lo, sub_1024_acc_hi], CoveredBySubRegs = 1 + } // let SubRegIndices = [sub_1024_lo, sub_1024_hi], CoveredBySubRegs = 1 def eDM : AIE2PAcc2048RegisterClass<(add dm0, dm1, dm2, dm3, dm4)>; def mDMa : AIE2PAcc2048RegisterClass<(add eDM)>; @@ -915,8 +905,8 @@ def VEC512 : AIE2PVector512RegisterClass<(add mXm)>; def VEC1024 : AIE2PVector1024RegisterClass<(add eY)>; def VEC576 : AIE2PVector576RegisterClass<(add mEXa)>; -def ACC512 : AIE2PAcc512RegisterClass<(add mBMm)>; -def ACC1024 : AIE2PAcc1024RegisterClass<(add mCMm)>; +def ACC512 : AIE2PVector512RegisterClass<(add mBMm)>; +def ACC1024 : AIE2PVector1024RegisterClass<(add mCMm)>; def ACC2048 : AIE2PAcc2048RegisterClass<(add eDM)>; def mAluCg : AIE2PScalarRegisterClass<(add eR, lc)>; @@ -932,13 +922,13 @@ def mAguSrc : AIE2PScalarRegisterClass<(add mSclMS)>; def mAguDst : AIE2P20BitRegisterClass<(add eP, mDm)>; def mShflXDst : AIE2PVector512RegisterClass<(add mXm)>; -def mShflBMDst : AIE2PAcc512RegisterClass<(add mBMSm)>; +def mShflBMDst : AIE2PVector512RegisterClass<(add mBMSm)>; def mFp2B0 : AIE2PAcc2048RegisterClass<(add eDM)>; def mFp2B1 : AIE2PAcc2048RegisterClass<(add eDM)>; def mBp2Bp : AIE2PVector576RegisterClass<(add mEXa)>; -def mWbfsrs : AIE2PAcc512RegisterClass<(add mBMs)>; -def mXbfsrs : AIE2PAcc1024RegisterClass<(add mCMs)>; +def mWbfsrs : AIE2PVector512RegisterClass<(add mBMs)>; +def mXbfsrs : AIE2PVector1024RegisterClass<(add mCMs)>; def mFl2FxSrc_W : AIE2PVector256RegisterClass<(add mWm)>; // VMOV register classes From 5b35a87829d8fd096047e23ce8abbe25047eec1d Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Wed, 9 Apr 2025 13:43:47 +0100 Subject: [PATCH 2/6] [AIE2P] Create dummy 256-bit accumulator and compose it with 512-bit to mimic vector register hierarchy Subregister indices corresponds to vector and accumulator does not cover the co- rresponding sized registers and leads to undefined uses if we use same subregis- ter indices for both. Having separate subregister indices solves this problem. However, with this approach, we cannot allocate vector register for accumulator (or vice versa). The idea is to mimic the register composition hierarchy of vector for accumulator (only for smaller types, because 2048 does not matter) creating dummy 256-bit accumulators and reverting the separate subregister indices. Now, vector and accumulator register has same lane masks. --- .../lib/Target/AIE/aie2p/AIE2PRegisterInfo.td | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td index 1123ba323a8f..8db847668b72 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td @@ -398,23 +398,43 @@ class AIE2PDim3DRegisterClass srFifo_of, srFifo_uf, srSparse_of, srFPFlags, srF2IFlags, srF2FFlags, srF2BFlags, srFPNlf, srFPCnvFx2Fl, srFPCnvFl2Fx)>; + + let isArtificial = 1 in { + foreach i = 0 - 4 in { + def dummy_acc256ll_l #i : AIE2P3BitReg; + def dummy_acc256ll_h #i : AIE2P3BitReg; + } + foreach i = 0 - 4 in { + def dummy_acc256lh_l #i : AIE2P3BitReg; + def dummy_acc256lh_h #i : AIE2P3BitReg; + } + foreach i = 0 - 4 in { + def dummy_acc256hl_l #i : AIE2P3BitReg; + def dummy_acc256hl_h #i : AIE2P3BitReg; + } + foreach i = 0 - 4 in { + def dummy_acc256hh_l #i : AIE2P3BitReg; + def dummy_acc256hh_h #i : AIE2P3BitReg; + } + } - + let SubRegIndices = [sub_256_lo, sub_256_hi], CoveredBySubRegs = 1 in { foreach i = 0 - 4 in { - def bmll #i : AIE2P3BitReg; + def bmll #i : AIE2P3BitReg("dummy_acc256ll_l" #i), !cast("dummy_acc256ll_h" #i)]>; } foreach i = 0 - 4 in { - def bmlh #i : AIE2P3BitReg; + def bmlh #i : AIE2P3BitReg("dummy_acc256lh_l" #i), !cast("dummy_acc256lh_h" #i)]>; } foreach i = 0 - 4 in { - def bmhl #i : AIE2P3BitReg; + def bmhl #i : AIE2P3BitReg("dummy_acc256hl_l" #i), !cast("dummy_acc256hl_h" #i)]>; } foreach i = 0 - 4 in { - def bmhh #i : AIE2P3BitReg; + def bmhh #i : AIE2P3BitReg("dummy_acc256hh_l" #i), !cast("dummy_acc256hh_h" #i)]>; } + } // let SubRegIndices = [sub_256_lo, sub_256_hi], CoveredBySubRegs = 1 def eBMLL : AIE2PVector512RegisterClass<(add bmll0, bmll1, bmll2, bmll3, bmll4)>; def eBMLH : AIE2PVector512RegisterClass<(add bmlh0, bmlh1, bmlh2, bmlh3, bmlh4)>; From f3470bf1748d6e26d33a1a1f51a5c3642724f457 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Thu, 27 Mar 2025 14:49:46 +0000 Subject: [PATCH 3/6] [AIE2P] Spill to register instead of stack VEC/ACC/FIFO registers can be copied to each other and which can be used as an alternative to costly stack spill. --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 31 ++++++++- .../Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 67 +++++++++++++++++++ llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h | 3 + .../lib/Target/AIE/aie2p/AIE2PRegisterInfo.td | 8 ++- 4 files changed, 104 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 53c457e9ee5c..0479d49587da 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -554,7 +554,6 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MCRegister SrcReg, bool KillSrc) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) && AIE2P::mMvSclDstRegClass.contains(DstReg)) { // Build MultiSlotPseudo in preference @@ -753,8 +752,16 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, getKillRegState(KillSrc)); } else if ((AIE2P::ePSRFLdFRegClass.contains(SrcReg)) && (AIE2P::ePSRFLdFRegClass.contains(DstReg))) { - copyThroughSubRegs(MBB, MBBI, DL, DstReg, SrcReg, KillSrc); + // copyThroughSubRegs(MBB, MBBI, DL, DstReg, SrcReg, KillSrc); + copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_ptr), + TRI.getSubReg(SrcReg, AIE2P::sub_ptr), KillSrc); + copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_fifo), + TRI.getSubReg(SrcReg, AIE2P::sub_fifo), KillSrc); + copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_avail), + TRI.getSubReg(SrcReg, AIE2P::sub_avail), KillSrc); } else { + MBBI->dump(); + LLVM_DEBUG(MBBI->dump()); llvm_unreachable("unhandled case in copyPhysReg"); } } @@ -906,6 +913,14 @@ void AIE2PInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::VST_E_SPILL; } else if (regClassMatches(AIE2P::VEC576RegClass, RC, SrcReg)) { Opcode = AIE2P::VST_EX_SPILL; + } else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_CM_SPILL; + } else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_dmx_sts_bm_spill; + } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_Y_SPILL; + } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_dmx_sts_x_spill; } else if (regClassMatches(AIE2P::eSRegClass, RC, SrcReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, SrcReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -962,6 +977,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } else if (regClassMatches(AIE2P::VEC256RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_dmw_lda_w_spill; } else if (regClassMatches(AIE2P::mBMsRegClass, RC, DstReg)) { + I->dump(); Opcode = AIE2P::VLDA_dmx_lda_bm_spill; } else if (regClassMatches(AIE2P::mFifoHLRegRegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_dmx_lda_fifohl_spill; @@ -970,6 +986,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } else if (regClassMatches(AIE2P::ACC2048RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_DM_SPILL; } else if (regClassMatches(AIE2P::ACC1024RegClass, RC, DstReg)) { + I->dump(); Opcode = AIE2P::VLDA_CM_SPILL; } else if (regClassMatches(AIE2P::FIFO1024RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_FIFO_SPILL; @@ -987,6 +1004,15 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::VLDA_E_SPILL; } else if (regClassMatches(AIE2P::VEC576RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_EX_SPILL; + } else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VLDA_CM_SPILL; + } else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) { + // I->dump(); + Opcode = AIE2P::VLDA_dmx_lda_bm_spill; + } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VLDA_Y_SPILL; + } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { + Opcode = AIE2P::VLDA_dmx_lda_x_spill; } else if (regClassMatches(AIE2P::eSRegClass, RC, DstReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, DstReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -999,6 +1025,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addReg(Reg, getKillRegState(true)); return; } else { + I->dump(); llvm_unreachable( "Can't load this register from stack slot: is it virtual?"); } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index 3cb013124e73..7e579348af70 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -43,6 +43,12 @@ cl::opt EnableCoalescingForWideCopy( extern llvm::cl::opt ReservedGPRs; +static llvm::cl::opt + SpillAccToVecOrAcc("aie2p-spill-accumulator-to-vec-or-acc", cl::Hidden, + cl::init(true), + cl::desc("Allow spilling accumulator registers to " + "vector or accumulator registers")); + AIE2PRegisterInfo::AIE2PRegisterInfo(unsigned HwMode) : AIE2PGenRegisterInfo(AIE2P::sp, /*DwarfFlavour*/ 0, /*EHFlavor*/ 0, /*PC*/ 0, HwMode) {} @@ -482,9 +488,34 @@ AIE2PRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, if (AIE2P::eSRegClass.hasSubClassEq(RC)) return &AIE2P::spill_eS_to_eRRegClass; + if (SpillAccToVecOrAcc && RC == &AIE2P::ACC1024RegClass) + return &AIE2P::spill_acc1024_to_compositeRegClass; + if (SpillAccToVecOrAcc && RC == &AIE2P::ACC512RegClass) + return &AIE2P::spill_acc512_to_compositeRegClass; + if (SpillAccToVecOrAcc && RC == &AIE2P::VEC1024RegClass) + return &AIE2P::spill_vec1024_to_compositeRegClass; + if (SpillAccToVecOrAcc && RC == &AIE2P::VEC512RegClass) + return &AIE2P::spill_vec512_to_compositeRegClass; return RC; } +const TargetRegisterClass * +AIE2PRegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + if ((RC == &AIE2P::spill_vec512_to_compositeRegClass || + RC == &AIE2P::spill_acc512_to_compositeRegClass) && + (Idx == AIE2P::sub_256_lo || Idx == AIE2P::sub_256_hi)) { + return &AIE2P::VEC512RegClass; + } + if ((RC == &AIE2P::spill_vec1024_to_compositeRegClass || + RC == &AIE2P::spill_acc1024_to_compositeRegClass) && + (Idx == AIE2P::sub_512_hi_256_lo || Idx == AIE2P::sub_512_hi_256_hi)) { + return &AIE2P::VEC1024RegClass; + } + // Forward to TableGen's default version. + return AIE2PGenRegisterInfo::getSubClassWithSubReg(RC, Idx); +} + const std::set &AIE2PRegisterInfo::getSubRegSplit(int RegClassId) const { static const std::set NoSplit = {AIE2P::NoSubRegister}; static const std::set Mod2DSplit = {AIE2P::sub_mod, AIE2P::sub_dim_size, @@ -618,6 +649,42 @@ bool AIE2PRegisterInfo::shouldCoalesce( const unsigned SrcSize = getRegSizeInBits(*SrcRC); const unsigned DstSize = getRegSizeInBits(*DstRC); + + // if (SrcSize == 256 && (AIE2P::ACC2048RegClass.hasSubClassEq(DstRC) || + // AIE2P::ACC1024RegClass.hasSubClassEq(DstRC) || + // AIE2P::ACC512RegClass.hasSubClassEq(DstRC) || + // &AIE2P::spill_vec512_to_compositeRegClass == DstRC)) + // { + // return false; + // } + // if (DstSize == 256 && (AIE2P::ACC2048RegClass.hasSubClassEq(SrcRC) || + // AIE2P::ACC1024RegClass.hasSubClassEq(SrcRC) || + // AIE2P::ACC512RegClass.hasSubClassEq(SrcRC) || + // &AIE2P::spill_vec512_to_compositeRegClass == SrcRC)) + // { + // return false; + // } + // if ((&AIE2P::spill_vec512_to_compositeRegClass == DstRC && + // (AIE2P::ACC2048RegClass.hasSubClassEq(SrcRC) || + // AIE2P::ACC1024RegClass.hasSubClassEq(SrcRC) || + // AIE2P::ACC512RegClass.hasSubClassEq(SrcRC)))) { + // return false; + // } + // if ((&AIE2P::spill_vec512_to_compositeRegClass == SrcRC && + // (AIE2P::ACC2048RegClass.hasSubClassEq(DstRC) || + // AIE2P::ACC1024RegClass.hasSubClassEq(DstRC) || + // AIE2P::ACC512RegClass.hasSubClassEq(DstRC)))) { + // return false; + // } + // if (((&AIE2P::spill_vec512_to_compositeRegClass == SrcRC || + // &AIE2P::spill_vec1024_to_compositeRegClass == SrcRC || + // &AIE2P::spill_vec512_to_compositeRegClass == DstRC || + // &AIE2P::spill_vec1024_to_compositeRegClass == DstRC) && + // (AIE2P::ACC2048RegClass.hasSubClassEq(NewRC) || + // AIE2P::ACC1024RegClass.hasSubClassEq(NewRC) || + // AIE2P::ACC512RegClass.hasSubClassEq(NewRC)))) { + // return false; + // } MachineFunction *MF = MI->getMF(); const AIEBaseInstrInfo *TII = static_cast(MF->getSubtarget().getInstrInfo()); diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h index c7db1ac989a8..7375fbb11a25 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h @@ -77,6 +77,9 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo { getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const override; + const TargetRegisterClass * getGPRRegClass(const MachineFunction &MF) const override; unsigned getVectorRegBankID() const override; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td index 8db847668b72..9b50ebe729f3 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td @@ -30,9 +30,6 @@ def sub_256_hi : SubRegIndex<256, 256>; // sub_512_lo_256_hi def sub_512_lo : SubRegIndex<512, 0>; def sub_512_hi : SubRegIndex<512, 512>; -def sub_512_acc_lo : SubRegIndex<512, 0>; -def sub_512_acc_hi : SubRegIndex<512, 512>; - def sub_1024_lo : SubRegIndex<1024, 0>; def sub_1024_hi : SubRegIndex<1024, 1024>; @@ -993,4 +990,9 @@ def spill_eDN_to_eR : AIE2PScalarRegisterClass<(add eDN, eR)>; def spill_eDJ_to_eR : AIE2PScalarRegisterClass<(add eDJ, eR, eDN)>; def spill_eDC_to_eR : AIE2PScalarRegisterClass<(add eDC, eR)>; +def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm)>; +def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm)>; +def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm)>; +def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY)>; + } // End AIE2P Namespace From d3d4d34d53925011025d1f0ec7d333e7a9ca1507 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Thu, 10 Apr 2025 13:03:29 +0100 Subject: [PATCH 4/6] [AIE2P] Extend vector composite register class with fifo registers --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 32 +++++++------- .../AIE/aie2p/AIE2PInstructionSelector.cpp | 2 +- .../lib/Target/AIE/aie2p/AIE2PRegisterInfo.td | 43 ++++++++++++++----- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 0479d49587da..d54388d6dd7e 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -683,52 +683,52 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if ((AIE2P::FIFO1024RegClass.contains(SrcReg)) && (AIE2P::FIFO1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_lo_fifo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_lo_fifo), + TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_hi_fifo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_hi_fifo), + TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::VEC1024RegClass.contains(SrcReg)) && (AIE2P::FIFO1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_lo_fifo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_hi_fifo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::FIFO1024RegClass.contains(SrcReg)) && (AIE2P::VEC1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_lo_fifo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_hi_fifo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::ACC1024RegClass.contains(SrcReg)) && (AIE2P::FIFO1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_lo_fifo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), - TRI.getSubReg(DstReg, AIE2P::sub_hi_fifo)) + TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::FIFO1024RegClass.contains(SrcReg)) && (AIE2P::ACC1024RegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_lo)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_lo_fifo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo), getKillRegState(KillSrc)); BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x), TRI.getSubReg(DstReg, AIE2P::sub_512_hi)) - .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_hi_fifo), + .addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi), getKillRegState(KillSrc)); } else if ((AIE2P::eLRegClass.contains(SrcReg)) && (AIE2P::EXPVEC64RegClass.contains(DstReg))) { @@ -1055,8 +1055,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { return {{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_lo}, {AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_hi}}; case AIE2P::VST_FIFO_SPILL: - return {{AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_lo_fifo}, - {AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_hi_fifo}}; + return {{AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_512_lo}, + {AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_512_hi}}; case AIE2P::VST_PLFR_SPILL: return {{AIE2P::VST_FIFO_SPILL, AIE2P::sub_fifo}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_avail}, @@ -1092,8 +1092,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { return {{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_lo}, {AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_hi}}; case AIE2P::VLDA_FIFO_SPILL: - return {{AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_lo_fifo}, - {AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_hi_fifo}}; + return {{AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_512_lo}, + {AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_512_hi}}; case AIE2P::VLDA_PLFR_SPILL: return { {AIE2P::VLDA_FIFO_SPILL, AIE2P::sub_fifo}, diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index b18aef81781d..3bca8e917c4a 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -2459,7 +2459,7 @@ bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( RC1024 = &AIE2P::VEC1024RegClass; } else if (RBID == AIE2P::FifoRegBankID) { RC512 = &AIE2P::FIFO512RegClass; - SubRegIdxes = {AIE2P::sub_lo_fifo, AIE2P::sub_hi_fifo}; + SubRegIdxes = {AIE2P::sub_512_lo, AIE2P::sub_512_hi}; RC1024 = &AIE2P::FIFO1024RegClass; } else { llvm_unreachable("Unknown Register Bank ID!"); diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td index 9b50ebe729f3..9507b106dffa 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td @@ -262,21 +262,42 @@ class AIE2PDim3DRegisterClass // Core ID def CORE_ID : AIE2PSPLReg<10, "core_id">; def mCoreID : AIE2P20BitRegisterClass<(add CORE_ID)>; + + let isArtificial = 1 in { + def dummy_sfh_l : AIE2P3BitReg<0, "dummy_sfh_l">; + def dummy_sfh_h : AIE2P3BitReg<0, "dummy_sfh_h">; + def dummy_sfl_l : AIE2P3BitReg<0, "dummy_sfl_l">; + def dummy_sfl_h : AIE2P3BitReg<0, "dummy_sfl_h">; + + def dummy_lfh0_l : AIE2P3BitReg<0, "dummy_lfh0_l">; + def dummy_lfh0_h : AIE2P3BitReg<0, "dummy_lfh0_h">; + def dummy_lfl0_l : AIE2P3BitReg<0, "dummy_lfl0_l">; + def dummy_lfl0_h : AIE2P3BitReg<0, "dummy_lfl0_h">; + + def dummy_lfh1_l : AIE2P3BitReg<0, "dummy_lfh1_l">; + def dummy_lfh1_h : AIE2P3BitReg<0, "dummy_lfh1_h">; + def dummy_lfl1_l : AIE2P3BitReg<0, "dummy_lfl1_l">; + def dummy_lfl1_h : AIE2P3BitReg<0, "dummy_lfl1_h">; + } + + let SubRegIndices = [sub_256_lo, sub_256_hi], CoveredBySubRegs = 1 in { // Store FIFO register - def sfh : AIE2P3BitReg<0b110,"sfh">; - def sfl : AIE2P3BitReg<0b011,"sfl">; + def sfh : AIE2P3BitReg<0b110,"sfh", [dummy_sfh_l, dummy_sfh_h]>; + def sfl : AIE2P3BitReg<0b011,"sfl", [dummy_sfl_l, dummy_sfl_h]>; // Load FIFO register 0 - def lfh0 : AIE2P1BitReg<0b0,"lfh0">; - def lfl0 : AIE2P1BitReg<0b0,"lfl0">; + def lfh0 : AIE2P1BitReg<0b0,"lfh0", [dummy_lfh0_l, dummy_lfh0_h]>; + def lfl0 : AIE2P1BitReg<0b0,"lfl0", [dummy_lfl0_l, dummy_lfl0_h]>; // Load FIFO register 1 - def lfh1 : AIE2P1BitReg<0b1,"lfh1">; - def lfl1 : AIE2P1BitReg<0b1,"lfl1">; + def lfh1 : AIE2P1BitReg<0b1,"lfh1", [dummy_lfh1_l, dummy_lfh1_h]>; + def lfl1 : AIE2P1BitReg<0b1,"lfl1", [dummy_lfl1_l, dummy_lfl1_h]>; + } + // Load FIFO extra register def lfe : AIE2P3BitReg<0b010,"lfe">; def sub_lo_fifo: SubRegIndex<512, 0>; def sub_hi_fifo : SubRegIndex<512, 512>; - let SubRegIndices = [sub_lo_fifo, sub_hi_fifo], CoveredBySubRegs = 1 in { + let SubRegIndices = [sub_512_lo, sub_512_hi], CoveredBySubRegs = 1 in { def lf0 : AIE2P1BitReg<0b0, "lf0", [lfl0, lfh0]>; def lf1 : AIE2P1BitReg<0b1, "lf1", [lfl1, lfh1]>; def sf : AIE2P1BitReg<0b1, "sf", [sfl, sfh]>; @@ -990,9 +1011,9 @@ def spill_eDN_to_eR : AIE2PScalarRegisterClass<(add eDN, eR)>; def spill_eDJ_to_eR : AIE2PScalarRegisterClass<(add eDJ, eR, eDN)>; def spill_eDC_to_eR : AIE2PScalarRegisterClass<(add eDC, eR)>; -def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm)>; -def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm)>; -def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm)>; -def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY)>; +def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm, sfh, sfl, lfh0, lfh1, lfl0, lfl1)>; +def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm, lf0, lf1, sf)>; +def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm, sfh, sfl, lfh0, lfh1, lfl0, lfl1)>; +def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY, lf0, lf1, sf)>; } // End AIE2P Namespace From 2bea207b96f67f5c91cd68b8f4853867b4964855 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Fri, 25 Apr 2025 13:09:59 +0100 Subject: [PATCH 5/6] [AIE2P] Spill composed vector registers to stack Upon spilling the composed register, we might need to select the spill/reload instruction based on the register allocated by the register allocator itself. --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 71 +++++++++++++------ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td | 4 ++ .../Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 14 ++-- 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index d54388d6dd7e..78af26522717 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -812,6 +812,8 @@ Register AIE2PInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case AIE2P::LDA_DS_SPILL: case AIE2P::VLDA_EX_SPILL: case AIE2P::VLDA_E_SPILL: + case AIE2P::VLDA_512_COMPOSED_REG_SPILL: + case AIE2P::VLDA_1024_COMPOSED_REG_SPILL: break; } @@ -845,6 +847,8 @@ Register AIE2PInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case AIE2P::VST_Y_SPILL: case AIE2P::VST_E_SPILL: case AIE2P::VST_EX_SPILL: + case AIE2P::VST_512_COMPOSED_REG_SPILL: + case AIE2P::VST_1024_COMPOSED_REG_SPILL: break; } @@ -883,6 +887,10 @@ void AIE2PInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, << "\n"); if (regClassMatches(AIE2P::mSclStRegClass, RC, SrcReg)) { Opcode = AIE2P::ST_R_SPILL; + } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_1024_COMPOSED_REG_SPILL; + } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { + Opcode = AIE2P::VST_512_COMPOSED_REG_SPILL; } else if (regClassMatches(AIE2P::mQQssRegClass, RC, SrcReg)) { Opcode = AIE2P::ST_dmv_sts_q_spill; } else if (regClassMatches(AIE2P::mBMsRegClass, RC, SrcReg)) { @@ -913,14 +921,6 @@ void AIE2PInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::VST_E_SPILL; } else if (regClassMatches(AIE2P::VEC576RegClass, RC, SrcReg)) { Opcode = AIE2P::VST_EX_SPILL; - } else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) { - Opcode = AIE2P::VST_CM_SPILL; - } else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) { - Opcode = AIE2P::VST_dmx_sts_bm_spill; - } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { - Opcode = AIE2P::VST_Y_SPILL; - } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { - Opcode = AIE2P::VST_dmx_sts_x_spill; } else if (regClassMatches(AIE2P::eSRegClass, RC, SrcReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, SrcReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -972,12 +972,17 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, RC = constrainRegClass(MBB.getParent()->getRegInfo(), RC, DstReg); if (regClassMatches(AIE2P::mLdaSclRegClass, RC, DstReg)) { Opcode = AIE2P::LDA_R_SPILL; + } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { + Opcode = AIE2P::VLDA_1024_COMPOSED_REG_SPILL; + } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { + // I->dump(); + Opcode = AIE2P::VLDA_512_COMPOSED_REG_SPILL; } else if (regClassMatches(AIE2P::mQQssRegClass, RC, DstReg)) { Opcode = AIE2P::LDA_dmv_lda_q_spill; } else if (regClassMatches(AIE2P::VEC256RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_dmw_lda_w_spill; } else if (regClassMatches(AIE2P::mBMsRegClass, RC, DstReg)) { - I->dump(); + // I->dump(); Opcode = AIE2P::VLDA_dmx_lda_bm_spill; } else if (regClassMatches(AIE2P::mFifoHLRegRegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_dmx_lda_fifohl_spill; @@ -986,7 +991,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } else if (regClassMatches(AIE2P::ACC2048RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_DM_SPILL; } else if (regClassMatches(AIE2P::ACC1024RegClass, RC, DstReg)) { - I->dump(); + // I->dump(); Opcode = AIE2P::VLDA_CM_SPILL; } else if (regClassMatches(AIE2P::FIFO1024RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_FIFO_SPILL; @@ -1004,15 +1009,6 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::VLDA_E_SPILL; } else if (regClassMatches(AIE2P::VEC576RegClass, RC, DstReg)) { Opcode = AIE2P::VLDA_EX_SPILL; - } else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) { - Opcode = AIE2P::VLDA_CM_SPILL; - } else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) { - // I->dump(); - Opcode = AIE2P::VLDA_dmx_lda_bm_spill; - } else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) { - Opcode = AIE2P::VLDA_Y_SPILL; - } else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) { - Opcode = AIE2P::VLDA_dmx_lda_x_spill; } else if (regClassMatches(AIE2P::eSRegClass, RC, DstReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, DstReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -1082,7 +1078,9 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; - + case AIE2P::VST_1024_COMPOSED_REG_SPILL: + return {{AIE2P::VST_512_COMPOSED_REG_SPILL, AIE2P::sub_512_lo}, + {AIE2P::VST_512_COMPOSED_REG_SPILL, AIE2P::sub_512_hi}}; case AIE2P::LDA_R_SPILL: return {{AIE2P::LDA_dms_lda_spill, AIE2P::NoSubRegister, 4}}; case AIE2P::VLDA_L_SPILL: @@ -1132,6 +1130,11 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { case AIE2P::VST_EX_SPILL: return {{AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_bfp16_x}, {AIE2P::VST_E_SPILL, AIE2P::sub_bfp16_e}}; + case AIE2P::VLDA_1024_COMPOSED_REG_SPILL: + return {{AIE2P::VLDA_512_COMPOSED_REG_SPILL, AIE2P::sub_512_lo}, + {AIE2P::VLDA_512_COMPOSED_REG_SPILL, AIE2P::sub_512_hi}}; + case AIE2P::VLDA_512_COMPOSED_REG_SPILL: + return {}; } llvm_unreachable("Un-implemented"); } @@ -1222,6 +1225,34 @@ bool AIE2PInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); return true; } + case AIE2P::VLDA_512_COMPOSED_REG_SPILL: { + unsigned int Opcode; + if (AIE2P::VEC512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VLDA_dmx_lda_x_spill; + } else if (AIE2P::FIFO512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VLDA_dmx_lda_fifohl_spill; + } else if (AIE2P::ACC512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VLDA_dmx_lda_bm_spill; + } else { + llvm_unreachable("Not a valid register for VST_512_COMPOSED_REG_SPILL"); + } + MI.setDesc(get(Opcode)); + return false; + } + case AIE2P::VST_512_COMPOSED_REG_SPILL: { + unsigned int Opcode; + if (AIE2P::VEC512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VST_dmx_sts_x_spill; + } else if (AIE2P::FIFO512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VST_dmx_sts_fifohl_spill; + } else if (AIE2P::ACC512RegClass.contains(MI.getOperand(0).getReg())) { + Opcode = AIE2P::VST_dmx_sts_bm_spill; + } else { + llvm_unreachable("Not a valid register for VST_512_COMPOSED_REG_SPILL"); + } + MI.setDesc(get(Opcode)); + return false; + } } return false; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td index 14822e9880b6..9e3da99e546f 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td @@ -321,6 +321,8 @@ def VST_FIFO_SPILL : Pseudo<(outs ), (ins FIFO1024:$src, c16n_step64:$imm), "vst def VST_PLFR_SPILL : Pseudo<(outs ), (ins ePSRFLdF:$src, c16n_step64:$imm), "vst_plfr_spill", "${src}, [sp, $imm]">; def VST_EX_SPILL : Pseudo<(outs ), (ins VEC576:$src, c16n_step64:$imm), "vst_ex_spill", "${src}, [sp, $imm]">; def VST_E_SPILL : Pseudo<(outs ), (ins EXPVEC64:$src, c12n_step4:$imm), "vst_e_spill", "$src, [sp, $imm]">; +def VST_512_COMPOSED_REG_SPILL : Pseudo<(outs ), (ins spill_vec512_to_composite:$src, c16n_step64:$imm), "vst_512_composed_reg_spill", "${src}, [sp, $imm]">; +def VST_1024_COMPOSED_REG_SPILL : Pseudo<(outs ), (ins spill_vec1024_to_composite:$src, c16n_step64:$imm), "vst_512_composed_reg_spill", "${src}, [sp, $imm]">; } let mayLoad = true, mayStore = false in { @@ -335,6 +337,8 @@ def LDA_D_SPILL : Pseudo<(outs eD:$dst), (ins c12n_step4:$imm), "lda_d_spill", " def LDA_DS_SPILL : Pseudo<(outs eDS:$dst), (ins c12n_step4:$imm), "lda_ds_spill", "${dst}, [sp, $imm]">; def VLDA_EX_SPILL : Pseudo<(outs VEC576:$dst), (ins c16n_step64:$imm), "vlda_ex_spill", "${dst}, [sp, $imm]">; def VLDA_E_SPILL : Pseudo<(outs EXPVEC64:$dst), (ins c12n_step4:$imm), "vlda_e_spill", "${dst}, [sp, $imm]">; +def VLDA_512_COMPOSED_REG_SPILL : Pseudo<(outs spill_vec512_to_composite:$dst), (ins c16n_step64:$imm), "vlda_512_composed_reg_spill", "${dst}, [sp, $imm]">; +def VLDA_1024_COMPOSED_REG_SPILL : Pseudo<(outs spill_vec1024_to_composite:$dst), (ins c16n_step64:$imm), "vlda_512_composed_reg_spill", "${dst}, [sp, $imm]">; } } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index 7e579348af70..9bb2bcaf31a6 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -194,6 +194,8 @@ bool AIE2PRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case AIE2P::VST_dmx_sts_bm_spill: case AIE2P::VST_dmx_sts_fifohl_spill: case AIE2P::VST_dmx_sts_x_spill: + case AIE2P::VLDA_512_COMPOSED_REG_SPILL: + case AIE2P::VST_512_COMPOSED_REG_SPILL: MI.getOperand(FIOperandNum).ChangeToImmediate(Offset); return false; case AIE2P::LDA_R_SPILL: @@ -231,10 +233,12 @@ bool AIE2PRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case AIE2P::VST_CM_SPILL: case AIE2P::VST_FIFO_SPILL: case AIE2P::VST_Y_SPILL: + case AIE2P::VST_1024_COMPOSED_REG_SPILL: case AIE2P::VLDA_DM_SPILL: case AIE2P::VLDA_CM_SPILL: case AIE2P::VLDA_FIFO_SPILL: case AIE2P::VLDA_Y_SPILL: + case AIE2P::VLDA_1024_COMPOSED_REG_SPILL: MI.getOperand(FIOperandNum).ChangeToImmediate(Offset); TII->expandSpillPseudo(MI, TRI, /*SubRegOffsetAlign=*/Align(4)); return true; @@ -488,13 +492,11 @@ AIE2PRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, if (AIE2P::eSRegClass.hasSubClassEq(RC)) return &AIE2P::spill_eS_to_eRRegClass; - if (SpillAccToVecOrAcc && RC == &AIE2P::ACC1024RegClass) - return &AIE2P::spill_acc1024_to_compositeRegClass; - if (SpillAccToVecOrAcc && RC == &AIE2P::ACC512RegClass) - return &AIE2P::spill_acc512_to_compositeRegClass; - if (SpillAccToVecOrAcc && RC == &AIE2P::VEC1024RegClass) + if (SpillAccToVecOrAcc && + (RC == &AIE2P::ACC1024RegClass || RC == &AIE2P::VEC1024RegClass)) return &AIE2P::spill_vec1024_to_compositeRegClass; - if (SpillAccToVecOrAcc && RC == &AIE2P::VEC512RegClass) + if (SpillAccToVecOrAcc && + (RC == &AIE2P::ACC512RegClass || RC == &AIE2P::VEC512RegClass)) return &AIE2P::spill_vec512_to_compositeRegClass; return RC; } From d95347759b50fbc06a66f26b68ffa02c62247a8a Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Mon, 28 Apr 2025 13:39:25 +0100 Subject: [PATCH 6/6] [AIE2P][Fix-up] Remove fifo store register from composed regclass ItineraryRegPairs information is missing for the FIFO store registers(sf) with VMOV_alu_mv_mv_x and we might see wrongly scheduled code. Removed fifo store registers from the composed register class for the time being. --- llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td index 9507b106dffa..6e9aaf239636 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td @@ -1011,9 +1011,9 @@ def spill_eDN_to_eR : AIE2PScalarRegisterClass<(add eDN, eR)>; def spill_eDJ_to_eR : AIE2PScalarRegisterClass<(add eDJ, eR, eDN)>; def spill_eDC_to_eR : AIE2PScalarRegisterClass<(add eDC, eR)>; -def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm, sfh, sfl, lfh0, lfh1, lfl0, lfl1)>; -def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm, lf0, lf1, sf)>; -def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm, sfh, sfl, lfh0, lfh1, lfl0, lfl1)>; -def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY, lf0, lf1, sf)>; +def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm, lfh0, lfh1, lfl0, lfl1)>; +def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm, lf0, lf1)>; +def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm, lfh0, lfh1, lfl0, lfl1)>; +def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY, lf0, lf1)>; } // End AIE2P Namespace