diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index 77c3d23f6f5b..dff5b5b45246 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -1256,27 +1256,50 @@ defm : Extract_512 : +// vectors of the VSHUFFLE instruction. Specifically, the VSHUFFLE instruction +// with mode 0 performs a transpose on a tensor of type 64x2xi8. Other modes +// used here perform transposes on different types: + +// Mode Input type +// =====+============+ +// 0 | 64x2xi8 | +// 2 | 32x2xi16 | +// 4 | 16x2xi32 | +// 28 | 8x4xi16 | +// 36 | 16x4xi8 | +// =====+============+ + +class Trunc1024To512Pat : Pat<(DstTy (trunc SrcTy:$s1)), (ShuffleInstOpc (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo ), (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi ), (MOV_RLC_imm11_pseudo (i32 Mode)))>; -def : Trunc1024Pat; -def : Trunc1024Pat; -def : Trunc1024Pat; -def : Trunc1024Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; -class Trunc512Pat : +class Trunc512To256Pat : Pat<(DstTy (trunc SrcTy:$s1)), (EXTRACT_SUBREG (VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, (MOV_RLC_imm11_pseudo (i32 Mode))), sub_256_lo)>; -def : Trunc512Pat; -def : Trunc512Pat; -def : Trunc512Pat; +def : Trunc512To256Pat; +def : Trunc512To256Pat; +def : Trunc512To256Pat; + +class Trunc512To128Pat : + Pat<(DstTy (trunc SrcTy:$s1)), + (VMOV_alu_mv_mv_w_to_q + (EXTRACT_SUBREG + (VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, + (MOV_RLC_imm11_pseudo (i32 Mode))), + sub_256_lo) + )>; +def : Trunc512To128Pat; +def : Trunc512To128Pat; class EventPat : Pat<(int_aie2p_event Imm), (Inst)>; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp index 6c91c50aa23d..1fb25e3126ef 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp @@ -280,20 +280,60 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) .clampScalar(1, S32, S32); getActionDefinitionsBuilder(G_TRUNC) + // Mark as legal all G_TRUNC with tablegen selection pattern: .legalIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; - return SrcTy.isVector() && DstTy.isVector() && - (SrcTy.getSizeInBits() == 512 || - SrcTy.getSizeInBits() == 1024) && - DstTy.getElementType().getSizeInBits() * 2 == - SrcTy.getElementType().getSizeInBits(); + + if (!SrcTy.isVector()) + return false; + assert(DstTy.isVector() && "Src is vector so Dst must be vector"); + + const uint16_t SrcElemBits = SrcTy.getElementType().getSizeInBits(); + const uint16_t DstElemBits = DstTy.getElementType().getSizeInBits(); + const uint16_t VectorSize = SrcTy.getNumElements(); + assert(VectorSize == DstTy.getNumElements() && + "Src and Dst vectors must have same number of elements"); + + // The case where the source vector's element type is i64: + // v16i64 -> v16i32, + // v8i64 -> v8i32, + // v8i64 -> v8i16, + if (SrcElemBits == 64) { + return (VectorSize == 16 && DstElemBits == 32) || + (VectorSize == 8 && DstElemBits == 32) || + (VectorSize == 8 && DstElemBits == 16); + } + + // The case where the source vector's element type is i32: + // v32i32 -> v32i16, + // v16i32 -> v16i16, + // v16i32 -> v16i8. + if (SrcElemBits == 32) { + return (VectorSize == 32 && DstElemBits == 16) || + (VectorSize == 16 && DstElemBits == 16) || + (VectorSize == 16 && DstElemBits == 8); + } + + // The case where the source vector's element type is i16: + // v64i16 -> v64i8, + // v32i16 -> v32i8. + if (SrcElemBits == 16) { + return (VectorSize == 64 && DstElemBits == 8) || + (VectorSize == 32 && DstElemBits == 8); + } + + return false; }) + + // Mark as legal all scalar G_TRUNC: .legalIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; return SrcTy.isScalar() && DstTy.isScalar(); }) + + // G_TRUNC 256-bit -> 128-bit is legalized by padding to 2x bitwidth: .customIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; @@ -301,10 +341,18 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) DstTy.getElementType().getSizeInBits() * 2 == SrcTy.getElementType().getSizeInBits(); }) + + // G_TRUNC on 2048-bit vector is legalized to 2 smaller G_TRUNCs. + // Similarly for G_TRUNC 1024-bit -> 256-bit: .fewerElementsIf( [=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; - return SrcTy.isVector() && SrcTy.getSizeInBits() == 2048; + const LLT &DstTy = Query.Types[0]; + if (!SrcTy.isVector() || !DstTy.isVector()) + return false; + const TypeSize SrcBits = SrcTy.getSizeInBits(); + const TypeSize DstBits = DstTy.getSizeInBits(); + return (SrcBits == 2048 || (SrcBits == 1024 && DstBits == 256)); }, [=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir index 67a709e48189..f0b8d0c4e463 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir @@ -184,4 +184,37 @@ body: | %0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s16>) PseudoRET implicit $lr, implicit %0 ... - +--- +name: v16s8_trunc_v16s32_vec512 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512 + ; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 36 + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo + ; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]] + %1:vregbank(<16 x s32>) = G_IMPLICIT_DEF + %0:vregbank(<16 x s8>) = G_TRUNC %1(<16 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v8s16_trunc_v8s64_vec512 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + ; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512 + ; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28 + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo + ; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]] + %1:vregbank(<8 x s64>) = G_IMPLICIT_DEF + %0:vregbank(<8 x s16>) = G_TRUNC %1(<8 x s64>) + PseudoRET implicit $lr, implicit %0 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir index aaa9ff5a7fd0..03c6fce19025 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir @@ -133,3 +133,57 @@ body: | %0:_(<16 x s8>) = G_TRUNC %1(<16 x s16>) PseudoRET implicit $lr, implicit %0 ... +--- +name: v16s8_trunc_v16s32_vec512 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[DEF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s8>) + %1:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s8>) = G_TRUNC %1(<16 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v8s16_trunc_v8s64_vec512 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF]](<8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<8 x s16>) + %1:_(<8 x s64>) = G_IMPLICIT_DEF + %0:_(<8 x s16>) = G_TRUNC %1(<8 x s64>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v32s8_trunc_v32s32_vec1024 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV1]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<16 x s8>), [[TRUNC1]](<16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s8>) + %1:_(<32 x s32>) = G_IMPLICIT_DEF + %0:_(<32 x s8>) = G_TRUNC %1(<32 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v16s16_trunc_v16s64_vec1024 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s64>), [[UV1:%[0-9]+]]:_(<8 x s64>) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV]](<8 x s64>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV1]](<8 x s64>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s16>), [[TRUNC1]](<8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s16>) + %1:_(<16 x s64>) = G_IMPLICIT_DEF + %0:_(<16 x s16>) = G_TRUNC %1(<16 x s64>) + PseudoRET implicit $lr, implicit %0 +...