Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -1256,27 +1256,50 @@ defm : Extract_512<i64, v8i64, (i32 c6u:$idx), VEXTRACT_64_vec_extract_imm_vaddS
// The G_TRUNC operation can be efficiently implemented using a VSHUFFLE
// instruction. Modes 0, 2, and 4 correspond to the deinterleaved operation of
// 1, 2, and 4 bytes respectively on the concatenated src0 and src1 input
// vectors of the VSHUFFLE instruction.
class Trunc1024Pat<ValueType DstTy, ValueType SrcTy, Instruction ShuffleInstOpc, int Mode> :
// vectors of the VSHUFFLE instruction. Specifically, the VSHUFFLE instruction
// with mode 0 performs a transpose on a tensor of type 64x2xi8. Other modes
// used here perform transposes on different types:

// Mode Input type
// =====+============+
// 0 | 64x2xi8 |
// 2 | 32x2xi16 |
// 4 | 16x2xi32 |
// 28 | 8x4xi16 |
// 36 | 16x4xi8 |
// =====+============+

class Trunc1024To512Pat<ValueType DstTy, ValueType SrcTy, Instruction ShuffleInstOpc, int Mode> :
Pat<(DstTy (trunc SrcTy:$s1)),
(ShuffleInstOpc
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo ),
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi ),
(MOV_RLC_imm11_pseudo (i32 Mode)))>;

def : Trunc1024Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_x, 4>;
def : Trunc1024Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_bm, 4>;
def : Trunc1024Pat<v32i16, v32i32, VSHUFFLE_vec_shuffle_x, 2>;
def : Trunc1024Pat<v64i8, v64i16, VSHUFFLE_vec_shuffle_x, 0>;
def : Trunc1024To512Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_x, 4>;
def : Trunc1024To512Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_bm, 4>;
def : Trunc1024To512Pat<v32i16, v32i32, VSHUFFLE_vec_shuffle_x, 2>;
def : Trunc1024To512Pat<v64i8, v64i16, VSHUFFLE_vec_shuffle_x, 0>;

class Trunc512Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
class Trunc512To256Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
Pat<(DstTy (trunc SrcTy:$s1)),
(EXTRACT_SUBREG
(VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, (MOV_RLC_imm11_pseudo (i32 Mode))),
sub_256_lo)>;
def : Trunc512Pat<v8i32, v8i64, 4>;
def : Trunc512Pat<v16i16, v16i32, 2>;
def : Trunc512Pat<v32i8, v32i16, 0>;
def : Trunc512To256Pat<v8i32, v8i64, 4>;
def : Trunc512To256Pat<v16i16, v16i32, 2>;
def : Trunc512To256Pat<v32i8, v32i16, 0>;

class Trunc512To128Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
Pat<(DstTy (trunc SrcTy:$s1)),
(VMOV_alu_mv_mv_w_to_q
(EXTRACT_SUBREG
(VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1,
(MOV_RLC_imm11_pseudo (i32 Mode))),
sub_256_lo)
)>;
def : Trunc512To128Pat<v16i8, v16i32, 36>;
def : Trunc512To128Pat<v8i16, v8i64, 28>;

class EventPat<AIE2PInst Inst, dag Imm> :
Pat<(int_aie2p_event Imm), (Inst)>;
Expand Down
60 changes: 54 additions & 6 deletions llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,31 +280,79 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
.clampScalar(1, S32, S32);

getActionDefinitionsBuilder(G_TRUNC)
// Mark as legal all G_TRUNC with tablegen selection pattern:
.legalIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
const LLT &DstTy = Query.Types[0];
return SrcTy.isVector() && DstTy.isVector() &&
(SrcTy.getSizeInBits() == 512 ||
SrcTy.getSizeInBits() == 1024) &&
DstTy.getElementType().getSizeInBits() * 2 ==
SrcTy.getElementType().getSizeInBits();

if (!SrcTy.isVector())
return false;
assert(DstTy.isVector() && "Src is vector so Dst must be vector");

const uint16_t SrcElemBits = SrcTy.getElementType().getSizeInBits();
const uint16_t DstElemBits = DstTy.getElementType().getSizeInBits();
const uint16_t VectorSize = SrcTy.getNumElements();
assert(VectorSize == DstTy.getNumElements() &&
"Src and Dst vectors must have same number of elements");

// The case where the source vector's element type is i64:
// v16i64 -> v16i32,
// v8i64 -> v8i32,
// v8i64 -> v8i16,
if (SrcElemBits == 64) {
return (VectorSize == 16 && DstElemBits == 32) ||
(VectorSize == 8 && DstElemBits == 32) ||
(VectorSize == 8 && DstElemBits == 16);
}

// The case where the source vector's element type is i32:
// v32i32 -> v32i16,
// v16i32 -> v16i16,
// v16i32 -> v16i8.
if (SrcElemBits == 32) {
return (VectorSize == 32 && DstElemBits == 16) ||
(VectorSize == 16 && DstElemBits == 16) ||
(VectorSize == 16 && DstElemBits == 8);
}

// The case where the source vector's element type is i16:
// v64i16 -> v64i8,
// v32i16 -> v32i8.
if (SrcElemBits == 16) {
return (VectorSize == 64 && DstElemBits == 8) ||
(VectorSize == 32 && DstElemBits == 8);
}

return false;
})

// Mark as legal all scalar G_TRUNC:
.legalIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
const LLT &DstTy = Query.Types[0];
return SrcTy.isScalar() && DstTy.isScalar();
})

// G_TRUNC 256-bit -> 128-bit is legalized by padding to 2x bitwidth:
.customIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
const LLT &DstTy = Query.Types[0];
return SrcTy.isVector() && SrcTy.getSizeInBits() == 256 &&
DstTy.getElementType().getSizeInBits() * 2 ==
SrcTy.getElementType().getSizeInBits();
})

// G_TRUNC on 2048-bit vector is legalized to 2 smaller G_TRUNCs.
// Similarly for G_TRUNC 1024-bit -> 256-bit:
.fewerElementsIf(
[=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
return SrcTy.isVector() && SrcTy.getSizeInBits() == 2048;
const LLT &DstTy = Query.Types[0];
if (!SrcTy.isVector() || !DstTy.isVector())
return false;
const TypeSize SrcBits = SrcTy.getSizeInBits();
const TypeSize DstBits = DstTy.getSizeInBits();
return (SrcBits == 2048 || (SrcBits == 1024 && DstBits == 256));
},
[=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
Expand Down
35 changes: 34 additions & 1 deletion llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,37 @@ body: |
%0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s16>)
PseudoRET implicit $lr, implicit %0
...

---
name: v16s8_trunc_v16s32_vec512
legalized: true
regBankSelected: true
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512
; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 36
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo
; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]]
%1:vregbank(<16 x s32>) = G_IMPLICIT_DEF
%0:vregbank(<16 x s8>) = G_TRUNC %1(<16 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v8s16_trunc_v8s64_vec512
legalized: true
regBankSelected: true
body: |
bb.1.entry:
; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512
; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo
; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]]
%1:vregbank(<8 x s64>) = G_IMPLICIT_DEF
%0:vregbank(<8 x s16>) = G_TRUNC %1(<8 x s64>)
PseudoRET implicit $lr, implicit %0
...
54 changes: 54 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,57 @@ body: |
%0:_(<16 x s8>) = G_TRUNC %1(<16 x s16>)
PseudoRET implicit $lr, implicit %0
...
---
name: v16s8_trunc_v16s32_vec512
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[DEF]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s8>)
%1:_(<16 x s32>) = G_IMPLICIT_DEF
%0:_(<16 x s8>) = G_TRUNC %1(<16 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v8s16_trunc_v8s64_vec512
body: |
bb.1.entry:
; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512
; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF]](<8 x s64>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<8 x s16>)
%1:_(<8 x s64>) = G_IMPLICIT_DEF
%0:_(<8 x s16>) = G_TRUNC %1(<8 x s64>)
PseudoRET implicit $lr, implicit %0
...
---
name: v32s8_trunc_v32s32_vec1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV]](<16 x s32>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV1]](<16 x s32>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<16 x s8>), [[TRUNC1]](<16 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s8>)
%1:_(<32 x s32>) = G_IMPLICIT_DEF
%0:_(<32 x s8>) = G_TRUNC %1(<32 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v16s16_trunc_v16s64_vec1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s64>), [[UV1:%[0-9]+]]:_(<8 x s64>) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV]](<8 x s64>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV1]](<8 x s64>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s16>), [[TRUNC1]](<8 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s16>)
%1:_(<16 x s64>) = G_IMPLICIT_DEF
%0:_(<16 x s16>) = G_TRUNC %1(<16 x s64>)
PseudoRET implicit $lr, implicit %0
...