Skip to content

Commit 085f685

Browse files
committed
[AIE2p] Use tablegen pattern for trunc where dst is 1/4 bitsize
1 parent 03f2a63 commit 085f685

File tree

3 files changed

+100
-14
lines changed

3 files changed

+100
-14
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
include "AIEBaseInstrPatterns.td"
1717

18+
1819
// Placeholder for a bare frameindex. This pseudo represents the
1920
// pointer register to be allocated, initialized with the address
2021
// represented by the frameindex in its only operand.
@@ -1257,26 +1258,52 @@ defm : Extract_512<i64, v8i64, (i32 c6u:$idx), VEXTRACT_64_vec_extract_imm_vaddS
12571258
// instruction. Modes 0, 2, and 4 correspond to the deinterleaved operation of
12581259
// 1, 2, and 4 bytes respectively on the concatenated src0 and src1 input
12591260
// vectors of the VSHUFFLE instruction.
1260-
class Trunc1024Pat<ValueType DstTy, ValueType SrcTy, Instruction ShuffleInstOpc, int Mode> :
1261+
1262+
// | 1024-bits -> 512-bits | 512-bits -> 256-bits | 1024-bits -> 256-bits |
1263+
// +=======================+=======================+=======================|
1264+
// | v16i64 -> v16i32 | v8i32 -> v8i16 | v16i64 -> v16i16 |
1265+
// | v32i32 -> v32i16 | v16i16 -> v16i8 | v32i32 -> v32i8 |
1266+
// | v64i16 -> v64i8 | v32i8 -> v32i4 | |
1267+
// +=======================+=======================+=======================|
1268+
class Trunc1024To512Pat<ValueType DstTy, ValueType SrcTy, Instruction ShuffleInstOpc, int Mode> :
12611269
Pat<(DstTy (trunc SrcTy:$s1)),
12621270
(ShuffleInstOpc
12631271
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo ),
12641272
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi ),
12651273
(MOV_RLC_imm11_pseudo (i32 Mode)))>;
12661274

1267-
def : Trunc1024Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_x, 4>;
1268-
def : Trunc1024Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_bm, 4>;
1269-
def : Trunc1024Pat<v32i16, v32i32, VSHUFFLE_vec_shuffle_x, 2>;
1270-
def : Trunc1024Pat<v64i8, v64i16, VSHUFFLE_vec_shuffle_x, 0>;
1275+
def : Trunc1024To512Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_x, 4>;
1276+
def : Trunc1024To512Pat<v16i32, v16i64, VSHUFFLE_vec_shuffle_bm, 4>;
1277+
def : Trunc1024To512Pat<v32i16, v32i32, VSHUFFLE_vec_shuffle_x, 2>;
1278+
def : Trunc1024To512Pat<v64i8, v64i16, VSHUFFLE_vec_shuffle_x, 0>;
12711279

1272-
class Trunc512Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
1280+
class Trunc512To256Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
12731281
Pat<(DstTy (trunc SrcTy:$s1)),
12741282
(EXTRACT_SUBREG
12751283
(VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, (MOV_RLC_imm11_pseudo (i32 Mode))),
12761284
sub_256_lo)>;
1277-
def : Trunc512Pat<v8i32, v8i64, 4>;
1278-
def : Trunc512Pat<v16i16, v16i32, 2>;
1279-
def : Trunc512Pat<v32i8, v32i16, 0>;
1285+
def : Trunc512To256Pat<v8i32, v8i64, 4>;
1286+
def : Trunc512To256Pat<v16i16, v16i32, 2>;
1287+
def : Trunc512To256Pat<v32i8, v32i16, 0>;
1288+
1289+
// This is effectively Trunc1024To512 followed by Trunc512To256.
1290+
class Trunc1024To256Pat<ValueType DstTy, ValueType SrcTy, int LargeMode, int SmallMode> :
1291+
Pat<(DstTy (trunc SrcTy:$s1)),
1292+
(EXTRACT_SUBREG
1293+
(VSHUFFLE_vec_shuffle_x
1294+
(VSHUFFLE_vec_shuffle_x
1295+
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo),
1296+
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi),
1297+
(MOV_RLC_imm11_pseudo (i32 LargeMode))),
1298+
(VSHUFFLE_vec_shuffle_x
1299+
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo),
1300+
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi),
1301+
(MOV_RLC_imm11_pseudo (i32 LargeMode))),
1302+
(MOV_RLC_imm11_pseudo (i32 SmallMode))),
1303+
sub_256_lo)>;
1304+
1305+
def : Trunc1024To256Pat<v32i8, v32i32, 2, 0>;
1306+
def : Trunc1024To256Pat<v16i16, v16i64, 4, 2>;
12801307

12811308
class EventPat<AIE2PInst Inst, dag Imm> :
12821309
Pat<(int_aie2p_event Imm), (Inst)>;

llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,24 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
281281

282282
getActionDefinitionsBuilder(G_TRUNC)
283283
.legalIf([=](const LegalityQuery &Query) {
284+
// Return true if there is a tablegen pattern to lower truncs on vectors
285+
// of specific element types and lengths to shuffles.
284286
const LLT &SrcTy = Query.Types[1];
285287
const LLT &DstTy = Query.Types[0];
286-
return SrcTy.isVector() && DstTy.isVector() &&
287-
(SrcTy.getSizeInBits() == 512 ||
288-
SrcTy.getSizeInBits() == 1024) &&
289-
DstTy.getElementType().getSizeInBits() * 2 ==
290-
SrcTy.getElementType().getSizeInBits();
288+
289+
if (!SrcTy.isVector() || !DstTy.isVector())
290+
return false;
291+
292+
const auto SrcElmBits = SrcTy.getElementType().getSizeInBits();
293+
if (SrcElmBits != 64 && SrcElmBits != 32 && SrcElmBits != 16)
294+
return false;
295+
296+
const TypeSize SrcBits = SrcTy.getSizeInBits();
297+
const TypeSize DstBits = DstTy.getSizeInBits();
298+
299+
return ((SrcBits == 1024 && DstBits == 256) ||
300+
(SrcBits == 1024 && DstBits == 512) ||
301+
(SrcBits == 512 && DstBits == 256));
291302
})
292303
.legalIf([=](const LegalityQuery &Query) {
293304
const LLT &SrcTy = Query.Types[1];

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,52 @@ body: |
184184
%0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s16>)
185185
PseudoRET implicit $lr, implicit %0
186186
...
187+
---
188+
name: v32s8_trunc_v32s32_vec1024
189+
legalized: true
190+
regBankSelected: true
191+
body: |
192+
bb.1.entry:
193+
; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024
194+
; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF
195+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0
196+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
197+
; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
198+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
199+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
200+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
201+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
202+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
203+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]]
204+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]]
205+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo
206+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]]
207+
%1:vregbank(<32 x s32>) = G_IMPLICIT_DEF
208+
%0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s32>)
209+
PseudoRET implicit $lr, implicit %0
210+
...
187211

212+
---
213+
name: v16s16_trunc_v16s64_vec1024
214+
legalized: true
215+
regBankSelected: true
216+
body: |
217+
bb.1.entry:
218+
; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024
219+
; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF
220+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
221+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4
222+
; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
223+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
224+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
225+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4
226+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
227+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
228+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]]
229+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]]
230+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo
231+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]]
232+
%1:vregbank(<16 x s64>) = G_IMPLICIT_DEF
233+
%0:vregbank(<16 x s16>) = G_TRUNC %1(<16 x s64>)
234+
PseudoRET implicit $lr, implicit %0
235+
...

0 commit comments

Comments
 (0)