Skip to content

Commit a815f2b

Browse files
committed
[AIE2p] Use shuffle mode 36 (trunc 512->128) and legalize trunc 1024->256 by splitting in two
1 parent 085f685 commit a815f2b

File tree

4 files changed

+148
-76
lines changed

4 files changed

+148
-76
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
include "AIEBaseInstrPatterns.td"
1717

18-
1918
// Placeholder for a bare frameindex. This pseudo represents the
2019
// pointer register to be allocated, initialized with the address
2120
// represented by the frameindex in its only operand.
@@ -1257,14 +1256,19 @@ defm : Extract_512<i64, v8i64, (i32 c6u:$idx), VEXTRACT_64_vec_extract_imm_vaddS
12571256
// The G_TRUNC operation can be efficiently implemented using a VSHUFFLE
12581257
// instruction. Modes 0, 2, and 4 correspond to the deinterleaved operation of
12591258
// 1, 2, and 4 bytes respectively on the concatenated src0 and src1 input
1260-
// vectors of the VSHUFFLE instruction.
1261-
1262-
// | 1024-bits -> 512-bits | 512-bits -> 256-bits | 1024-bits -> 256-bits |
1263-
// +=======================+=======================+=======================|
1264-
// | v16i64 -> v16i32 | v8i32 -> v8i16 | v16i64 -> v16i16 |
1265-
// | v32i32 -> v32i16 | v16i16 -> v16i8 | v32i32 -> v32i8 |
1266-
// | v64i16 -> v64i8 | v32i8 -> v32i4 | |
1267-
// +=======================+=======================+=======================|
1259+
// vectors of the VSHUFFLE instruction. Specifically, the VSHUFFLE instruction
1260+
// with mode 0 performs a transpose on a tensor of type 64x2xi8. Other modes
1261+
// used here perform transposes on different types:
1262+
1263+
// Mode Input type
1264+
// =====+============+
1265+
// 0 | 64x2xi8 |
1266+
// 2 | 32x2xi16 |
1267+
// 4 | 16x2xi32 |
1268+
// 28 | 8x4xi16 |
1269+
// 36 | 16x4xi8 |
1270+
// =====+============+
1271+
12681272
class Trunc1024To512Pat<ValueType DstTy, ValueType SrcTy, Instruction ShuffleInstOpc, int Mode> :
12691273
Pat<(DstTy (trunc SrcTy:$s1)),
12701274
(ShuffleInstOpc
@@ -1286,24 +1290,16 @@ def : Trunc512To256Pat<v8i32, v8i64, 4>;
12861290
def : Trunc512To256Pat<v16i16, v16i32, 2>;
12871291
def : Trunc512To256Pat<v32i8, v32i16, 0>;
12881292

1289-
// This is effectively Trunc1024To512 followed by Trunc512To256.
1290-
class Trunc1024To256Pat<ValueType DstTy, ValueType SrcTy, int LargeMode, int SmallMode> :
1293+
class Trunc512To128Pat<ValueType DstTy, ValueType SrcTy, int Mode> :
12911294
Pat<(DstTy (trunc SrcTy:$s1)),
1292-
(EXTRACT_SUBREG
1293-
(VSHUFFLE_vec_shuffle_x
1294-
(VSHUFFLE_vec_shuffle_x
1295-
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo),
1296-
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi),
1297-
(MOV_RLC_imm11_pseudo (i32 LargeMode))),
1298-
(VSHUFFLE_vec_shuffle_x
1299-
(EXTRACT_SUBREG VEC1024:$s1, sub_512_lo),
1300-
(EXTRACT_SUBREG VEC1024:$s1, sub_512_hi),
1301-
(MOV_RLC_imm11_pseudo (i32 LargeMode))),
1302-
(MOV_RLC_imm11_pseudo (i32 SmallMode))),
1303-
sub_256_lo)>;
1304-
1305-
def : Trunc1024To256Pat<v32i8, v32i32, 2, 0>;
1306-
def : Trunc1024To256Pat<v16i16, v16i64, 4, 2>;
1295+
(VMOV_alu_mv_mv_w_to_q
1296+
(EXTRACT_SUBREG
1297+
(VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1,
1298+
(MOV_RLC_imm11_pseudo (i32 Mode))),
1299+
sub_256_lo)
1300+
)>;
1301+
def : Trunc512To128Pat<v16i8, v16i32, 36>;
1302+
def : Trunc512To128Pat<v8i16, v8i64, 28>;
13071303

13081304
class EventPat<AIE2PInst Inst, dag Imm> :
13091305
Pat<(int_aie2p_event Imm), (Inst)>;

llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -280,42 +280,79 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
280280
.clampScalar(1, S32, S32);
281281

282282
getActionDefinitionsBuilder(G_TRUNC)
283+
// Mark as legal all G_TRUNC with tablegen selection pattern:
283284
.legalIf([=](const LegalityQuery &Query) {
284-
// Return true if there is a tablegen pattern to lower truncs on vectors
285-
// of specific element types and lengths to shuffles.
286285
const LLT &SrcTy = Query.Types[1];
287286
const LLT &DstTy = Query.Types[0];
288287

289-
if (!SrcTy.isVector() || !DstTy.isVector())
288+
if (!SrcTy.isVector())
290289
return false;
291-
292-
const auto SrcElmBits = SrcTy.getElementType().getSizeInBits();
293-
if (SrcElmBits != 64 && SrcElmBits != 32 && SrcElmBits != 16)
294-
return false;
295-
296-
const TypeSize SrcBits = SrcTy.getSizeInBits();
297-
const TypeSize DstBits = DstTy.getSizeInBits();
298-
299-
return ((SrcBits == 1024 && DstBits == 256) ||
300-
(SrcBits == 1024 && DstBits == 512) ||
301-
(SrcBits == 512 && DstBits == 256));
290+
assert(DstTy.isVector() && "Src is vector so Dst must be vector");
291+
292+
const uint16_t SrcElemBits = SrcTy.getElementType().getSizeInBits();
293+
const uint16_t DstElemBits = DstTy.getElementType().getSizeInBits();
294+
const uint16_t VectorSize = SrcTy.getNumElements();
295+
assert(VectorSize == DstTy.getNumElements() &&
296+
"Src and Dst vectors must have same number of elements");
297+
298+
// The case where the source vector's element type is i64:
299+
// v16i64 -> v16i32,
300+
// v8i64 -> v8i32,
301+
// v8i64 -> v8i16,
302+
if (SrcElemBits == 64) {
303+
return (VectorSize == 16 && DstElemBits == 32) ||
304+
(VectorSize == 8 && DstElemBits == 32) ||
305+
(VectorSize == 8 && DstElemBits == 16);
306+
}
307+
308+
// The case where the source vector's element type is i32:
309+
// v32i32 -> v32i16,
310+
// v16i32 -> v16i16,
311+
// v16i32 -> v16i8.
312+
if (SrcElemBits == 32) {
313+
return (VectorSize == 32 && DstElemBits == 16) ||
314+
(VectorSize == 16 && DstElemBits == 16) ||
315+
(VectorSize == 16 && DstElemBits == 8);
316+
}
317+
318+
// The case where the source vector's element type is i16:
319+
// v64i16 -> v64i8,
320+
// v32i16 -> v32i8.
321+
if (SrcElemBits == 16) {
322+
return (VectorSize == 64 && DstElemBits == 8) ||
323+
(VectorSize == 32 && DstElemBits == 8);
324+
}
325+
326+
return false;
302327
})
328+
329+
// Mark as legal all scalar G_TRUNC:
303330
.legalIf([=](const LegalityQuery &Query) {
304331
const LLT &SrcTy = Query.Types[1];
305332
const LLT &DstTy = Query.Types[0];
306333
return SrcTy.isScalar() && DstTy.isScalar();
307334
})
335+
336+
// G_TRUNC 256-bit -> 128-bit is legalized by padding to 2x bitwidth:
308337
.customIf([=](const LegalityQuery &Query) {
309338
const LLT &SrcTy = Query.Types[1];
310339
const LLT &DstTy = Query.Types[0];
311340
return SrcTy.isVector() && SrcTy.getSizeInBits() == 256 &&
312341
DstTy.getElementType().getSizeInBits() * 2 ==
313342
SrcTy.getElementType().getSizeInBits();
314343
})
344+
345+
// G_TRUNC on 2048-bit vector is legalized to 2 smaller G_TRUNCs.
346+
// Similarly for G_TRUNC 1024-bit -> 256-bit:
315347
.fewerElementsIf(
316348
[=](const LegalityQuery &Query) {
317349
const LLT &SrcTy = Query.Types[1];
318-
return SrcTy.isVector() && SrcTy.getSizeInBits() == 2048;
350+
const LLT &DstTy = Query.Types[0];
351+
if (!SrcTy.isVector() || !DstTy.isVector())
352+
return false;
353+
const TypeSize SrcBits = SrcTy.getSizeInBits();
354+
const TypeSize DstBits = DstTy.getSizeInBits();
355+
return (SrcBits == 2048 || (SrcBits == 1024 && DstBits == 256));
319356
},
320357
[=](const LegalityQuery &Query) {
321358
const LLT &SrcTy = Query.Types[1];

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -185,51 +185,36 @@ body: |
185185
PseudoRET implicit $lr, implicit %0
186186
...
187187
---
188-
name: v32s8_trunc_v32s32_vec1024
188+
name: v16s8_trunc_v16s32_vec512
189189
legalized: true
190190
regBankSelected: true
191191
body: |
192192
bb.1.entry:
193-
; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024
194-
; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF
195-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0
196-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
197-
; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
198-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
199-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
200-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
201-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
202-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
203-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]]
204-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]]
205-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo
206-
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]]
207-
%1:vregbank(<32 x s32>) = G_IMPLICIT_DEF
208-
%0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s32>)
193+
; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512
194+
; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF
195+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 36
196+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]]
197+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo
198+
; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]]
199+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]]
200+
%1:vregbank(<16 x s32>) = G_IMPLICIT_DEF
201+
%0:vregbank(<16 x s8>) = G_TRUNC %1(<16 x s32>)
209202
PseudoRET implicit $lr, implicit %0
210203
...
211-
212204
---
213-
name: v16s16_trunc_v16s64_vec1024
205+
name: v8s16_trunc_v8s64_vec512
214206
legalized: true
215207
regBankSelected: true
216208
body: |
217209
bb.1.entry:
218-
; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024
219-
; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF
220-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2
221-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4
222-
; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
223-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
224-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
225-
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4
226-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi
227-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo
228-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]]
229-
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]]
230-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo
231-
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]]
232-
%1:vregbank(<16 x s64>) = G_IMPLICIT_DEF
233-
%0:vregbank(<16 x s16>) = G_TRUNC %1(<16 x s64>)
210+
; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512
211+
; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF
212+
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28
213+
; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]]
214+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo
215+
; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]]
216+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]]
217+
%1:vregbank(<8 x s64>) = G_IMPLICIT_DEF
218+
%0:vregbank(<8 x s16>) = G_TRUNC %1(<8 x s64>)
234219
PseudoRET implicit $lr, implicit %0
235220
...

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,57 @@ body: |
133133
%0:_(<16 x s8>) = G_TRUNC %1(<16 x s16>)
134134
PseudoRET implicit $lr, implicit %0
135135
...
136+
---
137+
name: v16s8_trunc_v16s32_vec512
138+
body: |
139+
bb.1.entry:
140+
; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512
141+
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
142+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[DEF]](<16 x s32>)
143+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s8>)
144+
%1:_(<16 x s32>) = G_IMPLICIT_DEF
145+
%0:_(<16 x s8>) = G_TRUNC %1(<16 x s32>)
146+
PseudoRET implicit $lr, implicit %0
147+
...
148+
---
149+
name: v8s16_trunc_v8s64_vec512
150+
body: |
151+
bb.1.entry:
152+
; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512
153+
; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
154+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF]](<8 x s64>)
155+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<8 x s16>)
156+
%1:_(<8 x s64>) = G_IMPLICIT_DEF
157+
%0:_(<8 x s16>) = G_TRUNC %1(<8 x s64>)
158+
PseudoRET implicit $lr, implicit %0
159+
...
160+
---
161+
name: v32s8_trunc_v32s32_vec1024
162+
body: |
163+
bb.1.entry:
164+
; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024
165+
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
166+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
167+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV]](<16 x s32>)
168+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV1]](<16 x s32>)
169+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<16 x s8>), [[TRUNC1]](<16 x s8>)
170+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s8>)
171+
%1:_(<32 x s32>) = G_IMPLICIT_DEF
172+
%0:_(<32 x s8>) = G_TRUNC %1(<32 x s32>)
173+
PseudoRET implicit $lr, implicit %0
174+
...
175+
---
176+
name: v16s16_trunc_v16s64_vec1024
177+
body: |
178+
bb.1.entry:
179+
; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024
180+
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
181+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s64>), [[UV1:%[0-9]+]]:_(<8 x s64>) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
182+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV]](<8 x s64>)
183+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV1]](<8 x s64>)
184+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s16>), [[TRUNC1]](<8 x s16>)
185+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s16>)
186+
%1:_(<16 x s64>) = G_IMPLICIT_DEF
187+
%0:_(<16 x s16>) = G_TRUNC %1(<16 x s64>)
188+
PseudoRET implicit $lr, implicit %0
189+
...

0 commit comments

Comments
 (0)