Skip to content

Commit adf53bf

Browse files
Add support for bfloat in VInsert PreLegalizerCombiner
1 parent ddb9b1a commit adf53bf

File tree

2 files changed

+231
-2
lines changed

2 files changed

+231
-2
lines changed

llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ unsigned getVInsertScalarSize(unsigned IntrinsicID) {
152152
case Intrinsic::aie2_vinsert8_I512:
153153
return 8;
154154
case Intrinsic::aie2_vinsert16_I512:
155+
case Intrinsic::aie2_vinsert16_bf512:
155156
return 16;
156157
case Intrinsic::aie2_vinsert32_I512:
157158
return 32;
@@ -188,11 +189,11 @@ AIE2PreLegalizerCombinerImpl::getVectorInsertIndices(
188189
if (!Cst ||
189190
!RegMap.try_emplace(Cst->Value.getZExtValue(), SclSrcReg).second)
190191
return {};
191-
CurMI = getDefIgnoringCopies(SrcReg, MRI);
192+
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI);
192193

193194
// Combining Set and Extract to fetch next VInsert
194195
if (IsSet(CurMI) && tryToCombineSetExtract(*CurMI))
195-
CurMI = getDefIgnoringCopies(SrcReg, MRI);
196+
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI);
196197
}
197198

198199
// For 128/256-bit vectors, not all lanes are explicitly defined. If the
@@ -392,6 +393,7 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
392393
}
393394
case Intrinsic::aie2_vinsert8_I512:
394395
case Intrinsic::aie2_vinsert16_I512:
396+
case Intrinsic::aie2_vinsert16_bf512:
395397
case Intrinsic::aie2_vinsert32_I512: {
396398
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
397399
}

llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vinsert-sequence-prelegalizer.mir

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,3 +707,230 @@ body: |
707707
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.I512), %23(<16 x s32>), %8(s32), %2(s32)
708708
PseudoRET implicit $lr, implicit %24
709709
...
710+
711+
---
712+
name: vinsert16-bf256
713+
legalized: false
714+
body: |
715+
bb.1.entry:
716+
; CHECK-LABEL: name: vinsert16-bf256
717+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
718+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
719+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
720+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
721+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
722+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
723+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
724+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
725+
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
726+
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
727+
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
728+
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
729+
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
730+
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
731+
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
732+
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
733+
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR_TRUNC [[C15]](s32), [[C13]](s32), [[C11]](s32), [[C9]](s32), [[C7]](s32), [[C5]](s32), [[C3]](s32), [[C1]](s32), [[C]](s32), [[C2]](s32), [[C4]](s32), [[C6]](s32), [[C8]](s32), [[C10]](s32), [[C12]](s32), [[C14]](s32)
734+
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF [[BUILD_VECTOR_TRUNC]](<16 x s16>)
735+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[AIE_PAD_VECTOR_UNDEF]](<32 x s16>)
736+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s32>)
737+
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16int16)
738+
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
739+
%1:_(s32) = G_CONSTANT i32 0
740+
%2:_(s32) = G_CONSTANT i32 1
741+
%3:_(s32) = G_CONSTANT i32 2
742+
%4:_(s32) = G_CONSTANT i32 3
743+
%5:_(s32) = G_CONSTANT i32 4
744+
%6:_(s32) = G_CONSTANT i32 5
745+
%7:_(s32) = G_CONSTANT i32 6
746+
%8:_(s32) = G_CONSTANT i32 7
747+
%9:_(s32) = G_CONSTANT i32 8
748+
%10:_(s32) = G_CONSTANT i32 9
749+
%11:_(s32) = G_CONSTANT i32 10
750+
%12:_(s32) = G_CONSTANT i32 11
751+
%13:_(s32) = G_CONSTANT i32 12
752+
%14:_(s32) = G_CONSTANT i32 13
753+
%15:_(s32) = G_CONSTANT i32 14
754+
%16:_(s32) = G_CONSTANT i32 15
755+
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.I512.I256), %100(<8 x s32>), %1(s32)
756+
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
757+
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
758+
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
759+
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
760+
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
761+
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
762+
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
763+
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
764+
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
765+
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
766+
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
767+
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32)
768+
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32)
769+
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32)
770+
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %15(s32), %13(s32)
771+
%32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %16(s32), %15(s32)
772+
PseudoRET implicit $lr, implicit %32
773+
...
774+
775+
# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to a missing vinsert.16 for an index
776+
---
777+
name: vinsert16-bf256_idx_miss
778+
legalized: false
779+
body: |
780+
bb.1.entry:
781+
782+
; CHECK-LABEL: name: vinsert16-bf256_idx_miss
783+
; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16int16)
784+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>)
785+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
786+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
787+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
788+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
789+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
790+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
791+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
792+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
793+
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
794+
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
795+
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
796+
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
797+
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
798+
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
799+
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
800+
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
801+
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.I512.I256), [[BITCAST]](<8 x s32>), [[C]](s32)
802+
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32)
803+
; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32)
804+
; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32)
805+
; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32)
806+
; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32)
807+
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32)
808+
; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32)
809+
; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32)
810+
; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32)
811+
; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32)
812+
; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32)
813+
; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C12]](s32), [[C8]](s32)
814+
; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C13]](s32), [[C10]](s32)
815+
; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C14]](s32), [[C12]](s32)
816+
; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C15]](s32), [[C14]](s32)
817+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT16]](<16 x s32>)
818+
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16int16)
819+
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
820+
%1:_(s32) = G_CONSTANT i32 0
821+
%2:_(s32) = G_CONSTANT i32 1
822+
%3:_(s32) = G_CONSTANT i32 2
823+
%4:_(s32) = G_CONSTANT i32 3
824+
%5:_(s32) = G_CONSTANT i32 4
825+
%6:_(s32) = G_CONSTANT i32 5
826+
%7:_(s32) = G_CONSTANT i32 6
827+
%8:_(s32) = G_CONSTANT i32 7
828+
%9:_(s32) = G_CONSTANT i32 8
829+
%10:_(s32) = G_CONSTANT i32 9
830+
%11:_(s32) = G_CONSTANT i32 10
831+
%12:_(s32) = G_CONSTANT i32 11
832+
%13:_(s32) = G_CONSTANT i32 12
833+
%14:_(s32) = G_CONSTANT i32 13
834+
%15:_(s32) = G_CONSTANT i32 14
835+
%16:_(s32) = G_CONSTANT i32 15
836+
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.I512.I256), %100(<8 x s32>), %1(s32)
837+
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
838+
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
839+
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
840+
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
841+
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
842+
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
843+
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
844+
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
845+
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
846+
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
847+
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
848+
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %13(s32), %9(s32)
849+
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %14(s32), %11(s32)
850+
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %15(s32), %13(s32)
851+
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %16(s32), %15(s32)
852+
PseudoRET implicit $lr, implicit %31
853+
...
854+
855+
# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to multiple vinsert.16 instructions for a single index
856+
---
857+
name: vinsert16-bf256_idx_multiple
858+
legalized: false
859+
body: |
860+
bb.1.entry:
861+
; CHECK-LABEL: name: vinsert16-bf256_idx_multiple
862+
; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16int16)
863+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>)
864+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
865+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
866+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
867+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
868+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
869+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
870+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
871+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
872+
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
873+
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
874+
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
875+
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
876+
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
877+
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
878+
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
879+
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
880+
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.I512.I256), [[BITCAST]](<8 x s32>), [[C]](s32)
881+
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32)
882+
; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32)
883+
; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32)
884+
; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32)
885+
; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32)
886+
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32)
887+
; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32)
888+
; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32)
889+
; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32)
890+
; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32)
891+
; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32)
892+
; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C11]](s32), [[C6]](s32)
893+
; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C12]](s32), [[C8]](s32)
894+
; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C13]](s32), [[C10]](s32)
895+
; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C13]](s32), [[C15]](s32)
896+
; CHECK-NEXT: [[INT17:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT16]](<16 x s32>), [[C14]](s32), [[C12]](s32)
897+
; CHECK-NEXT: [[INT18:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT17]](<16 x s32>), [[C15]](s32), [[C14]](s32)
898+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT18]](<16 x s32>)
899+
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16int16)
900+
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
901+
%1:_(s32) = G_CONSTANT i32 0
902+
%2:_(s32) = G_CONSTANT i32 1
903+
%3:_(s32) = G_CONSTANT i32 2
904+
%4:_(s32) = G_CONSTANT i32 3
905+
%5:_(s32) = G_CONSTANT i32 4
906+
%6:_(s32) = G_CONSTANT i32 5
907+
%7:_(s32) = G_CONSTANT i32 6
908+
%8:_(s32) = G_CONSTANT i32 7
909+
%9:_(s32) = G_CONSTANT i32 8
910+
%10:_(s32) = G_CONSTANT i32 9
911+
%11:_(s32) = G_CONSTANT i32 10
912+
%12:_(s32) = G_CONSTANT i32 11
913+
%13:_(s32) = G_CONSTANT i32 12
914+
%14:_(s32) = G_CONSTANT i32 13
915+
%15:_(s32) = G_CONSTANT i32 14
916+
%16:_(s32) = G_CONSTANT i32 15
917+
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.I512.I256), %100(<8 x s32>), %1(s32)
918+
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
919+
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
920+
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
921+
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
922+
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
923+
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
924+
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
925+
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
926+
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
927+
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
928+
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
929+
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32)
930+
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32)
931+
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32)
932+
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %14(s32), %16(s32)
933+
%32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %15(s32), %13(s32)
934+
%33:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %32(<16 x s32>), %16(s32), %15(s32)
935+
PseudoRET implicit $lr, implicit %33
936+
...

0 commit comments

Comments
 (0)