Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions clang/include/clang/Basic/BuiltinsAIE.def
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ BUILTIN(__builtin_aiev2_set_I64_I32,"V2iii","nc")
BUILTIN(__builtin_aiev2_set_I512_I256,"V16iV8ii","nc")
BUILTIN(__builtin_aiev2_set_I1024_I512,"V32iV16ii","nc")
BUILTIN(__builtin_aiev2_set_I1024_I256,"V32iV8ii","nc")
BUILTIN(__builtin_aiev2_set_bf512_bf256, "V32yV16yi","nc")
BUILTIN(__builtin_aiev2_set_bf1024_bf512,"V64yV32yi","nc")
BUILTIN(__builtin_aiev2_set_bf1024_bf256,"V64yV16yi","nc")
BUILTIN(__builtin_aiev2_set_ACC512_ACC256,"V16nV8ni","nc")
Expand All @@ -322,7 +321,6 @@ BUILTIN(__builtin_aiev2_ext_I32_I64,"iV2ii","nc")
BUILTIN(__builtin_aiev2_ext_I256_I512,"V8iV16ii","nc")
BUILTIN(__builtin_aiev2_ext_I512_I1024,"V16iV32ii","nc")
BUILTIN(__builtin_aiev2_ext_I256_I1024,"V8iV32ii","nc")
BUILTIN(__builtin_aiev2_ext_bf256_bf512, "V16yV32yi","nc")
BUILTIN(__builtin_aiev2_ext_bf512_bf1024,"V32yV64yi","nc")
BUILTIN(__builtin_aiev2_ext_bf256_bf1024,"V16yV64yi","nc")
BUILTIN(__builtin_aiev2_ext_ACC256_ACC512,"V8nV16ni","nc")
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/aiev2_upd_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,9 @@ INTRINSIC(v8cint32) concat(v4cint32 a0, v4cint32 a1) {
// Extract 256-bit portion from 512-bit register
INTRINSIC(v16bfloat16) extract_v16bfloat16(v32bfloat16 a, int idx) {
if (idx == 0)
return __builtin_aiev2_ext_bf256_bf512(a, 0);
return __builtin_aiev2_ext_I256_I512(a, 0);
else
return __builtin_aiev2_ext_bf256_bf512(a, 1);
return __builtin_aiev2_ext_I256_I512(a, 1);
}

// Insert 256-bit in 512-bit register
Expand All @@ -527,9 +527,9 @@ INTRINSIC(v32bfloat16) insert(v32bfloat16 a, int idx, v16bfloat16 b) {
// Set 256-bit portion of 512-bit register
INTRINSIC(v32bfloat16) set_v32bfloat16(int idx, v16bfloat16 b) {
if (idx == 0)
return __builtin_aiev2_set_bf512_bf256(b, 0);
return __builtin_aiev2_set_I512_I256(b, 0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks curious, all values different from 0 can be used as index 1. I was wondering why not simply:

return __builtin_aiev2_set_I512_I256(b, idx == 0 ? 0 : 1);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think those headers were partly auto-generated, and for some intrinsics we have more than two different values. So I'd assume it's easier to write a generic generator if sticking to if/else.

else
return __builtin_aiev2_set_bf512_bf256(b, 1);
return __builtin_aiev2_set_I512_I256(b, 1);
}

INTRINSIC(v32bfloat16) concat(v16bfloat16 a0, v16bfloat16 a1) {
Expand Down
15 changes: 9 additions & 6 deletions clang/test/CodeGen/aie/aie-ups-intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,12 +564,15 @@ v16accfloat test_ups_to_v16accfloat_v16bfloat16(v16bfloat16 a) {
}
// CHECK-AIE2-LABEL: @_Z35test_ups_to_v32accfloat_v32bfloat16Dv32_u6__bf16(
// CHECK-AIE2-NEXT: entry:
// CHECK-AIE2-NEXT: [[TMP0:%.*]] = tail call <16 x bfloat> @llvm.aie2.ext.bf256.bf512(<32 x bfloat> [[A:%.*]], i32 0)
// CHECK-AIE2-NEXT: [[TMP1:%.*]] = tail call noundef <8 x i64> @llvm.aie2.v16bf16.to.v16accfloat(<16 x bfloat> [[TMP0]])
// CHECK-AIE2-NEXT: [[TMP2:%.*]] = tail call <16 x bfloat> @llvm.aie2.ext.bf256.bf512(<32 x bfloat> [[A]], i32 1)
// CHECK-AIE2-NEXT: [[TMP3:%.*]] = tail call noundef <8 x i64> @llvm.aie2.v16bf16.to.v16accfloat(<16 x bfloat> [[TMP2]])
// CHECK-AIE2-NEXT: [[TMP4:%.*]] = tail call noundef <16 x i64> @llvm.aie2.concat.1024.512.acc(<8 x i64> [[TMP1]], <8 x i64> [[TMP3]])
// CHECK-AIE2-NEXT: ret <16 x i64> [[TMP4]]
// CHECK-AIE2-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[A:%.*]] to <16 x i32>
// CHECK-AIE2-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.aie2.ext.I256.I512(<16 x i32> [[TMP0]], i32 0)
// CHECK-AIE2-NEXT: [[RETVAL_0_I_I:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x bfloat>
// CHECK-AIE2-NEXT: [[TMP2:%.*]] = tail call noundef <8 x i64> @llvm.aie2.v16bf16.to.v16accfloat(<16 x bfloat> [[RETVAL_0_I_I]])
// CHECK-AIE2-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.aie2.ext.I256.I512(<16 x i32> [[TMP0]], i32 1)
// CHECK-AIE2-NEXT: [[RETVAL_0_I6_I:%.*]] = bitcast <8 x i32> [[TMP3]] to <16 x bfloat>
// CHECK-AIE2-NEXT: [[TMP4:%.*]] = tail call noundef <8 x i64> @llvm.aie2.v16bf16.to.v16accfloat(<16 x bfloat> [[RETVAL_0_I6_I]])
// CHECK-AIE2-NEXT: [[TMP5:%.*]] = tail call noundef <16 x i64> @llvm.aie2.concat.1024.512.acc(<8 x i64> [[TMP2]], <8 x i64> [[TMP4]])
// CHECK-AIE2-NEXT: ret <16 x i64> [[TMP5]]
//
// CHECK-AIE2P-LABEL: @_Z35test_ups_to_v32accfloat_v32bfloat16Dv32_u6__bf16(
// CHECK-AIE2P-NEXT: entry:
Expand Down
16 changes: 10 additions & 6 deletions clang/test/CodeGen/aie/aie2/aie2-upd-ext-intrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1651,8 +1651,10 @@ v16float test_extract_v16float(v32float a, int idx) {

// CHECK-LABEL: @_Z24test_extract_v16bfloat16Dv32_u6__bf16i(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2.ext.bf256.bf512(<32 x bfloat> [[A:%.*]], i32 0)
// CHECK-NEXT: ret <16 x bfloat> [[TMP0]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[A:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.aie2.ext.I256.I512(<16 x i32> [[TMP0]], i32 0)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x bfloat>
// CHECK-NEXT: ret <16 x bfloat> [[TMP2]]
//
v16bfloat16 test_extract_v16bfloat16(v32bfloat16 a, int idx) {
return extract_v16bfloat16(a, 0);
Expand All @@ -1669,8 +1671,10 @@ v32bfloat16 test_insert(v32bfloat16 a, int idx, v16bfloat16 b) {

// CHECK-LABEL: @_Z20test_set_v32bfloat16iDv16_u6__bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x bfloat> @llvm.aie2.set.bf512.bf256(<16 x bfloat> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <32 x bfloat> [[TMP0]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x bfloat> [[B:%.*]] to <8 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.aie2.set.I512.I256(<8 x i32> [[TMP0]], i32 1)
// CHECK-NEXT: [[RETVAL_0_I:%.*]] = bitcast <16 x i32> [[TMP1]] to <32 x bfloat>
// CHECK-NEXT: ret <32 x bfloat> [[RETVAL_0_I]]
//
v32bfloat16 test_set_v32bfloat16(int idx, v16bfloat16 b) {
return set_v32bfloat16(1, b);
Expand Down Expand Up @@ -1822,8 +1826,8 @@ v8bfloat16 test_extract_v8bfloat16_512(v32bfloat16 a, int idx) {

// CHECK-LABEL: @_Z27test_extract_v8bfloat16_256Dv16_u6__bf16i(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x bfloat> @llvm.aie2.set.bf512.bf256(<16 x bfloat> [[A:%.*]], i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x bfloat> [[TMP0]] to <16 x i32>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x bfloat> [[A:%.*]] to <8 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.aie2.set.I512.I256(<8 x i32> [[TMP0]], i32 0)
// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.v32bfloat16()
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x bfloat> [[TMP2]] to <16 x i32>
// CHECK-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX:%.*]], 4
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsAIE2.td
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,6 @@ class AIEV2SET_I1024_I512
: Intrinsic<[llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2SET_I1024_I256
: Intrinsic<[llvm_v32i32_ty], [llvm_v8i32_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2SET_bf512_bf256
: Intrinsic<[llvm_v32bf16_ty], [llvm_v16bf16_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2SET_bf1024_bf512
: Intrinsic<[llvm_v64bf16_ty], [llvm_v32bf16_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2SET_bf1024_bf256
Expand All @@ -284,8 +282,6 @@ class AIEV2EXT_I512_I1024
: Intrinsic<[llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2EXT_I256_I1024
: Intrinsic<[llvm_v8i32_ty] , [llvm_v32i32_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2EXT_bf256_bf512
: Intrinsic<[llvm_v16bf16_ty] , [llvm_v32bf16_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2EXT_bf512_bf1024
: Intrinsic<[llvm_v32bf16_ty], [llvm_v64bf16_ty,llvm_i32_ty], [IntrNoMem]>;
class AIEV2EXT_bf256_bf1024
Expand Down Expand Up @@ -759,7 +755,6 @@ def int_aie2_set_I64_I32 : ClangBuiltin<"__builtin_aiev2_set_I64_I32">, AI
def int_aie2_set_I512_I256 : ClangBuiltin<"__builtin_aiev2_set_I512_I256">, AIEV2SET_I512_I256;
def int_aie2_set_I1024_I512 : ClangBuiltin<"__builtin_aiev2_set_I1024_I512">, AIEV2SET_I1024_I512;
def int_aie2_set_I1024_I256 : ClangBuiltin<"__builtin_aiev2_set_I1024_I256">, AIEV2SET_I1024_I256;
def int_aie2_set_bf512_bf256 : ClangBuiltin<"__builtin_aiev2_set_bf512_bf256">, AIEV2SET_bf512_bf256;
def int_aie2_set_bf1024_bf512 : ClangBuiltin<"__builtin_aiev2_set_bf1024_bf512">, AIEV2SET_bf1024_bf512;
def int_aie2_set_bf1024_bf256 : ClangBuiltin<"__builtin_aiev2_set_bf1024_bf256">, AIEV2SET_bf1024_bf256;
def int_aie2_set_512_256_acc :
Expand All @@ -773,7 +768,6 @@ def int_aie2_ext_I32_I64 : ClangBuiltin<"__builtin_aiev2_ext_I32_I64">, AI
def int_aie2_ext_I256_I512 : ClangBuiltin<"__builtin_aiev2_ext_I256_I512">, AIEV2EXT_I256_I512;
def int_aie2_ext_I512_I1024 : ClangBuiltin<"__builtin_aiev2_ext_I512_I1024">, AIEV2EXT_I512_I1024;
def int_aie2_ext_I256_I1024 : ClangBuiltin<"__builtin_aiev2_ext_I256_I1024">, AIEV2EXT_I256_I1024;
def int_aie2_ext_bf256_bf512 : ClangBuiltin<"__builtin_aiev2_ext_bf256_bf512">, AIEV2EXT_bf256_bf512;
def int_aie2_ext_bf512_bf1024 : ClangBuiltin<"__builtin_aiev2_ext_bf512_bf1024">, AIEV2EXT_bf512_bf1024;
def int_aie2_ext_bf256_bf1024 : ClangBuiltin<"__builtin_aiev2_ext_bf256_bf1024">, AIEV2EXT_bf256_bf1024;

Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,7 +1545,6 @@ AIE2InstrInfo::getVExtractOpInfo(const MachineInstr &MI) const {
case Intrinsic::aie2_ext_I512_I1024:
case Intrinsic::aie2_ext_I256_I1024:

case Intrinsic::aie2_ext_bf256_bf512:
case Intrinsic::aie2_ext_bf512_bf1024:
case Intrinsic::aie2_ext_bf256_bf1024:

Expand Down
14 changes: 4 additions & 10 deletions llvm/lib/Target/AIE/AIE2InstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -850,12 +850,10 @@ def : Pat<(int_aie2_upd_1024_256_acc ACC1024:$src1, ACC256:$src2, 0x3),
sub_512_hi)>;

// Set
foreach intr = [int_aie2_set_I512_I256, int_aie2_set_bf512_bf256] in {
def : Pat<(intr VEC256:$src, 0x0),
(REG_SEQUENCE VEC512, VEC256:$src, sub_256_lo)>;
def : Pat<(intr VEC256:$src, 0x1),
(REG_SEQUENCE VEC512, VEC256:$src, sub_256_hi)>;
}
def : Pat<(int_aie2_set_I512_I256 VEC256:$src, 0x0),
(REG_SEQUENCE VEC512, VEC256:$src, sub_256_lo)>;
def : Pat<(int_aie2_set_I512_I256 VEC256:$src, 0x1),
(REG_SEQUENCE VEC512, VEC256:$src, sub_256_hi)>;
foreach intr = [int_aie2_set_I1024_I512, int_aie2_set_bf1024_bf512] in {
def : Pat<(intr VEC512:$src, 0x0),
(REG_SEQUENCE VEC1024, VEC512:$src, sub_512_lo)>;
Expand Down Expand Up @@ -958,10 +956,6 @@ def : Pat<(int_aie2_ext_I256_I1024 VEC1024:$src, 0x2),
(v8i32 (EXTRACT_SUBREG VEC1024:$src, sub_512_hi_256_lo))>;
def : Pat<(int_aie2_ext_I256_I1024 VEC1024:$src, 0x3),
(v8i32 (EXTRACT_SUBREG VEC1024:$src, sub_512_hi_256_hi))>;
def : Pat<(int_aie2_ext_bf256_bf512 VEC512:$src, 0x0),
(v16bf16 (EXTRACT_SUBREG VEC512:$src, sub_256_lo))>;
def : Pat<(int_aie2_ext_bf256_bf512 VEC512:$src, 0x1),
(v16bf16 (EXTRACT_SUBREG VEC512:$src, sub_256_hi))>;
def : Pat<(int_aie2_ext_bf512_bf1024 VEC1024:$src, 0x0),
(v32bf16 (EXTRACT_SUBREG VEC1024:$src, sub_512_lo))>;
def : Pat<(int_aie2_ext_bf512_bf1024 VEC1024:$src, 0x1),
Expand Down
14 changes: 9 additions & 5 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,8 @@ bool AIE2InstructionSelector::canCombineUNPACKLoad(MachineInstr &MemOp,
bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
MachineInstr &UNPACKI, MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(UNPACKI.uses().begin()))->getReg();
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);
MachineInstr *LoadOp =
getDefIgnoringCopiesAndBitcasts(LoadResult, false, MRI);
// Should we build the instruction at load's position?
bool ShouldAdvanceOp = false;

Expand Down Expand Up @@ -2041,7 +2042,8 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UPS(MachineInstr &UPSI,

// First use is the G_INTRINSIC_W_SIDE_EFFECTS ID
Register LoadResult = (std::next(UPSI.uses().begin()))->getReg();
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);
MachineInstr *LoadOp =
getDefIgnoringCopiesAndBitcasts(LoadResult, false, MRI);

assert(LoadOp && "Expected SSA.");

Expand Down Expand Up @@ -3138,7 +3140,8 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_PACK(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register PackResult = (StoreI.uses().begin())->getReg();
MachineInstr *PackOp = getDefIgnoringCopiesAndBitcasts(PackResult, MRI);
MachineInstr *PackOp =
getDefIgnoringCopiesAndBitcasts(PackResult, false, MRI);

assert(PackOp && "Expected SSA.");

Expand Down Expand Up @@ -3291,7 +3294,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_SRS(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register SrsResult = (StoreI.uses().begin())->getReg();
MachineInstr *SrsOp = getDefIgnoringCopiesAndBitcasts(SrsResult, MRI);
MachineInstr *SrsOp = getDefIgnoringCopiesAndBitcasts(SrsResult, false, MRI);

assert(SrsOp && "Expected SSA.");

Expand Down Expand Up @@ -3395,7 +3398,8 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_CONV(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register ConvResult = (StoreI.uses().begin())->getReg();
MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI);
MachineInstr *ConvOp =
getDefIgnoringCopiesAndBitcasts(ConvResult, false, MRI);

assert(ConvOp && "Expected SSA.");

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ unsigned getVInsertScalarSize(unsigned IntrinsicID) {
case Intrinsic::aie2_vinsert8_I512:
return 8;
case Intrinsic::aie2_vinsert16_I512:
case Intrinsic::aie2_vinsert16_bf512:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should also get rid of that intrinsic at some point

return 16;
case Intrinsic::aie2_vinsert32_I512:
return 32;
Expand Down Expand Up @@ -188,11 +189,11 @@ AIE2PreLegalizerCombinerImpl::getVectorInsertIndices(
if (!Cst ||
!RegMap.try_emplace(Cst->Value.getZExtValue(), SclSrcReg).second)
return {};
CurMI = getDefIgnoringCopies(SrcReg, MRI);
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI);

// Combining Set and Extract to fetch next VInsert
if (IsSet(CurMI) && tryToCombineSetExtract(*CurMI))
CurMI = getDefIgnoringCopies(SrcReg, MRI);
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI);
}

// For 128/256-bit vectors, not all lanes are explicitly defined. If the
Expand Down Expand Up @@ -392,6 +393,7 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
}
case Intrinsic::aie2_vinsert8_I512:
case Intrinsic::aie2_vinsert16_I512:
case Intrinsic::aie2_vinsert16_bf512:
case Intrinsic::aie2_vinsert32_I512: {
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ bool AIEBaseInstructionSelector::canCombineCONVLoad(MachineInstr &MemOp,
bool AIEBaseInstructionSelector::selectG_AIE_LOAD_CONV(
MachineInstr &CONVI, MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(CONVI.uses().begin()))->getReg();
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);
MachineInstr *LoadOp =
getDefIgnoringCopiesAndBitcasts(LoadResult, false, MRI);
assert(LoadOp && "Expected SSA.");

// Do not try to combine if one of the load's defs is used by another
Expand Down
54 changes: 41 additions & 13 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ bool isNonCoalesceableUseOf(const MachineInstr &MemI,
MRI.hasOneNonDBGUse(InBetweenMI.getOperand(0).getReg())) {
const MachineInstr *CopyOrignMI =
MRI.getVRegDef(InBetweenMI.getOperand(1).getReg());
const MachineInstr *CopyDestMI =
&*MRI.use_instr_nodbg_begin(InBetweenMI.getOperand(0).getReg());
const MachineInstr *CopyDestMI = getUserIgnoringCopiesAndBitcasts(
InBetweenMI.getOperand(0).getReg(), MRI);
if (CopyOrignMI == &MemI && CopyDestMI == &Dest)
return false;
}
Expand Down Expand Up @@ -222,28 +222,53 @@ bool llvm::canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,

/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
/// The \p AllowMultiUse flag permits folding even if the def instruction for \p
/// Reg, has multiple uses.
MachineInstr *
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg,
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg, bool AllowMultiUse,
const MachineRegisterInfo &MRI) {

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to never use AllowMultiUse=true. Do you have future plans outside of this PR?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh okay there is one use in matchExtractConcat. I would be curious to see if allowing multiple uses in other combiners would help as well?

MachineInstr *DefInstr = MRI.getVRegDef(Reg);
// Checks if MI is a copy or bitcast and valid if multiple uses are allowed,
// otherwise requires a single use.
auto IsValidCopyOrBitcast = [&](const MachineInstr *MI) {
return (MI->isCopy() || (MI->getOpcode() == TargetOpcode::G_BITCAST)) &&
(AllowMultiUse ||
MRI.hasOneNonDBGUse(DefInstr->getOperand(0).getReg()));
};

auto UseVirtReg = [&](const MachineInstr *MI) {
return MI->getOperand(1).getReg().isVirtual();
};

// Stop if we reach an use of a physical register.
while (DefInstr && IsValidCopyOrBitcast(DefInstr) && UseVirtReg(DefInstr))
DefInstr = MRI.getVRegDef(DefInstr->getOperand(1).getReg());

return DefInstr;
}

/// Find the use instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *
llvm::getUserIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI) {
MachineInstr *User = &*MRI.use_instr_nodbg_begin(Reg);

auto IsSingleUseCopyOrBitcast = [&](const MachineInstr *MI) {
return (MI->isCopy() ||
(DefInstr->getOpcode() == TargetOpcode::G_BITCAST)) &&
return (MI->isCopy() || (MI->getOpcode() == TargetOpcode::G_BITCAST)) &&
MRI.hasOneNonDBGUse(MI->getOperand(0).getReg());
};

auto UseVirtReg = [&](const MachineInstr *MI) {
return MI->getOperand(1).getReg().isVirtual();
};

// No other use for this copy/bitcast.
// Stop if we reach an use of a physical register.
while (DefInstr && IsSingleUseCopyOrBitcast(DefInstr) && UseVirtReg(DefInstr))
DefInstr = MRI.getVRegDef(DefInstr->getOperand(1).getReg());
while (User && IsSingleUseCopyOrBitcast(User) && UseVirtReg(User))
User = &*MRI.use_instr_nodbg_begin(User->getOperand(0).getReg());

return DefInstr;
return User;
}

MachineInstr *findLastRegUseInBB(Register Reg, MachineInstr &IgnoreUser,
Expand Down Expand Up @@ -1395,7 +1420,8 @@ bool llvm::matchExtractConcat(MachineInstr &MI, MachineRegisterInfo &MRI,
const unsigned ExtractSize =
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

MachineInstr &SrcMI = *MRI.getVRegDef(MI.getOperand(ExtractOp->Src).getReg());
MachineInstr &SrcMI = *getDefIgnoringCopiesAndBitcasts(
MI.getOperand(ExtractOp->Src).getReg(), true, MRI);

Register SrcReg;
unsigned ConcatSize = 0;
Expand All @@ -1415,8 +1441,9 @@ void llvm::applyExtractConcat(MachineInstr &MI, MachineRegisterInfo &MRI,
B.setInstrAndDebugLoc(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MatchInfo;

B.buildCopy(DstReg, SrcReg);
// Build a copy if types match, otherwise build a bitcast.
MRI.getType(DstReg) == MRI.getType(SrcReg) ? B.buildCopy(DstReg, SrcReg)
: B.buildBitcast(DstReg, SrcReg);
MI.eraseFromParent();
}

Expand Down Expand Up @@ -1582,7 +1609,8 @@ bool llvm::matchLoadStoreSplit(GLoadStore &MI, MachineRegisterInfo &MRI,
return false;
}
} else {
MachineInstr &ConvInstr = *getDefIgnoringCopiesAndBitcasts(ValReg, MRI);
MachineInstr &ConvInstr =
*getDefIgnoringCopiesAndBitcasts(ValReg, false, MRI);
if (TII.canCombineWithLoadStore(ConvInstr))
return false;
}
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,14 @@ bool canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,
const MachineRegisterInfo &MRI);
/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg,
/// The \p AllowMultiUse flag permits folding even if the def instruction for \p
/// Reg, has multiple uses.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg, bool AllowMultiUse,
const MachineRegisterInfo &MRI);
/// Find the use instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getUserIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI);

class InstrNode {
MachineInstr *BaseNode;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ bool getGlobalValue(const MachineInstr *MI,
SmallPtrSet<const Value *, 4> &GVSet,
MachineRegisterInfo &MRI) {

MI = getDefIgnoringCopiesAndBitcasts(MI->getOperand(1).getReg(), MRI);
MI = getDefIgnoringCopiesAndBitcasts(MI->getOperand(1).getReg(), false, MRI);

// We need an instruction that explicitly moves a global
// to a register (move immediate).
Expand Down
Loading