-
Notifications
You must be signed in to change notification settings - Fork 28
Add support for bfloat in VInsert PreLegalizerCombiner #243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
35e1fe0
921c2f6
ddb9b1a
adf53bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,6 +152,7 @@ unsigned getVInsertScalarSize(unsigned IntrinsicID) { | |
case Intrinsic::aie2_vinsert8_I512: | ||
return 8; | ||
case Intrinsic::aie2_vinsert16_I512: | ||
case Intrinsic::aie2_vinsert16_bf512: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should also get rid of that intrinsic at some point |
||
return 16; | ||
case Intrinsic::aie2_vinsert32_I512: | ||
return 32; | ||
|
@@ -188,11 +189,11 @@ AIE2PreLegalizerCombinerImpl::getVectorInsertIndices( | |
if (!Cst || | ||
!RegMap.try_emplace(Cst->Value.getZExtValue(), SclSrcReg).second) | ||
return {}; | ||
CurMI = getDefIgnoringCopies(SrcReg, MRI); | ||
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI); | ||
|
||
// Combining Set and Extract to fetch next VInsert | ||
if (IsSet(CurMI) && tryToCombineSetExtract(*CurMI)) | ||
CurMI = getDefIgnoringCopies(SrcReg, MRI); | ||
CurMI = getDefIgnoringCopiesAndBitcasts(SrcReg, false, MRI); | ||
} | ||
|
||
// For 128/256-bit vectors, not all lanes are explicitly defined. If the | ||
|
@@ -392,6 +393,7 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( | |
} | ||
case Intrinsic::aie2_vinsert8_I512: | ||
case Intrinsic::aie2_vinsert16_I512: | ||
case Intrinsic::aie2_vinsert16_bf512: | ||
case Intrinsic::aie2_vinsert32_I512: { | ||
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID)); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,8 +170,8 @@ bool isNonCoalesceableUseOf(const MachineInstr &MemI, | |
MRI.hasOneNonDBGUse(InBetweenMI.getOperand(0).getReg())) { | ||
const MachineInstr *CopyOrignMI = | ||
MRI.getVRegDef(InBetweenMI.getOperand(1).getReg()); | ||
const MachineInstr *CopyDestMI = | ||
&*MRI.use_instr_nodbg_begin(InBetweenMI.getOperand(0).getReg()); | ||
const MachineInstr *CopyDestMI = getUserIgnoringCopiesAndBitcasts( | ||
InBetweenMI.getOperand(0).getReg(), MRI); | ||
if (CopyOrignMI == &MemI && CopyDestMI == &Dest) | ||
return false; | ||
} | ||
|
@@ -222,28 +222,53 @@ bool llvm::canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest, | |
|
||
/// Find the def instruction for \p Reg, folding away any trivial copies and | ||
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register. | ||
/// The \p AllowMultiUse flag permits folding even if the def instruction for \p | ||
/// Reg, has multiple uses. | ||
MachineInstr * | ||
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg, | ||
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg, bool AllowMultiUse, | ||
const MachineRegisterInfo &MRI) { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You seem to never use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh okay there is one use in |
||
MachineInstr *DefInstr = MRI.getVRegDef(Reg); | ||
// Checks if MI is a copy or bitcast and valid if multiple uses are allowed, | ||
// otherwise requires a single use. | ||
auto IsValidCopyOrBitcast = [&](const MachineInstr *MI) { | ||
return (MI->isCopy() || (MI->getOpcode() == TargetOpcode::G_BITCAST)) && | ||
(AllowMultiUse || | ||
MRI.hasOneNonDBGUse(DefInstr->getOperand(0).getReg())); | ||
}; | ||
|
||
auto UseVirtReg = [&](const MachineInstr *MI) { | ||
return MI->getOperand(1).getReg().isVirtual(); | ||
}; | ||
|
||
// Stop if we reach an use of a physical register. | ||
while (DefInstr && IsValidCopyOrBitcast(DefInstr) && UseVirtReg(DefInstr)) | ||
DefInstr = MRI.getVRegDef(DefInstr->getOperand(1).getReg()); | ||
|
||
return DefInstr; | ||
} | ||
|
||
/// Find the use instruction for \p Reg, folding away any trivial copies and | ||
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register. | ||
MachineInstr * | ||
llvm::getUserIgnoringCopiesAndBitcasts(Register Reg, | ||
const MachineRegisterInfo &MRI) { | ||
MachineInstr *User = &*MRI.use_instr_nodbg_begin(Reg); | ||
|
||
auto IsSingleUseCopyOrBitcast = [&](const MachineInstr *MI) { | ||
return (MI->isCopy() || | ||
(DefInstr->getOpcode() == TargetOpcode::G_BITCAST)) && | ||
return (MI->isCopy() || (MI->getOpcode() == TargetOpcode::G_BITCAST)) && | ||
MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()); | ||
}; | ||
|
||
auto UseVirtReg = [&](const MachineInstr *MI) { | ||
return MI->getOperand(1).getReg().isVirtual(); | ||
}; | ||
|
||
// No other use for this copy/bitcast. | ||
// Stop if we reach an use of a physical register. | ||
while (DefInstr && IsSingleUseCopyOrBitcast(DefInstr) && UseVirtReg(DefInstr)) | ||
DefInstr = MRI.getVRegDef(DefInstr->getOperand(1).getReg()); | ||
while (User && IsSingleUseCopyOrBitcast(User) && UseVirtReg(User)) | ||
User = &*MRI.use_instr_nodbg_begin(User->getOperand(0).getReg()); | ||
|
||
return DefInstr; | ||
return User; | ||
} | ||
|
||
MachineInstr *findLastRegUseInBB(Register Reg, MachineInstr &IgnoreUser, | ||
|
@@ -1395,7 +1420,8 @@ bool llvm::matchExtractConcat(MachineInstr &MI, MachineRegisterInfo &MRI, | |
const unsigned ExtractSize = | ||
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); | ||
|
||
MachineInstr &SrcMI = *MRI.getVRegDef(MI.getOperand(ExtractOp->Src).getReg()); | ||
MachineInstr &SrcMI = *getDefIgnoringCopiesAndBitcasts( | ||
MI.getOperand(ExtractOp->Src).getReg(), true, MRI); | ||
|
||
Register SrcReg; | ||
unsigned ConcatSize = 0; | ||
|
@@ -1415,8 +1441,9 @@ void llvm::applyExtractConcat(MachineInstr &MI, MachineRegisterInfo &MRI, | |
B.setInstrAndDebugLoc(MI); | ||
Register DstReg = MI.getOperand(0).getReg(); | ||
Register SrcReg = MatchInfo; | ||
|
||
B.buildCopy(DstReg, SrcReg); | ||
// Build a copy if types match, otherwise build a bitcast. | ||
MRI.getType(DstReg) == MRI.getType(SrcReg) ? B.buildCopy(DstReg, SrcReg) | ||
: B.buildBitcast(DstReg, SrcReg); | ||
MI.eraseFromParent(); | ||
} | ||
|
||
|
@@ -1582,7 +1609,8 @@ bool llvm::matchLoadStoreSplit(GLoadStore &MI, MachineRegisterInfo &MRI, | |
return false; | ||
} | ||
} else { | ||
MachineInstr &ConvInstr = *getDefIgnoringCopiesAndBitcasts(ValReg, MRI); | ||
MachineInstr &ConvInstr = | ||
*getDefIgnoringCopiesAndBitcasts(ValReg, false, MRI); | ||
if (TII.canCombineWithLoadStore(ConvInstr)) | ||
return false; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks curious, all values different from 0 can be used as index
1
. I was wondering why not simply:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think those headers were partly auto-generated, and for some intrinsics we have more than two different values. So I'd assume it's easier to write a generic generator if sticking to
if
/else
.