-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU] More accurately account for AVGPR pressure #150711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Jeffrey Byrnes (jrbyrnes) ChangesAfter #146606 we will only assign %av registers to $agpr after we run out of $vgpr. This teaches our scheduler about this property about the %av registers. Still need to the non-unified case, and add some tests. Patch is 22.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150711.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 334afd3a2a5b4..286c8d9529731 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc = std::min(
- MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
+ DynamicVGPRBlockSize));
const auto OtherSGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
- const auto OtherVGPROcc =
- std::min(MaxOccupancy,
- ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ const auto OtherVGPROcc = std::min(
+ MaxOccupancy, ST.getOccupancyWithNumVGPRs(
+ O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
+ DynamicVGPRBlockSize));
const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
-
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
- std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
+ std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
VGPRForSGPRSpills - MaxVGPRs),
0);
- unsigned OtherExcessVGPR =
- std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
- OtherVGPRForSGPRSpills - MaxVGPRs),
- 0);
+ unsigned OtherExcessVGPR = std::max(
+ static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
+ OtherVGPRForSGPRSpills - MaxVGPRs),
+ 0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
- unsigned ExcessArchVGPR = std::max(
- static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
- 0);
+ unsigned ExcessArchVGPR =
+ std::max(static_cast<int>(getVGPRNum(false, MaxArchVGPRs) +
+ VGPRForSGPRSpills - MaxArchVGPRs),
+ 0);
unsigned OtherExcessArchVGPR =
- std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
- MaxArchVGPRs),
+ std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) +
+ OtherVGPRForSGPRSpills - MaxArchVGPRs),
0);
// AGPR excess pressure conditions
- unsigned ExcessAGPR = std::max(
- static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
- : (getAGPRNum() - MaxVGPRs)),
- 0);
+ unsigned ExcessAGPR =
+ std::max(static_cast<int>(ST.hasGFX90AInsts()
+ ? (getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
+ : (getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
+ 0);
unsigned OtherExcessAGPR = std::max(
- static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
- : (O.getAGPRNum() - MaxVGPRs)),
+ static_cast<int>(ST.hasGFX90AInsts()
+ ? (O.getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
+ : (O.getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
0);
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +187,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
return VGPRDiff > 0;
if (SGPRDiff != 0) {
unsigned PureExcessVGPR =
- std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
- 0) +
- std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
+ std::max(
+ static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
+ MaxVGPRs),
+ 0) +
+ std::max(
+ static_cast<int>(getVGPRNum(false, MaxArchVGPRs) - MaxArchVGPRs),
+ 0);
unsigned OtherPureExcessVGPR =
std::max(
- static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
+ static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
+ MaxVGPRs),
0) +
- std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
+ std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) -
+ MaxArchVGPRs),
+ 0);
// If we have a special case where there is a tie in excess VGPR, but one
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,33 +231,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (SW != OtherSW)
return SW < OtherSW;
} else {
- auto VW = getVGPRTuplesWeight();
- auto OtherVW = O.getVGPRTuplesWeight();
+ auto VW = getVGPRTuplesWeight(MaxArchVGPRs);
+ auto OtherVW = O.getVGPRTuplesWeight(MaxArchVGPRs);
if (VW != OtherVW)
return VW < OtherVW;
}
}
// Give final precedence to lower general RP.
- return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
- (getVGPRNum(ST.hasGFX90AInsts()) <
- O.getVGPRNum(ST.hasGFX90AInsts()));
+ return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
+ : (getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) <
+ O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs));
}
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize) {
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
- OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
- << "AGPRs: " << RP.getAGPRNum();
+ OS << "VGPRs: " << RP.getArchVGPRNum(ST->getAddressableNumArchVGPRs())
+ << ' ' << "AGPRs: " << RP.getAGPRNum(ST->getAddressableNumArchVGPRs());
if (ST)
OS << "(O"
- << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
- DynamicVGPRBlockSize)
+ << ST->getOccupancyWithNumVGPRs(
+ RP.getVGPRNum(ST->hasGFX90AInsts(),
+ ST->getAddressableNumArchVGPRs()),
+ DynamicVGPRBlockSize)
<< ')';
OS << ", SGPRs: " << RP.getSGPRNum();
if (ST)
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
- OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
+ OS << ", LVGPR WT: "
+ << RP.getVGPRTuplesWeight(ST->getAddressableNumArchVGPRs())
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
@@ -398,8 +411,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
- MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
+ MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
MaxUnifiedVGPRs =
ST.hasGFX90AInsts()
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +428,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
if (SRI->isSGPRClass(RC))
return RP.getSGPRNum() > MaxSGPRs;
- unsigned NumVGPRs =
- SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
+
+ bool ShouldUseAGPR =
+ SRI->isAGPRClass(RC) ||
+ (SRI->isVectorSuperClass(RC) &&
+ RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
+ unsigned NumVGPRs = ShouldUseAGPR
+ ? RP.getAGPRNum(AddressableNumArchVGPRs)
+ : RP.getArchVGPRNum(AddressableNumArchVGPRs);
return isVGPRBankSaveBeneficial(NumVGPRs);
}
bool GCNRPTarget::satisfied() const {
if (RP.getSGPRNum() > MaxSGPRs)
return false;
- if (RP.getVGPRNum(false) > MaxVGPRs &&
+ if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
return false;
return satisfiesUnifiedTarget();
@@ -876,10 +896,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
- auto printRP = [](const GCNRegPressure &RP) {
- return Printable([&RP](raw_ostream &OS) {
+ auto printRP = [&MF](const GCNRegPressure &RP) {
+ return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
- << format(" %-5d", RP.getVGPRNum(false));
+ << format(" %-5d",
+ RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
+ .getAddressableNumArchVGPRs()));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index ea33a229110c1..a8c1c3bfd8703 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -43,13 +43,13 @@ struct GCNRegPressure {
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
- /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
- /// dependent upon \p UnifiedVGPRFile
- unsigned getVGPRNum(bool UnifiedVGPRFile) const {
+ unsigned getVGPRNum(bool UnifiedVGPRFile,
+ unsigned AddressableArchVGPR) const {
if (UnifiedVGPRFile) {
- return Value[AGPR]
- ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
- : Value[VGPR] + Value[AVGPR];
+ return Value[AGPR] || Value[AVGPR]
+ ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR],
+ AddressableArchVGPR)
+ : Value[VGPR];
}
// AVGPR assignment priority is based on the width of the register. Account
// AVGPR pressure as VGPR.
@@ -61,33 +61,60 @@ struct GCNRegPressure {
/// VGPR file.
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
unsigned NumAGPRs,
- unsigned NumAVGPRs) {
-
- // Assume AVGPRs will be assigned as VGPRs.
- return alignTo(NumArchVGPRs + NumAVGPRs,
+ unsigned NumAVGPRs,
+ unsigned AddressableArchVGPR) {
+
+ // Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
+ // point, we will assign as AGPR.
+ unsigned AVGPRsAsVGPRs =
+ NumArchVGPRs < AddressableArchVGPR
+ ? std::min((AddressableArchVGPR - NumArchVGPRs), NumAVGPRs)
+ : 0;
+ unsigned AVGPRsAsAGPRs =
+ NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
+ return alignTo(NumArchVGPRs + AVGPRsAsVGPRs,
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
- NumAGPRs;
+ NumAGPRs + AVGPRsAsAGPRs;
}
/// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
/// allocated as VGPR
- unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
+ unsigned getArchVGPRNum(unsigned AddressableArchVGPR) const {
+ return std::min(Value[VGPR] + Value[AVGPR], AddressableArchVGPR);
+ }
/// \returns the AccVGPR32 pressure
- unsigned getAGPRNum() const { return Value[AGPR]; }
+ unsigned getAGPRNum(unsigned AddressableArchVGPR) const {
+ unsigned VGPRsForAGPRs =
+ Value[VGPR] + Value[AVGPR] > AddressableArchVGPR
+ ? (Value[VGPR] + Value[AVGPR] - AddressableArchVGPR)
+ : 0;
+ return Value[AGPR] + VGPRsForAGPRs;
+ }
/// \returns the AVGPR32 pressure
unsigned getAVGPRNum() const { return Value[AVGPR]; }
- unsigned getVGPRTuplesWeight() const {
- return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
- Value[TOTAL_KINDS + AGPR]);
+ unsigned getVGPRTuplesWeight(unsigned AddressableArchVGPR) const {
+ unsigned AVGPRsAsVGPRs =
+ Value[TOTAL_KINDS + VGPR] < AddressableArchVGPR
+ ? std::min(AddressableArchVGPR - Value[TOTAL_KINDS + VGPR],
+ Value[TOTAL_KINDS + AVGPR])
+ : 0;
+ unsigned AVGPRsAsAGPRs = Value[TOTAL_KINDS + AVGPR] > AVGPRsAsVGPRs
+ ? Value[TOTAL_KINDS + AVGPR] - AVGPRsAsVGPRs
+ : 0;
+
+ return std::max(Value[TOTAL_KINDS + VGPR] + AVGPRsAsVGPRs,
+ Value[TOTAL_KINDS + AGPR] + AVGPRsAsAGPRs);
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
unsigned getOccupancy(const GCNSubtarget &ST,
unsigned DynamicVGPRBlockSize) const {
- return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
- ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ return std::min(
+ ST.getOccupancyWithNumSGPRs(getSGPRNum()),
+ ST.getOccupancyWithNumVGPRs(
+ getVGPRNum(ST.hasGFX90AInsts(), ST.getAddressableNumArchVGPRs()),
+ DynamicVGPRBlockSize));
}
void inc(unsigned Reg,
@@ -151,7 +178,7 @@ struct GCNRegPressure {
friend GCNRegPressure max(const GCNRegPressure &P1,
const GCNRegPressure &P2);
- friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
+ friend Printable print(const GCNRegPressure &RP,
unsigned DynamicVGPRBlockSize);
};
@@ -220,16 +247,19 @@ class GCNRPTarget {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
- << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
- << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
- << " AGPRs";
+ << " SGPRs, " << Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs)
+ << '/' << Target.MaxVGPRs << " ArchVGPRs, "
+ << Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs) << '/'
+ << Target.MaxVGPRs << " AGPRs";
if (Target.MaxUnifiedVGPRs) {
- OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
- << " VGPRs (unified)";
+ OS << ", " << Target.RP.getVGPRNum(true, Target.AddressableNumArchVGPRs)
+ << '/' << Target.MaxUnifiedVGPRs << " VGPRs (unified)";
} else if (Target.CombineVGPRSavings) {
- OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
- << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
+ OS << ", "
+ << Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs) +
+ Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs)
+ << '/' << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
}
return OS;
}
@@ -238,7 +268,6 @@ class GCNRPTarget {
private:
/// Current register pressure.
GCNRegPressure RP;
-
/// Target number of SGPRs.
unsigned MaxSGPRs;
/// Target number of ArchVGPRs and AGPRs.
@@ -246,6 +275,8 @@ class GCNRPTarget {
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0
/// for subtargets with non-unified RFs.
unsigned MaxUnifiedVGPRs;
+ /// The maximum number of arch vgprs allowed by the subtarget.
+ unsigned AddressableNumArchVGPRs;
/// Whether we consider that the register allocator will be able to swap
/// between ArchVGPRs and AGPRs by copying them to a super register class.
/// Concretely, this allows savings in one of the VGPR banks to help toward
@@ -254,12 +285,15 @@ class GCNRPTarget {
inline bool satisifiesVGPRBanksTarget() const {
assert(CombineVGPRSavings && "only makes sense with combined savings");
- return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
+ return RP.getArchVGPRNum(AddressableNumArchVGPRs) +
+ RP.getAGPRNum(AddressableNumArchVGPRs) <=
+ 2 * MaxVGPRs;
}
/// Always satisified when the subtarget doesn't have a unified RF.
inline bool satisfiesUnifiedTarget() const {
- return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
+ return !MaxUnifiedVGPRs ||
+ RP.getVGPRNum(true, AddressableNumArchVGPRs) <= MaxUnifiedVGPRs;
}
inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ce1ce687d0038..772c979809b75 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -190,10 +190,13 @@ static void getRegisterPressures(
TempUpwardTracker.recede(*MI);
NewPressure = TempUpwardTracker.getPressure();
}
+ unsigned AddressableArchVGPR =
+ DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs();
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- NewPressure.getArchVGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
+ NewPressure.getArchVGPRNum(AddressableArchVGPR);
+ Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
+ NewPressure.getAGPRNum(AddressableArchVGPR);
}
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -339,7 +342,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
- VGPRPressure = T->getPressure().getArchVGPRNum();
+ VGPRPressure = T->getPressure().getArchVGPRNum(
+ DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs());
}
}
ReadyQueue &Q = Zone.Available;
@@ -1279,9 +1283,10 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
-
+ unsigned AddressableArchVGPR = ST.getAddressableNumArchVGPRs();
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
- PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
+ PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), AddressableArchVGPR) <=
+ S.VGPRCriticalLimit) {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
@@ -1331,9 +1336,10 @@ void GCNSchedStage::checkScheduling() {
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
- PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
- PressureAfter.getAGPRNum() > MaxArchV...
[truncated]
|
Added support for gfx908 -- based on AllocationOrder in RA, it gfx908 will continue to allocate AVReg -> VGPR until it hits the addressable limit unless waves-per-eu is set. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a specific rationale to count extra AVGPRs beyond twice the addressable limit as AGPRs instead of VGPRs? Would doing the opposite make any difference?
No rationale. I don't see a difference between doing the opposite. Basically I chose this implementation because it was less detailed if we just always add to AGPR. |
In the latest I replace |
Change-Id: I6f129c2723b52a391a96178e390f60535164ac9b
Change-Id: Ic16c8a4ffdf58027de164c598cfac70fc453bb00
Change-Id: I0b74f6ee1d93bd5e6fc3e285c0c6e91a8090d28e
Change-Id: Ia3b8507f95763079ee3c2224655990a299c8854d
Change-Id: I14486056bef5e9a97842be68a7f5abe82ecc37fe
Change-Id: I36e92840e35774cb419389ee6dadc26dd376ebaa
Change-Id: I17c9239229b94c42c35b5683d77f8dfe3f70bafc
force-push rebase for #150889 |
Change-Id: I992cdc7ab89d244eaed82d4e671238878376c8d2
Change-Id: I15cd9b4e9e38d7000a403bed56918819ae858658
Change-Id: I172e8c013f41daea997266dea9c20335c75c9b83
/// Target number of SGPRs. | ||
unsigned MaxSGPRs; | ||
/// Target number of ArchVGPRs and AGPRs. | ||
unsigned MaxVGPRs; | ||
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0 | ||
/// for subtargets with non-unified RFs. | ||
unsigned MaxUnifiedVGPRs; | ||
/// The maximum number of arch vgprs allowed by the subtarget. | ||
unsigned AddressableNumArchVGPRs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs to be initialized in the constructors. Let me know if I can help with this, since I designed this class.
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg, | |||
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O, | |||
unsigned MaxOccupancy) const { | |||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | |||
unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to make this a class member to avoid passing it every time we call getVGPRNum
/getAGPRNum
? It seems to always have the value ST.getMaxNumVectorRegs(MF.getFunction()).first
which should not change throughout the MF
's lifetime.
# CHECK: avgpr_rp_occ1:%bb.0 | ||
# CHECK: Pressure before scheduling: | ||
# CHECK-NEXT: Region live-ins: | ||
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't maximum occupancy 10 on gfx950? Why does this report 8 for VGPRs/AGPRs (here and in other tests)?
# CHECK: Pressure before scheduling: | ||
# CHECK-NEXT: Region live-ins: | ||
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8 | ||
# CHECK-NEXT: Region register pressure: VGPRs: 128 AGPRs: 64(O2), SGPRs: 0(O10), LVGPR WT: 128, LSGPR WT: 0 -> Occ: 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This applies to other tests below, but I expected the limit for VGPRs to be 256 here (generally,
This is expected to change after #151063 right?
After #146606 we will only assign %av registers to $agpr after we run out of $vgpr. This teaches our scheduler about this property about the %av registers.
Still need to the non-unified case, and add some tests.