-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU] Check noalias.addrspace in mayAccessScratchThroughFlat #151319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesPR #149247 made the MD accessible by the backend so we can now leverage it Full diff: https://github.com/llvm/llvm-project/pull/151319.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c2da937552240..509bb2cb1428c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4249,6 +4249,24 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);
}
+static bool hasNoAliasAddrSpaceScratch(const MachineMemOperand *MemOp) {
+ const MDNode *MD = MemOp->getAAInfo().NoAliasAddrSpace;
+ if (!MD)
+ return false;
+
+ // This MD is structured in ranges [A, B)
+ // Check if PRIVATE is included in any of them.
+ for (unsigned I = 0, E = MD->getNumOperands() / 2; I != E; ++I) {
+ auto *Low = mdconst::extract<ConstantInt>(MD->getOperand(2 * I + 0));
+ auto *High = mdconst::extract<ConstantInt>(MD->getOperand(2 * I + 1));
+ if (Low->getValue().ule(AMDGPUAS::PRIVATE_ADDRESS) &&
+ High->getValue().ugt(AMDGPUAS::PRIVATE_ADDRESS))
+ return true;
+ }
+
+ return false;
+}
+
bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
if (!isFLAT(MI) || isFLATGlobal(MI))
return false;
@@ -4271,7 +4289,9 @@ bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
// See if any memory operand specifies an address space that involves scratch.
return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
unsigned AS = Memop->getAddrSpace();
- return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
+ return !hasNoAliasAddrSpaceScratch(Memop);
+ return AS == AMDGPUAS::PRIVATE_ADDRESS;
});
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
index d1e82a06077f5..99025f0a983c0 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
@@ -39,20 +39,19 @@ define void @test_flat_store_no_scratch_alloc(ptr %ptr, i32 %val) #0 {
ret void
}
-; TODO: handle
define void @test_flat_store_noalias_addrspace(ptr %ptr, i32 %val) {
; GCN-LABEL: test_flat_store_noalias_addrspace:
; GCN: ; %bb.0:
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
+; GCN-NEXT: flat_store_b32 v[0:1], v2
; GCN-NEXT: s_wait_dscnt 0x0
; GCN-NEXT: s_set_pc_i64 s[30:31]
store i32 %val, ptr %ptr, !noalias.addrspace !{i32 5, i32 6}
ret void
}
-; TODO: would be nice to handle too
+; TODO: would be nice to handle
define void @test_flat_store_select(ptr addrspace(1) %a, ptr addrspace(3) %b, i1 %cond, i32 %val) {
; GCN-SDAG-LABEL: test_flat_store_select:
; GCN-SDAG: ; %bb.0:
|
for (unsigned I = 0, E = MD->getNumOperands() / 2; I != E; ++I) { | ||
auto *Low = mdconst::extract<ConstantInt>(MD->getOperand(2 * I + 0)); | ||
auto *High = mdconst::extract<ConstantInt>(MD->getOperand(2 * I + 1)); | ||
assert(Low->getValue().ult(High->getValue()) && "invalid range metadata!"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should already be checked by the IR verifier
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can't check it's actually dealing with a range metadata, and not something else. The asserts are to avoid a silent failure in case this is passed some other type of MD accidentally
if (!MD) | ||
return false; | ||
|
||
assert((MD->getNumOperands() % 2 == 0) && "invalid number of operands!"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should already be checked by the IR verifier
PR #149247 made the MD accessible by the backend so we can now leverage it in the memory model. The first use case here is detecting if a flat op can access scratch memory. Benefits both the MemoryLegalizer and InsertWaitCnt.
239b9c0
to
3fb4dc2
Compare
const MDNode *MD = I->getMetadata(LLVMContext::MD_noalias_addrspace); | ||
return !(MD && | ||
AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const MDNode *MD = I->getMetadata(LLVMContext::MD_noalias_addrspace); | |
return !(MD && | |
AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS)); | |
const MDNode *MD = I->getMetadata(LLVMContext::MD_noalias_addrspace); | |
return !MD || !AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS)); |
return !(MD && AMDGPU::hasValueInRangeLikeMetadata( | ||
*MD, AMDGPUAS::PRIVATE_ADDRESS)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return !(MD && AMDGPU::hasValueInRangeLikeMetadata( | |
*MD, AMDGPUAS::PRIVATE_ADDRESS)); | |
return !MD || !AMDGPU::hasValueInRangeLikeMetadata( | |
*MD, AMDGPUAS::PRIVATE_ADDRESS)); |
PR #149247 made the MD accessible by the backend so we can now leverage it
in the memory model. The first use case here is detecting if a flat op
can access scratch memory.
Benefits both the MemoryLegalizer and InsertWaitCnt.