Skip to content

Commit 4b74f7b

Browse files
committed
[AMDGPU][GISel] Use buildObjectPtrOffset instead of buildPtrAdd
This concerns offset computations for kernargs and RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a memory object. See #150392 for the motivation for introducing the buildObjectPtrOffset function. For SWDEV-516125.
1 parent 1528ddb commit 4b74f7b

10 files changed

+184
-179
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2295,8 +2295,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
22952295
LLT::scalar(32), commonAlignment(Align(64), Offset));
22962296

22972297
// Pointer address
2298-
B.buildPtrAdd(LoadAddr, KernargPtrReg,
2299-
B.buildConstant(LLT::scalar(64), Offset).getReg(0));
2298+
B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
2299+
B.buildConstant(LLT::scalar(64), Offset).getReg(0));
23002300
// Load address
23012301
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
23022302
}
@@ -2317,8 +2317,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23172317
MachineMemOperand::MOInvariant,
23182318
LLT::scalar(32), commonAlignment(Align(64), StructOffset));
23192319

2320-
B.buildPtrAdd(LoadAddr, QueuePtr,
2321-
B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
2320+
B.buildObjectPtrOffset(
2321+
LoadAddr, QueuePtr,
2322+
B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
23222323
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
23232324
}
23242325

@@ -4500,8 +4501,7 @@ Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
45004501
llvm_unreachable("failed to find kernarg segment ptr");
45014502

45024503
auto COffset = B.buildConstant(LLT::scalar(64), Offset);
4503-
// TODO: Should get nuw
4504-
return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0);
4504+
return B.buildObjectPtrOffset(PtrTy, KernArgReg, COffset).getReg(0);
45054505
}
45064506

45074507
/// Legalize a value that's loaded from kernel arguments. This is only used by
@@ -5676,8 +5676,8 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
56765676
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
56775677
return false;
56785678

5679-
// FIXME: This should be nuw
5680-
B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
5679+
B.buildObjectPtrOffset(DstReg, KernargPtrReg,
5680+
B.buildConstant(IdxTy, Offset).getReg(0));
56815681
return true;
56825682
}
56835683

@@ -7019,8 +7019,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
70197019
// Pointer address
70207020
Register LoadAddr = MRI.createGenericVirtualRegister(
70217021
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
7022-
B.buildPtrAdd(LoadAddr, KernargPtrReg,
7023-
B.buildConstant(LLT::scalar(64), Offset).getReg(0));
7022+
B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
7023+
B.buildConstant(LLT::scalar(64), Offset).getReg(0));
70247024
// Load address
70257025
Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
70267026
B.buildCopy(SGPR01, Temp);

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
294294
BasePlusOffset = Base;
295295
} else {
296296
auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
297-
BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
297+
BasePlusOffset =
298+
B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
298299
}
299300
auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
300301
auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
2424
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
2525
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
2626
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
27-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
27+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
2828
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
2929
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
3030
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
@@ -65,7 +65,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
6565
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
6666
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
6767
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
68-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
68+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
6969
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
7070
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
7171
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
@@ -105,7 +105,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
105105
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
106106
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
107107
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
108-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
108+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY4]], [[C]](s64)
109109
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
110110
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
111111
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
3131
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
3232
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
3333
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
34-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
34+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
3535
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
3636
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
3737
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
8484
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
8585
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
8686
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
87-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
87+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
8888
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
8989
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
9090
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -230,7 +230,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
230230
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
231231
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
232232
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
233-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
233+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
234234
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
235235
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
236236
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -319,7 +319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
319319
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
320320
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
321321
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
322-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
322+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
323323
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
324324
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
325325
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -668,7 +668,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
668668
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
669669
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
670670
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
671-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
671+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
672672
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
673673
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
674674
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -710,7 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
710710
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
711711
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
712712
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
713-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
713+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
714714
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
715715
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
716716
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
756756
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
757757
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
758758
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
759-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
759+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
760760
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
761761
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
762762
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -802,7 +802,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
802802
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
803803
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
804804
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
805-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
805+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
806806
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
807807
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
808808
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -852,7 +852,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
852852
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
853853
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
854854
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
855-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
855+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
856856
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
857857
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
858858
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -898,7 +898,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
898898
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
899899
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
900900
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
901-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
901+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
902902
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
903903
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
904904
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -949,7 +949,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
949949
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
950950
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
951951
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
952-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
952+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
953953
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
954954
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
955955
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -996,7 +996,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
996996
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
997997
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
998998
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
999-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
999+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
10001000
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
10011001
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
10021002
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1047,7 +1047,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
10471047
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
10481048
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
10491049
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1050-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
1050+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
10511051
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
10521052
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
10531053
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1098,7 +1098,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
10981098
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
10991099
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
11001100
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1101-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
1101+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
11021102
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
11031103
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
11041104
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1153,7 +1153,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
11531153
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
11541154
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
11551155
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1156-
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
1156+
; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
11571157
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
11581158
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
11591159
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
12001200
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
12011201
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
12021202
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1203-
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
1203+
; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
12041204
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
12051205
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
12061206
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]

0 commit comments

Comments
 (0)