Skip to content

Commit 80f3b37

Browse files
authored
[AMDGPU][GlobalISel] Combine for breaking s64 and/or into two s32 insts (#151731)
When either one of the operands is all ones in high or low parts, splitting these opens up other opportunities for combines. One of two new instructions will either be removed or become a simple copy.
1 parent 2cfba96 commit 80f3b37

12 files changed

+429
-79
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,25 @@ def zext_of_shift_amount_combines : GICombineGroup<[
151151
canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl
152152
]>;
153153

154+
// (and/or i64:x, i64:y) -> i64:(merge (and/or lo_32(x), lo_32(y)), (and/or hi_32(x), hi_32(y)))
155+
// when either x or y is all ones in low or high parts
156+
class combine_binop_s64_with_s32_mask<Instruction opcode> : GICombineRule<
157+
(defs root:$dst),
158+
(match (opcode $dst, i64:$x, i64:$y):$dst,
159+
[{ return Helper.matchConstantIs32BitMask(${x}.getReg()) ||
160+
Helper.matchConstantIs32BitMask(${y}.getReg()); }]),
161+
(apply (G_UNMERGE_VALUES i32:$x_lo, i32:$x_hi, $x),
162+
(G_UNMERGE_VALUES i32:$y_lo, i32:$y_hi, $y),
163+
(opcode i32:$lo, $x_lo, $y_lo),
164+
(opcode i32:$hi, $x_hi, $y_hi),
165+
(G_MERGE_VALUES $dst, $lo, $hi))>;
166+
167+
def combine_or_s64_with_s32_mask : combine_binop_s64_with_s32_mask<G_OR>;
168+
def combine_and_s64_with_s32_mask : combine_binop_s64_with_s32_mask<G_AND>;
169+
def binop_s64_with_s32_mask_combines : GICombineGroup<[
170+
combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask
171+
]>;
172+
154173
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
155174
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
156175
// saves one instruction compared to the promotion.
@@ -180,15 +199,17 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
180199
def AMDGPUPreLegalizerCombiner: GICombiner<
181200
"AMDGPUPreLegalizerCombinerImpl",
182201
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16,
183-
foldable_fneg, combine_shuffle_vector_to_build_vector]> {
202+
foldable_fneg, combine_shuffle_vector_to_build_vector,
203+
binop_s64_with_s32_mask_combines]> {
184204
let CombineAllMethodName = "tryCombineAllImpl";
185205
}
186206

187207
def AMDGPUPostLegalizerCombiner: GICombiner<
188208
"AMDGPUPostLegalizerCombinerImpl",
189209
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
190210
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
191-
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
211+
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64,
212+
binop_s64_with_s32_mask_combines]> {
192213
let CombineAllMethodName = "tryCombineAllImpl";
193214
}
194215

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,3 +516,18 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
516516

517517
return true;
518518
}
519+
520+
bool AMDGPUCombinerHelper::matchConstantIs32BitMask(Register Reg) const {
521+
auto Res = getIConstantVRegValWithLookThrough(Reg, MRI);
522+
if (!Res)
523+
return false;
524+
525+
const uint64_t Val = Res->Value.getZExtValue();
526+
unsigned MaskIdx = 0;
527+
unsigned MaskLen = 0;
528+
if (!isShiftedMask_64(Val, MaskIdx, MaskLen))
529+
return false;
530+
531+
// Check if low 32 bits or high 32 bits are all ones.
532+
return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
533+
}

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class AMDGPUCombinerHelper : public CombinerHelper {
4343
bool matchCombineFmulWithSelectToFldexp(
4444
MachineInstr &MI, MachineInstr &Sel,
4545
std::function<void(MachineIRBuilder &)> &MatchInfo) const;
46+
47+
bool matchConstantIs32BitMask(Register Reg) const;
4648
};
4749

4850
} // namespace llvm
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: test_and_mask_hi_rhs
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0_sgpr1, $sgpr2
10+
; CHECK-LABEL: name: test_and_mask_hi_rhs
11+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
14+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
15+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
16+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV1]](s32)
17+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
18+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
19+
%0:_(s64) = COPY $sgpr0_sgpr1
20+
%1:_(s64) = G_CONSTANT i64 -4294967296
21+
%2:_(s64) = G_AND %0, %1
22+
$sgpr0_sgpr1 = COPY %2(s64)
23+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
24+
...
25+
---
26+
name: test_and_mask_hi_lhs
27+
tracksRegLiveness: true
28+
body: |
29+
bb.0:
30+
liveins: $sgpr0_sgpr1, $sgpr2
31+
; CHECK-LABEL: name: test_and_mask_hi_lhs
32+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
35+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
36+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
37+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV1]](s32)
38+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
39+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
40+
%0:_(s64) = COPY $sgpr0_sgpr1
41+
%1:_(s64) = G_CONSTANT i64 -4294967296
42+
%2:_(s64) = G_AND %1, %0
43+
$sgpr0_sgpr1 = COPY %2(s64)
44+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
45+
...
46+
---
47+
name: test_and_mask_hi_48bit_mask_rhs
48+
tracksRegLiveness: true
49+
body: |
50+
bb.0:
51+
liveins: $sgpr0_sgpr1, $sgpr2
52+
; CHECK-LABEL: name: test_and_mask_hi_48bit_mask_rhs
53+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
56+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
57+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
58+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
59+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[UV1]](s32)
60+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
61+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
62+
%0:_(s64) = COPY $sgpr0_sgpr1
63+
%1:_(s64) = G_CONSTANT i64 -65536
64+
%2:_(s64) = G_AND %0, %1
65+
$sgpr0_sgpr1 = COPY %2(s64)
66+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
67+
...
68+
---
69+
name: test_and_mask_hi_16bit_mask_rhs
70+
tracksRegLiveness: true
71+
body: |
72+
bb.0:
73+
liveins: $sgpr0_sgpr1, $sgpr2
74+
; CHECK-LABEL: name: test_and_mask_hi_16bit_mask_rhs
75+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
78+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -281474976710656
79+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
80+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[AND]](s64)
81+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
82+
%0:_(s64) = COPY $sgpr0_sgpr1
83+
%1:_(s64) = G_CONSTANT i64 -281474976710656
84+
%2:_(s64) = G_AND %0, %1
85+
$sgpr0_sgpr1 = COPY %2(s64)
86+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
87+
...
88+
---
89+
name: test_and_mask_lo_rhs
90+
tracksRegLiveness: true
91+
body: |
92+
bb.0:
93+
liveins: $sgpr0_sgpr1, $sgpr2
94+
; CHECK-LABEL: name: test_and_mask_lo_rhs
95+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
96+
; CHECK-NEXT: {{ $}}
97+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
98+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
99+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
100+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[ZEXT]](s64)
101+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
102+
%0:_(s64) = COPY $sgpr0_sgpr1
103+
%1:_(s64) = G_CONSTANT i64 4294967295
104+
%2:_(s64) = G_AND %0, %1
105+
$sgpr0_sgpr1 = COPY %2(s64)
106+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
107+
...
108+
---
109+
name: test_and_mask_lo_lhs
110+
tracksRegLiveness: true
111+
body: |
112+
bb.0:
113+
liveins: $sgpr0_sgpr1, $sgpr2
114+
; CHECK-LABEL: name: test_and_mask_lo_lhs
115+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
116+
; CHECK-NEXT: {{ $}}
117+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
118+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
119+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
120+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[ZEXT]](s64)
121+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
122+
%0:_(s64) = COPY $sgpr0_sgpr1
123+
%1:_(s64) = G_CONSTANT i64 4294967295
124+
%2:_(s64) = G_AND %1, %0
125+
$sgpr0_sgpr1 = COPY %2(s64)
126+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
127+
...
128+
---
129+
name: test_and_mask_lo_36bit_mask_rhs
130+
tracksRegLiveness: true
131+
body: |
132+
bb.0:
133+
liveins: $sgpr0_sgpr1, $sgpr2
134+
; CHECK-LABEL: name: test_and_mask_lo_36bit_mask_rhs
135+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
136+
; CHECK-NEXT: {{ $}}
137+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
138+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
139+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
140+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
141+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[AND]](s32)
142+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
143+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
144+
%0:_(s64) = COPY $sgpr0_sgpr1
145+
%1:_(s64) = G_CONSTANT i64 68719476735
146+
%2:_(s64) = G_AND %0, %1
147+
$sgpr0_sgpr1 = COPY %2(s64)
148+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
149+
...
150+
---
151+
name: test_and_mask_hi_with_merge_unmerge
152+
tracksRegLiveness: true
153+
body: |
154+
bb.0:
155+
liveins: $sgpr0, $sgpr1, $sgpr2
156+
; CHECK-LABEL: name: test_and_mask_hi_with_merge_unmerge
157+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
158+
; CHECK-NEXT: {{ $}}
159+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr1
160+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
161+
; CHECK-NEXT: $sgpr0 = COPY [[C]](s32)
162+
; CHECK-NEXT: $sgpr1 = COPY [[COPY]](s32)
163+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
164+
%0:_(s32) = COPY $sgpr0
165+
%1:_(s32) = COPY $sgpr1
166+
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
167+
%3:_(s64) = G_CONSTANT i64 -4294967296
168+
%4:_(s64) = G_AND %2, %3
169+
%5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4(s64)
170+
$sgpr0 = COPY %5(s32)
171+
$sgpr1 = COPY %6(s32)
172+
SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
173+
...
174+
---
175+
name: negative_and_test_incorrect_types
176+
tracksRegLiveness: true
177+
body: |
178+
bb.0:
179+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
180+
181+
; CHECK-LABEL: name: negative_and_test_incorrect_types
182+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
183+
; CHECK-NEXT: {{ $}}
184+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
185+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -4294967296
186+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s128) = G_AND [[COPY]], [[C]]
187+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[AND]](s128)
188+
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
189+
%1:_(s64) = COPY $vgpr4_vgpr5
190+
%2:_(s128) = G_CONSTANT i128 -4294967296
191+
%3:_(s128) = G_AND %0, %2
192+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
193+
...
194+
---
195+
name: test_or_mask_hi_rhs
196+
tracksRegLiveness: true
197+
body: |
198+
bb.0:
199+
liveins: $sgpr0_sgpr1, $sgpr2
200+
; CHECK-LABEL: name: test_or_mask_hi_rhs
201+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
202+
; CHECK-NEXT: {{ $}}
203+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
204+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
205+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
206+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s32), [[C]](s32)
207+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
208+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
209+
%0:_(s64) = COPY $sgpr0_sgpr1
210+
%1:_(s64) = G_CONSTANT i64 -4294967296
211+
%2:_(s64) = G_OR %0, %1
212+
$sgpr0_sgpr1 = COPY %2(s64)
213+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
214+
...
215+
---
216+
name: test_or_mask_hi_lhs
217+
tracksRegLiveness: true
218+
body: |
219+
bb.0:
220+
liveins: $sgpr0_sgpr1, $sgpr2
221+
; CHECK-LABEL: name: test_or_mask_hi_lhs
222+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
223+
; CHECK-NEXT: {{ $}}
224+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
225+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
226+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
227+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s32), [[C]](s32)
228+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
229+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
230+
%0:_(s64) = COPY $sgpr0_sgpr1
231+
%1:_(s64) = G_CONSTANT i64 -4294967296
232+
%2:_(s64) = G_OR %1, %0
233+
$sgpr0_sgpr1 = COPY %2(s64)
234+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
235+
...
236+
---
237+
name: test_or_mask_lo_rhs
238+
tracksRegLiveness: true
239+
body: |
240+
bb.0:
241+
liveins: $sgpr0_sgpr1, $sgpr2
242+
; CHECK-LABEL: name: test_or_mask_lo_rhs
243+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
244+
; CHECK-NEXT: {{ $}}
245+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
246+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
247+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
248+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV1]](s32)
249+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
250+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
251+
%0:_(s64) = COPY $sgpr0_sgpr1
252+
%1:_(s64) = G_CONSTANT i64 4294967295
253+
%2:_(s64) = G_OR %0, %1
254+
$sgpr0_sgpr1 = COPY %2(s64)
255+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
256+
...
257+
---
258+
name: test_or_mask_lo_lhs
259+
tracksRegLiveness: true
260+
body: |
261+
bb.0:
262+
liveins: $sgpr0_sgpr1, $sgpr2
263+
; CHECK-LABEL: name: test_or_mask_lo_lhs
264+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
265+
; CHECK-NEXT: {{ $}}
266+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
267+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
268+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
269+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV1]](s32)
270+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
271+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
272+
%0:_(s64) = COPY $sgpr0_sgpr1
273+
%1:_(s64) = G_CONSTANT i64 4294967295
274+
%2:_(s64) = G_OR %1, %0
275+
$sgpr0_sgpr1 = COPY %2(s64)
276+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
277+
...
278+
---
279+
name: test_or_mask_hi_with_merge_unmerge
280+
tracksRegLiveness: true
281+
body: |
282+
bb.0:
283+
liveins: $sgpr0, $sgpr1, $sgpr2
284+
; CHECK-LABEL: name: test_or_mask_hi_with_merge_unmerge
285+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
286+
; CHECK-NEXT: {{ $}}
287+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
288+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
289+
; CHECK-NEXT: $sgpr0 = COPY [[COPY]](s32)
290+
; CHECK-NEXT: $sgpr1 = COPY [[C]](s32)
291+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
292+
%0:_(s32) = COPY $sgpr0
293+
%1:_(s32) = COPY $sgpr1
294+
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
295+
%3:_(s64) = G_CONSTANT i64 -4294967296
296+
%4:_(s64) = G_OR %2, %3
297+
%5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4(s64)
298+
$sgpr0 = COPY %5(s32)
299+
$sgpr1 = COPY %6(s32)
300+
SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
301+
...
302+
---
303+
name: negative_or_test_incorrect_types
304+
tracksRegLiveness: true
305+
body: |
306+
bb.0:
307+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
308+
309+
; CHECK-LABEL: name: negative_or_test_incorrect_types
310+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
311+
; CHECK-NEXT: {{ $}}
312+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
313+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -4294967296
314+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s128) = G_OR [[COPY]], [[C]]
315+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[OR]](s128)
316+
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
317+
%1:_(s64) = COPY $vgpr4_vgpr5
318+
%2:_(s128) = G_CONSTANT i128 -4294967296
319+
%3:_(s128) = G_OR %0, %2
320+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
321+
...

0 commit comments

Comments
 (0)