Skip to content

Commit 9fc7c6c

Browse files
authored
[AArch64] Allow splitting bitmasks for ANDS. (#149095)
This is already done for AND; we can reuse the existing infrastructure for ANDS so long as the second instruction of the pair is ANDS.
1 parent 579a807 commit 9fc7c6c

File tree

2 files changed

+123
-10
lines changed

2 files changed

+123
-10
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
//
99
// This pass performs below peephole optimizations on MIR level.
1010
//
11-
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12-
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
11+
// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
12+
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
1313
//
1414
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1515
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
126126
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
127127

128128
template <typename T>
129-
bool visitAND(unsigned Opc, MachineInstr &MI);
129+
bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
130130
bool visitORR(MachineInstr &MI);
131131
bool visitCSEL(MachineInstr &MI);
132132
bool visitINSERT(MachineInstr &MI);
@@ -194,12 +194,12 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
194194
}
195195

196196
template <typename T>
197-
bool AArch64MIPeepholeOpt::visitAND(
198-
unsigned Opc, MachineInstr &MI) {
197+
bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI,
198+
unsigned OtherOpc) {
199199
// Try below transformation.
200200
//
201-
// MOVi32imm + ANDWrr ==> ANDWri + ANDWri
202-
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
201+
// MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
202+
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
203203
//
204204
// The mov pseudo instruction could be expanded to multiple mov instructions
205205
// later. Let's try to split the constant operand of mov instruction into two
@@ -208,10 +208,10 @@ bool AArch64MIPeepholeOpt::visitAND(
208208

209209
return splitTwoPartImm<T>(
210210
MI,
211-
[Opc](T Imm, unsigned RegSize, T &Imm0,
212-
T &Imm1) -> std::optional<OpcodePair> {
211+
[Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
212+
T &Imm1) -> std::optional<OpcodePair> {
213213
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
214-
return std::make_pair(Opc, Opc);
214+
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
215215
return std::nullopt;
216216
},
217217
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
@@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
864864
case AArch64::ANDXrr:
865865
Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
866866
break;
867+
case AArch64::ANDSWrr:
868+
Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
869+
break;
870+
case AArch64::ANDSXrr:
871+
Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
872+
break;
867873
case AArch64::ORRWrs:
868874
Changed |= visitORR(MI);
869875
break;

llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,110 @@ entry:
263263
%conv = zext i1 %cmp to i8
264264
ret i8 %conv
265265
}
266+
267+
; Test ANDS.
268+
define i32 @test1_ands(i32 %a) {
269+
; CHECK-LABEL: test1_ands:
270+
; CHECK: // %bb.0: // %entry
271+
; CHECK-NEXT: and w8, w0, #0x3ffc00
272+
; CHECK-NEXT: ands w8, w8, #0xffe007ff
273+
; CHECK-NEXT: csel w0, w0, w8, eq
274+
; CHECK-NEXT: ret
275+
entry:
276+
%ands = and i32 %a, 2098176
277+
%c = icmp eq i32 %ands, 0
278+
%r = select i1 %c, i32 %a, i32 %ands
279+
ret i32 %r
280+
}
281+
282+
; This constant should not be split because it can be handled by one mov.
283+
define i32 @test2_ands(i32 %a) {
284+
; CHECK-LABEL: test2_ands:
285+
; CHECK: // %bb.0: // %entry
286+
; CHECK-NEXT: mov w8, #135 // =0x87
287+
; CHECK-NEXT: ands w8, w0, w8
288+
; CHECK-NEXT: csel w0, w0, w8, eq
289+
; CHECK-NEXT: ret
290+
entry:
291+
%ands = and i32 %a, 135
292+
%c = icmp eq i32 %ands, 0
293+
%r = select i1 %c, i32 %a, i32 %ands
294+
ret i32 %r
295+
}
296+
297+
; This constant should not be split because the split immediate is not valid
298+
; bitmask immediate.
299+
define i32 @test3_ands(i32 %a) {
300+
; CHECK-LABEL: test3_ands:
301+
; CHECK: // %bb.0: // %entry
302+
; CHECK-NEXT: mov w8, #1024 // =0x400
303+
; CHECK-NEXT: movk w8, #33, lsl #16
304+
; CHECK-NEXT: ands w8, w0, w8
305+
; CHECK-NEXT: csel w0, w0, w8, eq
306+
; CHECK-NEXT: ret
307+
entry:
308+
%ands = and i32 %a, 2163712
309+
%c = icmp eq i32 %ands, 0
310+
%r = select i1 %c, i32 %a, i32 %ands
311+
ret i32 %r
312+
}
313+
314+
define i64 @test4_ands(i64 %a) {
315+
; CHECK-LABEL: test4_ands:
316+
; CHECK: // %bb.0: // %entry
317+
; CHECK-NEXT: and x8, x0, #0x3ffc00
318+
; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff
319+
; CHECK-NEXT: csel x0, x0, x8, eq
320+
; CHECK-NEXT: ret
321+
entry:
322+
%ands = and i64 %a, 2098176
323+
%c = icmp eq i64 %ands, 0
324+
%r = select i1 %c, i64 %a, i64 %ands
325+
ret i64 %r
326+
}
327+
328+
define i64 @test5_ands(i64 %a) {
329+
; CHECK-LABEL: test5_ands:
330+
; CHECK: // %bb.0: // %entry
331+
; CHECK-NEXT: and x8, x0, #0x3ffffc000
332+
; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff
333+
; CHECK-NEXT: csel x0, x0, x8, eq
334+
; CHECK-NEXT: ret
335+
entry:
336+
%ands = and i64 %a, 8589950976
337+
%c = icmp eq i64 %ands, 0
338+
%r = select i1 %c, i64 %a, i64 %ands
339+
ret i64 %r
340+
}
341+
342+
; This constant should not be split because it can be handled by one mov.
343+
define i64 @test6_ands(i64 %a) {
344+
; CHECK-LABEL: test6_ands:
345+
; CHECK: // %bb.0: // %entry
346+
; CHECK-NEXT: mov w8, #135 // =0x87
347+
; CHECK-NEXT: ands x8, x0, x8
348+
; CHECK-NEXT: csel x0, x0, x8, eq
349+
; CHECK-NEXT: ret
350+
entry:
351+
%ands = and i64 %a, 135
352+
%c = icmp eq i64 %ands, 0
353+
%r = select i1 %c, i64 %a, i64 %ands
354+
ret i64 %r
355+
}
356+
357+
; This constant should not be split because the split immediate is not valid
358+
; bitmask immediate.
359+
define i64 @test7_ands(i64 %a) {
360+
; CHECK-LABEL: test7_ands:
361+
; CHECK: // %bb.0: // %entry
362+
; CHECK-NEXT: mov w8, #1024 // =0x400
363+
; CHECK-NEXT: movk w8, #33, lsl #16
364+
; CHECK-NEXT: ands x8, x0, x8
365+
; CHECK-NEXT: csel x0, x0, x8, eq
366+
; CHECK-NEXT: ret
367+
entry:
368+
%ands = and i64 %a, 2163712
369+
%c = icmp eq i64 %ands, 0
370+
%r = select i1 %c, i64 %a, i64 %ands
371+
ret i64 %r
372+
}

0 commit comments

Comments
 (0)