Skip to content

Commit a5401d0

Browse files
committed
[AArch64] Enable preferZeroCompareBranch for AArch64 when we don't have fused cmp+br
Obviously we also cannot do this if speculative load hardening is on too.
1 parent f49c243 commit a5401d0

File tree

8 files changed

+28
-21
lines changed

8 files changed

+28
-21
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ class LLVM_ABI TargetLoweringBase {
749749

750750
/// Return true if the heuristic to prefer icmp eq zero should be used in code
751751
/// gen prepare.
752-
virtual bool preferZeroCompareBranch() const { return false; }
752+
virtual bool preferZeroCompareBranch(BranchInst *) const { return false; }
753753

754754
/// Return true if it is cheaper to split the store of a merged int val
755755
/// from a pair of smaller values into multiple stores.

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8630,7 +8630,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
86308630
// br %c, bla, blb
86318631
// Creating the cmp to zero can be better for the backend, especially if the
86328632
// lshr produces flags that can be used automatically.
8633-
if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8633+
if (!TLI.preferZeroCompareBranch(Branch) || !Branch->isConditional())
86348634
return false;
86358635

86368636
ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28508,6 +28508,16 @@ Register AArch64TargetLowering::getExceptionSelectorRegister(
2850828508
return AArch64::X1;
2850928509
}
2851028510

28511+
bool AArch64TargetLowering::preferZeroCompareBranch(BranchInst *Branch) const {
28512+
// If we can use Armv9.6 CB instructions, prefer that over zero compare
28513+
// branches.
28514+
28515+
// If we have speculative load hardening enabled, we cannot use
28516+
// zero compare branches.
28517+
return !Subtarget->hasCMPBR() && !Branch->getFunction()->hasFnAttribute(
28518+
Attribute::SpeculativeLoadHardening);
28519+
}
28520+
2851128521
bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
2851228522
const Instruction &AndI) const {
2851328523
// Only sink 'and' mask to cmp use block if it is masking a single bit, since

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@ class AArch64TargetLowering : public TargetLowering {
366366
return true;
367367
}
368368

369+
bool preferZeroCompareBranch(BranchInst *) const override;
370+
369371
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
370372

371373
bool hasAndNotCompare(SDValue V) const override {

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ class VectorType;
605605

606606
Sched::Preference getSchedulingPreference(SDNode *N) const override;
607607

608-
bool preferZeroCompareBranch() const override { return true; }
608+
bool preferZeroCompareBranch(BranchInst *) const override { return true; }
609609

610610
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
611611

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class RISCVTargetLowering : public TargetLowering {
225225

226226
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
227227

228-
bool preferZeroCompareBranch() const override { return true; }
228+
bool preferZeroCompareBranch(BranchInst *) const override { return true; }
229229

230230
// Note that one specific case requires fence insertion for an
231231
// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ class SystemZTargetLowering : public TargetLowering {
471471
}
472472
bool isCheapToSpeculateCtlz(Type *) const override { return true; }
473473
bool isCheapToSpeculateCttz(Type *) const override { return true; }
474-
bool preferZeroCompareBranch() const override { return true; }
474+
bool preferZeroCompareBranch(BranchInst *) const override { return true; }
475475
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
476476
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
477477
return Mask && Mask->getValue().isIntN(16);

llvm/test/CodeGen/AArch64/branch-on-zero.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -59,37 +59,33 @@ while.end: ; preds = %while.body, %entry
5959
define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
6060
; CHECK-SD-LABEL: test_lshr2:
6161
; CHECK-SD: // %bb.0: // %entry
62-
; CHECK-SD-NEXT: cmp w2, #4
63-
; CHECK-SD-NEXT: b.lo .LBB1_3
64-
; CHECK-SD-NEXT: // %bb.1: // %while.body.preheader
6562
; CHECK-SD-NEXT: lsr w8, w2, #2
66-
; CHECK-SD-NEXT: .LBB1_2: // %while.body
63+
; CHECK-SD-NEXT: cbz w8, .LBB1_2
64+
; CHECK-SD-NEXT: .LBB1_1: // %while.body
6765
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
6866
; CHECK-SD-NEXT: ldr w9, [x1], #4
6967
; CHECK-SD-NEXT: subs w8, w8, #1
7068
; CHECK-SD-NEXT: lsl w9, w9, #1
7169
; CHECK-SD-NEXT: str w9, [x0], #4
72-
; CHECK-SD-NEXT: b.ne .LBB1_2
73-
; CHECK-SD-NEXT: .LBB1_3: // %while.end
70+
; CHECK-SD-NEXT: b.ne .LBB1_1
71+
; CHECK-SD-NEXT: .LBB1_2: // %while.end
7472
; CHECK-SD-NEXT: mov w0, wzr
7573
; CHECK-SD-NEXT: ret
7674
;
7775
; CHECK-GI-LABEL: test_lshr2:
7876
; CHECK-GI: // %bb.0: // %entry
79-
; CHECK-GI-NEXT: cmp w2, #4
80-
; CHECK-GI-NEXT: b.lo .LBB1_3
81-
; CHECK-GI-NEXT: // %bb.1: // %while.body.preheader
8277
; CHECK-GI-NEXT: lsr w8, w2, #2
83-
; CHECK-GI-NEXT: .LBB1_2: // %while.body
78+
; CHECK-GI-NEXT: cbz w8, .LBB1_2
79+
; CHECK-GI-NEXT: .LBB1_1: // %while.body
8480
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
8581
; CHECK-GI-NEXT: ldr w9, [x1], #4
8682
; CHECK-GI-NEXT: add x10, x0, #4
8783
; CHECK-GI-NEXT: subs w8, w8, #1
8884
; CHECK-GI-NEXT: lsl w9, w9, #1
8985
; CHECK-GI-NEXT: str w9, [x0]
9086
; CHECK-GI-NEXT: mov x0, x10
91-
; CHECK-GI-NEXT: b.ne .LBB1_2
92-
; CHECK-GI-NEXT: .LBB1_3: // %while.end
87+
; CHECK-GI-NEXT: b.ne .LBB1_1
88+
; CHECK-GI-NEXT: .LBB1_2: // %while.end
9389
; CHECK-GI-NEXT: mov w0, wzr
9490
; CHECK-GI-NEXT: ret
9591
entry:
@@ -126,11 +122,10 @@ define i32 @lshr(i32 %u) {
126122
; CHECK-NEXT: .cfi_offset w19, -8
127123
; CHECK-NEXT: .cfi_offset w30, -16
128124
; CHECK-NEXT: mov w19, w0
129-
; CHECK-NEXT: cmp w0, #16
130-
; CHECK-NEXT: mov w8, w0
131-
; CHECK-NEXT: b.lo .LBB2_2
125+
; CHECK-NEXT: lsr w0, w0, #4
126+
; CHECK-NEXT: mov w8, w19
127+
; CHECK-NEXT: cbz w0, .LBB2_2
132128
; CHECK-NEXT: // %bb.1: // %if.then
133-
; CHECK-NEXT: lsr w0, w19, #4
134129
; CHECK-NEXT: bl use
135130
; CHECK-NEXT: add w8, w19, w19, lsl #1
136131
; CHECK-NEXT: .LBB2_2: // %if.end

0 commit comments

Comments
 (0)