[AArch64] Enable preferZeroCompareBranch for AArch64 when we don't have fused cmp+br

AZero13 · AZero13 · commit a5401d0ba19e · 2025-07-22T11:38:58.000-04:00
Obviously we also cannot do this if speculative load hardening is on too.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -749,7 +749,7 @@ class LLVM_ABI TargetLoweringBase {
 
   /// Return true if the heuristic to prefer icmp eq zero should be used in code
   /// gen prepare.
-  virtual bool preferZeroCompareBranch() const { return false; }
+  virtual bool preferZeroCompareBranch(BranchInst *) const { return false; }
 
   /// Return true if it is cheaper to split the store of a merged int val
   /// from a pair of smaller values into multiple stores.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8630,7 +8630,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
   //  br %c, bla, blb
   // Creating the cmp to zero can be better for the backend, especially if the
   // lshr produces flags that can be used automatically.
-  if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
+  if (!TLI.preferZeroCompareBranch(Branch) || !Branch->isConditional())
     return false;
 
   ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -28508,6 +28508,16 @@ Register AArch64TargetLowering::getExceptionSelectorRegister(
   return AArch64::X1;
 }
 
+bool AArch64TargetLowering::preferZeroCompareBranch(BranchInst *Branch) const {
+  // If we can use Armv9.6 CB instructions, prefer that over zero compare
+  // branches.
+
+  // If we have speculative load hardening enabled, we cannot use
+  // zero compare branches.
+  return !Subtarget->hasCMPBR() && !Branch->getFunction()->hasFnAttribute(
+                                       Attribute::SpeculativeLoadHardening);
+}
+
 bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
     const Instruction &AndI) const {
   // Only sink 'and' mask to cmp use block if it is masking a single bit, since
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -366,6 +366,8 @@ class AArch64TargetLowering : public TargetLowering {
     return true;
   }
 
+  bool preferZeroCompareBranch(BranchInst *) const override;
+
   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 
   bool hasAndNotCompare(SDValue V) const override {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -605,7 +605,7 @@ class VectorType;
 
     Sched::Preference getSchedulingPreference(SDNode *N) const override;
 
-    bool preferZeroCompareBranch() const override { return true; }
+    bool preferZeroCompareBranch(BranchInst *) const override { return true; }
 
     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -225,7 +225,7 @@ class RISCVTargetLowering : public TargetLowering {
 
   unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
 
-  bool preferZeroCompareBranch() const override { return true; }
+  bool preferZeroCompareBranch(BranchInst *) const override { return true; }
 
   // Note that one specific case requires fence insertion for an
   // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -471,7 +471,7 @@ class SystemZTargetLowering : public TargetLowering {
   }
   bool isCheapToSpeculateCtlz(Type *) const override { return true; }
   bool isCheapToSpeculateCttz(Type *) const override { return true; }
-  bool preferZeroCompareBranch() const override { return true; }
+  bool preferZeroCompareBranch(BranchInst *) const override { return true; }
   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
     ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
     return Mask && Mask->getValue().isIntN(16);
diff --git a/llvm/test/CodeGen/AArch64/branch-on-zero.ll b/llvm/test/CodeGen/AArch64/branch-on-zero.ll
@@ -59,37 +59,33 @@ while.end:                                        ; preds = %while.body, %entry
 define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-SD-LABEL: test_lshr2:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    cmp w2, #4
-; CHECK-SD-NEXT:    b.lo .LBB1_3
-; CHECK-SD-NEXT:  // %bb.1: // %while.body.preheader
 ; CHECK-SD-NEXT:    lsr w8, w2, #2
-; CHECK-SD-NEXT:  .LBB1_2: // %while.body
+; CHECK-SD-NEXT:    cbz w8, .LBB1_2
+; CHECK-SD-NEXT:  .LBB1_1: // %while.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldr w9, [x1], #4
 ; CHECK-SD-NEXT:    subs w8, w8, #1
 ; CHECK-SD-NEXT:    lsl w9, w9, #1
 ; CHECK-SD-NEXT:    str w9, [x0], #4
-; CHECK-SD-NEXT:    b.ne .LBB1_2
-; CHECK-SD-NEXT:  .LBB1_3: // %while.end
+; CHECK-SD-NEXT:    b.ne .LBB1_1
+; CHECK-SD-NEXT:  .LBB1_2: // %while.end
 ; CHECK-SD-NEXT:    mov w0, wzr
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_lshr2:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    cmp w2, #4
-; CHECK-GI-NEXT:    b.lo .LBB1_3
-; CHECK-GI-NEXT:  // %bb.1: // %while.body.preheader
 ; CHECK-GI-NEXT:    lsr w8, w2, #2
-; CHECK-GI-NEXT:  .LBB1_2: // %while.body
+; CHECK-GI-NEXT:    cbz w8, .LBB1_2
+; CHECK-GI-NEXT:  .LBB1_1: // %while.body
 ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-GI-NEXT:    ldr w9, [x1], #4
 ; CHECK-GI-NEXT:    add x10, x0, #4
 ; CHECK-GI-NEXT:    subs w8, w8, #1
 ; CHECK-GI-NEXT:    lsl w9, w9, #1
 ; CHECK-GI-NEXT:    str w9, [x0]
 ; CHECK-GI-NEXT:    mov x0, x10
-; CHECK-GI-NEXT:    b.ne .LBB1_2
-; CHECK-GI-NEXT:  .LBB1_3: // %while.end
+; CHECK-GI-NEXT:    b.ne .LBB1_1
+; CHECK-GI-NEXT:  .LBB1_2: // %while.end
 ; CHECK-GI-NEXT:    mov w0, wzr
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -126,11 +122,10 @@ define i32 @lshr(i32 %u) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    cmp w0, #16
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    b.lo .LBB2_2
+; CHECK-NEXT:    lsr w0, w0, #4
+; CHECK-NEXT:    mov w8, w19
+; CHECK-NEXT:    cbz w0, .LBB2_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
-; CHECK-NEXT:    lsr w0, w19, #4
 ; CHECK-NEXT:    bl use
 ; CHECK-NEXT:    add w8, w19, w19, lsl #1
 ; CHECK-NEXT:  .LBB2_2: // %if.end

Original file line number	Diff line number	Diff line change
`@@ -366,6 +366,8 @@ class AArch64TargetLowering : public TargetLowering {`
`366`	`366`	`return true;`
`367`	`367`	`}`
`368`	`368`
	`369`	`+ bool preferZeroCompareBranch(BranchInst *) const override;`
	`370`	`+`
`369`	`371`	`bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;`
`370`	`372`
`371`	`373`	`bool hasAndNotCompare(SDValue V) const override {`
Original file line number	Diff line number	Diff line change
`@@ -471,7 +471,7 @@ class SystemZTargetLowering : public TargetLowering {`
`471`	`471`	`}`
`472`	`472`	`bool isCheapToSpeculateCtlz(Type *) const override { return true; }`
`473`	`473`	`bool isCheapToSpeculateCttz(Type *) const override { return true; }`
`474`		`- bool preferZeroCompareBranch() const override { return true; }`
	`474`	`+ bool preferZeroCompareBranch(BranchInst *) const override { return true; }`
`475`	`475`	`bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {`
`476`	`476`	`ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));`
`477`	`477`	`return Mask && Mask->getValue().isIntN(16);`