[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

AZero13 · 2025-07-19T20:06:50Z

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.

llvmbot · 2025-07-19T20:07:20Z

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.

Full diff: https://github.com/llvm/llvm-project/pull/149672.diff

6 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+25)
(modified) llvm/test/CodeGen/X86/bmi-select-distrib.ll (+57-104)
(modified) llvm/test/CodeGen/X86/pr35972.ll (+4-3)
(modified) llvm/test/CodeGen/X86/sbb-false-dep.ll (+15-19)
(modified) llvm/test/CodeGen/X86/select.ll (+60-28)
(modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+11-13)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea1bb1b..f2d50c9db33f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24917,6 +24917,31 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
   if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
       (isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
     SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
+
+    // If CMOV is available, use it instead. Only prefer CMOV when SBB
+    // dependency breaking is not available or when CMOV is likely to be more
+    // efficient
+    if (Subtarget.canUseCMOV() &&
+        (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+        !Subtarget.hasSBBDepBreaking()) {
+            // Create comparison against zero to set EFLAGS  
+      SDValue Zero = DAG.getConstant(0, DL, CmpVT);
+      SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
+
+      // For CMOV: FalseVal is used when condition is false, TrueVal when
+      // condition is true We want: when X==0 return -1, when X!=0 return Y So
+      // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
+      // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
+      // exactly the same select operation with CMOV CMOV semantics: CMOV
+      // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
+      // FalseVal if condition is false
+
+      return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
+                                                  DAG.getTargetConstant(X86CC, DL, MVT::i8), 
+                         Cmp);
+    }
+
+    // Fall back to SBB pattern for older processors or unsupported types
     SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
 
     // 'X - 1' sets the carry flag if X == 0.
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index e5696ded4fbf1..dc98d338cc382 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
 define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_neg_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    negl %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_neg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %edx, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %eax, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_neg:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %esi, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %esi, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_neg_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
 ; X86-NEXT:    negl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_different_op:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    negl %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %edx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
 define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_sub_1_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    leal -1(%edx), %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal -1(%ecx), %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_i16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_sub_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -2(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -2(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_sub_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -2(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_sub_1_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    decl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%edx), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_different_op:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rdx), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: xor_select_sub_1_wrong_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -1(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    xorl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    xorl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: xor_select_sub_1_wrong_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    xorl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -1
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 981c47800c0f3..e609981c2e752 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    sbbl %ecx, %ecx
-; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl $-1, %edx
+; CHECK-NEXT:    cmovnel %ecx, %edx
+; CHECK-NEXT:    kmovd %edx, %k0
 ; CHECK-NEXT:    kunpckdq %k0, %k0, %k0
 ; CHECK-NEXT:    kmovq %k0, (%eax)
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..f53f362289c45 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
 ; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movl %r8d, %ebp
-; CHECK-NEXT:    movl %ecx, %r14d
-; CHECK-NEXT:    movl %edx, %r15d
-; CHECK-NEXT:    movq %rsi, %rbx
+; CHECK-NEXT:    movl %r8d, %ebx
+; CHECK-NEXT:    movl %ecx, %ebp
+; CHECK-NEXT:    movl %edx, %r14d
+; CHECK-NEXT:    movq %rsi, %r15
 ; CHECK-NEXT:    movq %rdi, %r12
 ; CHECK-NEXT:    movq (%rsi), %rdi
 ; CHECK-NEXT:    movq 8(%rsi), %rsi
-; CHECK-NEXT:    movq %rbx, %rdx
+; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq foo1@PLT
-; CHECK-NEXT:    movq 8(%rbx), %rax
+; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    movq 8(%r15), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
-; CHECK-NEXT:    xorl %r10d, %r10d
-; CHECK-NEXT:    movl %ebp, %ecx
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    movl $0, %r11d
-; CHECK-NEXT:    sbbq %r11, %r11
-; CHECK-NEXT:    orq %rax, %r11
-; CHECK-NEXT:    cmpl $1, %ebp
-; CHECK-NEXT:    sbbq %r10, %r10
-; CHECK-NEXT:    orq %rax, %r10
+; CHECK-NEXT:    movq $-1, %rcx
+; CHECK-NEXT:    movq $-1, %r10
+; CHECK-NEXT:    cmoveq %rax, %r10
+; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    subq $8, %rsp
 ; CHECK-NEXT:    movq %r12, %rdi
-; CHECK-NEXT:    movl %r15d, %esi
-; CHECK-NEXT:    movl %r14d, %edx
+; CHECK-NEXT:    movl %r14d, %esi
+; CHECK-NEXT:    movl %ebp, %edx
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %r8d, %r8d
 ; CHECK-NEXT:    xorl %r9d, %r9d
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    pushq %r10
-; CHECK-NEXT:    pushq %r11
-; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    callq foo2@PLT
 ; CHECK-NEXT:    addq $32, %rsp
 ; CHECK-NEXT:    popq %rbx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..f5ac941fda930 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -627,13 +627,21 @@ define void @test8(i1 %c, ptr %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwi
 ;; Test integer select between values and constants.
 
 define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9:
 ; ATHLON:       ## %bb.0:
@@ -667,13 +675,21 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Same as test9
 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9a:
 ; ATHLON:       ## %bb.0:
@@ -779,13 +795,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11:
 ; ATHLON:       ## %bb.0:
@@ -818,13 +842,21 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11a:
 ; ATHLON:       ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..6728d25abf1b6 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -441,33 +441,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    mull %ebp
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %edx, %esi
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mull %ebp
-; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %edx, %ebx
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %esi, %eax
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %ebx
 ; X86-NEXT:    addl %ebp, %edx
-; X86-NEXT:    adcl $0, %edi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    negl %edi
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    cmovnel %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx

github-actions · 2025-07-19T20:09:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

… be setting sbb to 0 anyway If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.

llvm/lib/Target/X86/X86ISelLowering.cpp

RKSimon · 2025-07-20T16:46:16Z

What is the motivation for this patch? Do you have results showing perf benefit, and for what targets please?

AZero13 · 2025-07-20T16:58:27Z

What is the motivation for this patch? Do you have results showing perf benefit, and for what targets please?

This comes up a lot in ucmp, especially on targets where sbb false dep causes stalls. This is the best way to do it.

llvmbot added the backend:X86 label Jul 19, 2025

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will…

bebcd9d

… be setting sbb to 0 anyway If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.

AZero13 force-pushed the sbb branch from 2257ccc to bebcd9d Compare July 19, 2025 20:11

topperc reviewed Jul 19, 2025

View reviewed changes

RKSimon self-requested a review July 20, 2025 11:23

AZero13 added 2 commits July 20, 2025 10:00

Fix regression and review concerns

2ce9ac3

Grammar (NFC)

74a7e69

AZero13 requested a review from topperc July 20, 2025 14:01

Update test

5746267

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

AZero13 commented Jul 19, 2025

Uh oh!

llvmbot commented Jul 19, 2025

Uh oh!

github-actions bot commented Jul 19, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

RKSimon commented Jul 20, 2025

Uh oh!

AZero13 commented Jul 20, 2025

Uh oh!

Uh oh!

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

Are you sure you want to change the base?

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

Conversation

AZero13 commented Jul 19, 2025

Uh oh!

llvmbot commented Jul 19, 2025

Uh oh!

github-actions bot commented Jul 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

RKSimon commented Jul 20, 2025

Uh oh!

AZero13 commented Jul 20, 2025

Uh oh!

Uh oh!

github-actions bot commented Jul 19, 2025 •

edited

Loading