Skip to content

[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Jul 19, 2025

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.

@llvmbot
Copy link
Member

llvmbot commented Jul 19, 2025

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.


Full diff: https://github.com/llvm/llvm-project/pull/149672.diff

6 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+25)
  • (modified) llvm/test/CodeGen/X86/bmi-select-distrib.ll (+57-104)
  • (modified) llvm/test/CodeGen/X86/pr35972.ll (+4-3)
  • (modified) llvm/test/CodeGen/X86/sbb-false-dep.ll (+15-19)
  • (modified) llvm/test/CodeGen/X86/select.ll (+60-28)
  • (modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+11-13)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea1bb1b..f2d50c9db33f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24917,6 +24917,31 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
   if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
       (isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
     SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
+
+    // If CMOV is available, use it instead. Only prefer CMOV when SBB
+    // dependency breaking is not available or when CMOV is likely to be more
+    // efficient
+    if (Subtarget.canUseCMOV() &&
+        (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+        !Subtarget.hasSBBDepBreaking()) {
+            // Create comparison against zero to set EFLAGS  
+      SDValue Zero = DAG.getConstant(0, DL, CmpVT);
+      SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
+
+      // For CMOV: FalseVal is used when condition is false, TrueVal when
+      // condition is true We want: when X==0 return -1, when X!=0 return Y So
+      // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
+      // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
+      // exactly the same select operation with CMOV CMOV semantics: CMOV
+      // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
+      // FalseVal if condition is false
+
+      return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
+                                                  DAG.getTargetConstant(X86CC, DL, MVT::i8), 
+                         Cmp);
+    }
+
+    // Fall back to SBB pattern for older processors or unsupported types
     SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
 
     // 'X - 1' sets the carry flag if X == 0.
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index e5696ded4fbf1..dc98d338cc382 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
 define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_neg_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    negl %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_neg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %edx, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %eax, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_neg:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %esi, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %esi, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_neg_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
 ; X86-NEXT:    negl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_different_op:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    negl %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %edx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
 define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_sub_1_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    leal -1(%edx), %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal -1(%ecx), %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_i16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_sub_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -2(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -2(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_sub_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -2(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_sub_1_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    decl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%edx), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_different_op:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rdx), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: xor_select_sub_1_wrong_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -1(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    xorl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    xorl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: xor_select_sub_1_wrong_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    xorl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -1
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 981c47800c0f3..e609981c2e752 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    sbbl %ecx, %ecx
-; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl $-1, %edx
+; CHECK-NEXT:    cmovnel %ecx, %edx
+; CHECK-NEXT:    kmovd %edx, %k0
 ; CHECK-NEXT:    kunpckdq %k0, %k0, %k0
 ; CHECK-NEXT:    kmovq %k0, (%eax)
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..f53f362289c45 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
 ; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movl %r8d, %ebp
-; CHECK-NEXT:    movl %ecx, %r14d
-; CHECK-NEXT:    movl %edx, %r15d
-; CHECK-NEXT:    movq %rsi, %rbx
+; CHECK-NEXT:    movl %r8d, %ebx
+; CHECK-NEXT:    movl %ecx, %ebp
+; CHECK-NEXT:    movl %edx, %r14d
+; CHECK-NEXT:    movq %rsi, %r15
 ; CHECK-NEXT:    movq %rdi, %r12
 ; CHECK-NEXT:    movq (%rsi), %rdi
 ; CHECK-NEXT:    movq 8(%rsi), %rsi
-; CHECK-NEXT:    movq %rbx, %rdx
+; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq foo1@PLT
-; CHECK-NEXT:    movq 8(%rbx), %rax
+; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    movq 8(%r15), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
-; CHECK-NEXT:    xorl %r10d, %r10d
-; CHECK-NEXT:    movl %ebp, %ecx
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    movl $0, %r11d
-; CHECK-NEXT:    sbbq %r11, %r11
-; CHECK-NEXT:    orq %rax, %r11
-; CHECK-NEXT:    cmpl $1, %ebp
-; CHECK-NEXT:    sbbq %r10, %r10
-; CHECK-NEXT:    orq %rax, %r10
+; CHECK-NEXT:    movq $-1, %rcx
+; CHECK-NEXT:    movq $-1, %r10
+; CHECK-NEXT:    cmoveq %rax, %r10
+; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    subq $8, %rsp
 ; CHECK-NEXT:    movq %r12, %rdi
-; CHECK-NEXT:    movl %r15d, %esi
-; CHECK-NEXT:    movl %r14d, %edx
+; CHECK-NEXT:    movl %r14d, %esi
+; CHECK-NEXT:    movl %ebp, %edx
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %r8d, %r8d
 ; CHECK-NEXT:    xorl %r9d, %r9d
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    pushq %r10
-; CHECK-NEXT:    pushq %r11
-; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    callq foo2@PLT
 ; CHECK-NEXT:    addq $32, %rsp
 ; CHECK-NEXT:    popq %rbx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..f5ac941fda930 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -627,13 +627,21 @@ define void @test8(i1 %c, ptr %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwi
 ;; Test integer select between values and constants.
 
 define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9:
 ; ATHLON:       ## %bb.0:
@@ -667,13 +675,21 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Same as test9
 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9a:
 ; ATHLON:       ## %bb.0:
@@ -779,13 +795,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11:
 ; ATHLON:       ## %bb.0:
@@ -818,13 +842,21 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11a:
 ; ATHLON:       ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..6728d25abf1b6 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -441,33 +441,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    mull %ebp
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %edx, %esi
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mull %ebp
-; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %edx, %ebx
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %esi, %eax
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %ebx
 ; X86-NEXT:    addl %ebp, %edx
-; X86-NEXT:    adcl $0, %edi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    negl %edi
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    cmovnel %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx

Copy link

github-actions bot commented Jul 19, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

… be setting sbb to 0 anyway

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.
@RKSimon RKSimon self-requested a review July 20, 2025 11:23
@AZero13 AZero13 requested a review from topperc July 20, 2025 14:01
@RKSimon
Copy link
Collaborator

RKSimon commented Jul 20, 2025

What is the motivation for this patch? Do you have results showing perf benefit, and for what targets please?

@AZero13
Copy link
Contributor Author

AZero13 commented Jul 20, 2025

What is the motivation for this patch? Do you have results showing perf benefit, and for what targets please?

This comes up a lot in ucmp, especially on targets where sbb false dep causes stalls. This is the best way to do it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants