-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway #149672
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: AZero13 (AZero13) ChangesIf we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0. Full diff: https://github.com/llvm/llvm-project/pull/149672.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea1bb1b..f2d50c9db33f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24917,6 +24917,31 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
(isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
+
+ // If CMOV is available, use it instead. Only prefer CMOV when SBB
+ // dependency breaking is not available or when CMOV is likely to be more
+ // efficient
+ if (Subtarget.canUseCMOV() &&
+ (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ !Subtarget.hasSBBDepBreaking()) {
+ // Create comparison against zero to set EFLAGS
+ SDValue Zero = DAG.getConstant(0, DL, CmpVT);
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
+
+ // For CMOV: FalseVal is used when condition is false, TrueVal when
+ // condition is true We want: when X==0 return -1, when X!=0 return Y So
+ // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
+ // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
+ // exactly the same select operation with CMOV CMOV semantics: CMOV
+ // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
+ // FalseVal if condition is false
+
+ return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
+ DAG.getTargetConstant(X86CC, DL, MVT::i8),
+ Cmp);
+ }
+
+ // Fall back to SBB pattern for older processors or unsupported types
SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
// 'X - 1' sets the carry flag if X == 0.
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index e5696ded4fbf1..dc98d338cc382 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
; X86-LABEL: and_select_neg_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: negl %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movw $-1, %ax
+; X86-NEXT: cmovnew %dx, %ax
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_i16:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movw $-1, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_no_neg:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_no_neg:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
; X86-LABEL: and_select_neg_different_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
; X86-NEXT: negl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %edx, %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_different_op:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
; X64-NEXT: negl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %edx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
; X86-LABEL: and_select_sub_1_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: leal -1(%edx), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal -1(%ecx), %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movw $-1, %ax
+; X86-NEXT: cmovnew %dx, %ax
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_i16:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movw $-1, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_no_sub_1:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -2(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -2(%eax), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_no_sub_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -2(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
; X86-LABEL: and_select_sub_1_different_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: decl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -1(%edx), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_different_op:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rdx), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_wrong_const:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: xorl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_wrong_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 981c47800c0f3..e609981c2e752 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sbbl %ecx, %ecx
-; CHECK-NEXT: kmovd %ecx, %k0
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl $-1, %edx
+; CHECK-NEXT: cmovnel %ecx, %edx
+; CHECK-NEXT: kmovd %edx, %k0
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, (%eax)
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..f53f362289c45 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movl %r8d, %ebp
-; CHECK-NEXT: movl %ecx, %r14d
-; CHECK-NEXT: movl %edx, %r15d
-; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %ebp
+; CHECK-NEXT: movl %edx, %r14d
+; CHECK-NEXT: movq %rsi, %r15
; CHECK-NEXT: movq %rdi, %r12
; CHECK-NEXT: movq (%rsi), %rdi
; CHECK-NEXT: movq 8(%rsi), %rsi
-; CHECK-NEXT: movq %rbx, %rdx
+; CHECK-NEXT: movq %r15, %rdx
; CHECK-NEXT: callq foo1@PLT
-; CHECK-NEXT: movq 8(%rbx), %rax
+; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: movq 8(%r15), %rax
; CHECK-NEXT: movq (%rax), %rax
-; CHECK-NEXT: xorl %r10d, %r10d
-; CHECK-NEXT: movl %ebp, %ecx
-; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: movl $0, %r11d
-; CHECK-NEXT: sbbq %r11, %r11
-; CHECK-NEXT: orq %rax, %r11
-; CHECK-NEXT: cmpl $1, %ebp
-; CHECK-NEXT: sbbq %r10, %r10
-; CHECK-NEXT: orq %rax, %r10
+; CHECK-NEXT: movq $-1, %rcx
+; CHECK-NEXT: movq $-1, %r10
+; CHECK-NEXT: cmoveq %rax, %r10
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movq %r12, %rdi
-; CHECK-NEXT: movl %r15d, %esi
-; CHECK-NEXT: movl %r14d, %edx
+; CHECK-NEXT: movl %r14d, %esi
+; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: xorl %r9d, %r9d
+; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %r10
-; CHECK-NEXT: pushq %r11
-; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %r15
; CHECK-NEXT: callq foo2@PLT
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..f5ac941fda930 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -627,13 +627,21 @@ define void @test8(i1 %c, ptr %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwi
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpq $1, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test9:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmovneq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test9:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmovneq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test9:
; ATHLON: ## %bb.0:
@@ -667,13 +675,21 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9a:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpq $1, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test9a:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmovneq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test9a:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmovneq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test9a:
; ATHLON: ## %bb.0:
@@ -779,13 +795,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: negq %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test11:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmoveq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test11:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmoveq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test11:
; ATHLON: ## %bb.0:
@@ -818,13 +842,21 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11a:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: negq %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test11a:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmoveq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test11a:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmoveq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test11a:
; ATHLON: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..6728d25abf1b6 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -441,33 +441,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: mull %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: negl %edi
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
… be setting sbb to 0 anyway If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.
What is the motivation for this patch? Do you have results showing perf benefit, and for what targets please? |
This comes up a lot in ucmp, especially on targets where sbb false dep causes stalls. This is the best way to do it. |
If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.