Skip to content

Commit bebcd9d

Browse files
committed
[X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway
If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.
1 parent b9adc4a commit bebcd9d

File tree

6 files changed

+171
-167
lines changed

6 files changed

+171
-167
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24917,6 +24917,30 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2491724917
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
2491824918
(isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
2491924919
SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
24920+
24921+
// If CMOV is available, use it instead. Only prefer CMOV when SBB
24922+
// dependency breaking is not available or when CMOV is likely to be more
24923+
// efficient
24924+
if (Subtarget.canUseCMOV() &&
24925+
(VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
24926+
!Subtarget.hasSBBDepBreaking()) {
24927+
// Create comparison against zero to set EFLAGS
24928+
SDValue Zero = DAG.getConstant(0, DL, CmpVT);
24929+
SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
24930+
24931+
// For CMOV: FalseVal is used when condition is false, TrueVal when
24932+
// condition is true We want: when X==0 return -1, when X!=0 return Y So
24933+
// condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
24934+
// pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
24935+
// exactly the same select operation with CMOV CMOV semantics: CMOV
24936+
// condition, TrueVal, FalseVal Returns TrueVal if condition is true,
24937+
// FalseVal if condition is false
24938+
24939+
return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
24940+
DAG.getTargetConstant(X86CC, DL, MVT::i8), Cmp);
24941+
}
24942+
24943+
// Fall back to SBB pattern for older processors or unsupported types
2492024944
SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
2492124945

2492224946
// 'X - 1' sets the carry flag if X == 0.

llvm/test/CodeGen/X86/bmi-select-distrib.ll

Lines changed: 57 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
128128
define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
129129
; X86-LABEL: and_select_neg_i16:
130130
; X86: # %bb.0:
131-
; X86-NEXT: pushl %esi
132-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
133-
; X86-NEXT: andb $1, %cl
134-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
135-
; X86-NEXT: movl %edx, %esi
136-
; X86-NEXT: negl %esi
137-
; X86-NEXT: xorl %eax, %eax
138-
; X86-NEXT: cmpb $1, %cl
139-
; X86-NEXT: sbbl %eax, %eax
140-
; X86-NEXT: orl %esi, %eax
141-
; X86-NEXT: andl %edx, %eax
131+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
132+
; X86-NEXT: movl %ecx, %edx
133+
; X86-NEXT: negl %edx
134+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
135+
; X86-NEXT: movw $-1, %ax
136+
; X86-NEXT: cmovnew %dx, %ax
137+
; X86-NEXT: andl %ecx, %eax
142138
; X86-NEXT: # kill: def $ax killed $ax killed $eax
143-
; X86-NEXT: popl %esi
144139
; X86-NEXT: retl
145140
;
146141
; X64-LABEL: and_select_neg_i16:
147142
; X64: # %bb.0:
148-
; X64-NEXT: andb $1, %dil
149143
; X64-NEXT: movl %esi, %ecx
150144
; X64-NEXT: negl %ecx
151-
; X64-NEXT: xorl %eax, %eax
152-
; X64-NEXT: cmpb $1, %dil
153-
; X64-NEXT: sbbl %eax, %eax
154-
; X64-NEXT: orl %ecx, %eax
145+
; X64-NEXT: testb $1, %dil
146+
; X64-NEXT: movw $-1, %ax
147+
; X64-NEXT: cmovnew %cx, %ax
155148
; X64-NEXT: andl %esi, %eax
156149
; X64-NEXT: # kill: def $ax killed $ax killed $eax
157150
; X64-NEXT: retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
200193
define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
201194
; X86-LABEL: and_select_no_neg:
202195
; X86: # %bb.0:
203-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
204-
; X86-NEXT: andb $1, %cl
205-
; X86-NEXT: xorl %edx, %edx
206-
; X86-NEXT: cmpb $1, %cl
207-
; X86-NEXT: sbbl %edx, %edx
208-
; X86-NEXT: orl %eax, %edx
209-
; X86-NEXT: andl %edx, %eax
196+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
197+
; X86-NEXT: movl $-1, %ecx
198+
; X86-NEXT: cmovnel %eax, %ecx
199+
; X86-NEXT: andl %ecx, %eax
210200
; X86-NEXT: retl
211201
;
212202
; X64-LABEL: and_select_no_neg:
213203
; X64: # %bb.0:
214-
; X64-NEXT: andb $1, %dil
215-
; X64-NEXT: xorl %eax, %eax
216-
; X64-NEXT: cmpb $1, %dil
217-
; X64-NEXT: sbbl %eax, %eax
218-
; X64-NEXT: orl %esi, %eax
204+
; X64-NEXT: testb $1, %dil
205+
; X64-NEXT: movl $-1, %eax
206+
; X64-NEXT: cmovnel %esi, %eax
219207
; X64-NEXT: andl %esi, %eax
220208
; X64-NEXT: retq
221209
%sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
255243
define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
256244
; X86-LABEL: and_select_neg_different_op:
257245
; X86: # %bb.0:
258-
; X86-NEXT: pushl %esi
259-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
260-
; X86-NEXT: andb $1, %cl
261246
; X86-NEXT: negl %edx
262-
; X86-NEXT: xorl %esi, %esi
263-
; X86-NEXT: cmpb $1, %cl
264-
; X86-NEXT: sbbl %esi, %esi
265-
; X86-NEXT: orl %edx, %esi
266-
; X86-NEXT: andl %esi, %eax
267-
; X86-NEXT: popl %esi
247+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
248+
; X86-NEXT: movl $-1, %ecx
249+
; X86-NEXT: cmovnel %edx, %ecx
250+
; X86-NEXT: andl %ecx, %eax
268251
; X86-NEXT: retl
269252
;
270253
; X64-LABEL: and_select_neg_different_op:
271254
; X64: # %bb.0:
272-
; X64-NEXT: andb $1, %dil
273255
; X64-NEXT: negl %edx
274-
; X64-NEXT: xorl %eax, %eax
275-
; X64-NEXT: cmpb $1, %dil
276-
; X64-NEXT: sbbl %eax, %eax
277-
; X64-NEXT: orl %edx, %eax
256+
; X64-NEXT: testb $1, %dil
257+
; X64-NEXT: movl $-1, %eax
258+
; X64-NEXT: cmovnel %edx, %eax
278259
; X64-NEXT: andl %esi, %eax
279260
; X64-NEXT: retq
280261
%sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
427408
define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
428409
; X86-LABEL: and_select_sub_1_i16:
429410
; X86: # %bb.0:
430-
; X86-NEXT: pushl %esi
431-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
432-
; X86-NEXT: andb $1, %cl
433-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
434-
; X86-NEXT: leal -1(%edx), %esi
435-
; X86-NEXT: xorl %eax, %eax
436-
; X86-NEXT: cmpb $1, %cl
437-
; X86-NEXT: sbbl %eax, %eax
438-
; X86-NEXT: orl %esi, %eax
439-
; X86-NEXT: andl %edx, %eax
411+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
412+
; X86-NEXT: leal -1(%ecx), %edx
413+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
414+
; X86-NEXT: movw $-1, %ax
415+
; X86-NEXT: cmovnew %dx, %ax
416+
; X86-NEXT: andl %ecx, %eax
440417
; X86-NEXT: # kill: def $ax killed $ax killed $eax
441-
; X86-NEXT: popl %esi
442418
; X86-NEXT: retl
443419
;
444420
; X64-LABEL: and_select_sub_1_i16:
445421
; X64: # %bb.0:
446422
; X64-NEXT: # kill: def $esi killed $esi def $rsi
447-
; X64-NEXT: andb $1, %dil
448423
; X64-NEXT: leal -1(%rsi), %ecx
449-
; X64-NEXT: xorl %eax, %eax
450-
; X64-NEXT: cmpb $1, %dil
451-
; X64-NEXT: sbbl %eax, %eax
452-
; X64-NEXT: orl %ecx, %eax
424+
; X64-NEXT: testb $1, %dil
425+
; X64-NEXT: movw $-1, %ax
426+
; X64-NEXT: cmovnew %cx, %ax
453427
; X64-NEXT: andl %esi, %eax
454428
; X64-NEXT: # kill: def $ax killed $ax killed $eax
455429
; X64-NEXT: retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
492466
define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
493467
; X86-LABEL: and_select_no_sub_1:
494468
; X86: # %bb.0:
495-
; X86-NEXT: pushl %esi
496-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
497-
; X86-NEXT: andb $1, %cl
498-
; X86-NEXT: leal -2(%eax), %edx
499-
; X86-NEXT: xorl %esi, %esi
500-
; X86-NEXT: cmpb $1, %cl
501-
; X86-NEXT: sbbl %esi, %esi
502-
; X86-NEXT: orl %edx, %esi
503-
; X86-NEXT: andl %esi, %eax
504-
; X86-NEXT: popl %esi
469+
; X86-NEXT: leal -2(%eax), %ecx
470+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
471+
; X86-NEXT: movl $-1, %edx
472+
; X86-NEXT: cmovnel %ecx, %edx
473+
; X86-NEXT: andl %edx, %eax
505474
; X86-NEXT: retl
506475
;
507476
; X64-LABEL: and_select_no_sub_1:
508477
; X64: # %bb.0:
509478
; X64-NEXT: # kill: def $esi killed $esi def $rsi
510-
; X64-NEXT: andb $1, %dil
511479
; X64-NEXT: leal -2(%rsi), %ecx
512-
; X64-NEXT: xorl %eax, %eax
513-
; X64-NEXT: cmpb $1, %dil
514-
; X64-NEXT: sbbl %eax, %eax
515-
; X64-NEXT: orl %ecx, %eax
480+
; X64-NEXT: testb $1, %dil
481+
; X64-NEXT: movl $-1, %eax
482+
; X64-NEXT: cmovnel %ecx, %eax
516483
; X64-NEXT: andl %esi, %eax
517484
; X64-NEXT: retq
518485
%sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
551518
define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
552519
; X86-LABEL: and_select_sub_1_different_op:
553520
; X86: # %bb.0:
554-
; X86-NEXT: pushl %esi
555-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
556-
; X86-NEXT: andb $1, %cl
557-
; X86-NEXT: decl %edx
558-
; X86-NEXT: xorl %esi, %esi
559-
; X86-NEXT: cmpb $1, %cl
560-
; X86-NEXT: sbbl %esi, %esi
561-
; X86-NEXT: orl %edx, %esi
562-
; X86-NEXT: andl %esi, %eax
563-
; X86-NEXT: popl %esi
521+
; X86-NEXT: leal -1(%edx), %ecx
522+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
523+
; X86-NEXT: movl $-1, %edx
524+
; X86-NEXT: cmovnel %ecx, %edx
525+
; X86-NEXT: andl %edx, %eax
564526
; X86-NEXT: retl
565527
;
566528
; X64-LABEL: and_select_sub_1_different_op:
567529
; X64: # %bb.0:
568530
; X64-NEXT: # kill: def $edx killed $edx def $rdx
569-
; X64-NEXT: andb $1, %dil
570531
; X64-NEXT: leal -1(%rdx), %ecx
571-
; X64-NEXT: xorl %eax, %eax
572-
; X64-NEXT: cmpb $1, %dil
573-
; X64-NEXT: sbbl %eax, %eax
574-
; X64-NEXT: orl %ecx, %eax
532+
; X64-NEXT: testb $1, %dil
533+
; X64-NEXT: movl $-1, %eax
534+
; X64-NEXT: cmovnel %ecx, %eax
575535
; X64-NEXT: andl %esi, %eax
576536
; X64-NEXT: retq
577537
%sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
809769
define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
810770
; X86-LABEL: xor_select_sub_1_wrong_const:
811771
; X86: # %bb.0:
812-
; X86-NEXT: pushl %esi
813-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
814-
; X86-NEXT: andb $1, %cl
815-
; X86-NEXT: leal -1(%eax), %edx
816-
; X86-NEXT: xorl %esi, %esi
817-
; X86-NEXT: cmpb $1, %cl
818-
; X86-NEXT: sbbl %esi, %esi
819-
; X86-NEXT: orl %edx, %esi
820-
; X86-NEXT: xorl %esi, %eax
821-
; X86-NEXT: popl %esi
772+
; X86-NEXT: leal -1(%eax), %ecx
773+
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
774+
; X86-NEXT: movl $-1, %edx
775+
; X86-NEXT: cmovnel %ecx, %edx
776+
; X86-NEXT: xorl %edx, %eax
822777
; X86-NEXT: retl
823778
;
824779
; X64-LABEL: xor_select_sub_1_wrong_const:
825780
; X64: # %bb.0:
826781
; X64-NEXT: # kill: def $esi killed $esi def $rsi
827-
; X64-NEXT: andb $1, %dil
828782
; X64-NEXT: leal -1(%rsi), %ecx
829-
; X64-NEXT: xorl %eax, %eax
830-
; X64-NEXT: cmpb $1, %dil
831-
; X64-NEXT: sbbl %eax, %eax
832-
; X64-NEXT: orl %ecx, %eax
783+
; X64-NEXT: testb $1, %dil
784+
; X64-NEXT: movl $-1, %eax
785+
; X64-NEXT: cmovnel %ecx, %eax
833786
; X64-NEXT: xorl %esi, %eax
834787
; X64-NEXT: retq
835788
%sub = add i32 %a1, -1

llvm/test/CodeGen/X86/pr35972.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
88
; CHECK-NEXT: xorl %ecx, %ecx
9-
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
10-
; CHECK-NEXT: sbbl %ecx, %ecx
11-
; CHECK-NEXT: kmovd %ecx, %k0
9+
; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
10+
; CHECK-NEXT: movl $-1, %edx
11+
; CHECK-NEXT: cmovnel %ecx, %edx
12+
; CHECK-NEXT: kmovd %edx, %k0
1213
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
1314
; CHECK-NEXT: kmovq %k0, (%eax)
1415
; CHECK-NEXT: retl

llvm/test/CodeGen/X86/sbb-false-dep.ll

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
1212
; CHECK-NEXT: pushq %r14
1313
; CHECK-NEXT: pushq %r12
1414
; CHECK-NEXT: pushq %rbx
15-
; CHECK-NEXT: movl %r8d, %ebp
16-
; CHECK-NEXT: movl %ecx, %r14d
17-
; CHECK-NEXT: movl %edx, %r15d
18-
; CHECK-NEXT: movq %rsi, %rbx
15+
; CHECK-NEXT: movl %r8d, %ebx
16+
; CHECK-NEXT: movl %ecx, %ebp
17+
; CHECK-NEXT: movl %edx, %r14d
18+
; CHECK-NEXT: movq %rsi, %r15
1919
; CHECK-NEXT: movq %rdi, %r12
2020
; CHECK-NEXT: movq (%rsi), %rdi
2121
; CHECK-NEXT: movq 8(%rsi), %rsi
22-
; CHECK-NEXT: movq %rbx, %rdx
22+
; CHECK-NEXT: movq %r15, %rdx
2323
; CHECK-NEXT: callq foo1@PLT
24-
; CHECK-NEXT: movq 8(%rbx), %rax
24+
; CHECK-NEXT: testl %ebx, %ebx
25+
; CHECK-NEXT: movq 8(%r15), %rax
2526
; CHECK-NEXT: movq (%rax), %rax
26-
; CHECK-NEXT: xorl %r10d, %r10d
27-
; CHECK-NEXT: movl %ebp, %ecx
28-
; CHECK-NEXT: negl %ecx
29-
; CHECK-NEXT: movl $0, %r11d
30-
; CHECK-NEXT: sbbq %r11, %r11
31-
; CHECK-NEXT: orq %rax, %r11
32-
; CHECK-NEXT: cmpl $1, %ebp
33-
; CHECK-NEXT: sbbq %r10, %r10
34-
; CHECK-NEXT: orq %rax, %r10
27+
; CHECK-NEXT: movq $-1, %rcx
28+
; CHECK-NEXT: movq $-1, %r10
29+
; CHECK-NEXT: cmoveq %rax, %r10
30+
; CHECK-NEXT: cmoveq %rcx, %rax
3531
; CHECK-NEXT: subq $8, %rsp
3632
; CHECK-NEXT: movq %r12, %rdi
37-
; CHECK-NEXT: movl %r15d, %esi
38-
; CHECK-NEXT: movl %r14d, %edx
33+
; CHECK-NEXT: movl %r14d, %esi
34+
; CHECK-NEXT: movl %ebp, %edx
3935
; CHECK-NEXT: xorl %ecx, %ecx
4036
; CHECK-NEXT: xorl %r8d, %r8d
4137
; CHECK-NEXT: xorl %r9d, %r9d
38+
; CHECK-NEXT: pushq %rax
4239
; CHECK-NEXT: pushq %r10
43-
; CHECK-NEXT: pushq %r11
44-
; CHECK-NEXT: pushq %rbx
40+
; CHECK-NEXT: pushq %r15
4541
; CHECK-NEXT: callq foo2@PLT
4642
; CHECK-NEXT: addq $32, %rsp
4743
; CHECK-NEXT: popq %rbx

0 commit comments

Comments
 (0)