Skip to content

Commit fe0dbe0

Browse files
authored
[CodeGen] More consistently expand float ops by default (#150597)
These float operations were expanded for scalar f32/f64/f128, but not for f16 and more problematically, not for vectors. A small subset of them was separately set to expand for vectors. Change these to always expand by default, and adjust targets to mark these as legal where necessary instead. This is a much safer default, and avoids unnecessary legalization failures because a target failed to manually mark them as expand. Fixes #110753. Fixes #121390.
1 parent 3d99446 commit fe0dbe0

File tree

4 files changed

+145
-21
lines changed

4 files changed

+145
-21
lines changed

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
806806
ISD::SDIVFIX, ISD::SDIVFIXSAT,
807807
ISD::UDIVFIX, ISD::UDIVFIXSAT,
808808
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
809-
ISD::IS_FPCLASS},
809+
ISD::IS_FPCLASS, ISD::FCBRT,
810+
ISD::FLOG, ISD::FLOG2,
811+
ISD::FLOG10, ISD::FEXP,
812+
ISD::FEXP2, ISD::FEXP10,
813+
ISD::FFLOOR, ISD::FNEARBYINT,
814+
ISD::FCEIL, ISD::FRINT,
815+
ISD::FTRUNC, ISD::FROUNDEVEN,
816+
ISD::FTAN, ISD::FACOS,
817+
ISD::FASIN, ISD::FATAN,
818+
ISD::FCOSH, ISD::FSINH,
819+
ISD::FTANH, ISD::FATAN2},
810820
VT, Expand);
811821

812822
// Overflow operations default to expand
@@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
852862

853863
// These operations default to expand for vector types.
854864
if (VT.isVector())
855-
setOperationAction(
856-
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
857-
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
858-
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
859-
ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
860-
ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
861-
VT, Expand);
865+
setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
866+
ISD::ANY_EXTEND_VECTOR_INREG,
867+
ISD::SIGN_EXTEND_VECTOR_INREG,
868+
ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
869+
ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
870+
VT, Expand);
862871

863872
// Constrained floating-point operations default to expand.
864873
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
914923
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
915924
Expand);
916925

917-
// These library functions default to expand.
918-
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
919-
ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
920-
ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
921-
ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
922-
ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
923-
ISD::FATAN2},
924-
{MVT::f32, MVT::f64, MVT::f128}, Expand);
925-
926926
// Insert custom handling default for llvm.canonicalize.*.
927927
setOperationAction(ISD::FCANONICALIZE,
928928
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
391391
// Library functions. These default to Expand, but we have instructions
392392
// for them.
393393
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
394-
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
395-
MVT::f32, Legal);
394+
ISD::FROUNDEVEN, ISD::FTRUNC},
395+
{MVT::f16, MVT::f32}, Legal);
396+
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
396397

397398
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
398399
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -412,9 +413,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
412413

413414
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
414415

415-
if (Subtarget->has16BitInsts())
416+
if (Subtarget->has16BitInsts()) {
416417
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
417-
else {
418+
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
419+
} else {
418420
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
419421
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
420422
}

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
370370
setOperationAction(ISD::FMINNUM, VT, Legal);
371371
setOperationAction(ISD::FMAXNUM, VT, Legal);
372372
setOperationAction(ISD::FROUND, VT, Legal);
373+
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
374+
setOperationAction(ISD::FRINT, VT, Legal);
375+
setOperationAction(ISD::FTRUNC, VT, Legal);
376+
setOperationAction(ISD::FFLOOR, VT, Legal);
377+
setOperationAction(ISD::FCEIL, VT, Legal);
373378
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
374379
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
375380
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
15071512
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
15081513

15091514
setOperationAction(ISD::FROUND, MVT::f16, Legal);
1515+
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
1516+
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
1517+
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
1518+
setOperationAction(ISD::FRINT, MVT::f16, Legal);
1519+
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
1520+
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
15101521
}
15111522

15121523
if (Subtarget->hasNEON()) {
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
3+
4+
define void @test(ptr %p1, ptr %p2) nounwind {
5+
; CHECK-LABEL: test:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: mflr 0
8+
; CHECK-NEXT: stdu 1, -224(1)
9+
; CHECK-NEXT: li 5, 48
10+
; CHECK-NEXT: std 0, 240(1)
11+
; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
12+
; CHECK-NEXT: li 27, 16
13+
; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
14+
; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
15+
; CHECK-NEXT: li 29, 32
16+
; CHECK-NEXT: li 28, 48
17+
; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
18+
; CHECK-NEXT: li 5, 64
19+
; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
20+
; CHECK-NEXT: mr 30, 4
21+
; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
22+
; CHECK-NEXT: li 5, 80
23+
; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
24+
; CHECK-NEXT: li 5, 96
25+
; CHECK-NEXT: lxvd2x 58, 0, 3
26+
; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
27+
; CHECK-NEXT: li 5, 112
28+
; CHECK-NEXT: lxvd2x 59, 3, 27
29+
; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
30+
; CHECK-NEXT: li 5, 128
31+
; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
32+
; CHECK-NEXT: li 5, 144
33+
; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
34+
; CHECK-NEXT: li 5, 160
35+
; CHECK-NEXT: lxvd2x 62, 3, 28
36+
; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
37+
; CHECK-NEXT: lxvd2x 63, 3, 29
38+
; CHECK-NEXT: xxswapd 57, 58
39+
; CHECK-NEXT: xxswapd 1, 59
40+
; CHECK-NEXT: xxswapd 60, 62
41+
; CHECK-NEXT: xxswapd 61, 63
42+
; CHECK-NEXT: bl roundeven
43+
; CHECK-NEXT: nop
44+
; CHECK-NEXT: xxswapd 56, 1
45+
; CHECK-NEXT: xxlor 1, 59, 59
46+
; CHECK-NEXT: bl roundeven
47+
; CHECK-NEXT: nop
48+
; CHECK-NEXT: xxswapd 0, 1
49+
; CHECK-NEXT: xxlor 1, 60, 60
50+
; CHECK-NEXT: xxmrgld 59, 0, 56
51+
; CHECK-NEXT: bl roundeven
52+
; CHECK-NEXT: nop
53+
; CHECK-NEXT: xxswapd 60, 1
54+
; CHECK-NEXT: xxlor 1, 62, 62
55+
; CHECK-NEXT: bl roundeven
56+
; CHECK-NEXT: nop
57+
; CHECK-NEXT: xxswapd 0, 1
58+
; CHECK-NEXT: xxlor 1, 61, 61
59+
; CHECK-NEXT: xxmrgld 62, 0, 60
60+
; CHECK-NEXT: bl roundeven
61+
; CHECK-NEXT: nop
62+
; CHECK-NEXT: xxswapd 61, 1
63+
; CHECK-NEXT: xxlor 1, 63, 63
64+
; CHECK-NEXT: bl roundeven
65+
; CHECK-NEXT: nop
66+
; CHECK-NEXT: xxswapd 0, 1
67+
; CHECK-NEXT: xxlor 1, 57, 57
68+
; CHECK-NEXT: xxmrgld 63, 0, 61
69+
; CHECK-NEXT: bl roundeven
70+
; CHECK-NEXT: nop
71+
; CHECK-NEXT: xxswapd 61, 1
72+
; CHECK-NEXT: xxlor 1, 58, 58
73+
; CHECK-NEXT: bl roundeven
74+
; CHECK-NEXT: nop
75+
; CHECK-NEXT: li 3, 160
76+
; CHECK-NEXT: stxvd2x 63, 30, 29
77+
; CHECK-NEXT: xxswapd 0, 1
78+
; CHECK-NEXT: stxvd2x 62, 30, 28
79+
; CHECK-NEXT: stxvd2x 59, 30, 27
80+
; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
81+
; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
82+
; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
83+
; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
84+
; CHECK-NEXT: li 3, 144
85+
; CHECK-NEXT: xxmrgld 0, 0, 61
86+
; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
87+
; CHECK-NEXT: li 3, 128
88+
; CHECK-NEXT: stxvd2x 0, 0, 30
89+
; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
90+
; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
91+
; CHECK-NEXT: li 3, 112
92+
; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
93+
; CHECK-NEXT: li 3, 96
94+
; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
95+
; CHECK-NEXT: li 3, 80
96+
; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
97+
; CHECK-NEXT: li 3, 64
98+
; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
99+
; CHECK-NEXT: li 3, 48
100+
; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
101+
; CHECK-NEXT: addi 1, 1, 224
102+
; CHECK-NEXT: ld 0, 16(1)
103+
; CHECK-NEXT: mtlr 0
104+
; CHECK-NEXT: blr
105+
%v = load <8 x double>, ptr %p1, align 64
106+
%res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
107+
store <8 x double> %res, ptr %p2, align 64
108+
ret void
109+
}
110+
111+
declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)

0 commit comments

Comments
 (0)