Skip to content

Commit ea0c778

Browse files
committed
re-enable direct bitcasts for Int/Float vector transmutes (but not ones involving pointers)
1 parent 231dddd commit ea0c778

File tree

2 files changed

+197
-0
lines changed

2 files changed

+197
-0
lines changed

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
253253
return OperandValue::poison(bx, cast);
254254
}
255255

256+
// To or from pointers takes different methods, so we use this to restrict
257+
// the SimdVector case to types which can be `bitcast` between each other.
258+
#[inline]
259+
fn vector_can_bitcast(x: abi::Scalar) -> bool {
260+
matches!(
261+
x,
262+
abi::Scalar::Initialized {
263+
value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
264+
..
265+
}
266+
)
267+
}
268+
256269
let cx = bx.cx();
257270
match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
258271
_ if cast.is_zst() => OperandValue::ZeroSized,
@@ -269,6 +282,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
269282
) if from_scalar.size(cx) == to_scalar.size(cx) => {
270283
OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
271284
}
285+
(
286+
OperandValue::Immediate(imm),
287+
abi::BackendRepr::SimdVector { element: from_scalar, .. },
288+
abi::BackendRepr::SimdVector { element: to_scalar, .. },
289+
) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
290+
let to_backend_ty = bx.cx().immediate_backend_type(cast);
291+
OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
292+
}
272293
(
273294
OperandValue::Pair(imm_a, imm_b),
274295
abi::BackendRepr::ScalarPair(in_a, in_b),
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
2+
//@ only-64bit (so I don't need to worry about usize)
3+
//@ revisions: aarch64 x86_64
4+
//@ [aarch64] only-aarch64
5+
//@ [aarch64] compile-flags: -C target-feature=+neon
6+
//@ [x86_64] only-x86_64
7+
//@ [x86_64] compile-flags: -C target-feature=+sse2
8+
9+
#![crate_type = "lib"]
10+
#![feature(core_intrinsics)]
11+
#![feature(portable_simd)]
12+
13+
use std::intrinsics::transmute;
14+
use std::simd::{Simd, f32x4, f64x2, i32x4, i64x2};
15+
type PtrX2 = Simd<*const (), 2>;
16+
17+
// These tests use the "C" ABI so that the vectors in question aren't passed and
18+
// returned though memory (as they are in the "Rust" ABI), which greatly
19+
// simplifies seeing the difference between the in-operand cases vs the ones
20+
// that fallback to just using the `LocalKind::Memory` path.
21+
22+
// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
23+
#[no_mangle]
24+
pub extern "C" fn mixed_int(v: i32x4) -> i64x2 {
25+
// CHECK-NOT: alloca
26+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
27+
// CHECK: ret <2 x i64> %[[RET]]
28+
unsafe { transmute(v) }
29+
}
30+
31+
// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
32+
#[no_mangle]
33+
pub extern "C" fn mixed_float(v: f32x4) -> f64x2 {
34+
// CHECK-NOT: alloca
35+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
36+
// CHECK: ret <2 x double> %[[RET]]
37+
unsafe { transmute(v) }
38+
}
39+
40+
// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
41+
#[no_mangle]
42+
pub extern "C" fn float_int_same_lanes(v: f32x4) -> i32x4 {
43+
// CHECK-NOT: alloca
44+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
45+
// CHECK: ret <4 x i32> %[[RET]]
46+
unsafe { transmute(v) }
47+
}
48+
49+
// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
50+
#[no_mangle]
51+
pub extern "C" fn int_float_same_lanes(v: i64x2) -> f64x2 {
52+
// CHECK-NOT: alloca
53+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
54+
// CHECK: ret <2 x double> %[[RET]]
55+
unsafe { transmute(v) }
56+
}
57+
58+
// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
59+
#[no_mangle]
60+
pub extern "C" fn float_int_widen(v: f32x4) -> i64x2 {
61+
// CHECK-NOT: alloca
62+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
63+
// CHECK: ret <2 x i64> %[[RET]]
64+
unsafe { transmute(v) }
65+
}
66+
67+
// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
68+
#[no_mangle]
69+
pub extern "C" fn int_float_widen(v: i32x4) -> f64x2 {
70+
// CHECK-NOT: alloca
71+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
72+
// CHECK: ret <2 x double> %[[RET]]
73+
unsafe { transmute(v) }
74+
}
75+
76+
// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
77+
#[no_mangle]
78+
pub extern "C" fn float_int_narrow(v: f64x2) -> i32x4 {
79+
// CHECK-NOT: alloca
80+
// CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
81+
// CHECK: ret <4 x i32> %[[RET]]
82+
unsafe { transmute(v) }
83+
}
84+
85+
// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
86+
#[no_mangle]
87+
pub extern "C" fn int_float_narrow(v: i64x2) -> f32x4 {
88+
// CHECK-NOT: alloca
89+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
90+
// CHECK: ret <4 x float> %[[RET]]
91+
unsafe { transmute(v) }
92+
}
93+
94+
// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
95+
#[no_mangle]
96+
pub extern "C" fn float_ptr_same_lanes(v: f64x2) -> PtrX2 {
97+
// CHECK-NOT: alloca
98+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
99+
// CHECK-NOT: alloca
100+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
101+
// CHECK: store <2 x double> %v, ptr %[[TEMP]]
102+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
103+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
104+
// CHECK: ret <2 x ptr> %[[RET]]
105+
unsafe { transmute(v) }
106+
}
107+
108+
// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
109+
#[no_mangle]
110+
pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> f64x2 {
111+
// CHECK-NOT: alloca
112+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
113+
// CHECK-NOT: alloca
114+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
115+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
116+
// CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
117+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
118+
// CHECK: ret <2 x double> %[[RET]]
119+
unsafe { transmute(v) }
120+
}
121+
122+
// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
123+
#[no_mangle]
124+
pub extern "C" fn int_ptr_same_lanes(v: i64x2) -> PtrX2 {
125+
// CHECK-NOT: alloca
126+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
127+
// CHECK-NOT: alloca
128+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
129+
// CHECK: store <2 x i64> %v, ptr %[[TEMP]]
130+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
131+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
132+
// CHECK: ret <2 x ptr> %[[RET]]
133+
unsafe { transmute(v) }
134+
}
135+
136+
// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
137+
#[no_mangle]
138+
pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> i64x2 {
139+
// CHECK-NOT: alloca
140+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
141+
// CHECK-NOT: alloca
142+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
143+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
144+
// CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
145+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
146+
// CHECK: ret <2 x i64> %[[RET]]
147+
unsafe { transmute(v) }
148+
}
149+
150+
// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
151+
#[no_mangle]
152+
pub extern "C" fn float_ptr_widen(v: f32x4) -> PtrX2 {
153+
// CHECK-NOT: alloca
154+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
155+
// CHECK-NOT: alloca
156+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
157+
// CHECK: store <4 x float> %v, ptr %[[TEMP]]
158+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
159+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
160+
// CHECK: ret <2 x ptr> %[[RET]]
161+
unsafe { transmute(v) }
162+
}
163+
164+
// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
165+
#[no_mangle]
166+
pub extern "C" fn int_ptr_widen(v: i32x4) -> PtrX2 {
167+
// CHECK-NOT: alloca
168+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
169+
// CHECK-NOT: alloca
170+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
171+
// CHECK: store <4 x i32> %v, ptr %[[TEMP]]
172+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
173+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
174+
// CHECK: ret <2 x ptr> %[[RET]]
175+
unsafe { transmute(v) }
176+
}

0 commit comments

Comments
 (0)