Skip to content

Commit cb09508

Browse files
committed
[DAGCombiner] Add combine for vector interleave of splats
This patch adds two DAG combines: 1. vector_interleave(splat, splat, ...) -> {splat,splat,...} 2. concat_vectors(splat, splat, ...) -> wide_splat where all the input splats are identical. Both of these together enable us to fold concat_vectors(vector_interleave(splat, splat, ...)) into a wide splat. Post-legalisation we must only do the concat_vector combine if the wider type and splat operation is legal. For fixed-width vectors the DAG combine only occurs for interleave factors of 3 or more, however it's not currently safe to test this for AArch64 since there isn't any lowering support for fixed-width interleaves. I've only added fixed-width tests for RISCV.
1 parent d8c97ca commit cb09508

File tree

7 files changed

+132
-556
lines changed

7 files changed

+132
-556
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,11 @@ namespace {
331331
return CombineTo(N, To, 2, AddTo);
332332
}
333333

334+
SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335+
bool AddTo = true) {
336+
return CombineTo(N, To->data(), To->size(), AddTo);
337+
}
338+
334339
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
335340

336341
private:
@@ -541,6 +546,7 @@ namespace {
541546
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
542547
SDValue visitBUILD_VECTOR(SDNode *N);
543548
SDValue visitCONCAT_VECTORS(SDNode *N);
549+
SDValue visitVECTOR_INTERLEAVE(SDNode *N);
544550
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
545551
SDValue visitVECTOR_SHUFFLE(SDNode *N);
546552
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
@@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
20212027
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
20222028
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
20232029
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030+
case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
20242031
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
20252032
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
20262033
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
@@ -25274,6 +25281,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands(
2527425281
return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
2527525282
}
2527625283

25284+
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG,
25285+
const TargetLowering &TLI,
25286+
bool LegalTypes,
25287+
bool LegalOperations) {
25288+
EVT VT = N->getValueType(0);
25289+
25290+
// Post-legalization we can only create wider SPLAT_VECTOR operations if both
25291+
// the type and operation is legal. The Hexagon target has custom
25292+
// legalization for SPLAT_VECTOR that splits the operation into two parts and
25293+
// concatenates them. Therefore, custom lowering must also be rejected in
25294+
// order to avoid an infinite loop.
25295+
if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25296+
(LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25297+
return SDValue();
25298+
25299+
SDValue Op0 = N->getOperand(0);
25300+
if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25301+
return SDValue();
25302+
25303+
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25304+
}
25305+
2527725306
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
2527825307
// If we only have one input vector, we don't need to do any concatenation.
2527925308
if (N->getNumOperands() == 1)
@@ -25397,6 +25426,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
2539725426
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
2539825427
}
2539925428

25429+
if (SDValue V =
25430+
combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25431+
return V;
25432+
2540025433
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
2540125434
// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
2540225435
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
@@ -25465,6 +25498,22 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
2546525498
return SDValue();
2546625499
}
2546725500

25501+
SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25502+
// Check to see if all operands are identical.
25503+
if (!llvm::all_equal(N->op_values()))
25504+
return SDValue();
25505+
25506+
// Check to see if the identical operand is a splat.
25507+
SDValue Splat = DAG.getSplatValue(N->getOperand(0));
25508+
if (!Splat)
25509+
return SDValue();
25510+
25511+
// Simply replace all results with the first operand.
25512+
SmallVector<SDValue, 4> Ops;
25513+
Ops.append(N->op_values().begin(), N->op_values().end());
25514+
return CombineTo(N, &Ops);
25515+
}
25516+
2546825517
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
2546925518
// if the subvector can be sourced for free.
2547025519
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,19 @@ target triple = "aarch64"
1414
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
1515
; CHECK-LABEL: complex_mul_v2f64:
1616
; CHECK: // %bb.0: // %entry
17+
; CHECK-NEXT: movi v0.2d, #0000000000000000
1718
; CHECK-NEXT: movi v1.2d, #0000000000000000
1819
; CHECK-NEXT: mov w8, #100 // =0x64
19-
; CHECK-NEXT: cntd x9
2020
; CHECK-NEXT: whilelo p1.d, xzr, x8
21+
; CHECK-NEXT: cntd x9
2122
; CHECK-NEXT: rdvl x10, #2
22-
; CHECK-NEXT: mov x11, x9
2323
; CHECK-NEXT: ptrue p0.d
24-
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
25-
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
24+
; CHECK-NEXT: mov x11, x9
2625
; CHECK-NEXT: .LBB0_1: // %vector.body
2726
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2827
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
29-
; CHECK-NEXT: mov z6.d, z1.d
30-
; CHECK-NEXT: mov z7.d, z0.d
28+
; CHECK-NEXT: mov z6.d, z0.d
29+
; CHECK-NEXT: mov z7.d, z1.d
3130
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
3231
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
3332
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
@@ -39,14 +38,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
3938
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
4039
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
4140
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
42-
; CHECK-NEXT: mov z0.d, p2/m, z7.d
43-
; CHECK-NEXT: mov z1.d, p1/m, z6.d
41+
; CHECK-NEXT: mov z1.d, p2/m, z7.d
42+
; CHECK-NEXT: mov z0.d, p1/m, z6.d
4443
; CHECK-NEXT: whilelo p1.d, x11, x8
4544
; CHECK-NEXT: add x11, x11, x9
4645
; CHECK-NEXT: b.mi .LBB0_1
4746
; CHECK-NEXT: // %bb.2: // %exit.block
48-
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
49-
; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
47+
; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
48+
; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
5049
; CHECK-NEXT: faddv d0, p0, z2.d
5150
; CHECK-NEXT: faddv d1, p0, z1.d
5251
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -111,21 +110,20 @@ exit.block: ; preds = %vector.body
111110
define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
112111
; CHECK-LABEL: complex_mul_predicated_v2f64:
113112
; CHECK: // %bb.0: // %entry
113+
; CHECK-NEXT: movi v0.2d, #0000000000000000
114114
; CHECK-NEXT: movi v1.2d, #0000000000000000
115115
; CHECK-NEXT: cntd x9
116-
; CHECK-NEXT: mov w11, #100 // =0x64
117116
; CHECK-NEXT: neg x10, x9
117+
; CHECK-NEXT: mov w11, #100 // =0x64
118118
; CHECK-NEXT: ptrue p0.d
119119
; CHECK-NEXT: mov x8, xzr
120120
; CHECK-NEXT: and x10, x10, x11
121121
; CHECK-NEXT: rdvl x11, #2
122-
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
123-
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
124122
; CHECK-NEXT: .LBB1_1: // %vector.body
125123
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
126124
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
127-
; CHECK-NEXT: mov z6.d, z1.d
128-
; CHECK-NEXT: mov z7.d, z0.d
125+
; CHECK-NEXT: mov z6.d, z0.d
126+
; CHECK-NEXT: mov z7.d, z1.d
129127
; CHECK-NEXT: add x8, x8, x9
130128
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
131129
; CHECK-NEXT: cmp x10, x8
@@ -141,12 +139,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
141139
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
142140
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
143141
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
144-
; CHECK-NEXT: mov z0.d, p2/m, z7.d
145-
; CHECK-NEXT: mov z1.d, p1/m, z6.d
142+
; CHECK-NEXT: mov z1.d, p2/m, z7.d
143+
; CHECK-NEXT: mov z0.d, p1/m, z6.d
146144
; CHECK-NEXT: b.ne .LBB1_1
147145
; CHECK-NEXT: // %bb.2: // %exit.block
148-
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
149-
; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
146+
; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
147+
; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
150148
; CHECK-NEXT: faddv d0, p0, z2.d
151149
; CHECK-NEXT: faddv d1, p0, z1.d
152150
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -213,21 +211,20 @@ exit.block: ; preds = %vector.body
213211
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
214212
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
215213
; CHECK: // %bb.0: // %entry
214+
; CHECK-NEXT: movi v0.2d, #0000000000000000
216215
; CHECK-NEXT: movi v1.2d, #0000000000000000
217216
; CHECK-NEXT: mov w8, #100 // =0x64
218-
; CHECK-NEXT: cntd x9
219217
; CHECK-NEXT: whilelo p1.d, xzr, x8
218+
; CHECK-NEXT: cntd x9
220219
; CHECK-NEXT: rdvl x10, #2
221-
; CHECK-NEXT: cnth x11
222220
; CHECK-NEXT: ptrue p0.d
221+
; CHECK-NEXT: cnth x11
223222
; CHECK-NEXT: mov x12, x9
224-
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
225-
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
226223
; CHECK-NEXT: .LBB2_1: // %vector.body
227224
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
228225
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
229-
; CHECK-NEXT: mov z6.d, z1.d
230-
; CHECK-NEXT: mov z7.d, z0.d
226+
; CHECK-NEXT: mov z6.d, z0.d
227+
; CHECK-NEXT: mov z7.d, z1.d
231228
; CHECK-NEXT: add x2, x2, x11
232229
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
233230
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
@@ -243,14 +240,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
243240
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
244241
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
245242
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
246-
; CHECK-NEXT: mov z0.d, p2/m, z7.d
247-
; CHECK-NEXT: mov z1.d, p1/m, z6.d
243+
; CHECK-NEXT: mov z1.d, p2/m, z7.d
244+
; CHECK-NEXT: mov z0.d, p1/m, z6.d
248245
; CHECK-NEXT: whilelo p1.d, x12, x8
249246
; CHECK-NEXT: add x12, x12, x9
250247
; CHECK-NEXT: b.mi .LBB2_1
251248
; CHECK-NEXT: // %bb.2: // %exit.block
252-
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
253-
; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
249+
; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
250+
; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
254251
; CHECK-NEXT: faddv d0, p0, z2.d
255252
; CHECK-NEXT: faddv d1, p0, z1.d
256253
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@ target triple = "aarch64"
1414
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
1515
; CHECK-LABEL: complex_mul_v2f64:
1616
; CHECK: // %bb.0: // %entry
17+
; CHECK-NEXT: movi v0.2d, #0000000000000000
1718
; CHECK-NEXT: movi v1.2d, #0000000000000000
1819
; CHECK-NEXT: cntd x8
19-
; CHECK-NEXT: mov w10, #100 // =0x64
2020
; CHECK-NEXT: neg x9, x8
21+
; CHECK-NEXT: mov w10, #100 // =0x64
2122
; CHECK-NEXT: ptrue p0.d
2223
; CHECK-NEXT: and x9, x9, x10
2324
; CHECK-NEXT: rdvl x10, #2
24-
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
25-
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2625
; CHECK-NEXT: .LBB0_1: // %vector.body
2726
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2827
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
@@ -32,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
3231
; CHECK-NEXT: ldr z5, [x1]
3332
; CHECK-NEXT: add x1, x1, x10
3433
; CHECK-NEXT: add x0, x0, x10
35-
; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
36-
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
37-
; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
38-
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
34+
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
35+
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
36+
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
37+
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
3938
; CHECK-NEXT: b.ne .LBB0_1
4039
; CHECK-NEXT: // %bb.2: // %exit.block
41-
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
42-
; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
40+
; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
41+
; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
4342
; CHECK-NEXT: faddv d0, p0, z2.d
4443
; CHECK-NEXT: faddv d1, p0, z1.d
4544
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -183,17 +182,16 @@ exit.block: ; preds = %vector.body
183182
define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
184183
; CHECK-LABEL: complex_mul_v2f64_unrolled:
185184
; CHECK: // %bb.0: // %entry
185+
; CHECK-NEXT: movi v0.2d, #0000000000000000
186186
; CHECK-NEXT: movi v1.2d, #0000000000000000
187187
; CHECK-NEXT: cntw x8
188-
; CHECK-NEXT: mov w10, #1000 // =0x3e8
188+
; CHECK-NEXT: movi v2.2d, #0000000000000000
189+
; CHECK-NEXT: movi v3.2d, #0000000000000000
189190
; CHECK-NEXT: neg x9, x8
191+
; CHECK-NEXT: mov w10, #1000 // =0x3e8
190192
; CHECK-NEXT: ptrue p0.d
191193
; CHECK-NEXT: and x9, x9, x10
192194
; CHECK-NEXT: rdvl x10, #4
193-
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
194-
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
195-
; CHECK-NEXT: mov z2.d, z1.d
196-
; CHECK-NEXT: mov z3.d, z0.d
197195
; CHECK-NEXT: .LBB2_1: // %vector.body
198196
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
199197
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
@@ -207,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
207205
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
208206
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
209207
; CHECK-NEXT: add x1, x1, x10
210-
; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
211-
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
208+
; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
209+
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
212210
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
213211
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
214-
; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
215-
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
212+
; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
213+
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
216214
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
217215
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
218216
; CHECK-NEXT: b.ne .LBB2_1
219217
; CHECK-NEXT: // %bb.2: // %exit.block
220218
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
221-
; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
219+
; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
222220
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
223-
; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
221+
; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
224222
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
225223
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
226224
; CHECK-NEXT: faddv d0, p0, z1.d
@@ -310,15 +308,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
310308
; CHECK-LABEL: reduction_mix:
311309
; CHECK: // %bb.0: // %entry
312310
; CHECK-NEXT: movi v2.2d, #0000000000000000
311+
; CHECK-NEXT: movi v0.2d, #0000000000000000
313312
; CHECK-NEXT: cntd x9
314-
; CHECK-NEXT: mov w11, #100 // =0x64
313+
; CHECK-NEXT: movi v1.2d, #0000000000000000
315314
; CHECK-NEXT: neg x10, x9
315+
; CHECK-NEXT: mov w11, #100 // =0x64
316316
; CHECK-NEXT: ptrue p0.d
317317
; CHECK-NEXT: mov x8, xzr
318318
; CHECK-NEXT: and x10, x10, x11
319319
; CHECK-NEXT: rdvl x11, #2
320-
; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
321-
; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
322320
; CHECK-NEXT: .LBB3_1: // %vector.body
323321
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
324322
; CHECK-NEXT: ldr z3, [x0]
@@ -327,13 +325,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
327325
; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2]
328326
; CHECK-NEXT: add x8, x8, x9
329327
; CHECK-NEXT: cmp x10, x8
330-
; CHECK-NEXT: fadd z0.d, z4.d, z0.d
331-
; CHECK-NEXT: fadd z1.d, z3.d, z1.d
328+
; CHECK-NEXT: fadd z1.d, z4.d, z1.d
329+
; CHECK-NEXT: fadd z0.d, z3.d, z0.d
332330
; CHECK-NEXT: add z2.d, z5.d, z2.d
333331
; CHECK-NEXT: b.ne .LBB3_1
334332
; CHECK-NEXT: // %bb.2: // %middle.block
335-
; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d
336-
; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d
333+
; CHECK-NEXT: uzp2 z3.d, z0.d, z1.d
334+
; CHECK-NEXT: uzp1 z1.d, z0.d, z1.d
337335
; CHECK-NEXT: uaddv d2, p0, z2.d
338336
; CHECK-NEXT: faddv d0, p0, z3.d
339337
; CHECK-NEXT: faddv d1, p0, z1.d

llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -211,18 +211,3 @@ define <4 x i16> @interleave2_diff_nonconst_splat_v4i16(i16 %a, i16 %b) {
211211
ret <4 x i16> %retval
212212
}
213213

214-
; Float declarations
215-
declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>)
216-
declare <8 x half> @llvm.vector.interleave2.v8f16(<4 x half>, <4 x half>)
217-
declare <16 x half> @llvm.vector.interleave2.v16f16(<8 x half>, <8 x half>)
218-
declare <4 x float> @llvm.vector.interleave2.v4f32(<2 x float>, <2 x float>)
219-
declare <8 x float> @llvm.vector.interleave2.v8f32(<4 x float>, <4 x float>)
220-
declare <4 x double> @llvm.vector.interleave2.v4f64(<2 x double>, <2 x double>)
221-
222-
; Integer declarations
223-
declare <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8>, <16 x i8>)
224-
declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>)
225-
declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>)
226-
declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>)
227-
declare <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16>, <2 x i16>)
228-
declare <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>)

0 commit comments

Comments
 (0)