Skip to content

Commit 82e5c49

Browse files
committed
[DAGCombiner] Add combine for vector interleave of splats
This patch adds a DAG combine that looks for concat_vectors(vector_interleave(splat, splat, ...)), where all the splats are identical. For fixed-width vectors the DAG combine only occurs for interleave factors of 3 or more, however it's not currently safe to test this for AArch64 since there isn't any lowering support for fixed-width interleaves. I've only added fixed-width tests for RISCV.
1 parent 5d1f01f commit 82e5c49

File tree

4 files changed

+67
-419
lines changed

4 files changed

+67
-419
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25173,6 +25173,38 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
2517325173
return DAG.getNode(CastOpcode, DL, VT, NewConcat);
2517425174
}
2517525175

25176+
static SDValue combineConcatVectorInterleave(SDNode *N, SelectionDAG &DAG) {
25177+
SDValue FirstOp = N->getOperand(0);
25178+
if (FirstOp.getOpcode() != ISD::VECTOR_INTERLEAVE ||
25179+
FirstOp.getNumOperands() != N->getNumOperands())
25180+
return SDValue();
25181+
25182+
for (unsigned I = 0; I < N->getNumOperands(); I++) {
25183+
if (N->getOperand(I).getResNo() != I ||
25184+
N->getOperand(I).getNode() != FirstOp.getNode())
25185+
return SDValue();
25186+
}
25187+
25188+
SDValue InOp0 = FirstOp.getOperand(0);
25189+
if (!llvm::all_of(FirstOp->ops(),
25190+
[&InOp0](SDValue Op) { return Op == InOp0; }))
25191+
return SDValue();
25192+
25193+
// We're concatenating all the sequential results of the same vector
25194+
// interleave node. Now check if all inputs to the interleave are splats.
25195+
if (SDValue Splat = DAG.getSplatValue(InOp0)) {
25196+
SDLoc DL(N);
25197+
EVT SubVecTy = InOp0.getValueType();
25198+
// Create the wider type required.
25199+
EVT WideVecTy = EVT::getVectorVT(
25200+
*DAG.getContext(), SubVecTy.getScalarType(),
25201+
SubVecTy.getVectorElementCount() * N->getNumOperands());
25202+
return DAG.getSplat(WideVecTy, DL, Splat);
25203+
}
25204+
25205+
return SDValue();
25206+
}
25207+
2517625208
// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
2517725209
// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
2517825210
// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
@@ -25397,6 +25429,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
2539725429
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
2539825430
}
2539925431

25432+
if (SDValue V = combineConcatVectorInterleave(N, DAG))
25433+
return V;
25434+
2540025435
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
2540125436
// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
2540225437
if (SDValue V = combineConcatVectorOfScalars(N, DAG))

llvm/test/CodeGen/AArch64/sve-vector-interleave.ll

Lines changed: 12 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -541,20 +541,10 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
541541
}
542542

543543
define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
544-
; SVE-LABEL: interleave2_same_const_splat_nxv4i16:
545-
; SVE: // %bb.0:
546-
; SVE-NEXT: mov z0.d, #3 // =0x3
547-
; SVE-NEXT: zip2 z1.d, z0.d, z0.d
548-
; SVE-NEXT: zip1 z0.d, z0.d, z0.d
549-
; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
550-
; SVE-NEXT: ret
551-
;
552-
; SME2-LABEL: interleave2_same_const_splat_nxv4i16:
553-
; SME2: // %bb.0:
554-
; SME2-NEXT: mov z0.d, #3 // =0x3
555-
; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
556-
; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
557-
; SME2-NEXT: ret
544+
; CHECK-LABEL: interleave2_same_const_splat_nxv4i16:
545+
; CHECK: // %bb.0:
546+
; CHECK-NEXT: mov z0.s, #3 // =0x3
547+
; CHECK-NEXT: ret
558548
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
559549
ret <vscale x 4 x i16> %retval
560550
}
@@ -581,22 +571,10 @@ define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
581571
}
582572

583573
define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
584-
; SVE-LABEL: interleave2_same_nonconst_splat_nxv4i16:
585-
; SVE: // %bb.0:
586-
; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
587-
; SVE-NEXT: mov z0.d, x0
588-
; SVE-NEXT: zip2 z1.d, z0.d, z0.d
589-
; SVE-NEXT: zip1 z0.d, z0.d, z0.d
590-
; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
591-
; SVE-NEXT: ret
592-
;
593-
; SME2-LABEL: interleave2_same_nonconst_splat_nxv4i16:
594-
; SME2: // %bb.0:
595-
; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
596-
; SME2-NEXT: mov z0.d, x0
597-
; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
598-
; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
599-
; SME2-NEXT: ret
574+
; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16:
575+
; CHECK: // %bb.0:
576+
; CHECK-NEXT: mov z0.s, w0
577+
; CHECK-NEXT: ret
600578
%ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
601579
%splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
602580
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
@@ -633,75 +611,10 @@ define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %
633611
}
634612

635613
define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
636-
; SVE-LABEL: interleave4_same_const_splat_nxv8i16:
637-
; SVE: // %bb.0:
638-
; SVE-NEXT: mov z0.d, #3 // =0x3
639-
; SVE-NEXT: zip1 z1.d, z0.d, z0.d
640-
; SVE-NEXT: zip1 z2.d, z1.d, z1.d
641-
; SVE-NEXT: zip2 z1.d, z1.d, z1.d
642-
; SVE-NEXT: uzp1 z2.s, z2.s, z0.s
643-
; SVE-NEXT: uzp1 z2.h, z2.h, z0.h
644-
; SVE-NEXT: uunpklo z2.s, z2.h
645-
; SVE-NEXT: uunpklo z2.d, z2.s
646-
; SVE-NEXT: uzp1 z1.s, z2.s, z1.s
647-
; SVE-NEXT: uzp1 z2.h, z1.h, z0.h
648-
; SVE-NEXT: zip2 z0.d, z0.d, z0.d
649-
; SVE-NEXT: uunpkhi z2.s, z2.h
650-
; SVE-NEXT: zip1 z3.d, z0.d, z0.d
651-
; SVE-NEXT: zip2 z0.d, z0.d, z0.d
652-
; SVE-NEXT: uunpkhi z2.d, z2.s
653-
; SVE-NEXT: uzp1 z2.s, z3.s, z2.s
654-
; SVE-NEXT: uzp1 z2.h, z1.h, z2.h
655-
; SVE-NEXT: uunpkhi z2.s, z2.h
656-
; SVE-NEXT: uunpklo z2.d, z2.s
657-
; SVE-NEXT: uzp1 z0.s, z2.s, z0.s
658-
; SVE-NEXT: uzp1 z0.h, z1.h, z0.h
659-
; SVE-NEXT: ret
660-
;
661-
; SME-ALL-LABEL: interleave4_same_const_splat_nxv8i16:
662-
; SME-ALL: // %bb.0:
663-
; SME-ALL-NEXT: mov z0.d, #3 // =0x3
664-
; SME-ALL-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
665-
; SME-ALL-NEXT: zip { z2.d, z3.d }, z0.d, z0.d
666-
; SME-ALL-NEXT: uzp1 z4.s, z2.s, z0.s
667-
; SME-ALL-NEXT: uzp1 z4.h, z4.h, z0.h
668-
; SME-ALL-NEXT: uunpklo z4.s, z4.h
669-
; SME-ALL-NEXT: uunpklo z4.d, z4.s
670-
; SME-ALL-NEXT: uzp1 z2.s, z4.s, z3.s
671-
; SME-ALL-NEXT: uzp1 z3.h, z2.h, z0.h
672-
; SME-ALL-NEXT: zip { z0.d, z1.d }, z1.d, z1.d
673-
; SME-ALL-NEXT: uunpkhi z3.s, z3.h
674-
; SME-ALL-NEXT: uunpkhi z3.d, z3.s
675-
; SME-ALL-NEXT: uzp1 z3.s, z0.s, z3.s
676-
; SME-ALL-NEXT: uzp1 z3.h, z2.h, z3.h
677-
; SME-ALL-NEXT: uunpkhi z3.s, z3.h
678-
; SME-ALL-NEXT: uunpklo z3.d, z3.s
679-
; SME-ALL-NEXT: uzp1 z0.s, z3.s, z1.s
680-
; SME-ALL-NEXT: uzp1 z0.h, z2.h, z0.h
681-
; SME-ALL-NEXT: ret
682-
;
683-
; SME2-256-LABEL: interleave4_same_const_splat_nxv8i16:
684-
; SME2-256: // %bb.0:
685-
; SME2-256-NEXT: mov z0.d, #3 // =0x3
686-
; SME2-256-NEXT: mov z1.d, z0.d
687-
; SME2-256-NEXT: mov z2.d, z0.d
688-
; SME2-256-NEXT: mov z3.d, z0.d
689-
; SME2-256-NEXT: zip { z0.d - z3.d }, { z0.d - z3.d }
690-
; SME2-256-NEXT: uzp1 z4.s, z0.s, z0.s
691-
; SME2-256-NEXT: uzp1 z4.h, z4.h, z0.h
692-
; SME2-256-NEXT: uunpklo z4.s, z4.h
693-
; SME2-256-NEXT: uunpklo z4.d, z4.s
694-
; SME2-256-NEXT: uzp1 z4.s, z4.s, z1.s
695-
; SME2-256-NEXT: uzp1 z5.h, z4.h, z0.h
696-
; SME2-256-NEXT: uunpkhi z5.s, z5.h
697-
; SME2-256-NEXT: uunpkhi z5.d, z5.s
698-
; SME2-256-NEXT: uzp1 z5.s, z2.s, z5.s
699-
; SME2-256-NEXT: uzp1 z5.h, z4.h, z5.h
700-
; SME2-256-NEXT: uunpkhi z5.s, z5.h
701-
; SME2-256-NEXT: uunpklo z5.d, z5.s
702-
; SME2-256-NEXT: uzp1 z0.s, z5.s, z3.s
703-
; SME2-256-NEXT: uzp1 z0.h, z4.h, z0.h
704-
; SME2-256-NEXT: ret
614+
; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
615+
; CHECK: // %bb.0:
616+
; CHECK-NEXT: mov z0.h, #3 // =0x3
617+
; CHECK-NEXT: ret
705618
%retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
706619
ret <vscale x 8 x i16> %retval
707620
}

0 commit comments

Comments
 (0)