[LoongArch] Use xvperm.w for cross-lane access within a single vector #151634

zhaoqi5 · 2025-08-01T03:44:54Z

No description provided.

llvmbot · 2025-08-01T03:45:22Z

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/151634.diff

2 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+44)
(modified) llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll (+4-14)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4f534f1666eaa..5f2512d33b96c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1832,6 +1832,48 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
 }
 
+/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
+static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
+                                          MVT VT, SDValue V1, SDValue V2,
+                                          SelectionDAG &DAG) {
+  // LoongArch LASX only have XVPERM_W.
+  if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
+    return SDValue();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfSize = NumElts / 2;
+  bool FrontLo = true, FrontHi = true;
+  bool BackLo = true, BackHi = true;
+
+  auto inRange = [](int val, int low, int high) {
+    return (val == -1) || (val >= low && val < high);
+  };
+
+  for (unsigned i = 0; i < HalfSize; ++i) {
+    int Fronti = Mask[i];
+    int Backi = Mask[i + HalfSize];
+
+    FrontLo &= inRange(Fronti, 0, HalfSize);
+    FrontHi &= inRange(Fronti, HalfSize, NumElts);
+    BackLo &= inRange(Backi, 0, HalfSize);
+    BackHi &= inRange(Backi, HalfSize, NumElts);
+  }
+
+  // If both the lower and upper 128-bit parts access only one half of the
+  // vector (either lower or upper), avoid using xvperm.w. The latency of
+  // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
+  if ((FrontLo && (BackLo || BackHi)) || (FrontHi && (BackLo || BackHi)))
+    return SDValue();
+
+  SmallVector<SDValue, 8> Masks;
+  for (unsigned i = 0; i < NumElts; ++i)
+    Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
+                                  : DAG.getConstant(Mask[i], DL, MVT::i64));
+  SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
+
+  return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
+}
+
 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
                                             MVT VT, SDValue V1, SDValue V2,
@@ -2235,6 +2277,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
       return Result;
     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
       return Result;
+    if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
+      return Result;
     if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
                                                              V1, V2, DAG)))
       return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index fed085843485a..5f76d9951df9c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -61,13 +61,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
 ; CHECK-LABEL: shuffle_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI4_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    xvperm.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i32> %shuffle
@@ -117,13 +112,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
 ; CHECK-LABEL: shuffle_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI8_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT:    xvperm.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
   ret <8 x float> %shuffle

tangaac · 2025-08-01T07:07:22Z

tangaac/loong-opt-cov-ts@51dae8b

zhaoqi5 requested a review from tangaac August 1, 2025 03:45

llvmbot added the backend:loongarch label Aug 1, 2025

zhaoqi5 requested a review from SixWeining August 1, 2025 03:45

Base automatically changed from users/zhaoqi5/test-permute-and-shuffle-samelane to users/zhaoqi5/opt-extractelement-idx August 9, 2025 10:11

zhaoqi5 force-pushed the users/zhaoqi5/opt-extractelement-idx branch from d764815 to f8b7d4c Compare August 9, 2025 11:07

zhaoqi5 added 2 commits August 9, 2025 19:09

[LoongArch] Use xvperm.w for cross-lane access within a single vector

f759464

opt code style

f934beb

zhaoqi5 force-pushed the users/zhaoqi5/opt-xvperm branch from 875f353 to f934beb Compare August 9, 2025 11:13

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LoongArch] Use xvperm.w for cross-lane access within a single vector #151634

[LoongArch] Use xvperm.w for cross-lane access within a single vector #151634

zhaoqi5 commented Aug 1, 2025

Uh oh!

llvmbot commented Aug 1, 2025

Uh oh!

tangaac commented Aug 1, 2025

Uh oh!

Uh oh!

[LoongArch] Use xvperm.w for cross-lane access within a single vector #151634

Are you sure you want to change the base?

[LoongArch] Use xvperm.w for cross-lane access within a single vector #151634

Conversation

zhaoqi5 commented Aug 1, 2025

Uh oh!

llvmbot commented Aug 1, 2025

Uh oh!

tangaac commented Aug 1, 2025

Uh oh!

Uh oh!