@@ -1832,6 +1832,48 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1832
1832
return lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG);
1833
1833
}
1834
1834
1835
+ // / Lower VECTOR_SHUFFLE into XVPERM (if possible).
1836
+ static SDValue lowerVECTOR_SHUFFLE_XVPERM (const SDLoc &DL, ArrayRef<int > Mask,
1837
+ MVT VT, SDValue V1, SDValue V2,
1838
+ SelectionDAG &DAG) {
1839
+ // LoongArch LASX only have XVPERM_W.
1840
+ if (Mask.size () != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
1841
+ return SDValue ();
1842
+
1843
+ unsigned NumElts = VT.getVectorNumElements ();
1844
+ unsigned HalfSize = NumElts / 2 ;
1845
+ bool FrontLo = true , FrontHi = true ;
1846
+ bool BackLo = true , BackHi = true ;
1847
+
1848
+ auto inRange = [](int val, int low, int high) {
1849
+ return (val == -1 ) || (val >= low && val < high);
1850
+ };
1851
+
1852
+ for (unsigned i = 0 ; i < HalfSize; ++i) {
1853
+ int Fronti = Mask[i];
1854
+ int Backi = Mask[i + HalfSize];
1855
+
1856
+ FrontLo &= inRange (Fronti, 0 , HalfSize);
1857
+ FrontHi &= inRange (Fronti, HalfSize, NumElts);
1858
+ BackLo &= inRange (Backi, 0 , HalfSize);
1859
+ BackHi &= inRange (Backi, HalfSize, NumElts);
1860
+ }
1861
+
1862
+ // If both the lower and upper 128-bit parts access only one half of the
1863
+ // vector (either lower or upper), avoid using xvperm.w. The latency of
1864
+ // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
1865
+ if ((FrontLo && (BackLo || BackHi)) || (FrontHi && (BackLo || BackHi)))
1866
+ return SDValue ();
1867
+
1868
+ SmallVector<SDValue, 8 > Masks;
1869
+ for (unsigned i = 0 ; i < NumElts; ++i)
1870
+ Masks.push_back (Mask[i] == -1 ? DAG.getUNDEF (MVT::i64 )
1871
+ : DAG.getConstant (Mask[i], DL, MVT::i64 ));
1872
+ SDValue MaskVec = DAG.getBuildVector (MVT::v8i32, DL, Masks);
1873
+
1874
+ return DAG.getNode (LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
1875
+ }
1876
+
1835
1877
// / Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1836
1878
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV (const SDLoc &DL, ArrayRef<int > Mask,
1837
1879
MVT VT, SDValue V1, SDValue V2,
@@ -2235,6 +2277,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2235
2277
return Result;
2236
2278
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask, VT, V1, V2, DAG)))
2237
2279
return Result;
2280
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG)))
2281
+ return Result;
2238
2282
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2239
2283
V1, V2, DAG)))
2240
2284
return Result;
0 commit comments