From 1ceacf55a0e207c08bbedfea422c0842a4983e3d Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 24 Jun 2025 20:19:10 +0800 Subject: [PATCH] LoongArch64 LSX fast-path for `str.contains(&str)` Benchmark results with LLVM 21 on LA664: ``` OLD: test bench_is_contained_in ... bench: 43.63 ns/iter (+/- 0.04) NEW: test bench_is_contained_in ... bench: 12.81 ns/iter (+/- 0.01) ``` --- library/core/src/str/pattern.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index bcbbb11c83b2f..e116b13838323 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -996,7 +996,10 @@ impl<'b> Pattern for &'b str { return haystack.as_bytes().contains(&self.as_bytes()[0]); } - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") + ))] if self.len() <= 32 { if let Some(result) = simd_contains(self, haystack) { return result; @@ -1770,11 +1773,18 @@ impl TwoWayStrategy for RejectAndMatch { /// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors /// should be evaluated. /// +/// Similarly, on LoongArch the 128-bit LSX vector extension is the baseline, +/// so we also use `u8x16` there. Wider vector widths may be considered +/// for future LoongArch extensions (e.g., LASX). +/// /// For haystacks smaller than vector-size + needle length it falls back to /// a naive O(n*m) search so this implementation should not be called on larger needles. /// /// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2 -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +#[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +))] #[inline] fn simd_contains(needle: &str, haystack: &str) -> Option { let needle = needle.as_bytes(); @@ -1906,7 +1916,10 @@ fn simd_contains(needle: &str, haystack: &str) -> Option { /// # Safety /// /// Both slices must have the same length. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86 +#[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +))] #[inline] unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool { debug_assert_eq!(x.len(), y.len());