-
Notifications
You must be signed in to change notification settings - Fork 14.5k
Closed
Labels
backend:X86questionA question, not bug report. Check out https://llvm.org/docs/GettingInvolved.html instead!A question, not bug report. Check out https://llvm.org/docs/GettingInvolved.html instead!
Description
x86 backend incorrectly optimizes a sequence involving @llvm.x86.sse41.pblendvb
when only a subset of vector elements are used, resulting in code that ignores the condition and returns one of the inputs unconditionally.
Input LLVM IR
define linkonce_odr <16 x i8> @select_partial(<16 x i1> %cond, <16 x i8> %t, <16 x i8> %f, <16 x i8> %__mask) local_unnamed_addr #2 {
%cond_toStorageBool = zext <16 x i1> %cond to <16 x i8>
%res.i = shufflevector <16 x i8> %cond_toStorageBool, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%res.i186 = shufflevector <16 x i8> %t, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%res.i187 = shufflevector <16 x i8> %f, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%blend.i = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %res.i187, <16 x i8> %res.i186, <16 x i8> %res.i)
%blend.i189 = shufflevector <16 x i8> %blend.i, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %blend.i189
}
The backend incorrectly concludes that the blend operation can be eliminated, even though the first 3 elements should still be conditionally selected based on the condition vector.
movaps %xmm2, %xmm0
retq
The equivalent (IMO) code using select
produces the expected instructions:
define linkonce_odr <16 x i8> @select_partial(<16 x i1> %cond, <16 x i8> %t, <16 x i8> %f, <16 x i8> %__mask) local_unnamed_addr #2 {
%1 = select <16 x i1> %cond, <16 x i8> %t, <16 x i8> %f
%2 = shufflevector <16 x i8> %1, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %1
}
select_partial: # @select_partial
psllw xmm0, 7
pblendvb xmm2, xmm1, xmm0
movdqa xmm0, xmm2
ret
Compiler explorer link: https://ispc.godbolt.org/z/b64cT6MrK
Metadata
Metadata
Assignees
Labels
backend:X86questionA question, not bug report. Check out https://llvm.org/docs/GettingInvolved.html instead!A question, not bug report. Check out https://llvm.org/docs/GettingInvolved.html instead!