Skip to content

broadcasts with assignment don't vectorize #1305

@matthias314

Description

@matthias314

I've tried out StaticArrays v1.9.13 with several processors (all amd64). Broadcasts without assignment vectorize. With assignment they don't, which means that they are much slower.

julia> v = zero(SVector{8,Int32}); w = MVector(v);

julia> @code_native debuginfo=:none v + w
; Function Signature: +(StaticArraysCore.SArray{Tuple{8}, Int32, 1, 8}, StaticArraysCore.MArray{Tuple{8}, Int32, 1, 8})
# %bb.0:                                # %top
	#DEBUG_VALUE: +:a <- [DW_OP_deref] [$rsi+0]
	push	rbp
	mov	rbp, rsp
	mov	rax, rdi
	#DEBUG_VALUE: +:b <- [DW_OP_deref] undef
	#DEBUG_VALUE: +:b <- [DW_OP_deref] undef
	vmovdqu	ymm0, ymmword ptr [rsi]
	vpaddd	ymm0, ymm0, ymmword ptr [rdx]
	vmovdqu	ymmword ptr [rdi], ymm0
	pop	rbp
	vzeroupper
	ret

julia> f(v, w) = w .+= v;

julia> @code_native debuginfo=:none f(v, w)
; Function Signature: f(StaticArraysCore.SArray{Tuple{8}, Int32, 1, 8}, StaticArraysCore.MArray{Tuple{8}, Int32, 1, 8})
# %bb.0:                                # %top
	#DEBUG_VALUE: f:v <- [DW_OP_deref] [$rdi+0]
	push	rbp
	mov	rbp, rsp
	mov	rax, rsi
	#DEBUG_VALUE: f:w <- [DW_OP_deref] undef
	#DEBUG_VALUE: f:w <- [DW_OP_deref] undef
	mov	ecx, dword ptr [rdi]
	mov	edx, dword ptr [rdi + 4]
	mov	esi, dword ptr [rdi + 8]
	mov	r8d, dword ptr [rdi + 12]
	mov	r9d, dword ptr [rdi + 16]
	mov	r10d, dword ptr [rdi + 20]
	mov	r11d, dword ptr [rdi + 24]
	mov	edi, dword ptr [rdi + 28]
	add	dword ptr [rax], ecx
	add	dword ptr [rax + 4], edx
	add	dword ptr [rax + 8], esi
	add	dword ptr [rax + 12], r8d
	add	dword ptr [rax + 16], r9d
	add	dword ptr [rax + 20], r10d
	add	dword ptr [rax + 24], r11d
	add	dword ptr [rax + 28], edi
	pop	rbp
	ret

julia> g(v, w) = w .= v;

julia> @code_native debuginfo=:none g(v, w)
; Function Signature: g(StaticArraysCore.SArray{Tuple{8}, Int32, 1, 8}, StaticArraysCore.MArray{Tuple{8}, Int32, 1, 8})
# %bb.0:                                # %top
	#DEBUG_VALUE: g:v <- [DW_OP_deref] [$rdi+0]
	push	rbp
	mov	rbp, rsp
	mov	rax, rsi
	#DEBUG_VALUE: g:w <- [DW_OP_deref] undef
	#DEBUG_VALUE: g:w <- [DW_OP_deref] undef
	mov	ecx, dword ptr [rdi]
	mov	dword ptr [rsi], ecx
	mov	ecx, dword ptr [rdi + 4]
	mov	dword ptr [rsi + 4], ecx
	mov	ecx, dword ptr [rdi + 8]
	mov	dword ptr [rsi + 8], ecx
	mov	ecx, dword ptr [rdi + 12]
	mov	dword ptr [rsi + 12], ecx
	mov	ecx, dword ptr [rdi + 16]
	mov	dword ptr [rsi + 16], ecx
	mov	ecx, dword ptr [rdi + 20]
	mov	dword ptr [rsi + 20], ecx
	mov	ecx, dword ptr [rdi + 24]
	mov	dword ptr [rsi + 24], ecx
	mov	ecx, dword ptr [rdi + 28]
	mov	dword ptr [rsi + 28], ecx
	pop	rbp
	ret
Julia Version 1.11.4
Commit 8561cc3d68d (2025-03-10 11:36 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 4 × Intel(R) Core(TM) i3-10110U CPU @ 2.10GHz
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, skylake)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions