Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 25 additions & 38 deletions src/general/smoothing_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ struct SchoenbergCubicSplineKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end

# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl.
# Use `//` to preserve the type of `q`.
result = 1 // 4 * (2 - q)^3
result = 1 * (2 - q)^3 / 4
result = result - (q < 1) * (1 - q)^3

# Zero out result if q >= 2
Expand All @@ -160,7 +160,7 @@ end

# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
# Use `//` to preserve the type of `q`.
result = -3 // 4 * (2 - q)^2
result = -3 * (2 - q)^2 / 4
result = result + 3 * (q < 1) * (1 - q)^2

# Zero out result if q >= 2
Expand All @@ -172,8 +172,8 @@ end

@inline compact_support(::SchoenbergCubicSplineKernel, h) = 2 * h

@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / 3h
# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
# Note that `2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now)
@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / (3 * h)
@inline normalization_factor(::SchoenbergCubicSplineKernel{2}, h) = 10 / (pi * h^2 * 7)
@inline normalization_factor(::SchoenbergCubicSplineKernel{3}, h) = 1 / (pi * h^3)

Expand Down Expand Up @@ -262,10 +262,9 @@ end
return result
end

@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 * h / 2

@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / (24 * h)
@inline normalization_factor(::SchoenbergQuarticSplineKernel{2}, h) = 96 / (pi * h^2 * 1199)
@inline normalization_factor(::SchoenbergQuarticSplineKernel{3}, h) = 1 / (pi * h^3 * 20)

Expand Down Expand Up @@ -343,15 +342,14 @@ end

@inline compact_support(::SchoenbergQuinticSplineKernel, h) = 3 * h

@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / 120h
# `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`.
@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / (120 * h)
@inline normalization_factor(::SchoenbergQuinticSplineKernel{2}, h) = 7 / (pi * h^2 * 478)
@inline normalization_factor(::SchoenbergQuinticSplineKernel{3}, h) = 1 / (pi * h^3 * 120)

abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end

# Compact support for all Wendland kernels
@inline compact_support(::AbstractWendlandKernel, h) = 2h
@inline compact_support(::AbstractWendlandKernel, h) = 2 * h

@doc raw"""
WendlandC2Kernel{NDIMS}()
Expand Down Expand Up @@ -390,24 +388,19 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
@fastpow @inline function kernel(kernel::WendlandC2Kernel, r::Real, h)
q = r / h

result = (1 - q / 2)^4 * (2q + 1)
result = (1 - q / 2)^4 * (2 * q + 1)

# Zero out result if q >= 2
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))

return result
end

@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
@inline function kernel_deriv(kernel::WendlandC2Kernel, r::Real, h)
inner_deriv = 1 / h
q = r * inner_deriv

q1_3 = (1 - q / 2)^3
q1_4 = (1 - q / 2)^4

# We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl
result = -2 * q1_3 * (2q + 1)
result = result + q1_4 * 2
result = -5 * (1 - q / 2)^3 * q

# Zero out result if q >= 2
result = ifelse(q < 2,
Expand All @@ -416,9 +409,9 @@ end
return result
end

@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2) / 4
# `2 * pi` is always `Float64`. `pi * h^3 * 2` preserves the type of `h`.
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 2) / 8
# Note that `7 // 4` saves one instruction but is significantly slower on GPUs (for now)
@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2 * 4)
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 16)

@doc raw"""
WendlandC4Kernel{NDIMS}()
Expand Down Expand Up @@ -457,21 +450,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
@fastpow @inline function kernel(kernel::WendlandC4Kernel, r::Real, h)
q = r / h

result = (1 - q / 2)^6 * (35q^2 / 12 + 3q + 1)
result = (1 - q / 2)^6 * (35 * q^2 / 12 + 3 * q + 1)

# Zero out result if q >= 2
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))

return result
end

@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
@fastpow @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
q = r / h

# Use `//` to preserve the type of `q`
term1 = (1 - q / 2)^6 * (3 + 35 // 6 * q)
term2 = 3 * (1 - q / 2)^5 * (1 + 3q + 35 // 12 * q^2)
derivative = term1 - term2
derivative = -7 * q / 3 * (2 + 5 * q) * (1 - q / 2)^5

# Zero out result if q >= 2
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
Expand All @@ -480,9 +470,8 @@ end
return result
end

@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2) / 4
# `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`.
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 32) / 8
@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2 * 4)
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 256)

@doc raw"""
WendlandC6Kernel{NDIMS}()
Expand Down Expand Up @@ -521,7 +510,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
@fastpow @inline function kernel(kernel::WendlandC6Kernel, r::Real, h)
q = r / h

result = (1 - q / 2)^8 * (4q^3 + 25q^2 / 4 + 4q + 1)
result = (1 - q / 2)^8 * (4 * q^3 + 25 * q^2 / 4 + 4 * q + 1)

# Zero out result if q >= 2
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
Expand All @@ -531,9 +520,8 @@ end

@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC6Kernel, r::Real, h)
q = r / h
term1 = -4 * (1 - q / 2)^7 * (4q^3 + 25q^2 / 4 + 4q + 1)
term2 = (1 - q / 2)^8 * (12q^2 + 50q / 4 + 4)
derivative = term1 + term2

derivative = -11 * q / 4 * (8 * q^2 + 7 * q + 2) * (1 - q / 2)^7

# Zero out result if q >= 2
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
Expand All @@ -542,9 +530,8 @@ end
return result
end

# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 78 / (pi * h^2 * 7) / 4
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 64) / 8
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 39 / (pi * h^2 * 14)
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 512)

@doc raw"""
Poly6Kernel{NDIMS}()
Expand Down Expand Up @@ -609,8 +596,8 @@ end

@inline compact_support(::Poly6Kernel, h) = h

# Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now)
@inline normalization_factor(::Poly6Kernel{2}, h) = 4 / (pi * h^2)
# `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`.
@inline normalization_factor(::Poly6Kernel{3}, h) = 315 / (pi * h^3 * 64)

@doc raw"""
Expand Down
10 changes: 6 additions & 4 deletions src/schemes/fluid/shifting_techniques.jl
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,13 @@ end
# means `compute_v_max=true`
function v_max(shifting::ParticleShiftingTechnique{<:Any, <:Any, <:Any, true},
v, system)
# This has similar performance to `maximum(..., eachparticle(system))`,
# This has similar performance as `maximum(..., eachparticle(system))`,
# but is GPU-compatible.
v_max = maximum(x -> sqrt(dot(x, x)),
reinterpret(reshape, SVector{ndims(system), eltype(v)},
current_velocity(v, system)))
v_max2 = maximum(x -> dot(x, x),
reinterpret(reshape, SVector{ndims(system), eltype(v)},
current_velocity(v, system)))
v_max = sqrt(v_max2)

return shifting.v_factor * v_max
end

Expand Down
Loading