Skip to content

Commit f7964d1

Browse files
committed
Optimize kernel normalization factors
1 parent 0852ab0 commit f7964d1

File tree

1 file changed

+21
-29
lines changed

1 file changed

+21
-29
lines changed

src/general/smoothing_kernels.jl

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ end
172172

173173
@inline compact_support(::SchoenbergCubicSplineKernel, h) = 2 * h
174174

175-
@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / 3h
176-
# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
175+
# Note that `2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now)
176+
@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / (3 * h)
177177
@inline normalization_factor(::SchoenbergCubicSplineKernel{2}, h) = 10 / (pi * h^2 * 7)
178178
@inline normalization_factor(::SchoenbergCubicSplineKernel{3}, h) = 1 / (pi * h^3)
179179

@@ -262,10 +262,9 @@ end
262262
return result
263263
end
264264

265-
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
265+
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 * h / 2
266266

267-
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
268-
# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
267+
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / (24 * h)
269268
@inline normalization_factor(::SchoenbergQuarticSplineKernel{2}, h) = 96 / (pi * h^2 * 1199)
270269
@inline normalization_factor(::SchoenbergQuarticSplineKernel{3}, h) = 1 / (pi * h^3 * 20)
271270

@@ -343,15 +342,14 @@ end
343342

344343
@inline compact_support(::SchoenbergQuinticSplineKernel, h) = 3 * h
345344

346-
@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / 120h
347-
# `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`.
345+
@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / (120 * h)
348346
@inline normalization_factor(::SchoenbergQuinticSplineKernel{2}, h) = 7 / (pi * h^2 * 478)
349347
@inline normalization_factor(::SchoenbergQuinticSplineKernel{3}, h) = 1 / (pi * h^3 * 120)
350348

351349
abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end
352350

353351
# Compact support for all Wendland kernels
354-
@inline compact_support(::AbstractWendlandKernel, h) = 2h
352+
@inline compact_support(::AbstractWendlandKernel, h) = 2 * h
355353

356354
@doc raw"""
357355
WendlandC2Kernel{NDIMS}()
@@ -390,7 +388,7 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
390388
@fastpow @inline function kernel(kernel::WendlandC2Kernel, r::Real, h)
391389
q = r / h
392390

393-
result = (1 - q / 2)^4 * (2q + 1)
391+
result = (1 - q / 2)^4 * (2 * q + 1)
394392

395393
# Zero out result if q >= 2
396394
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
@@ -411,9 +409,9 @@ end
411409
return result
412410
end
413411

414-
@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2) / 4
415-
# `2 * pi` is always `Float64`. `pi * h^3 * 2` preserves the type of `h`.
416-
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 2) / 8
412+
# Note that `7 // 4` saves one instruction but is significantly slower on GPUs (for now)
413+
@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2 * 4)
414+
@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 16)
417415

418416
@doc raw"""
419417
WendlandC4Kernel{NDIMS}()
@@ -452,21 +450,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
452450
@fastpow @inline function kernel(kernel::WendlandC4Kernel, r::Real, h)
453451
q = r / h
454452

455-
result = (1 - q / 2)^6 * (35q^2 / 12 + 3q + 1)
453+
result = (1 - q / 2)^6 * (35 // 12 * q^2 + 3 * q + 1)
456454

457455
# Zero out result if q >= 2
458456
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
459457

460458
return result
461459
end
462460

463-
@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
461+
@fastpow @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h)
464462
q = r / h
465463

466-
# Use `//` to preserve the type of `q`
467-
term1 = (1 - q / 2)^6 * (3 + 35 // 6 * q)
468-
term2 = 3 * (1 - q / 2)^5 * (1 + 3q + 35 // 12 * q^2)
469-
derivative = term1 - term2
464+
derivative = -7 * q / 3 * (2 + 5 * q) * (1 - q / 2)^5
470465

471466
# Zero out result if q >= 2
472467
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -475,9 +470,8 @@ end
475470
return result
476471
end
477472

478-
@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2) / 4
479-
# `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`.
480-
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 32) / 8
473+
@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2 * 4)
474+
@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 256)
481475

482476
@doc raw"""
483477
WendlandC6Kernel{NDIMS}()
@@ -516,7 +510,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end
516510
@fastpow @inline function kernel(kernel::WendlandC6Kernel, r::Real, h)
517511
q = r / h
518512

519-
result = (1 - q / 2)^8 * (4q^3 + 25q^2 / 4 + 4q + 1)
513+
result = (1 - q / 2)^8 * (4 * q^3 + 25 * q^2 / 4 + 4 * q + 1)
520514

521515
# Zero out result if q >= 2
522516
result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q))
@@ -526,9 +520,8 @@ end
526520

527521
@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC6Kernel, r::Real, h)
528522
q = r / h
529-
term1 = -4 * (1 - q / 2)^7 * (4q^3 + 25q^2 / 4 + 4q + 1)
530-
term2 = (1 - q / 2)^8 * (12q^2 + 50q / 4 + 4)
531-
derivative = term1 + term2
523+
524+
derivative = -11 * q / 4 * (8 * q^2 + 7 * q + 2) * (1 - q / 2)^7
532525

533526
# Zero out result if q >= 2
534527
result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h,
@@ -537,9 +530,8 @@ end
537530
return result
538531
end
539532

540-
# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`.
541-
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 78 / (pi * h^2 * 7) / 4
542-
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 64) / 8
533+
@inline normalization_factor(::WendlandC6Kernel{2}, h) = 39 / (pi * h^2 * 14)
534+
@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 512)
543535

544536
@doc raw"""
545537
Poly6Kernel{NDIMS}()
@@ -604,8 +596,8 @@ end
604596

605597
@inline compact_support(::Poly6Kernel, h) = h
606598

599+
# Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now)
607600
@inline normalization_factor(::Poly6Kernel{2}, h) = 4 / (pi * h^2)
608-
# `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`.
609601
@inline normalization_factor(::Poly6Kernel{3}, h) = 315 / (pi * h^3 * 64)
610602

611603
@doc raw"""

0 commit comments

Comments
 (0)