|
172 | 172 |
|
173 | 173 | @inline compact_support(::SchoenbergCubicSplineKernel, h) = 2 * h |
174 | 174 |
|
175 | | -@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / 3h |
176 | | -# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`. |
| 175 | +# Note that `2 // 3 / h` saves one instruction but is significantly slower on GPUs (for now) |
| 176 | +@inline normalization_factor(::SchoenbergCubicSplineKernel{1}, h) = 2 / (3 * h) |
177 | 177 | @inline normalization_factor(::SchoenbergCubicSplineKernel{2}, h) = 10 / (pi * h^2 * 7) |
178 | 178 | @inline normalization_factor(::SchoenbergCubicSplineKernel{3}, h) = 1 / (pi * h^3) |
179 | 179 |
|
|
262 | 262 | return result |
263 | 263 | end |
264 | 264 |
|
265 | | -@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h |
| 265 | +@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 * h / 2 |
266 | 266 |
|
267 | | -@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h |
268 | | -# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`. |
| 267 | +@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / (24 * h) |
269 | 268 | @inline normalization_factor(::SchoenbergQuarticSplineKernel{2}, h) = 96 / (pi * h^2 * 1199) |
270 | 269 | @inline normalization_factor(::SchoenbergQuarticSplineKernel{3}, h) = 1 / (pi * h^3 * 20) |
271 | 270 |
|
@@ -343,15 +342,14 @@ end |
343 | 342 |
|
344 | 343 | @inline compact_support(::SchoenbergQuinticSplineKernel, h) = 3 * h |
345 | 344 |
|
346 | | -@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / 120h |
347 | | -# `478 * pi` is always `Float64`. `pi * h^2 * 478` preserves the type of `h`. |
| 345 | +@inline normalization_factor(::SchoenbergQuinticSplineKernel{1}, h) = 1 / (120 * h) |
348 | 346 | @inline normalization_factor(::SchoenbergQuinticSplineKernel{2}, h) = 7 / (pi * h^2 * 478) |
349 | 347 | @inline normalization_factor(::SchoenbergQuinticSplineKernel{3}, h) = 1 / (pi * h^3 * 120) |
350 | 348 |
|
351 | 349 | abstract type AbstractWendlandKernel{NDIMS} <: AbstractSmoothingKernel{NDIMS} end |
352 | 350 |
|
353 | 351 | # Compact support for all Wendland kernels |
354 | | -@inline compact_support(::AbstractWendlandKernel, h) = 2h |
| 352 | +@inline compact_support(::AbstractWendlandKernel, h) = 2 * h |
355 | 353 |
|
356 | 354 | @doc raw""" |
357 | 355 | WendlandC2Kernel{NDIMS}() |
@@ -390,7 +388,7 @@ struct WendlandC2Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end |
390 | 388 | @fastpow @inline function kernel(kernel::WendlandC2Kernel, r::Real, h) |
391 | 389 | q = r / h |
392 | 390 |
|
393 | | - result = (1 - q / 2)^4 * (2q + 1) |
| 391 | + result = (1 - q / 2)^4 * (2 * q + 1) |
394 | 392 |
|
395 | 393 | # Zero out result if q >= 2 |
396 | 394 | result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q)) |
|
411 | 409 | return result |
412 | 410 | end |
413 | 411 |
|
414 | | -@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2) / 4 |
415 | | -# `2 * pi` is always `Float64`. `pi * h^3 * 2` preserves the type of `h`. |
416 | | -@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 2) / 8 |
| 412 | +# Note that `7 // 4` saves one instruction but is significantly slower on GPUs (for now) |
| 413 | +@inline normalization_factor(::WendlandC2Kernel{2}, h) = 7 / (pi * h^2 * 4) |
| 414 | +@inline normalization_factor(::WendlandC2Kernel{3}, h) = 21 / (pi * h^3 * 16) |
417 | 415 |
|
418 | 416 | @doc raw""" |
419 | 417 | WendlandC4Kernel{NDIMS}() |
@@ -452,21 +450,18 @@ struct WendlandC4Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end |
452 | 450 | @fastpow @inline function kernel(kernel::WendlandC4Kernel, r::Real, h) |
453 | 451 | q = r / h |
454 | 452 |
|
455 | | - result = (1 - q / 2)^6 * (35q^2 / 12 + 3q + 1) |
| 453 | + result = (1 - q / 2)^6 * (35 // 12 * q^2 + 3 * q + 1) |
456 | 454 |
|
457 | 455 | # Zero out result if q >= 2 |
458 | 456 | result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q)) |
459 | 457 |
|
460 | 458 | return result |
461 | 459 | end |
462 | 460 |
|
463 | | -@fastpow @muladd @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h) |
| 461 | +@fastpow @inline function kernel_deriv(kernel::WendlandC4Kernel, r::Real, h) |
464 | 462 | q = r / h |
465 | 463 |
|
466 | | - # Use `//` to preserve the type of `q` |
467 | | - term1 = (1 - q / 2)^6 * (3 + 35 // 6 * q) |
468 | | - term2 = 3 * (1 - q / 2)^5 * (1 + 3q + 35 // 12 * q^2) |
469 | | - derivative = term1 - term2 |
| 464 | + derivative = -7 * q / 3 * (2 + 5 * q) * (1 - q / 2)^5 |
470 | 465 |
|
471 | 466 | # Zero out result if q >= 2 |
472 | 467 | result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h, |
|
475 | 470 | return result |
476 | 471 | end |
477 | 472 |
|
478 | | -@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2) / 4 |
479 | | -# `32 * pi` is always `Float64`. `pi * h^2 * 32` preserves the type of `h`. |
480 | | -@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 32) / 8 |
| 473 | +@inline normalization_factor(::WendlandC4Kernel{2}, h) = 9 / (pi * h^2 * 4) |
| 474 | +@inline normalization_factor(::WendlandC4Kernel{3}, h) = 495 / (pi * h^3 * 256) |
481 | 475 |
|
482 | 476 | @doc raw""" |
483 | 477 | WendlandC6Kernel{NDIMS}() |
@@ -516,7 +510,7 @@ struct WendlandC6Kernel{NDIMS} <: AbstractWendlandKernel{NDIMS} end |
516 | 510 | @fastpow @inline function kernel(kernel::WendlandC6Kernel, r::Real, h) |
517 | 511 | q = r / h |
518 | 512 |
|
519 | | - result = (1 - q / 2)^8 * (4q^3 + 25q^2 / 4 + 4q + 1) |
| 513 | + result = (1 - q / 2)^8 * (4 * q^3 + 25 * q^2 / 4 + 4 * q + 1) |
520 | 514 |
|
521 | 515 | # Zero out result if q >= 2 |
522 | 516 | result = ifelse(q < 2, normalization_factor(kernel, h) * result, zero(q)) |
|
526 | 520 |
|
527 | 521 | @fastpow @muladd @inline function kernel_deriv(kernel::WendlandC6Kernel, r::Real, h) |
528 | 522 | q = r / h |
529 | | - term1 = -4 * (1 - q / 2)^7 * (4q^3 + 25q^2 / 4 + 4q + 1) |
530 | | - term2 = (1 - q / 2)^8 * (12q^2 + 50q / 4 + 4) |
531 | | - derivative = term1 + term2 |
| 523 | + |
| 524 | + derivative = -11 * q / 4 * (8 * q^2 + 7 * q + 2) * (1 - q / 2)^7 |
532 | 525 |
|
533 | 526 | # Zero out result if q >= 2 |
534 | 527 | result = ifelse(q < 2, normalization_factor(kernel, h) * derivative / h, |
|
537 | 530 | return result |
538 | 531 | end |
539 | 532 |
|
540 | | -# `7 * pi` is always `Float64`. `pi * h^2 * 7` preserves the type of `h`. |
541 | | -@inline normalization_factor(::WendlandC6Kernel{2}, h) = 78 / (pi * h^2 * 7) / 4 |
542 | | -@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 64) / 8 |
| 533 | +@inline normalization_factor(::WendlandC6Kernel{2}, h) = 39 / (pi * h^2 * 14) |
| 534 | +@inline normalization_factor(::WendlandC6Kernel{3}, h) = 1365 / (pi * h^3 * 512) |
543 | 535 |
|
544 | 536 | @doc raw""" |
545 | 537 | Poly6Kernel{NDIMS}() |
|
604 | 596 |
|
605 | 597 | @inline compact_support(::Poly6Kernel, h) = h |
606 | 598 |
|
| 599 | +# Note that `315 // 64` saves one instruction but is significantly slower on GPUs (for now) |
607 | 600 | @inline normalization_factor(::Poly6Kernel{2}, h) = 4 / (pi * h^2) |
608 | | -# `64 * pi` is always `Float64`. `pi * h^3 * 64` preserves the type of `h`. |
609 | 601 | @inline normalization_factor(::Poly6Kernel{3}, h) = 315 / (pi * h^3 * 64) |
610 | 602 |
|
611 | 603 | @doc raw""" |
|
0 commit comments