CorrDiff fixes (precond., patching, & cfg parsing) (#937)

juliusberner · Julius Berner · CharlelieLrt · web-flow · commit 399422aa0807 · 2025-06-16T22:43:12.000-07:00
* Add legacy scaling function

Signed-off-by: Julius Berner &lt;jberner@nvidia.com&gt;

* Adapt warning for `scale_cond_input=True`

* Fix max_patch_per_gpu=1 behavior

Signed-off-by: Julius Berner &lt;jberner@nvidia.com&gt;

* Avoid views for floats and simplify input_interp concat

Signed-off-by: Julius Berner &lt;jberner@nvidia.com&gt;

* Added tests to check differentiability of patching and deterministic sampler

Signed-off-by: Charlelie Laurent &lt;claurent@nvidia.com&gt;

* Fixed error in new test

Signed-off-by: Charlelie Laurent &lt;claurent@nvidia.com&gt;

---------

Signed-off-by: Julius Berner &lt;jberner@nvidia.com&gt;
Signed-off-by: Charlelie Laurent &lt;claurent@nvidia.com&gt;
Co-authored-by: Julius Berner &lt;jberner@nvidia.com&gt;
Co-authored-by: Charlelie Laurent &lt;claurent@nvidia.com&gt;
Co-authored-by: Charlelie Laurent &lt;84199758+CharlelieLrt@users.noreply.github.com&gt;
diff --git a/examples/weather/corrdiff/train.py b/examples/weather/corrdiff/train.py
@@ -369,22 +369,25 @@ def main(cfg: DictConfig) -> None:
     batch_size_per_gpu = cfg.training.hp.batch_size_per_gpu
     logger0.info(f"Using {num_accumulation_rounds} gradient accumulation rounds")
 
-    patch_num = getattr(cfg.training.hp, "patch_num", 1)
-    max_patch_per_gpu = getattr(cfg.training.hp, "max_patch_per_gpu", 1)
-
     # calculate patch per iter
-    if hasattr(cfg.training.hp, "max_patch_per_gpu") and max_patch_per_gpu > 1:
+    patch_num = getattr(cfg.training.hp, "patch_num", 1)
+    if hasattr(cfg.training.hp, "max_patch_per_gpu"):
+        max_patch_per_gpu = cfg.training.hp.max_patch_per_gpu
+        if max_patch_per_gpu // batch_size_per_gpu < 1:
+            raise ValueError(
+                f"max_patch_per_gpu ({max_patch_per_gpu}) must be greater or equal to batch_size_per_gpu ({batch_size_per_gpu})."
+            )
         max_patch_num_per_iter = min(
             patch_num, (max_patch_per_gpu // batch_size_per_gpu)
-        )  # Ensure at least 1 patch per iter
+        )
         patch_iterations = (
             patch_num + max_patch_num_per_iter - 1
         ) // max_patch_num_per_iter
         patch_nums_iter = [
             min(max_patch_num_per_iter, patch_num - i * max_patch_num_per_iter)
             for i in range(patch_iterations)
         ]
-        print(
+        logger0.info(
             f"max_patch_num_per_iter is {max_patch_num_per_iter}, patch_iterations is {patch_iterations}, patch_nums_iter is {patch_nums_iter}"
         )
     else:
diff --git a/physicsnemo/models/diffusion/preconditioning.py b/physicsnemo/models/diffusion/preconditioning.py
@@ -1031,15 +1031,6 @@ def __init__(
             stacklevel=2,
         )
 
-        if scale_cond_input:
-            warnings.warn(
-                "scale_cond_input=True does not properly scale the conditional input. "
-                "(see https://github.com/NVIDIA/modulus/issues/229). "
-                "This setup will be deprecated. "
-                "Please set scale_cond_input=False.",
-                DeprecationWarning,
-            )
-
         super().__init__(
             img_resolution=img_resolution,
             img_in_channels=img_in_channels,
@@ -1052,10 +1043,48 @@ def __init__(
             **model_kwargs,
         )
 
+        if scale_cond_input:
+            warnings.warn(
+                "The `scale_cond_input=True` option does not properly scale the conditional input "
+                "and is deprecated. It is highly recommended to set `scale_cond_input=False`. "
+                "However, for loading a checkpoint previously trained with `scale_cond_input=True`, "
+                "this flag must be set to `True` to ensure compatibility. "
+                "For more details, see https://github.com/NVIDIA/modulus/issues/229.",
+                DeprecationWarning,
+            )
+            self.scaling_fn = self._legacy_scaling_fn
+
         # Store deprecated parameters for backward compatibility
         self.img_channels = img_channels
         self.scale_cond_input = scale_cond_input
 
+    @staticmethod
+    def _legacy_scaling_fn(
+        x: torch.Tensor, img_lr: torch.Tensor, c_in: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        This function does not properly scale the conditional input
+        (see https://github.com/NVIDIA/modulus/issues/229)
+        and will be deprecated.
+
+        Concatenate and scale the high-resolution and low-resolution tensors.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Noisy high-resolution image of shape (B, C_hr, H, W).
+        img_lr : torch.Tensor
+            Low-resolution image of shape (B, C_lr, H, W).
+        c_in : torch.Tensor
+            Scaling factor of shape (B, 1, 1, 1).
+
+        Returns
+        -------
+        torch.Tensor
+            Scaled and concatenated tensor of shape (B, C_in+C_out, H, W).
+        """
+        return c_in * torch.cat([x, img_lr.to(x.dtype)], dim=1)
+
     def forward(
         self,
         x,
diff --git a/physicsnemo/utils/patching.py b/physicsnemo/utils/patching.py
@@ -591,14 +591,26 @@ def image_batching(
     )  # (padding_left,padding_right,padding_top,padding_bottom)
     input_padded = image_padding(input)
     patch_num = patch_num_x * patch_num_y
+
+    # Cast to float for unfold
+    if input.dtype == torch.int32:
+        input_padded = input_padded.view(torch.float32)
+    elif input.dtype == torch.int64:
+        input_padded = input_padded.view(torch.float64)
+
     x_unfold = torch.nn.functional.unfold(
-        input=input_padded.view(_cast_type(input_padded)),  # Cast to float
+        input=input_padded,
         kernel_size=(patch_shape_y, patch_shape_x),
         stride=(
             patch_shape_y - overlap_pix - boundary_pix,
             patch_shape_x - overlap_pix - boundary_pix,
         ),
-    ).view(input_padded.dtype)
+    )
+
+    # Cast back to original dtype
+    if input.dtype in [torch.int32, torch.int64]:
+        x_unfold = x_unfold.view(input.dtype)
+
     x_unfold = rearrange(
         x_unfold,
         "b (c p_h p_w) (nb_p_h nb_p_w) -> (nb_p_w nb_p_h b) c p_h p_w",
@@ -608,16 +620,7 @@ def image_batching(
         nb_p_w=patch_num_x,
     )
     if input_interp is not None:
-        input_interp_repeated = rearrange(
-            torch.repeat_interleave(
-                input=input_interp,
-                repeats=patch_num,
-                dim=0,
-                output_size=x_unfold.shape[0],
-            ),
-            "(b p) c h w -> (p b) c h w",
-            p=patch_num,
-        )
+        input_interp_repeated = input_interp.repeat(patch_num, 1, 1, 1)
         return torch.cat((x_unfold, input_interp_repeated), dim=1)
     else:
         return x_unfold
@@ -722,6 +725,13 @@ def image_fuse(
         nb_p_h=patch_num_y,
         nb_p_w=patch_num_x,
     )
+
+    # Cast to float for fold
+    if input.dtype == torch.int32:
+        x = x.view(torch.float32)
+    elif input.dtype == torch.int64:
+        x = x.view(torch.float64)
+
     # Stitch patches together (by summing over overlapping patches)
     x_folded = torch.nn.functional.fold(
         input=x,
@@ -733,6 +743,10 @@ def image_fuse(
         ),
     )
 
+    # Cast back to original dtype
+    if input.dtype in [torch.int32, torch.int64]:
+        x_folded = x_folded.view(input.dtype)
+
     # Remove padding
     x_no_padding = x_folded[
         ..., pad[2] : pad[2] + img_shape_y, pad[0] : pad[0] + img_shape_x
@@ -743,25 +757,3 @@ def image_fuse(
 
     # Normalize by overlap count
     return x_no_padding / overlap_count_no_padding
-
-
-def _cast_type(input: Tensor) -> torch.dtype:
-    """Return float type based on input tensor type.
-
-    Parameters
-    ----------
-    input : Tensor
-        Input tensor to determine float type from
-
-    Returns
-    -------
-    torch.dtype
-        Float type corresponding to input tensor type for int32/64,
-        otherwise returns original dtype
-    """
-    if input.dtype == torch.int32:
-        return torch.float32
-    elif input.dtype == torch.int64:
-        return torch.float64
-    else:
-        return input.dtype
diff --git a/test/utils/generative/test_stochastic_sampler.py b/test/utils/generative/test_stochastic_sampler.py
@@ -16,6 +16,7 @@
 
 from typing import Callable, Optional
 
+import pytest
 import torch
 from pytest_utils import import_or_fail
 from torch import Tensor
@@ -118,7 +119,8 @@ def test_stochastic_sampler(pytestconfig):
 
 # The test function for edm_sampler with rectangular domain and patching
 @import_or_fail("cftime")
-def test_stochastic_sampler_rectangle_patching(pytestconfig):
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_stochastic_sampler_rectangle_patching(device, pytestconfig):
     from physicsnemo.utils.generative import stochastic_sampler
     from physicsnemo.utils.patching import GridPatching2D
 
@@ -127,8 +129,10 @@ def test_stochastic_sampler_rectangle_patching(pytestconfig):
     img_shape_y, img_shape_x = 256, 64
     patch_shape_y, patch_shape_x = 16, 10
 
-    latents = torch.randn(2, 3, img_shape_y, img_shape_x)  # Mock latents
-    img_lr = torch.randn(2, 3, img_shape_y, img_shape_x)  # Mock low-res image
+    latents = torch.randn(2, 3, img_shape_y, img_shape_x, device=device)  # Mock latents
+    img_lr = torch.randn(
+        2, 3, img_shape_y, img_shape_x, device=device
+    )  # Mock low-res image
 
     # Test with patching
     patching = GridPatching2D(
@@ -139,7 +143,7 @@ def test_stochastic_sampler_rectangle_patching(pytestconfig):
     )
 
     # Test with mean_hr conditioning
-    mean_hr = torch.randn(2, 3, img_shape_y, img_shape_x)
+    mean_hr = torch.randn(2, 3, img_shape_y, img_shape_x, device=device)
     result_mean_hr = stochastic_sampler(
         net=net,
         latents=latents,
@@ -159,3 +163,91 @@ def test_stochastic_sampler_rectangle_patching(pytestconfig):
     assert (
         result_mean_hr.shape == latents.shape
     ), "Mean HR conditioned output shape does not match expected shape"
+
+
+# Test that the stochastic sampler is differentiable with rectangular patching
+# (tests differentiation through the patching and fusing)
+@import_or_fail("cftime")
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_stochastic_sampler_patching_differentiable(device, pytestconfig):
+    from physicsnemo.utils.generative import stochastic_sampler
+    from physicsnemo.utils.patching import GridPatching2D
+
+    # Mock network class
+    class MockNet:
+        def __init__(self, sigma_min=0.1, sigma_max=1000):
+            self.sigma_min = sigma_min
+            self.sigma_max = sigma_max
+
+        def round_sigma(self, t: Tensor) -> Tensor:
+            return t
+
+        def __call__(
+            self,
+            x: Tensor,
+            x_lr: Tensor,
+            t: Tensor,
+            class_labels: Optional[Tensor],
+            global_index: Optional[Tensor] = None,
+            embedding_selector: Optional[Callable] = None,
+        ) -> Tensor:
+            # Mock behavior: return input tensor for testing purposes
+            return x * 0.9 + x_lr[:, : x.shape[1], :, :] * 0.1
+
+    net = MockNet()
+
+    img_shape_y, img_shape_x = 256, 64
+    patch_shape_y, patch_shape_x = 16, 10
+
+    latents = torch.randn(2, 3, img_shape_y, img_shape_x, device=device)  # Mock latents
+    img_lr = torch.randn(
+        2, 3, img_shape_y, img_shape_x, device=device
+    )  # Mock low-res image
+
+    # Tensors with requires grad
+    a = torch.randn(1, requires_grad=True, device=device)
+    b = torch.randn(1, requires_grad=True, device=device)
+    c = torch.randn(1, requires_grad=True, device=device)
+    d = torch.randn(1, requires_grad=True, device=device)
+    e = torch.randn(1, requires_grad=True, device=device)
+    f = torch.randn(1, requires_grad=True, device=device)
+
+    # Test with patching
+    patching = GridPatching2D(
+        img_shape=(img_shape_y, img_shape_x),
+        patch_shape=(patch_shape_y, patch_shape_x),
+        overlap_pix=4,
+        boundary_pix=2,
+    )
+
+    # Test with mean_hr conditioning
+    mean_hr = torch.randn(2, 3, img_shape_y, img_shape_x, device=device)
+    result_mean_hr = stochastic_sampler(
+        net=net,
+        latents=a * latents + b,
+        img_lr=c * img_lr + d,
+        patching=patching,
+        mean_hr=e * mean_hr + f,
+        num_steps=2,
+        sigma_min=0.002,
+        sigma_max=800,
+        rho=7,
+        S_churn=0,
+        S_min=0,
+        S_max=float("inf"),
+        S_noise=1,
+    )
+
+    assert (
+        result_mean_hr.shape == latents.shape
+    ), "Mean HR conditioned output shape does not match expected shape"
+
+    loss = result_mean_hr.sum()
+    loss.backward()
+
+    assert a.grad is not None
+    assert b.grad is not None
+    assert c.grad is not None
+    assert d.grad is not None
+    assert e.grad is not None
+    assert f.grad is not None
diff --git a/test/utils/test_patching.py b/test/utils/test_patching.py