🔄 refactor(model): memory usage optimisation (#2813)

alfieroddan · samet-akcay · web-flow · commit 73073a0461fe · 2025-07-28T21:44:02.000+01:00
* inital commit: Don't re-init buffer, inplace fill

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* implement patchcore memory savings

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* deprecation warning for internal subsample_embedding of patchcore

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* PaDiM gpu memory usage reduction

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* refactor memory bank tensor assignment for readability. dfkdde refactor

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* unify memory bank models trainer arguments. refactor dfm to fit new memory bank framework

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* bugfix: padim device count set to zero

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* ensure buffer is not replaced but instead resized and filled

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* give memory bank type

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* revert memory bank mixin back

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* to memory bank type and device

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* update return type, add comment about deprecation, duplicated copyright

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* new deprecation wrapper, now handles arg deprecation

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* add flexible tests for new deprecation warning

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* update warnings to deprecations. Resolve comments re-fit vs fit_guassian. Test for None replacements for args

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* add type hint for dfkde

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* remove dataset arg from dfm model fit

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;

* Update src/anomalib/models/image/dfm/torch_model.py

Signed-off-by: Samet Akcay &lt;samet.akcay@intel.com&gt;

---------

Signed-off-by: Alfie Roddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;
Signed-off-by: alfieroddan &lt;51797647+alfieroddan@users.noreply.github.com&gt;
Signed-off-by: Samet Akcay &lt;samet.akcay@intel.com&gt;
Co-authored-by: Samet Akcay &lt;samet.akcay@intel.com&gt;
diff --git a/src/anomalib/models/image/dfkde/lightning_model.py b/src/anomalib/models/image/dfkde/lightning_model.py
@@ -104,7 +104,7 @@ def __init__(
             visualizer=visualizer,
         )
 
-        self.model = DfkdeModel(
+        self.model: DfkdeModel = DfkdeModel(
             layers=layers,
             backbone=backbone,
             pre_trained=pre_trained,
@@ -113,8 +113,6 @@ def __init__(
             max_training_points=max_training_points,
         )
 
-        self.embeddings: list[torch.Tensor] = []
-
     @staticmethod
     def configure_optimizers() -> None:  # pylint: disable=arguments-differ
         """DFKDE doesn't require optimization, therefore returns no optimizers."""
@@ -133,18 +131,15 @@ def training_step(self, batch: Batch, *args, **kwargs) -> None:
         """
         del args, kwargs  # These variables are not used.
 
-        embedding = self.model(batch.image)
-        self.embeddings.append(embedding)
+        _ = self.model(batch.image)
 
         # Return a dummy loss tensor
         return torch.tensor(0.0, requires_grad=True, device=self.device)
 
     def fit(self) -> None:
         """Fit KDE model to collected embeddings from the training set."""
-        embeddings = torch.vstack(self.embeddings)
-
         logger.info("Fitting a KDE model to the embedding collected from the training set.")
-        self.model.classifier.fit(embeddings)
+        self.model.fit()
 
     def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
         """Perform validation by computing anomaly scores.
@@ -167,9 +162,13 @@ def trainer_arguments(self) -> dict[str, Any]:
         """Get DFKDE-specific trainer arguments.
 
         Returns:
-            dict[str, Any]: Dictionary of trainer arguments.
+           dict[str, Any]: Trainer arguments
+               - ``gradient_clip_val``: ``0`` (no gradient clipping needed)
+               - ``max_epochs``: ``1`` (single pass through training data)
+               - ``num_sanity_val_steps``: ``0`` (skip validation sanity checks)
+               - ``devices``: ``1`` (only single gpu supported)
         """
-        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0}
+        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0, "devices": 1}
 
     @property
     def learning_type(self) -> LearningType:
diff --git a/src/anomalib/models/image/dfkde/torch_model.py b/src/anomalib/models/image/dfkde/torch_model.py
@@ -89,6 +89,7 @@ def __init__(
             feature_scaling_method=feature_scaling_method,
             max_training_points=max_training_points,
         )
+        self.memory_bank = torch.empty(0)
 
     def get_features(self, batch: torch.Tensor) -> torch.Tensor:
         """Extract features from the pre-trained backbone network.
@@ -141,8 +142,34 @@ def forward(self, batch: torch.Tensor) -> torch.Tensor | InferenceBatch:
         # 1. apply feature extraction
         features = self.get_features(batch)
         if self.training:
+            if self.memory_bank.size(0) == 0:
+                self.memory_bank = features
+            else:
+                new_bank = torch.cat((self.memory_bank, features), dim=0).to(self.memory_bank)
+                self.memory_bank = new_bank
             return features
 
         # 2. apply density estimation
         scores = self.classifier(features)
         return InferenceBatch(pred_score=scores)
+
+    def fit(self) -> None:
+        """Fits the classifier using the current contents of the memory bank.
+
+        This method is typically called after the memory bank has been populated
+        during training.
+
+        After fitting, the memory bank is cleared to reduce GPU memory usage.
+
+        Raises:
+            ValueError: If the memory bank is empty.
+        """
+        if self.memory_bank.size(0) == 0:
+            msg = "Memory bank is empty. Cannot perform coreset selection."
+            raise ValueError(msg)
+
+        # fit gaussian
+        self.classifier.fit(self.memory_bank)
+
+        # clear memory bank, redcues gpu size
+        self.memory_bank = torch.empty(0).to(self.memory_bank)
diff --git a/src/anomalib/models/image/dfm/lightning_model.py b/src/anomalib/models/image/dfm/lightning_model.py
@@ -112,7 +112,6 @@ def __init__(
             n_comps=pca_level,
             score_type=score_type,
         )
-        self.embeddings: list[torch.Tensor] = []
         self.score_type = score_type
 
     @staticmethod
@@ -137,8 +136,7 @@ def training_step(self, batch: Batch, *args, **kwargs) -> None:
         """
         del args, kwargs  # These variables are not used.
 
-        embedding = self.model.get_features(batch.image).squeeze()
-        self.embeddings.append(embedding)
+        _ = self.model(batch.image)
 
         # Return a dummy loss tensor
         return torch.tensor(0.0, requires_grad=True, device=self.device)
@@ -149,11 +147,8 @@ def fit(self) -> None:
         The method aggregates embeddings collected during training and fits
         both the PCA transformation and Gaussian model used for scoring.
         """
-        logger.info("Aggregating the embedding extracted from the training set.")
-        embeddings = torch.vstack(self.embeddings)
-
         logger.info("Fitting a PCA and a Gaussian model to dataset.")
-        self.model.fit(embeddings)
+        self.model.fit()
 
     def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
         """Compute predictions for the input batch during validation.
@@ -176,12 +171,13 @@ def trainer_arguments(self) -> dict[str, Any]:
         """Get DFM-specific trainer arguments.
 
         Returns:
-            dict[str, Any]: Dictionary of trainer arguments:
-                - ``gradient_clip_val`` (int): Disable gradient clipping
-                - ``max_epochs`` (int): Train for one epoch only
-                - ``num_sanity_val_steps`` (int): Skip validation sanity checks
+            dict[str, Any]: Trainer arguments
+                - ``gradient_clip_val``: ``0`` (no gradient clipping needed)
+                - ``max_epochs``: ``1`` (single pass through training data)
+                - ``num_sanity_val_steps``: ``0`` (skip validation sanity checks)
+                - ``devices``: ``1`` (only single gpu supported)
         """
-        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0}
+        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0, "devices": 1}
 
     @property
     def learning_type(self) -> LearningType:
diff --git a/src/anomalib/models/image/dfm/torch_model.py b/src/anomalib/models/image/dfm/torch_model.py
@@ -153,18 +153,18 @@ def __init__(
             layers=[layer],
         ).eval()
 
-    def fit(self, dataset: torch.Tensor) -> None:
-        """Fit PCA and Gaussian model to dataset.
+        self.memory_bank = torch.empty(0)
 
-        Args:
-            dataset (torch.Tensor): Input dataset with shape
-                ``(n_samples, n_features)``.
-        """
-        self.pca_model.fit(dataset)
+    def fit(self) -> None:
+        """Fit PCA and Gaussian model to dataset."""
+        self.pca_model.fit(self.memory_bank)
         if self.score_type == "nll":
-            features_reduced = self.pca_model.transform(dataset)
+            features_reduced = self.pca_model.transform(self.memory_bank)
             self.gaussian_model.fit(features_reduced.T)
 
+        # clear memory bank, reduces GPU size
+        self.memory_bank = torch.empty(0).to(self.memory_bank)
+
     def score(self, features: torch.Tensor, feature_shapes: tuple) -> torch.Tensor:
         """Compute anomaly scores.
 
@@ -194,25 +194,24 @@ def score(self, features: torch.Tensor, feature_shapes: tuple) -> torch.Tensor:
 
         return (score, None) if self.score_type == "nll" else (score, score_map)
 
-    def get_features(self, batch: torch.Tensor) -> torch.Tensor:
+    def get_features(self, batch: torch.Tensor) -> tuple[torch.Tensor, torch.Size]:
         """Extract features from the pretrained network.
 
         Args:
             batch (torch.Tensor): Input images with shape
                 ``(batch_size, channels, height, width)``.
 
         Returns:
-            Union[torch.Tensor, Tuple[torch.Tensor, torch.Size]]: Features during
-                training, or tuple of (features, feature_shapes) during inference.
+            tuple of (features, feature_shapes).
         """
-        self.feature_extractor.eval()
-        features = self.feature_extractor(batch)[self.layer]
-        batch_size = len(features)
-        if self.pooling_kernel_size > 1:
-            features = F.avg_pool2d(input=features, kernel_size=self.pooling_kernel_size)
-        feature_shapes = features.shape
-        features = features.view(batch_size, -1).detach()
-        return features if self.training else (features, feature_shapes)
+        with torch.no_grad():
+            features = self.feature_extractor(batch)[self.layer]
+            batch_size = len(features)
+            if self.pooling_kernel_size > 1:
+                features = F.avg_pool2d(input=features, kernel_size=self.pooling_kernel_size)
+            feature_shapes = features.shape
+            features = features.view(batch_size, -1)
+        return features, feature_shapes
 
     def forward(self, batch: torch.Tensor) -> torch.Tensor | InferenceBatch:
         """Compute anomaly predictions from input images.
@@ -227,6 +226,15 @@ def forward(self, batch: torch.Tensor) -> torch.Tensor | InferenceBatch:
                 ``InferenceBatch`` with prediction scores and anomaly maps.
         """
         feature_vector, feature_shapes = self.get_features(batch)
+
+        if self.training:
+            if self.memory_bank.size(0) == 0:
+                self.memory_bank = feature_vector
+            else:
+                new_bank = torch.cat((self.memory_bank, feature_vector), dim=0).to(self.memory_bank)
+                self.memory_bank = new_bank
+            return feature_vector
+
         pred_score, anomaly_map = self.score(feature_vector.view(feature_vector.shape[:2]), feature_shapes)
         if anomaly_map is not None:
             anomaly_map = F.interpolate(anomaly_map, size=batch.shape[-2:], mode="bilinear", align_corners=False)
diff --git a/src/anomalib/models/image/padim/lightning_model.py b/src/anomalib/models/image/padim/lightning_model.py
@@ -131,9 +131,6 @@ def __init__(
             n_features=n_features,
         )
 
-        self.stats: list[torch.Tensor] = []
-        self.embeddings: list[torch.Tensor] = []
-
     @staticmethod
     def configure_optimizers() -> None:
         """PADIM doesn't require optimization, therefore returns no optimizers."""
@@ -154,19 +151,15 @@ def training_step(self, batch: Batch, *args, **kwargs) -> None:
         """
         del args, kwargs  # These variables are not used.
 
-        embedding = self.model(batch.image)
-        self.embeddings.append(embedding)
+        _ = self.model(batch.image)
 
         # Return a dummy loss tensor
         return torch.tensor(0.0, requires_grad=True, device=self.device)
 
     def fit(self) -> None:
         """Fit a Gaussian to the embedding collected from the training set."""
-        logger.info("Aggregating the embedding extracted from the training set.")
-        embeddings = torch.vstack(self.embeddings)
-
         logger.info("Fitting a Gaussian to the embedding collected from the training set.")
-        self.stats = self.model.gaussian.fit(embeddings)
+        self.model.fit()
 
     def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
         """Perform a validation step of PADIM.
@@ -190,16 +183,16 @@ def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
 
     @property
     def trainer_arguments(self) -> dict[str, int | float]:
-        """Return PADIM trainer arguments.
-
-        Since the model does not require training, we limit the max_epochs to 1.
-        Since we need to run training epoch before validation, we also set the
-        sanity steps to 0.
+        """Get default trainer arguments for Padim.
 
         Returns:
-            dict[str, int | float]: Dictionary of trainer arguments
+            dict[str, Any]: Trainer arguments
+                - ``max_epochs``: ``1`` (single pass through training data)
+                - ``val_check_interval``: ``1.0`` (check validation every 1 step)
+                - ``num_sanity_val_steps``: ``0`` (skip validation sanity checks)
+                - ``devices``: ``1`` (only single gpu supported)
         """
-        return {"max_epochs": 1, "val_check_interval": 1.0, "num_sanity_val_steps": 0}
+        return {"max_epochs": 1, "val_check_interval": 1.0, "num_sanity_val_steps": 0, "devices": 1}
 
     @property
     def learning_type(self) -> LearningType:
diff --git a/src/anomalib/models/image/padim/torch_model.py b/src/anomalib/models/image/padim/torch_model.py
@@ -147,6 +147,7 @@ def __init__(
         self.anomaly_map_generator = AnomalyMapGenerator()
 
         self.gaussian = MultiVariateGaussian()
+        self.memory_bank = torch.empty(0)
 
     def forward(self, input_tensor: torch.Tensor) -> torch.Tensor | InferenceBatch:
         """Forward-pass image-batch (N, C, H, W) into model to extract features.
@@ -182,6 +183,11 @@ def forward(self, input_tensor: torch.Tensor) -> torch.Tensor | InferenceBatch:
             embeddings = self.tiler.untile(embeddings)
 
         if self.training:
+            if self.memory_bank.size(0) == 0:
+                self.memory_bank = embeddings
+            else:
+                new_bank = torch.cat((self.memory_bank, embeddings), dim=0).to(self.memory_bank)
+                self.memory_bank = new_bank
             return embeddings
 
         anomaly_map = self.anomaly_map_generator(
@@ -217,3 +223,23 @@ def generate_embedding(self, features: dict[str, torch.Tensor]) -> torch.Tensor:
         # subsample embeddings
         idx = self.idx.to(embeddings.device)
         return torch.index_select(embeddings, 1, idx)
+
+    def fit(self) -> None:
+        """Fits a Gaussian model to the current contents of the memory bank.
+
+        This method is typically called after the memory bank has been filled during training.
+
+        After fitting, the memory bank is cleared to free GPU memory before validation or testing.
+
+        Raises:
+            ValueError: If the memory bank is empty.
+        """
+        if self.memory_bank.size(0) == 0:
+            msg = "Memory bank is empty. Cannot perform coreset selection."
+            raise ValueError(msg)
+
+        # fit gaussian
+        self.gaussian.fit(self.memory_bank)
+
+        # clear memory bank, redcues gpu usage
+        self.memory_bank = torch.empty(0).to(self.memory_bank)
diff --git a/src/anomalib/models/image/patchcore/lightning_model.py b/src/anomalib/models/image/patchcore/lightning_model.py
@@ -159,7 +159,6 @@ def __init__(
             num_neighbors=num_neighbors,
         )
         self.coreset_sampling_ratio = coreset_sampling_ratio
-        self.embeddings: list[torch.Tensor] = []
 
     @classmethod
     def configure_pre_processor(
@@ -235,24 +234,18 @@ def training_step(self, batch: Batch, *args, **kwargs) -> None:
             ``fit()``.
         """
         del args, kwargs  # These variables are not used.
-
-        embedding = self.model(batch.image)
-        self.embeddings.append(embedding)
+        _ = self.model(batch.image)
         # Return a dummy loss tensor
         return torch.tensor(0.0, requires_grad=True, device=self.device)
 
     def fit(self) -> None:
         """Apply subsampling to the embedding collected from the training set.
 
         This method:
-        1. Aggregates embeddings from all training batches
-        2. Applies coreset subsampling to reduce memory requirements
+        1. Applies coreset subsampling to reduce memory requirements
         """
-        logger.info("Aggregating the embedding extracted from the training set.")
-        embeddings = torch.vstack(self.embeddings)
-
         logger.info("Applying core-set subsampling to get the embedding.")
-        self.model.subsample_embedding(embeddings, self.coreset_sampling_ratio)
+        self.model.subsample_embedding(self.coreset_sampling_ratio)
 
     def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
         """Generate predictions for a batch of images.
@@ -286,8 +279,9 @@ def trainer_arguments(self) -> dict[str, Any]:
                 - ``gradient_clip_val``: ``0`` (no gradient clipping needed)
                 - ``max_epochs``: ``1`` (single pass through training data)
                 - ``num_sanity_val_steps``: ``0`` (skip validation sanity checks)
+                - ``devices``: ``1`` (only single gpu supported)
         """
-        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0}
+        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0, "devices": 1}
 
     @property
     def learning_type(self) -> LearningType:
diff --git a/src/anomalib/models/image/patchcore/torch_model.py b/src/anomalib/models/image/patchcore/torch_model.py
diff --git a/src/anomalib/utils/deprecation.py b/src/anomalib/utils/deprecation.py
diff --git a/tests/unit/utils/test_deprecate.py b/tests/unit/utils/test_deprecate.py