diff --git a/examples/programmatic/llpr/llpr.py b/examples/programmatic/llpr/llpr.py
index bc1ec41b8..d2634da33 100644
--- a/examples/programmatic/llpr/llpr.py
+++ b/examples/programmatic/llpr/llpr.py
@@ -29,25 +29,17 @@
 
 import torch
 
-from metatrain.utils.io import load_model
-
-
 # %%
 #
 # Models can be loaded using the :func:`metatrain.utils.io.load_model` function from
 # the. For already exported models The function requires the path to the exported model
 # and, for many models, also the path to the respective extensions directory. Both are
 # produced during the training process.
-
-
-model = load_model("model.pt", extensions_directory="extensions/")
-
 # %%
 #
 # In metatrain, a Dataset is composed of a list of systems and a dictionary of targets.
 # The following lines illustrate how to read systems and targets from xyz files, and
 # how to create a Dataset object from them.
-
 from metatrain.utils.data import Dataset, read_systems, read_targets  # noqa: E402
 from metatrain.utils.neighbor_lists import (  # noqa: E402
     get_requested_neighbor_lists,
@@ -74,7 +66,13 @@
 }
 targets, _ = read_targets(target_config)
 
-requested_neighbor_lists = get_requested_neighbor_lists(model)
+
+from metatrain.utils.llpr import LLPRUncertaintyModel  # noqa: E402
+
+
+llpr_model = LLPRUncertaintyModel("model.ckpt")
+
+requested_neighbor_lists = get_requested_neighbor_lists(llpr_model)
 qm9_systems = [
     get_system_with_neighbor_lists(system, requested_neighbor_lists)
     for system in qm9_systems
@@ -111,27 +109,16 @@
 # to compute prediction rigidity metrics, which are useful for uncertainty
 # quantification and model introspection.
 
-from metatensor.torch.atomistic import (  # noqa: E402
-    MetatensorAtomisticModel,
-    ModelMetadata,
-)
-
-from metatrain.utils.llpr import LLPRUncertaintyModel  # noqa: E402
-
 
-llpr_model = LLPRUncertaintyModel(model)
 llpr_model.compute_covariance(dataloader)
 llpr_model.compute_inverse_covariance(regularizer=1e-4)
 
 # calibrate on the same dataset for simplicity. In reality, a separate
 # calibration/validation dataset should be used.
+print("Calibrate")
 llpr_model.calibrate(dataloader)
 
-exported_model = MetatensorAtomisticModel(
-    llpr_model.eval(),
-    ModelMetadata(),
-    llpr_model.capabilities,
-)
+exported_model = llpr_model.export()
 
 # %%
 #
@@ -148,7 +135,7 @@
     outputs={
         # request the uncertainty in the atomic energy predictions
         "energy": ModelOutput(per_atom=True),  # needed to request the uncertainties
-        "mtt::aux::energy_uncertainty": ModelOutput(per_atom=True),
+        "energy_uncertainty": ModelOutput(per_atom=True),
         # `per_atom=False` would return the total uncertainty for the system,
         # or (the inverse of) the TPR (total prediction rigidity)
         # you also can request other outputs from the model here, for example:
@@ -158,7 +145,7 @@
 )
 
 outputs = exported_model([ethanol_system], evaluation_options, check_consistency=False)
-lpr = outputs["mtt::aux::energy_uncertainty"].block().values.detach().cpu().numpy()
+lpr = outputs["energy_uncertainty"].block().values.detach().cpu().numpy()
 
 # %%
 #
diff --git a/examples/programmatic/llpr_forces/force_llpr.py b/examples/programmatic/llpr_forces/force_llpr.py
index 793e10fc4..862da85e8 100644
--- a/examples/programmatic/llpr_forces/force_llpr.py
+++ b/examples/programmatic/llpr_forces/force_llpr.py
@@ -6,17 +6,19 @@
     ModelEvaluationOptions,
     ModelMetadata,
     ModelOutput,
-    load_atomistic_model,
 )
 
 from metatrain.utils.data import Dataset, collate_fn, read_systems, read_targets
 from metatrain.utils.llpr import LLPRUncertaintyModel
 from metatrain.utils.loss import TensorMapDictLoss
-from metatrain.utils.neighbor_lists import get_system_with_neighbor_lists
+from metatrain.utils.neighbor_lists import (
+    get_requested_neighbor_lists,
+    get_system_with_neighbor_lists,
+)
 
 
-model = load_atomistic_model("model.pt", extensions_directory="extensions/")
-model = model.to("cuda")
+dtype = torch.float64  # matching the model that was trained
+device = "cuda" if torch.cuda.is_available() else "cpu"
 
 train_systems = read_systems("train.xyz")
 train_target_config = {
@@ -105,7 +107,10 @@
 }
 test_targets, target_info = read_targets(test_target_config)
 
-requested_neighbor_lists = model.requested_neighbor_lists()
+llpr_model = LLPRUncertaintyModel("model.ckpt")
+llpr_model.to(device)
+
+requested_neighbor_lists = get_requested_neighbor_lists(llpr_model)
 train_systems = [
     get_system_with_neighbor_lists(system, requested_neighbor_lists)
     for system in train_systems
@@ -148,8 +153,6 @@
 }
 loss_fn = TensorMapDictLoss(loss_weight_dict)
 
-llpr_model = LLPRUncertaintyModel(model)
-
 print("Last layer parameters:")
 parameters = []
 for name, param in llpr_model.named_parameters():
@@ -173,7 +176,7 @@
     length_unit="angstrom",
     outputs={
         "mtt::aux::energy_last_layer_features": ModelOutput(per_atom=False),
-        "mtt::aux::energy_uncertainty": ModelOutput(per_atom=False),
+        "energy_uncertainty": ModelOutput(per_atom=False),
         "energy": ModelOutput(per_atom=False),
     },
     selected_atoms=None,
@@ -183,12 +186,11 @@
 force_uncertainties = []
 
 for batch in test_dataloader:
-    dtype = getattr(torch, model.capabilities().dtype)
     systems, targets = batch
-    systems = [system.to("cuda", dtype) for system in systems]
+    systems = [system.to(device, dtype) for system in systems]
     for system in systems:
         system.positions.requires_grad = True
-    targets = {name: tmap.to("cuda", dtype) for name, tmap in targets.items()}
+    targets = {name: tmap.to(device, dtype) for name, tmap in targets.items()}
 
     outputs = exported_model(systems, evaluation_options, check_consistency=True)
     energy = outputs["energy"].block().values
@@ -221,7 +223,7 @@
     force_uncertainty = torch.einsum(
         "if, fg, ig -> i",
         ll_feature_grads,
-        exported_model.module.inv_covariances["mtt::aux::energy_uncertainty"],
+        exported_model.module._get_inv_covariance("energy_uncertainty"),
         ll_feature_grads,
     )
     force_uncertainties.append(force_uncertainty.detach().clone().cpu().numpy())
diff --git a/src/metatrain/utils/llpr.py b/src/metatrain/utils/llpr.py
index ff25e13b3..ce340a931 100644
--- a/src/metatrain/utils/llpr.py
+++ b/src/metatrain/utils/llpr.py
@@ -1,19 +1,23 @@
-from typing import Callable, Dict, List, Optional
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union
 
 import metatensor.torch
 import numpy as np
 import torch
 from metatensor.torch import Labels, TensorBlock, TensorMap
 from metatensor.torch.atomistic import (
+    MetatensorAtomisticModel,
     ModelCapabilities,
-    ModelEvaluationOptions,
+    ModelMetadata,
     ModelOutput,
     System,
 )
 from torch.utils.data import DataLoader
 
 from metatrain.utils.data.target_info import is_auxiliary_output
+from metatrain.utils.io import check_file_extension
 
+from .architectures import import_architecture
 from .data import DatasetInfo, TargetInfo, get_atomic_types
 from .evaluate_model import evaluate_model
 from .per_atom import average_by_num_atoms
@@ -30,29 +34,37 @@ class LLPRUncertaintyModel(torch.nn.Module):
     :param model: The model to wrap.
     """
 
-    def __init__(
-        self,
-        model: torch.jit._script.RecursiveScriptModule,
-    ) -> None:
+    def __init__(self, checkpoint_path) -> None:
         super().__init__()
 
-        self.model = model
-        self.ll_feat_size = self.model.module.last_layer_feature_size
+        architecture_name = torch.load(checkpoint_path, weights_only=False)[
+            "architecture_name"
+        ]
+        architecture = import_architecture(architecture_name)
+        Model = architecture.__model__
+
+        self.model = Model.load_checkpoint(checkpoint_path)
+        self.ll_feat_size = self.model.last_layer_feature_size
+
+        # we need the capabilities of the model to be able to infer the capabilities
+        # of the LLPR model. Here, we do a trick: we call export on the model to to make
+        # it handle the conversion from dataset_info to capabilities
+        old_capabilities = self.model.export().capabilities()
+        dtype = getattr(torch, old_capabilities.dtype)
 
         # update capabilities: now we have additional outputs for the uncertainty
-        old_capabilities = self.model.capabilities()
         additional_capabilities = {}
-        self.uncertainty_multipliers = {}
+        self.outputs_list = []
         for name, output in old_capabilities.outputs.items():
             if is_auxiliary_output(name):
                 continue  # auxiliary output
-            uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+            self.outputs_list.append(name)
+            uncertainty_name = _get_uncertainty_name(name)
             additional_capabilities[uncertainty_name] = ModelOutput(
-                quantity="",
-                unit=f"({output.unit})^2",
+                quantity=output.quantity,
+                unit=output.unit,
                 per_atom=True,
             )
-            self.uncertainty_multipliers[uncertainty_name] = 1.0
         self.capabilities = ModelCapabilities(
             outputs={**old_capabilities.outputs, **additional_capabilities},
             atomic_types=old_capabilities.atomic_types,
@@ -63,24 +75,26 @@ def __init__(
         )
 
         # register covariance and inverse covariance buffers
-        device = next(self.model.parameters()).device
-        dtype = getattr(torch, old_capabilities.dtype)
-        self.covariances = {
-            uncertainty_name: torch.zeros(
-                (self.ll_feat_size, self.ll_feat_size),
-                device=device,
-                dtype=dtype,
+        for name in self.outputs_list:
+            uncertainty_name = _get_uncertainty_name(name)
+            self.register_buffer(
+                f"covariance_{uncertainty_name}",
+                torch.zeros(
+                    (self.ll_feat_size, self.ll_feat_size),
+                    dtype=dtype,
+                ),
             )
-            for uncertainty_name in self.uncertainty_multipliers.keys()
-        }
-        self.inv_covariances = {
-            uncertainty_name: torch.zeros(
-                (self.ll_feat_size, self.ll_feat_size),
-                device=device,
-                dtype=dtype,
+            self.register_buffer(
+                f"inv_covariance_{uncertainty_name}",
+                torch.zeros(
+                    (self.ll_feat_size, self.ll_feat_size),
+                    dtype=dtype,
+                ),
+            )
+            self.register_buffer(
+                f"multiplier_{uncertainty_name}",
+                torch.tensor([1.0], dtype=dtype),
             )
-            for uncertainty_name in self.uncertainty_multipliers.keys()
-        }
 
         # flags
         self.covariance_computed = False
@@ -93,13 +107,9 @@ def forward(
         outputs: Dict[str, ModelOutput],
         selected_atoms: Optional[Labels] = None,
     ) -> Dict[str, TensorMap]:
-        device = systems[0].positions.device
-        if list(self.covariances.values())[0].device != device:
-            for name in self.covariances.keys():
-                self.covariances[name] = self.covariances[name].to(device=device)
-                self.inv_covariances[name] = self.inv_covariances[name].to(
-                    device=device
-                )
+        if all("_uncertainty" not in output for output in outputs):
+            # no uncertainties requested
+            return self.model(systems, outputs, selected_atoms)
 
         if not self.inv_covariance_computed:
             raise ValueError(
@@ -107,15 +117,6 @@ def forward(
                 "been computed yet."
             )
 
-        if all("_uncertainty" not in output for output in outputs):
-            # no uncertainties requested
-            options = ModelEvaluationOptions(
-                length_unit="",
-                outputs=outputs,
-                selected_atoms=selected_atoms,
-            )
-            return self.model(systems, options, check_consistency=False)
-
         per_atom_all_targets = [output.per_atom for output in outputs.values()]
         # impose either all per atom or all not per atom
         if not all(per_atom_all_targets) and any(per_atom_all_targets):
@@ -151,32 +152,36 @@ def forward(
                 continue
             outputs_for_model[name] = output
 
-        options = ModelEvaluationOptions(
-            length_unit="",
-            outputs=outputs_for_model,
-            selected_atoms=selected_atoms,
-        )
-        return_dict = self.model(systems, options, check_consistency=False)
+        return_dict = self.model(systems, outputs_for_model, selected_atoms)
 
         requested_uncertainties: List[str] = []
         for name in outputs.keys():
-            if name.startswith("mtt::aux::") and name.endswith("_uncertainty"):
+            if (name.startswith("mtt::aux::") and name.endswith("_uncertainty")) or (
+                name == "energy_uncertainty"
+            ):
                 requested_uncertainties.append(name)
 
-        for name in requested_uncertainties:
-            ll_features = return_dict[
-                name.replace("_uncertainty", "_last_layer_features")
-            ]
+        for uncertainty_name in requested_uncertainties:
+            ll_features_name = uncertainty_name.replace(
+                "_uncertainty", "_last_layer_features"
+            )
+            if ll_features_name == "energy_last_layer_features":
+                # special case for energy_ensemble
+                ll_features_name = "mtt::aux::energy_last_layer_features"
+            ll_features = return_dict[ll_features_name]
+            property_name = (
+                "energy" if uncertainty_name == "energy_uncertainty" else "_"
+            )
 
             # compute PRs
             # the code is the same for PR and LPR
             one_over_pr_values = torch.einsum(
                 "ij, jk, ik -> i",
                 ll_features.block().values,
-                self.inv_covariances[name],
+                self._get_inv_covariance(uncertainty_name),
                 ll_features.block().values,
             ).unsqueeze(1)
-            one_over_pr = TensorMap(
+            uncertainty = TensorMap(
                 keys=Labels(
                     names=["_"],
                     values=torch.tensor(
@@ -185,11 +190,11 @@ def forward(
                 ),
                 blocks=[
                     TensorBlock(
-                        values=one_over_pr_values,
+                        values=torch.sqrt(one_over_pr_values),
                         samples=ll_features.block().samples,
                         components=ll_features.block().components,
                         properties=Labels(
-                            names=["_"],
+                            names=[property_name],
                             values=torch.tensor(
                                 [[0]], device=ll_features.block().values.device
                             ),
@@ -198,8 +203,8 @@ def forward(
                 ],
             )
 
-            return_dict[name] = metatensor.torch.multiply(
-                one_over_pr, self.uncertainty_multipliers[name]
+            return_dict[uncertainty_name] = metatensor.torch.multiply(
+                uncertainty, float(self._get_multiplier(uncertainty_name).item())
             )
 
         # now deal with potential ensembles (see generate_ensemble method)
@@ -286,15 +291,15 @@ def compute_covariance(self, train_loader: DataLoader) -> None:
             The individual samples need to be compatible with the ``Dataset``
             class in ``metatrain``.
         """
-        device = next(iter(self.covariances.values())).device
-        dtype = next(iter(self.covariances.values())).dtype
+        device = next(iter(self.buffers())).device
+        dtype = next(iter(self.buffers())).dtype
         for batch in train_loader:
             systems, targets = batch
             n_atoms = torch.tensor(
                 [len(system.positions) for system in systems], device=device
             )
             systems = [system.to(device=device, dtype=dtype) for system in systems]
-            options_for_targets = {
+            outputs_for_targets = {
                 name: ModelOutput(
                     quantity="",
                     unit="",
@@ -311,19 +316,17 @@ class in ``metatrain``.
                 )
                 for name in targets.keys()
             }
-            options = ModelEvaluationOptions(
-                length_unit="",
-                outputs={**options_for_targets, **outputs_for_features},
+            output = self.forward(
+                systems, {**outputs_for_targets, **outputs_for_features}
             )
-            output = self.model(systems, options, check_consistency=False)
             for name in targets.keys():
                 ll_feat_tmap = output[
                     f"mtt::aux::{name.replace('mtt::', '')}_last_layer_features"
                 ]
                 ll_feats = ll_feat_tmap.block().values.detach() / n_atoms.unsqueeze(1)
-                self.covariances[
-                    f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
-                ] += ll_feats.T @ ll_feats
+                uncertainty_name = _get_uncertainty_name(name)
+                covariance = self._get_covariance(uncertainty_name)
+                covariance += ll_feats.T @ ll_feats
 
         self.covariance_computed = True
 
@@ -380,8 +383,8 @@ class in ``metatrain``.
             targets=target_infos,
         )
         train_targets = dataset_info.targets
-        device = next(iter(self.covariances.values())).device
-        dtype = next(iter(self.covariances.values())).dtype
+        device = next(iter(self.buffers())).device
+        dtype = next(iter(self.buffers())).dtype
         for batch in train_loader:
             systems, targets = batch
             systems = [system.to(device=device, dtype=dtype) for system in systems]
@@ -418,9 +421,9 @@ class in ``metatrain``.
                     if name == output_name
                 ]
                 grads = torch.cat(grads, dim=1)
-                self.covariances[
-                    "mtt::aux::" + output_name.replace("mtt::", "") + "_uncertainty"
-                ] += grads.T @ grads
+                uncertainty_name = _get_uncertainty_name(output_name)
+                covariance = self._get_covariance(uncertainty_name)
+                covariance += grads.T @ grads
 
             for parameter in all_parameters_that_require_grad:
                 parameter.grad = None  # reset the gradients
@@ -442,12 +445,15 @@ def compute_inverse_covariance(self, regularizer: Optional[float] = None):
             inverse without regularization and increase the regularization
             parameter until the matrix is invertible.
         """
-        for name in self.covariances.keys():
+        for name in self.outputs_list:
+            uncertainty_name = _get_uncertainty_name(name)
+            covariance = self._get_covariance(uncertainty_name)
+            inv_covariance = self._get_inv_covariance(uncertainty_name)
             if regularizer is not None:
-                self.inv_covariances[name] = torch.inverse(
-                    self.covariances[name]
+                inv_covariance[:] = torch.inverse(
+                    covariance
                     + regularizer
-                    * torch.eye(self.ll_feat_size, device=self.covariances[name].device)
+                    * torch.eye(self.ll_feat_size, device=covariance.device)
                 )
             else:
                 # Try with an increasingly high regularization parameter until
@@ -457,22 +463,18 @@ def is_psd(x):
 
                 for log10_sigma_squared in torch.linspace(-20.0, 16.0, 33):
                     if not is_psd(
-                        self.covariances[name]
+                        covariance
                         + 10**log10_sigma_squared
-                        * torch.eye(
-                            self.ll_feat_size, device=self.covariances[name].device
-                        )
+                        * torch.eye(self.ll_feat_size, device=covariance.device)
                     ):
                         continue
                     else:
                         inverse = torch.inverse(
-                            self.covariances[name]
-                            + 10 ** (log10_sigma_squared + 0.0)
-                            * torch.eye(
-                                self.ll_feat_size, device=self.covariances[name].device
-                            )
+                            covariance
+                            + 10 ** (log10_sigma_squared + 2.0)  # for good conditioning
+                            * torch.eye(self.ll_feat_size, device=covariance.device)
                         )
-                        self.inv_covariances[name] = (inverse + inverse.T) / 2.0
+                        inv_covariance[:] = (inverse + inverse.T) / 2.0
                         break
 
         self.inv_covariance_computed = True
@@ -494,8 +496,8 @@ def calibrate(self, valid_loader: DataLoader):
         # calibrate the LLPR
         # TODO: in the future, we might want to have one calibration factor per
         # property for outputs with multiple properties
-        device = next(iter(self.covariances.values())).device
-        dtype = next(iter(self.covariances.values())).dtype
+        device = next(iter(self.buffers())).device
+        dtype = next(iter(self.buffers())).dtype
         all_predictions = {}  # type: ignore
         all_targets = {}  # type: ignore
         all_uncertainties = {}  # type: ignore
@@ -514,7 +516,7 @@ def calibrate(self, valid_loader: DataLoader):
                     unit="",
                     per_atom=False,
                 )
-                uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+                uncertainty_name = _get_uncertainty_name(name)
                 requested_outputs[uncertainty_name] = ModelOutput(
                     quantity="",
                     unit="",
@@ -522,7 +524,7 @@ def calibrate(self, valid_loader: DataLoader):
                 )
             outputs = self.forward(systems, requested_outputs)
             for name, target in targets.items():
-                uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+                uncertainty_name = _get_uncertainty_name(name)
                 if name not in all_predictions:
                     all_predictions[name] = []
                     all_targets[name] = []
@@ -536,7 +538,7 @@ def calibrate(self, valid_loader: DataLoader):
         for name in all_predictions:
             all_predictions[name] = torch.cat(all_predictions[name], dim=0)
             all_targets[name] = torch.cat(all_targets[name], dim=0)
-            uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+            uncertainty_name = _get_uncertainty_name(name)
             all_uncertainties[uncertainty_name] = torch.cat(
                 all_uncertainties[uncertainty_name], dim=0
             )
@@ -544,11 +546,10 @@ def calibrate(self, valid_loader: DataLoader):
         for name in all_predictions:
             # compute the uncertainty multiplier
             residuals = all_predictions[name] - all_targets[name]
-            uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+            uncertainty_name = _get_uncertainty_name(name)
             uncertainties = all_uncertainties[uncertainty_name]
-            self.uncertainty_multipliers[uncertainty_name] = torch.mean(
-                residuals**2 / uncertainties
-            ).item()
+            multiplier = self._get_multiplier(uncertainty_name)
+            multiplier[:] = torch.sqrt(torch.mean(residuals**2 / uncertainties**2))
 
         self.is_calibrated = True
 
@@ -582,15 +583,19 @@ def generate_ensemble(
         # sampling; each member is sampled from a multivariate normal distribution
         # with mean given by the input weights and covariance given by the inverse
         # covariance matrix
+        device = next(iter(self.buffers())).device
+        dtype = next(iter(self.buffers())).dtype
         for name, weights in weight_tensors.items():
-            uncertainty_name = "mtt::aux::" + name.replace("mtt::", "") + "_uncertainty"
-            device = self.inv_covariances[uncertainty_name].device
-            dtype = self.inv_covariances[uncertainty_name].dtype
+            uncertainty_name = _get_uncertainty_name(name)
             rng = np.random.default_rng()
             ensemble_weights = rng.multivariate_normal(
                 weights.clone().detach().cpu().numpy(),
-                self.inv_covariances[uncertainty_name].clone().detach().cpu().numpy()
-                * self.uncertainty_multipliers[uncertainty_name],
+                self._get_inv_covariance(uncertainty_name)
+                .clone()
+                .detach()
+                .cpu()
+                .numpy()
+                * self._get_multiplier(uncertainty_name).item() ** 2,
                 size=n_members,
                 method="svd",
             ).T
@@ -627,3 +632,115 @@ def generate_ensemble(
             supported_devices=self.capabilities.supported_devices,
             dtype=self.capabilities.dtype,
         )
+
+    def save_checkpoint(self, model, path: Union[str, Path]):
+        checkpoint = {
+            "architecture_name": "llpr_wrapper",
+            "model_data": {
+                "model_hypers": model.hypers,
+                "dataset_info": model.dataset_info,
+            },
+            "model_state_dict": model.state_dict(),
+            "train_hypers": self.hypers,
+            "epoch": self.epoch,
+            "optimizer_state_dict": self.optimizer_state_dict,
+            "scheduler_state_dict": self.scheduler_state_dict,
+            "best_metric": self.best_metric,
+            "best_model_state_dict": self.best_model_state_dict,
+            "best_optimizer_state_dict": self.best_optimizer_state_dict,
+        }
+        torch.save(
+            checkpoint,
+            check_file_extension(path, ".ckpt"),
+        )
+
+    @classmethod
+    def load_checkpoint(cls, path: Union[str, Path]) -> "LLPRUncertaintyModel":
+        checkpoint = torch.load(path, weights_only=False, map_location="cpu")
+        model_data = checkpoint["model_data"]
+        model_state_dict = checkpoint["model_state_dict"]
+
+        # Create the model
+        model = cls(**model_data)
+        state_dict_iter = iter(model_state_dict.values())
+        next(state_dict_iter)  # skip some integer buffer for some architectures
+        dtype = next(state_dict_iter).dtype
+        model.to(dtype).load_state_dict(model_state_dict)
+
+        # TODO: remove this thing... unfortunately, for now, this NEEDS to be in the
+        # top-level module
+        try:
+            model.model.additive_models[0].sync_tensor_maps()
+            print("Syncing tensor maps")
+        except Exception as e:
+            print(e)
+            print("No tensor maps to sync")
+            # pass
+
+        # If we load a LLPR checkpoint, these will already be ready:
+        model.covariance_computed = False
+        model.inv_covariance_computed = False
+        model.is_calibrated = False
+
+        return model
+
+    def export(
+        self, metadata: Optional[ModelMetadata] = None
+    ) -> MetatensorAtomisticModel:
+        dtype = next(self.parameters()).dtype
+
+        # Make sure the model is all in the same dtype
+        # For example, after training, the additive models could still be in
+        # float64
+        self.to(dtype)
+
+        # Additionally, the composition model contains some `TensorMap`s that cannot
+        # be registered correctly with Pytorch. This function moves them:
+        try:
+            self.model.additive_models[0]._move_weights_to_device_and_dtype(
+                torch.device("cpu"), torch.float64
+            )
+            print("Moving weights to CPU and float64")
+        except Exception as e:
+            print(e)
+            print("No weights to move")
+
+        if metadata is None:
+            metadata = ModelMetadata()
+
+        # append_metadata_references(metadata, self.__default_metadata__)
+        # TODO: LLPR references
+
+        return MetatensorAtomisticModel(self.eval(), metadata, self.capabilities)
+
+    def _get_covariance(self, name: str):
+        name = "covariance_" + name
+        requested_buffer = torch.tensor(0)
+        for n, buffer in self.named_buffers():
+            if n == name:
+                requested_buffer = buffer
+        return requested_buffer
+
+    def _get_inv_covariance(self, name: str):
+        name = "inv_covariance_" + name
+        requested_buffer = torch.tensor(0)
+        for n, buffer in self.named_buffers():
+            if n == name:
+                requested_buffer = buffer
+        return requested_buffer
+
+    def _get_multiplier(self, name: str):
+        name = "multiplier_" + name
+        requested_buffer = torch.tensor(0)
+        for n, buffer in self.named_buffers():
+            if n == name:
+                requested_buffer = buffer
+        return requested_buffer
+
+
+def _get_uncertainty_name(name: str):
+    if name == "energy":
+        uncertainty_name = "energy_uncertainty"
+    else:
+        uncertainty_name = f"mtt::aux::{name.replace('mtt::', '')}_uncertainty"
+    return uncertainty_name
diff --git a/tests/utils/test_llpr.py b/tests/utils/test_llpr.py
index 00c722e2b..3c86cf3a6 100644
--- a/tests/utils/test_llpr.py
+++ b/tests/utils/test_llpr.py
@@ -22,10 +22,8 @@
 
 
 def test_llpr(tmpdir):
-    model = load_model(
-        str(RESOURCES_PATH / "model-64-bit.pt"),
-        extensions_directory=str(RESOURCES_PATH / "extensions/"),
-    )
+    llpr_model = LLPRUncertaintyModel(str(RESOURCES_PATH / "model-64-bit.ckpt"))
+
     qm9_systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz")
     target_config = {
         "energy": {
@@ -43,7 +41,7 @@ def test_llpr(tmpdir):
         },
     }
     targets, _ = read_targets(target_config)
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
+    requested_neighbor_lists = get_requested_neighbor_lists(llpr_model)
     qm9_systems = [
         get_system_with_neighbor_lists(system, requested_neighbor_lists)
         for system in qm9_systems
@@ -56,7 +54,6 @@ def test_llpr(tmpdir):
         collate_fn=collate_fn,
     )
 
-    llpr_model = LLPRUncertaintyModel(model)
     llpr_model.compute_covariance(dataloader)
     llpr_model.compute_inverse_covariance()
 
@@ -69,7 +66,7 @@ def test_llpr(tmpdir):
     evaluation_options = ModelEvaluationOptions(
         length_unit="angstrom",
         outputs={
-            "mtt::aux::energy_uncertainty": ModelOutput(per_atom=True),
+            "energy_uncertainty": ModelOutput(per_atom=True),
             "energy": ModelOutput(per_atom=True),
             "mtt::aux::energy_last_layer_features": ModelOutput(per_atom=True),
         },
@@ -80,11 +77,11 @@ def test_llpr(tmpdir):
         qm9_systems[:5], evaluation_options, check_consistency=True
     )
 
-    assert "mtt::aux::energy_uncertainty" in outputs
+    assert "energy_uncertainty" in outputs
     assert "energy" in outputs
     assert "mtt::aux::energy_last_layer_features" in outputs
 
-    assert outputs["mtt::aux::energy_uncertainty"].block().samples.names == [
+    assert outputs["energy_uncertainty"].block().samples.names == [
         "system",
         "atom",
     ]
@@ -101,7 +98,7 @@ def test_llpr(tmpdir):
             params.append(param.squeeze())
     weights = torch.cat(params)
 
-    n_ensemble_members = 10000
+    n_ensemble_members = 1000000  # converges slowly...
     llpr_model.calibrate(dataloader)
     llpr_model.generate_ensemble({"energy": weights}, n_ensemble_members)
     assert "energy_ensemble" in llpr_model.capabilities.outputs
@@ -120,7 +117,7 @@ def test_llpr(tmpdir):
         length_unit="angstrom",
         outputs={
             "energy": ModelOutput(per_atom=False),
-            "mtt::aux::energy_uncertainty": ModelOutput(per_atom=False),
+            "energy_uncertainty": ModelOutput(per_atom=False),
             "energy_ensemble": ModelOutput(per_atom=False),
         },
         selected_atoms=None,
@@ -129,24 +126,27 @@ def test_llpr(tmpdir):
         qm9_systems[:5], evaluation_options, check_consistency=True
     )
 
-    assert "mtt::aux::energy_uncertainty" in outputs
+    assert "energy_uncertainty" in outputs
     assert "energy_ensemble" in outputs
 
-    analytical_uncertainty = outputs["mtt::aux::energy_uncertainty"].block().values
-    ensemble_uncertainty = torch.var(
+    analytical_uncertainty = outputs["energy_uncertainty"].block().values
+    ensemble_uncertainty = torch.std(
         outputs["energy_ensemble"].block().values, dim=1, keepdim=True
     )
 
+    print(analytical_uncertainty)
+    print(ensemble_uncertainty)
+
+    print(analytical_uncertainty / ensemble_uncertainty)
+
     torch.testing.assert_close(
-        analytical_uncertainty, ensemble_uncertainty, rtol=1e-2, atol=1e-2
+        analytical_uncertainty, ensemble_uncertainty, rtol=5e-3, atol=0.0
     )
 
 
 def test_llpr_covariance_as_pseudo_hessian(tmpdir):
-    model = load_model(
-        str(RESOURCES_PATH / "model-64-bit.pt"),
-        extensions_directory=str(RESOURCES_PATH / "extensions/"),
-    )
+    llpr_model = LLPRUncertaintyModel(str(RESOURCES_PATH / "model-64-bit.ckpt"))
+
     qm9_systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz")
     target_config = {
         "energy": {
@@ -164,7 +164,7 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
         },
     }
     targets, target_info = read_targets(target_config)
-    requested_neighbor_lists = model.requested_neighbor_lists()
+    requested_neighbor_lists = get_requested_neighbor_lists(llpr_model)
     qm9_systems = [
         get_system_with_neighbor_lists(system, requested_neighbor_lists)
         for system in qm9_systems
@@ -177,8 +177,6 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
         collate_fn=collate_fn,
     )
 
-    llpr_model = LLPRUncertaintyModel(model)
-
     parameters = []
     for name, param in llpr_model.named_parameters():
         if "last_layers" in name:
@@ -205,7 +203,7 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
     evaluation_options = ModelEvaluationOptions(
         length_unit="angstrom",
         outputs={
-            "mtt::aux::energy_uncertainty": ModelOutput(per_atom=True),
+            "energy_uncertainty": ModelOutput(per_atom=True),
             "energy": ModelOutput(per_atom=True),
             "mtt::aux::energy_last_layer_features": ModelOutput(per_atom=True),
         },
@@ -216,11 +214,11 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
         qm9_systems[:5], evaluation_options, check_consistency=True
     )
 
-    assert "mtt::aux::energy_uncertainty" in outputs
+    assert "energy_uncertainty" in outputs
     assert "energy" in outputs
     assert "mtt::aux::energy_last_layer_features" in outputs
 
-    assert outputs["mtt::aux::energy_uncertainty"].block().samples.names == [
+    assert outputs["energy_uncertainty"].block().samples.names == [
         "system",
         "atom",
     ]
@@ -256,7 +254,7 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
         length_unit="angstrom",
         outputs={
             "energy": ModelOutput(per_atom=False),
-            "mtt::aux::energy_uncertainty": ModelOutput(per_atom=False),
+            "energy_uncertainty": ModelOutput(per_atom=False),
             "energy_ensemble": ModelOutput(per_atom=False),
         },
         selected_atoms=None,
@@ -265,22 +263,18 @@ def test_llpr_covariance_as_pseudo_hessian(tmpdir):
         qm9_systems[:5], evaluation_options, check_consistency=True
     )
 
-    assert "mtt::aux::energy_uncertainty" in outputs
+    assert "energy_uncertainty" in outputs
     assert "energy_ensemble" in outputs
 
     predictions = outputs["energy"].block().values
-    analytical_uncertainty = outputs["mtt::aux::energy_uncertainty"].block().values
+    analytical_uncertainty = outputs["energy_uncertainty"].block().values
     ensemble_mean = torch.mean(
         outputs["energy_ensemble"].block().values, dim=1, keepdim=True
     )
-    ensemble_uncertainty = torch.var(
+    ensemble_uncertainty = torch.std(
         outputs["energy_ensemble"].block().values, dim=1, keepdim=True
     )
 
-    print(predictions)
-    print(ensemble_mean)
-    print(predictions - ensemble_mean)
-
     torch.testing.assert_close(predictions, ensemble_mean, rtol=5e-3, atol=0.0)
     torch.testing.assert_close(
         analytical_uncertainty, ensemble_uncertainty, rtol=5e-3, atol=0.0