fix lorenz/rossler export and infer (#805)

zhiminzhang0830 · web-flow · commit 3b1d05d0cca3 · 2024-03-13T18:14:05.000+08:00
* add embedding model to PhysformerGPT2 for infer

* modify export and inference code of lorenz

* fix export command

* fix export and infer of rossler

* fix doc

* fix error message in generate method

* fix docstring
diff --git a/docs/zh/examples/lorenz.md b/docs/zh/examples/lorenz.md
@@ -31,7 +31,7 @@
 === "模型导出命令"
 
     ``` sh
-    python train_enn.py mode=export
+    python train_transformer.py mode=export EMBEDDING_MODEL_PATH=https://paddle-org.bj.bcebos.com/paddlescience/models/lorenz/lorenz_pretrained.pdparams
     ```
 
 === "模型推理命令"
@@ -43,7 +43,7 @@
     # windows
     # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_training_rk.hdf5 --output ./datasets/lorenz_training_rk.hdf5
     # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/lorenz_valid_rk.hdf5 --output ./datasets/lorenz_valid_rk.hdf5
-    python train_transformer.py mode=infer EMBEDDING_MODEL_PATH=https://paddle-org.bj.bcebos.com/paddlescience/models/lorenz/lorenz_pretrained.pdparams
+    python train_transformer.py mode=infer
     ```
 
 | 模型 | MSE |
diff --git a/docs/zh/examples/rossler.md b/docs/zh/examples/rossler.md
@@ -31,7 +31,7 @@
 === "模型导出命令"
 
     ``` sh
-    python train_transformer.py mode=export
+    python train_transformer.py mode=export EMBEDDING_MODEL_PATH=https://paddle-org.bj.bcebos.com/paddlescience/models/rossler/rossler_pretrained.pdparams
     ```
 
 === "模型推理命令"
@@ -43,7 +43,7 @@
     # windows
     # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_training.hdf5 --output ./datasets/rossler_training.hdf5
     # curl https://paddle-org.bj.bcebos.com/paddlescience/datasets/transformer_physx/rossler_valid.hdf5 --output ./datasets/rossler_valid.hdf5
-    python train_transformer.py mode=infer EMBEDDING_MODEL_PATH=https://paddle-org.bj.bcebos.com/paddlescience/models/rossler/rossler_pretrained.pdparams
+    python train_transformer.py mode=infer
     ```
 
 | 模型 | MSE |
diff --git a/examples/lorenz/train_transformer.py b/examples/lorenz/train_transformer.py
@@ -247,7 +247,14 @@ def evaluate(cfg: DictConfig):
 
 def export(cfg: DictConfig):
     # set model
-    model = ppsci.arch.PhysformerGPT2(**cfg.MODEL)
+    embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH)
+    model_cfg = {
+        **cfg.MODEL,
+        "embedding_model": embedding_model,
+        "input_keys": ["states"],
+        "output_keys": ["pred_states"],
+    }
+    model = ppsci.arch.PhysformerGPT2(**model_cfg)
 
     # initialize solver
     solver = ppsci.solver.Solver(
@@ -259,7 +266,7 @@ def export(cfg: DictConfig):
 
     input_spec = [
         {
-            key: InputSpec([None, 256, 32], "float32", name=key)
+            key: InputSpec([None, 255, 3], "float32", name=key)
             for key in model.input_keys
         },
     ]
@@ -272,42 +279,33 @@ def inference(cfg: DictConfig):
 
     predictor = pinn_predictor.PINNPredictor(cfg)
 
-    embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH)
-    output_transform = OutputTransform(embedding_model)
     dataset_cfg = {
         "name": "LorenzDataset",
         "file_path": cfg.VALID_FILE_PATH,
         "input_keys": cfg.MODEL.input_keys,
         "label_keys": cfg.MODEL.output_keys,
         "block_size": cfg.VALID_BLOCK_SIZE,
         "stride": 1024,
-        "embedding_model": embedding_model,
     }
 
     dataset = ppsci.data.dataset.build_dataset(dataset_cfg)
 
     input_dict = {
-        "embeds": dataset.embedding_data[: cfg.VIS_DATA_NUMS, :-1, :],
+        "states": dataset.data[: cfg.VIS_DATA_NUMS, :-1, :],
     }
-
-    output_dict = predictor.predict(
-        {key: input_dict[key] for key in cfg.MODEL.input_keys}, cfg.INFER.batch_size
-    )
+    output_dict = predictor.predict(input_dict, cfg.INFER.batch_size)
 
     # mapping data to cfg.INFER.output_keys
+    output_keys = ["pred_states"]
     output_dict = {
-        store_key: paddle.to_tensor(output_dict[infer_key])
-        for store_key, infer_key in zip(cfg.MODEL.output_keys, output_dict.keys())
+        store_key: output_dict[infer_key]
+        for store_key, infer_key in zip(output_keys, output_dict.keys())
     }
 
     input_dict = {
         "states": dataset.data[: cfg.VIS_DATA_NUMS, 1:, :],
     }
 
-    output_dict = {
-        "pred_states": output_transform(output_dict).numpy(),
-    }
-
     data_dict = {**input_dict, **output_dict}
     for i in range(cfg.VIS_DATA_NUMS):
         ppsci.visualize.save_plot_from_3d_dict(
diff --git a/examples/rossler/train_transformer.py b/examples/rossler/train_transformer.py
@@ -246,7 +246,14 @@ def evaluate(cfg: DictConfig):
 
 def export(cfg: DictConfig):
     # set model
-    model = ppsci.arch.PhysformerGPT2(**cfg.MODEL)
+    embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH)
+    model_cfg = {
+        **cfg.MODEL,
+        "embedding_model": embedding_model,
+        "input_keys": ["states"],
+        "output_keys": ["pred_states"],
+    }
+    model = ppsci.arch.PhysformerGPT2(**model_cfg)
 
     # initialize solver
     solver = ppsci.solver.Solver(
@@ -258,7 +265,7 @@ def export(cfg: DictConfig):
 
     input_spec = [
         {
-            key: InputSpec([None, 256, 32], "float32", name=key)
+            key: InputSpec([None, 255, 3], "float32", name=key)
             for key in model.input_keys
         },
     ]
@@ -271,42 +278,34 @@ def inference(cfg: DictConfig):
 
     predictor = pinn_predictor.PINNPredictor(cfg)
 
-    embedding_model = build_embedding_model(cfg.EMBEDDING_MODEL_PATH)
-    output_transform = OutputTransform(embedding_model)
     dataset_cfg = {
         "name": "RosslerDataset",
         "file_path": cfg.VALID_FILE_PATH,
         "input_keys": cfg.MODEL.input_keys,
         "label_keys": cfg.MODEL.output_keys,
         "block_size": cfg.VALID_BLOCK_SIZE,
         "stride": 1024,
-        "embedding_model": embedding_model,
     }
 
     dataset = ppsci.data.dataset.build_dataset(dataset_cfg)
 
     input_dict = {
-        "embeds": dataset.embedding_data[: cfg.VIS_DATA_NUMS, :-1, :],
+        "states": dataset.data[: cfg.VIS_DATA_NUMS, :-1, :],
     }
 
-    output_dict = predictor.predict(
-        {key: input_dict[key] for key in cfg.MODEL.input_keys}, cfg.INFER.batch_size
-    )
+    output_dict = predictor.predict(input_dict, cfg.INFER.batch_size)
 
     # mapping data to cfg.INFER.output_keys
+    output_keys = ["pred_states"]
     output_dict = {
-        store_key: paddle.to_tensor(output_dict[infer_key])
-        for store_key, infer_key in zip(cfg.MODEL.output_keys, output_dict.keys())
+        store_key: output_dict[infer_key]
+        for store_key, infer_key in zip(output_keys, output_dict.keys())
     }
 
     input_dict = {
         "states": dataset.data[: cfg.VIS_DATA_NUMS, 1:, :],
     }
 
-    output_dict = {
-        "pred_states": output_transform(output_dict).numpy(),
-    }
-
     data_dict = {**input_dict, **output_dict}
     for i in range(cfg.VIS_DATA_NUMS):
         ppsci.visualize.save_plot_from_3d_dict(
diff --git a/ppsci/arch/physx_transformer.py b/ppsci/arch/physx_transformer.py
@@ -251,6 +251,9 @@ class PhysformerGPT2(base.Arch):
         attn_pdrop (float, optional): The dropout probability used on attention weights. Defaults to 0.0.
         resid_pdrop (float, optional): The dropout probability used on block outputs. Defaults to 0.0.
         initializer_range (float, optional): Initializer range of linear layer. Defaults to 0.05.
+        embedding_model (Optional[base.Arch]): Embedding model, If this parameter is set,
+            the embedding model will map the input data to the embedding space and the
+            output data to the physical space. Defaults to None.
 
     Examples:
         >>> import ppsci
@@ -269,6 +272,7 @@ def __init__(
         attn_pdrop: float = 0.0,
         resid_pdrop: float = 0.0,
         initializer_range: float = 0.05,
+        embedding_model: Optional[base.Arch] = None,
     ):
         super().__init__()
         self.input_keys = input_keys
@@ -296,6 +300,7 @@ def __init__(
         self.linear = nn.Linear(embed_size, embed_size)
 
         self.apply(self._init_weights)
+        self.embedding_model = embedding_model
 
     def _init_weights(self, module):
         if isinstance(module, nn.Linear):
@@ -344,7 +349,7 @@ def _generate_time_series(self, x, max_length):
     def generate(self, x, max_length=256):
         if max_length <= 0:
             raise ValueError(
-                "max_length({max_length}) should be a strictly positive integer."
+                f"max_length({max_length}) should be a strictly positive integer."
             )
         outputs = self._generate_time_series(x, max_length)
         return outputs
@@ -375,10 +380,17 @@ def forward(self, x):
         if self._input_transform is not None:
             x = self._input_transform(x)
         x_tensor = self.concat_to_tensor(x, self.input_keys, axis=-1)
+        if self.embedding_model is not None:
+            x_tensor = self.embedding_model.encoder(x_tensor)
+
         if self.training:
             y = self.forward_tensor(x_tensor)
         else:
             y = self.forward_eval(x_tensor)
+
+        if self.embedding_model is not None:
+            y = (self.embedding_model.decoder(y[0]),)
+
         y = self.split_to_dict(y, self.output_keys)
         if self._output_transform is not None:
             y = self._output_transform(x, y)