pyiron · jan-janssen · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
@@ -402,7 +402,7 @@ jobs:
       run: echo -e "channels:\n  - conda-forge\n" > .condarc
     - uses: conda-incubator/setup-miniconda@v3
       with:
-        python-version: '3.9'
+        python-version: '3.10'
         miniforge-version: latest
         condarc-file: .condarc
         environment-file: .ci_support/environment-old.yml

@@ -4,6 +4,7 @@
 
 import cloudpickle
 
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.task_scheduler.file.backend import (
     backend_load_file,
     backend_write_file,
@@ -53,6 +54,10 @@ def main() -> None:
                 output={"error": error},
                 runtime=time.time() - time_start,
             )
+            backend_write_error_file(
+                error=error,
+                apply_dict=apply_dict,
+            )
     else:
         if mpi_rank_zero:
             backend_write_file(

@@ -6,6 +6,7 @@
 import cloudpickle
 import zmq
 
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 from executorlib.standalone.interactive.communication import (
     interface_connect,
@@ -82,6 +83,10 @@ def main() -> None:
                         socket=socket,
                         result_dict={"error": error},
                     )
+                    backend_write_error_file(
+                        error=error,
+                        apply_dict=input_dict,
+                    )
             else:
                 # Send output
                 if mpi_rank_zero:

@@ -2,6 +2,7 @@
 from os.path import abspath
 from typing import Optional
 
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 from executorlib.standalone.interactive.communication import (
     interface_connect,
@@ -17,6 +18,7 @@ def main(argument_lst: Optional[list[str]] = None):
 
     Args:
         argument_lst (Optional[List[str]]): List of command line arguments. If None, sys.argv will be used.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Returns:
         None
@@ -58,6 +60,10 @@ def main(argument_lst: Optional[list[str]] = None):
                     socket=socket,
                     result_dict={"error": error},
                 )
+                backend_write_error_file(
+                    error=error,
+                    apply_dict=input_dict,
+                )
             else:
                 # Send output
                 interface_send(socket=socket, result_dict={"result": output})

@@ -63,6 +63,7 @@ class FluxJobExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -103,6 +104,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.FluxJobExecutor leverages either the message passing interface (MPI), the SLURM workload manager
@@ -148,6 +150,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -157,6 +160,7 @@ def __init__(
             "cwd": None,
             "openmpi_oversubscribe": False,
             "slurm_cmd_args": [],
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}
@@ -248,6 +252,7 @@ class FluxClusterExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -285,6 +290,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -327,6 +333,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -336,6 +343,7 @@ def __init__(
             "cwd": None,
             "openmpi_oversubscribe": False,
             "slurm_cmd_args": [],
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}

@@ -57,6 +57,7 @@ class SingleNodeExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -93,6 +94,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.SingleNodeExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -134,6 +136,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -143,6 +146,7 @@ def __init__(
             "cwd": None,
             "openmpi_oversubscribe": False,
             "slurm_cmd_args": [],
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}
@@ -220,6 +224,7 @@ class TestClusterExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -256,6 +261,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
@@ -291,6 +297,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -299,6 +306,7 @@ def __init__(
             "gpus_per_core": 0,
             "cwd": None,
             "openmpi_oversubscribe": False,
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}

@@ -60,6 +60,7 @@ class SlurmClusterExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -97,6 +98,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -139,6 +141,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -148,6 +151,7 @@ def __init__(
             "cwd": None,
             "openmpi_oversubscribe": False,
             "slurm_cmd_args": [],
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}
@@ -244,6 +248,7 @@ class SlurmJobExecutor(BaseExecutor):
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
     Examples:
         ```
@@ -280,6 +285,7 @@ def __init__(
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
         log_obj_size: bool = False,
+        write_error_file: bool = False,
     ):
         """
         The executorlib.SlurmJobExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -325,6 +331,7 @@ def __init__(
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
             log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
 
         """
         default_resource_dict: dict = {
@@ -334,6 +341,7 @@ def __init__(
             "cwd": None,
             "openmpi_oversubscribe": False,
             "slurm_cmd_args": [],
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}

@@ -10,6 +10,7 @@
     "error": "error",
     "runtime": "runtime",
     "queue_id": "queue_id",
+    "write_error_file": "write_error_file",
 }
 
 

@@ -0,0 +1,20 @@
+import traceback
+
+
+def backend_write_error_file(error: Exception, apply_dict: dict) -> None:
+    """
+    Write an error to a file if specified in the apply_dict.
+
+    Args:
+        error (Exception): The error to be written.
+        apply_dict (dict): Dictionary containing additional parameters.
+
+    Returns:
+        None
+    """
+    if apply_dict.get("write_error_file", False):
+        with open(apply_dict.get("error_file_name", "error.out"), "a") as f:
+            f.write("function: " + str(apply_dict["fn"]) + "\n")
+            f.write("args: " + str(apply_dict["args"]) + "\n")
+            f.write("kwargs: " + str(apply_dict["kwargs"]) + "\n")
+            traceback.print_exception(error, file=f)
@@ -2,6 +2,7 @@
 import time
 from typing import Any
 
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.task_scheduler.file.hdf import dump, load
 from executorlib.task_scheduler.file.shared import FutureItem
 
@@ -77,6 +78,10 @@ def backend_execute_task_in_file(file_name: str) -> None:
         }
     except Exception as error:
         result = {"error": error}
+        backend_write_error_file(
+            error=error,
+            apply_dict=apply_dict,
+        )
 
     backend_write_file(
         file_name=file_name,

@@ -52,6 +52,10 @@ def load(file_name: str) -> dict:
             data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
         else:
             data_dict["kwargs"] = {}
+        if "write_error_file" in hdf:
+            data_dict["write_error_file"] = cloudpickle.loads(
+                np.void(hdf["/write_error_file"])
+            )
         return data_dict
 
 

@@ -126,13 +126,15 @@ def execute_tasks_h5(
             )
             cache_key = task_resource_dict.pop("cache_key", None)
             cache_directory = os.path.abspath(task_resource_dict.pop("cache_directory"))
+            write_error_file = task_resource_dict.pop("write_error_file", False)
             task_key, data_dict = serialize_funct_h5(
                 fn=task_dict["fn"],
                 fn_args=task_args,
                 fn_kwargs=task_kwargs,
                 resource_dict=task_resource_dict,
                 cache_key=cache_key,
             )
+            data_dict["write_error_file"] = write_error_file
             if task_key not in memory_dict:
                 if os.path.join(
                     cache_directory, task_key + "_o.h5"

@@ -36,6 +36,7 @@ def __init__(
         pysqa_config_directory: Optional[str] = None,
         backend: Optional[str] = None,
         disable_dependencies: bool = False,
+        write_error_file: bool = False,
     ):
         """
         Initialize the FileExecutor.
@@ -50,12 +51,14 @@ def __init__(
             pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
             backend (str, optional): name of the backend used to spawn tasks.
             disable_dependencies (boolean): Disable resolving future objects during the submission.
+            write_error_file (boolean): Enable writing error.out files when the computation of a Python function fails
         """
         super().__init__(max_cores=None)
         default_resource_dict = {
             "cores": 1,
             "cwd": None,
             "cache_directory": "executorlib_cache",
+            "write_error_file": write_error_file,
         }
         if resource_dict is None:
             resource_dict = {}
@@ -95,6 +98,7 @@ def create_file_executor(
     init_function: Optional[Callable] = None,
     disable_dependencies: bool = False,
     execute_function: Callable = execute_with_pysqa,
+    write_error_file: bool = False,
 ):
     if block_allocation:
         raise ValueError(
@@ -123,4 +127,5 @@ def create_file_executor(
         disable_dependencies=disable_dependencies,
         execute_function=execute_function,
         terminate_function=terminate_function,
+        write_error_file=write_error_file,
     )