automl
diff --git a/‎autosklearn/automl.py
Lines changed: 17 additions & 49 deletions b/‎autosklearn/automl.py
Lines changed: 17 additions & 49 deletions
diff --git a/‎autosklearn/estimators.py
Lines changed: 29 additions & 32 deletions b/‎autosklearn/estimators.py
Lines changed: 29 additions & 32 deletions
@@ -199,6 +199,8 @@ def _model_predict(
 class AutoML(BaseEstimator):
     """Base class for handling the AutoML procedure"""
 
+    _task_mapping: dict[str, int]
+
     def __init__(
         self,
         time_left_for_this_task: int,
@@ -243,12 +245,10 @@ def __init__(
                 )
 
         # Validate dataset_compression and set its values
-        self._dataset_compression: DatasetCompressionSpec | None
+        self._dataset_compression: DatasetCompressionSpec | None = None
         if isinstance(dataset_compression, bool):
             if dataset_compression is True:
                 self._dataset_compression = default_dataset_compression_arg
-            else:
-                self._dataset_compression = None
         else:
             self._dataset_compression = validate_dataset_compression_arg(
                 dataset_compression,
@@ -307,18 +307,18 @@ def __init__(
 
         # Create the backend
         self._backend: Backend = create(
-            temporary_directory=temporary_directory,
+            # TODO update backend as this does accept optional str
+            temporary_directory=temporary_directory,  # type: ignore
             output_directory=None,
             prefix="auto-sklearn",
             delete_output_folder_after_terminate=delete_tmp_folder_after_terminate,
         )
 
-        self._data_memory_limit = None  # TODO: dead variable? Always None
         self._datamanager = None
         self._dataset_name = None
         self._feat_type = None
         self._logger: PicklableClientLogger | None = None
-        self._task = None
+        self._task: int | None = None
         self._label_num = None
         self._parser = None
         self._can_predict = False
@@ -330,10 +330,11 @@ def __init__(
         self.InputValidator: InputValidator | None = None
         self.configuration_space = None
 
-        # The ensemble performance history through time
         self._stopwatch = StopWatch()
         self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
-        self.ensemble_performance_history = []
+
+        # The ensemble performance history through time
+        self.ensemble_performance_history: list[dict[str, Any]] = []
 
         # Num_run tell us how many runs have been launched. It can be seen as an
         # identifier for each configuration saved to disk
@@ -480,14 +481,6 @@ def _do_dummy_prediction(self) -> None:
 
         return
 
-    @classmethod
-    def _task_type_id(cls, task_type: str) -> int:
-        raise NotImplementedError
-
-    @classmethod
-    def _supports_task_type(cls, task_type: str) -> bool:
-        raise NotImplementedError
-
     def fit(
         self,
         X: SUPPORTED_FEAT_TYPES,
@@ -594,16 +587,12 @@ def fit(
         y = convert_if_sparse(y)
         y_test = convert_if_sparse(y_test) if y_test is not None else None
 
-        # Get the task if it doesn't exist
         if task is None:
-            y_task = type_of_target(y)
-            if not self._supports_task_type(y_task):
-                raise ValueError(
-                    f"{self.__class__.__name__} does not support" f" task {y_task}"
-                )
-            self._task = self._task_type_id(y_task)
-        else:
-            self._task = task
+            task = self._task_mapping.get(type_of_target(y), None)
+            if task is None:
+                raise ValueError(f"{self.__class__.__name__} does not support {task}")
+
+        self._task = task
 
         # Assign a metric if it doesnt exist
         if self._metrics is None:
@@ -613,9 +602,6 @@ def fit(
         if dataset_name is None:
             dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))
 
-        # By default try to use the TCP logging port or get a new port
-        self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
-
         # Once we start the logging server, it starts in a new process
         # If an error occurs then we want to make sure that we exit cleanly
         # and shut it down, else it might hang
@@ -1272,11 +1258,9 @@ def fit_pipeline(
         # Get the task if it doesn't exist
         if task is None:
             y_task = type_of_target(y)
-            if not self._supports_task_type(y_task):
-                raise ValueError(
-                    f"{self.__class__.__name__} does not support" f" task {y_task}"
-                )
-            self._task = self._task_type_id(y_task)
+            self._task = self._task_mapping.get(y_task, None)
+            if self._task is None:
+                raise ValueError(f"{self.__class__.__name__} does not support {y_task}")
         else:
             self._task = task
 
@@ -2271,14 +2255,6 @@ class AutoMLClassifier(AutoML):
         "binary": BINARY_CLASSIFICATION,
     }
 
-    @classmethod
-    def _task_type_id(cls, task_type: str) -> int:
-        return cls._task_mapping[task_type]
-
-    @classmethod
-    def _supports_task_type(cls, task_type: str) -> bool:
-        return task_type in cls._task_mapping.keys()
-
     def fit(
         self,
         X: SUPPORTED_FEAT_TYPES,
@@ -2361,14 +2337,6 @@ class AutoMLRegressor(AutoML):
         "multiclass": REGRESSION,
     }
 
-    @classmethod
-    def _task_type_id(cls, task_type: str) -> int:
-        return cls._task_mapping[task_type]
-
-    @classmethod
-    def _supports_task_type(cls, task_type: str) -> bool:
-        return task_type in cls._task_mapping.keys()
-
     def fit(
         self,
         X: SUPPORTED_FEAT_TYPES,
 
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from abc import ABC, abstractmethod
+from abc import ABC
 from typing import Any, Generic, Iterable, Sequence, TypeVar
 
 import warnings
@@ -21,7 +21,7 @@
 )
 from sklearn.utils.multiclass import type_of_target
 from smac.runhistory.runhistory import RunInfo, RunValue
-from typing_extensions import Literal, TypeAlias
+from typing_extensions import Literal
 
 from autosklearn.automl import AutoML, AutoMLClassifier, AutoMLRegressor
 from autosklearn.data.validation import convert_if_sparse
@@ -31,34 +31,23 @@
 from autosklearn.pipeline.base import BasePipeline
 from autosklearn.util.smac_wrap import SMACCallback
 
-# Used to indicate what type the underlying AutoML instance is
-TAutoML = TypeVar("TAutoML", bound=AutoML)
-TParetoModel = TypeVar("TParetoModel", VotingClassifier, VotingRegressor)
-
 # Used to return self and give correct type information from subclasses,
 # see `fit(self: Self) -> Self`
 Self = TypeVar("Self", bound="AutoSklearnEstimator")
 
-ResampleOptions: TypeAlias = Literal[
-    "holdout",
-    "cv",
-    "holdout-iterative-fit",
-    "cv-iterative-fit",
-    "partial-cv",
-]
-DisableEvaluatorOptions: TypeAlias = Literal["y_optimization", "model"]
-
+# Used to indicate what type the underlying AutoML instance is
+TParetoModel = TypeVar("TParetoModel", VotingClassifier, VotingRegressor)
+TAutoML = TypeVar("TAutoML", bound=AutoML)
 
-class AutoSklearnEstimator(ABC, Generic[TAutoML, TParetoModel], BaseEstimator):
 
-    # List of target types supported by the estimator class
-    supported_target_types: list[str]
+class AutoSklearnEstimator(ABC, BaseEstimator, Generic[TAutoML, TParetoModel]):
 
-    # The automl class used by the estimator class
-    _automl_class: type[TAutoML]
+    supported_target_types: list[str]  # Support output types for the estimator
+    _automl_class: type[TAutoML]  # The automl class used by the estimator class
 
     def __init__(
         self,
+        *,
         time_left_for_this_task: int = 3600,
         per_run_time_limit: int | None = None,  # TODO: allow percentage
         initial_configurations_via_metalearning: int = 25,  # TODO validate
@@ -71,7 +60,9 @@ def __init__(
         memory_limit: int | None = 3072,
         include: dict[str, list[str]] | None = None,
         exclude: dict[str, list[str]] | None = None,
-        resampling_strategy: ResampleOptions
+        resampling_strategy: Literal[
+            "holdout", "cv", "holdout-iterative-fit", "cv-iterative-fit", "partial-cv"
+        ]
         | BaseCrossValidator
         | _RepeatedSplits
         | BaseShuffleSplit = "holdout",
@@ -81,7 +72,7 @@ def __init__(
         n_jobs: int = 1,
         dask_client: dask.distributed.Client | None = None,
         disable_evaluator_output: bool
-        | Sequence[DisableEvaluatorOptions] = False,  # TODO fill in
+        | Sequence[Literal["y_optimization", "model"]] = False,  # TODO: fill in
         get_smac_object_callback: SMACCallback | None = None,
         smac_scenario_args: dict[str, Any] | None = None,
         logging_config: dict[str, Any] | None = None,
@@ -490,7 +481,7 @@ def __init__(
         self.allow_string_features = allow_string_features
 
         # Cached
-        self.automl_: AutoML | None = None
+        self.automl_: TAutoML | None = None
 
         # Handle the number of jobs and the time for them
         # Made private by `_n_jobs` to keep with sklearn compliance
@@ -504,21 +495,19 @@ def __init__(
             self.per_run_time_limit = self._n_jobs * self.time_left_for_this_task // 10
 
     @property
-    @abstractmethod
     def automl(self) -> TAutoML:
         """Get the underlying Automl instance
 
         Returns
         -------
         AutoML
-            The underlying AutoML instanec
+            The underlying AutoML instance
         """
         if self.automl_ is not None:
             return self.automl_
 
         initial_configs = self.initial_configurations_via_metalearning
-        cls = self._get_automl_class()
-        automl = cls(
+        automl = self._automl_class(
             temporary_directory=self.tmp_folder,
             delete_tmp_folder_after_terminate=self.delete_tmp_folder_after_terminate,
             time_left_for_this_task=self.time_left_for_this_task,
@@ -568,16 +557,14 @@ def ensemble(self) -> AbstractEnsemble:
         NotFittedError
             If there this estimator has not been fitted
         """
-
-    def __getstate__(self) -> dict[str, Any]:
-        # Cannot serialize a client!
-        self.dask_client = None
-        return self.__dict__
+        # TODO
+        raise NotImplementedError()
 
     def fit(
         self: Self,
         X: np.ndarray | pd.DataFrame | list | spmatrix,
         y: np.ndarray | pd.DataFrame | pd.Series | list,
+        *,
         X_test: np.ndarray | pd.DataFrame | list | spmatrix | None = None,
         y_test: np.ndarray | pd.DataFrame | pd.Series | list | None = None,
         feat_type: list[str] | None = None,
@@ -697,6 +684,7 @@ def fit_pipeline(
         self,
         X: np.ndarray | pd.DataFrame | list | spmatrix,
         y: np.ndarray | pd.DataFrame | pd.Series | list,
+        *,
         config: Configuration | dict[str, Any],
         dataset_name: str | None = None,
         X_test: np.ndarray | pd.DataFrame | list | spmatrix | None = None,
@@ -767,6 +755,7 @@ def fit_pipeline(
     def fit_ensemble(
         self: Self,
         y: np.ndarray | pd.DataFrame | pd.Series | list,
+        *,
         task: int | None = None,
         precision: Literal[16, 32, 64] = 32,
         dataset_name: str | None = None,
@@ -913,6 +902,7 @@ def refit(
     def predict(
         self,
         X: np.ndarray | pd.DataFrame | list | spmatrix,
+        *,
         batch_size: int | None = None,
         n_jobs: int = 1,
     ) -> np.ndarray:
@@ -1088,6 +1078,7 @@ def sprint_statistics(self) -> str:
 
     def leaderboard(
         self,
+        *,
         detailed: bool = False,
         ensemble_only: bool = True,
         top_k: int | Literal["all"] = "all",
@@ -1501,6 +1492,7 @@ def get_configuration_space(
         self,
         X: np.ndarray | pd.DataFrame | list | spmatrix,
         y: np.ndarray | pd.DataFrame | pd.Series | list,
+        *,
         X_test: np.ndarray | pd.DataFrame | list | spmatrix | None = None,
         y_test: np.ndarray | pd.DataFrame | pd.Series | list | None = None,
         dataset_name: str | None = None,
@@ -1549,6 +1541,11 @@ def get_pareto_set(self) -> Sequence[TParetoModel]:
         """
         return self.automl._load_pareto_set()
 
+    def __getstate__(self) -> dict[str, Any]:
+        # Cannot serialize a client!
+        self.dask_client = None
+        return self.__dict__
+
     def __sklearn_is_fitted__(self) -> bool:
         return self.automl_ is not None and self.automl.fitted