diff --git a/airbyte/_executors/docker.py b/airbyte/_executors/docker.py index c395eaf3..eba14f79 100644 --- a/airbyte/_executors/docker.py +++ b/airbyte/_executors/docker.py @@ -8,6 +8,7 @@ from airbyte import exceptions as exc from airbyte._executors.base import Executor +from airbyte.constants import TEMP_DIR logger = logging.getLogger("airbyte") @@ -76,12 +77,18 @@ def map_cli_args(self, args: list[str]) -> list[str]: # This is a file path and we need to map it to the same file within the # relative path of the file within the container's volume. for local_volume, container_path in self.volumes.items(): - if Path(arg).is_relative_to(local_volume): + # If the container path corresponds to the container's temporary directory, + # then the local temporary directory path is used (as the local volume + # path can be overridden). + local_path = local_volume + if container_path == DEFAULT_AIRBYTE_CONTAINER_TEMP_DIR: + local_path = TEMP_DIR + if Path(arg).is_relative_to(local_path): logger.debug( f"Found file input path `{arg}` " - f"relative to container-mapped volume: {local_volume}" + f"relative to container-mapped volume: {local_path}" ) - mapped_path = Path(container_path) / Path(arg).relative_to(local_volume) + mapped_path = Path(container_path) / Path(arg).relative_to(local_path) logger.debug(f"Mapping `{arg}` -> `{mapped_path}`") new_args.append(str(mapped_path)) break diff --git a/airbyte/_executors/util.py b/airbyte/_executors/util.py index 6f9295a7..1219f907 100644 --- a/airbyte/_executors/util.py +++ b/airbyte/_executors/util.py @@ -1,7 +1,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from __future__ import annotations -import tempfile from pathlib import Path from typing import TYPE_CHECKING, Literal, cast @@ -16,7 +15,7 @@ from airbyte._executors.python import VenvExecutor from airbyte._util.meta import which from airbyte._util.telemetry import EventState, log_install_state # Non-public API -from airbyte.constants import AIRBYTE_OFFLINE_MODE, TEMP_DIR_OVERRIDE +from airbyte.constants import AIRBYTE_OFFLINE_MODE, TEMP_DIR from airbyte.sources.registry import ConnectorMetadata, InstallType, get_connector_metadata from airbyte.version import get_version @@ -132,6 +131,7 @@ def get_connector_executor( # noqa: PLR0912, PLR0913, PLR0915 # Too many branch local_executable: Path | str | None = None, docker_image: bool | str | None = None, use_host_network: bool = False, + host_temp_dir: Path | str | None = None, source_manifest: bool | dict | Path | str | None = None, install_if_missing: bool = True, install_root: Path | None = None, @@ -226,7 +226,9 @@ def get_connector_executor( # noqa: PLR0912, PLR0913, PLR0915 # Too many branch if ":" not in docker_image: docker_image = f"{docker_image}:{version or 'latest'}" - host_temp_dir = TEMP_DIR_OVERRIDE or Path(tempfile.gettempdir()) + if not host_temp_dir: + host_temp_dir = TEMP_DIR + container_temp_dir = DEFAULT_AIRBYTE_CONTAINER_TEMP_DIR local_mount_dir = Path().absolute() / name diff --git a/airbyte/_util/temp_files.py b/airbyte/_util/temp_files.py index ce2a65cf..109c0c1b 100644 --- a/airbyte/_util/temp_files.py +++ b/airbyte/_util/temp_files.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any -from airbyte.constants import TEMP_DIR_OVERRIDE +from airbyte.constants import TEMP_DIR if TYPE_CHECKING: @@ -30,7 +30,7 @@ def as_temp_files(files_contents: list[dict | str]) -> Generator[list[str], Any, mode="w+t", delete=False, encoding="utf-8", - dir=TEMP_DIR_OVERRIDE or None, + dir=TEMP_DIR, suffix=".json" if use_json else ".txt", ) temp_file.write( diff --git a/airbyte/constants.py b/airbyte/constants.py index 03318f2d..f2a10026 100644 --- a/airbyte/constants.py +++ b/airbyte/constants.py @@ -4,6 +4,7 @@ from __future__ import annotations import os +import tempfile from pathlib import Path @@ -64,9 +65,8 @@ def _str_to_bool(value: str) -> bool: """Convert a string value of an environment values to a boolean value.""" return bool(value) and value.lower() not in {"", "0", "false", "f", "no", "n", "off"} - -TEMP_DIR_OVERRIDE: Path | None = ( - Path(os.environ["AIRBYTE_TEMP_DIR"]) if os.getenv("AIRBYTE_TEMP_DIR") else None +TEMP_DIR: Path = Path( + os.environ["AIRBYTE_TEMP_DIR"] if os.getenv("AIRBYTE_TEMP_DIR") else tempfile.gettempdir() ) """The directory to use for temporary files. diff --git a/airbyte/destinations/util.py b/airbyte/destinations/util.py index a27e7596..96987c35 100644 --- a/airbyte/destinations/util.py +++ b/airbyte/destinations/util.py @@ -28,6 +28,7 @@ def get_destination( # noqa: PLR0913 # Too many arguments local_executable: Path | str | None = None, docker_image: str | bool | None = None, use_host_network: bool = False, + host_temp_dir: Path | str | None = None, install_if_missing: bool = True, ) -> Destination: """Get a connector by name and version. @@ -56,6 +57,9 @@ def get_destination( # noqa: PLR0913 # Too many arguments the host network. This is useful for connectors that need to access resources on the host machine, such as a local database. This parameter is ignored when `docker_image` is not set. + host_temp_dir: If set, along with docker_image, this replaces the volume exposing the + temporary files directory, ensuring compatibility when the Docker engine runs on a + different host (e.g., Docker in Docker), where paths may differ. install_if_missing: Whether to install the connector if it is not available locally. This parameter is ignored when local_executable is set. """ @@ -70,6 +74,7 @@ def get_destination( # noqa: PLR0913 # Too many arguments local_executable=local_executable, docker_image=docker_image, use_host_network=use_host_network, + host_temp_dir=host_temp_dir, install_if_missing=install_if_missing, ), ) diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index dbdc5668..1016aea3 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -55,6 +55,7 @@ def get_source( # noqa: PLR0913 # Too many arguments local_executable: Path | str | None = None, docker_image: bool | str | None = None, use_host_network: bool = False, + host_temp_dir: Path | str | None = None, source_manifest: bool | dict | Path | str | None = None, install_if_missing: bool = True, install_root: Path | None = None, @@ -95,6 +96,9 @@ def get_source( # noqa: PLR0913 # Too many arguments the host network. This is useful for connectors that need to access resources on the host machine, such as a local database. This parameter is ignored when `docker_image` is not set. + host_temp_dir: If set, along with docker_image, this replaces the volume exposing the + temporary files directory, ensuring compatibility when the Docker engine runs on a + different host (e.g., Docker in Docker), where paths may differ. source_manifest: If set, the connector will be executed based on a declarative YAML source definition. This input can be `True` to attempt to auto-download a YAML spec, `dict` to accept a Python dictionary as the manifest, `Path` to pull a manifest from @@ -116,6 +120,7 @@ def get_source( # noqa: PLR0913 # Too many arguments local_executable=local_executable, docker_image=docker_image, use_host_network=use_host_network, + host_temp_dir=host_temp_dir, source_manifest=source_manifest, install_if_missing=install_if_missing, install_root=install_root,