Skip to content

Commit ce1eeb1

Browse files
committed
resolve write function during import time
1 parent 0c8c4ec commit ce1eeb1

File tree

1 file changed

+51
-32
lines changed

1 file changed

+51
-32
lines changed

src/crawlee/_utils/file.py

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,52 @@
1717

1818
from crawlee._types import ExportDataCsvKwargs, ExportDataJsonKwargs
1919

20+
if sys.platform == 'win32':
21+
22+
def _write_file(
23+
path: Path,
24+
data: str | bytes,
25+
*,
26+
is_binary: bool,
27+
) -> str | None:
28+
"""Windows-specific file write implementation.
29+
30+
This implementation writes directly to the file without using a temporary file, because
31+
they are problematic due to permissions issues on Windows.
32+
"""
33+
if is_binary:
34+
path.write_bytes(data) # type: ignore[arg-type]
35+
else:
36+
path.write_text(data, encoding='utf-8') # type: ignore[arg-type]
37+
return None
38+
else:
39+
40+
def _write_file(
41+
path: Path,
42+
data: str | bytes,
43+
*,
44+
is_binary: bool,
45+
) -> str | None:
46+
"""Linux/Unix-specific file write implementation using temporary files."""
47+
dir_path = path.parent
48+
fd, tmp_path = tempfile.mkstemp(
49+
suffix=f'{path.suffix}.tmp',
50+
prefix=f'{path.name}.',
51+
dir=str(dir_path),
52+
)
53+
54+
try:
55+
if is_binary:
56+
with os.fdopen(fd, 'wb') as tmp_file:
57+
tmp_file.write(data) # type: ignore[arg-type]
58+
else:
59+
with os.fdopen(fd, 'w', encoding='utf-8') as tmp_file:
60+
tmp_file.write(data) # type: ignore[arg-type]
61+
except Exception:
62+
Path(tmp_path).unlink(missing_ok=True)
63+
raise
64+
return tmp_path
65+
2066

2167
def infer_mime_type(value: Any) -> str:
2268
"""Infer the MIME content type from the value.
@@ -95,41 +141,14 @@ async def atomic_write(
95141
retry_count: Internal parameter to track the number of retry attempts (default: 0).
96142
"""
97143
max_retries = 3
98-
dir_path = path.parent
99144
tmp_path: str | None = None
100145

101-
def _write_windows() -> None:
102-
if is_binary:
103-
path.write_bytes(data) # type: ignore[arg-type]
104-
else:
105-
path.write_text(data, encoding='utf-8') # type: ignore[arg-type]
106-
107-
def _write_linux() -> str:
108-
fd, tmp_path = tempfile.mkstemp(
109-
suffix=f'{path.suffix}.tmp',
110-
prefix=f'{path.name}.',
111-
dir=str(dir_path),
112-
)
113-
114-
try:
115-
if is_binary:
116-
with os.fdopen(fd, 'wb') as tmp_file:
117-
tmp_file.write(data) # type: ignore[arg-type]
118-
else:
119-
with os.fdopen(fd, 'w', encoding='utf-8') as tmp_file:
120-
tmp_file.write(data) # type: ignore[arg-type]
121-
except Exception:
122-
Path(tmp_path).unlink(missing_ok=True)
123-
raise
124-
return tmp_path
125-
126146
try:
127-
# We have to differentiate between Windows and Linux due to the permissions errors
128-
# in Windows when working with temporary files.
129-
if sys.platform == 'win32':
130-
await asyncio.to_thread(_write_windows)
131-
else:
132-
tmp_path = await asyncio.to_thread(_write_linux)
147+
# Use the platform-specific write function resolved at import time.
148+
tmp_path = await asyncio.to_thread(_write_file, path, data, is_binary=is_binary)
149+
150+
# On Linux/Unix, replace the destination file with tmp file.
151+
if tmp_path is not None:
133152
await asyncio.to_thread(os.replace, tmp_path, str(path))
134153
except (FileNotFoundError, PermissionError):
135154
if retry_count < max_retries:

0 commit comments

Comments
 (0)