Skip to content

Commit 20f156f

Browse files
committed
SafeTarfile: Update relative symlinks instead of dropping
1 parent 2e6a7ae commit 20f156f

File tree

1 file changed

+80
-10
lines changed

1 file changed

+80
-10
lines changed

unblob/handlers/archive/_safe_tarfile.py

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,93 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676

7777
# prevent traversal attempts through links
7878
if tarinfo.islnk() or tarinfo.issym():
79-
if Path(tarinfo.linkname).is_absolute():
79+
rel_target = Path(tarinfo.linkname)
80+
if rel_target.is_absolute():
81+
# If target is absolute, we'll rewrite to be relative to the symlink
82+
83+
# Strip leading '/' to make the path relative to extract directory
84+
rel_target = (extract_root / rel_target.relative_to("/")).relative_to(
85+
extract_root
86+
)
87+
88+
# Now we need to find the path from the symlink to the target
89+
# If the symlink is in a directory like /foo/bar and the target is at /target
90+
# we need to make rel_target ../target (relative to /foo) instead of just being /target (relative to /)
91+
92+
# Let's calculate depth of the symlink itself and the depth of the target
93+
symlink_depth = len(tarinfo.name.split("/"))
94+
target_depth = len(rel_target.parts)
95+
96+
# If the symlink is deeper than the target, we need to go up by the difference in depth
97+
if symlink_depth > target_depth:
98+
# We need to go up by the difference in depth
99+
rel_target = (
100+
Path(
101+
"/".join(
102+
[".." for _ in range(symlink_depth - target_depth)]
103+
)
104+
)
105+
/ rel_target
106+
)
107+
80108
self.record_problem(
81109
tarinfo,
82110
"Absolute path as link target.",
83111
"Converted to extraction relative path.",
84112
)
85-
tarinfo.linkname = f"./{tarinfo.linkname}"
86-
if not is_safe_path(
87-
basedir=extract_root,
88-
path=extract_root / tarinfo.linkname,
89-
):
90-
self.record_problem(
91-
tarinfo,
113+
114+
# The symlink will point to our relative target (may be updated below if unsafe)
115+
tarinfo.linkname = rel_target
116+
logger.info(
117+
"Link target is relative", linkname=tarinfo.linkname, name=tarinfo.name
118+
)
119+
120+
# Resolve the link target to an absolute path
121+
resolved_path = (extract_root / tarinfo.name).parent / rel_target
122+
123+
# If the resolved path points outside of extract_root, we need to fix it!
124+
if not is_safe_path(extract_root, resolved_path):
125+
logger.warning(
92126
"Traversal attempt through link path.",
93-
"Skipped.",
127+
src=tarinfo.name,
128+
dest=tarinfo.linkname,
129+
basedir=extract_root,
130+
resovled_path=resolved_path,
94131
)
95-
return
132+
133+
for drop_count in range(len(str(rel_target).split("/"))):
134+
new_path = (
135+
(extract_root / tarinfo.name).parent
136+
/ Path("/".join(["placeholder"] * drop_count))
137+
/ rel_target
138+
)
139+
resolved_path = new_path.resolve()
140+
if str(resolved_path).startswith(str(extract_root)):
141+
break
142+
else:
143+
# We didn't hit the break, we couldn't resolve the path safely
144+
self.record_problem(
145+
tarinfo,
146+
"Traversal attempt through link path.",
147+
"Skipped.",
148+
)
149+
return
150+
151+
# Double check that it's safe now
152+
if not is_safe_path(extract_root, resolved_path):
153+
self.record_problem(
154+
tarinfo,
155+
"Traversal attempt through link path.",
156+
"Skipped.",
157+
)
158+
return
159+
160+
# Prepend placeholder directories before rel_target to get a valid path
161+
# within extract_root. This is the relative version of resolved_path.
162+
rel_target = Path("/".join(["placeholder"] * drop_count)) / rel_target
163+
tarinfo.linkname = rel_target
164+
165+
logger.debug("Creating symlink", points_to=resolved_path, name=tarinfo.name)
96166

97167
target_path = extract_root / tarinfo.name
98168
# directories are special: we can not set their metadata now + they might also be already existing

0 commit comments

Comments
 (0)