@@ -76,23 +76,93 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
76
76
77
77
# prevent traversal attempts through links
78
78
if tarinfo .islnk () or tarinfo .issym ():
79
- if Path (tarinfo .linkname ).is_absolute ():
79
+ rel_target = Path (tarinfo .linkname )
80
+ if rel_target .is_absolute ():
81
+ # If target is absolute, we'll rewrite to be relative to the symlink
82
+
83
+ # Strip leading '/' to make the path relative to extract directory
84
+ rel_target = (extract_root / rel_target .relative_to ("/" )).relative_to (
85
+ extract_root
86
+ )
87
+
88
+ # Now we need to find the path from the symlink to the target
89
+ # If the symlink is in a directory like /foo/bar and the target is at /target
90
+ # we need to make rel_target ../target (relative to /foo) instead of just being /target (relative to /)
91
+
92
+ # Let's calculate depth of the symlink itself and the depth of the target
93
+ symlink_depth = len (tarinfo .name .split ("/" ))
94
+ target_depth = len (rel_target .parts )
95
+
96
+ # If the symlink is deeper than the target, we need to go up by the difference in depth
97
+ if symlink_depth > target_depth :
98
+ # We need to go up by the difference in depth
99
+ rel_target = (
100
+ Path (
101
+ "/" .join (
102
+ [".." for _ in range (symlink_depth - target_depth )]
103
+ )
104
+ )
105
+ / rel_target
106
+ )
107
+
80
108
self .record_problem (
81
109
tarinfo ,
82
110
"Absolute path as link target." ,
83
111
"Converted to extraction relative path." ,
84
112
)
85
- tarinfo .linkname = f"./{ tarinfo .linkname } "
86
- if not is_safe_path (
87
- basedir = extract_root ,
88
- path = extract_root / tarinfo .linkname ,
89
- ):
90
- self .record_problem (
91
- tarinfo ,
113
+
114
+ # The symlink will point to our relative target (may be updated below if unsafe)
115
+ tarinfo .linkname = rel_target
116
+ logger .info (
117
+ "Link target is relative" , linkname = tarinfo .linkname , name = tarinfo .name
118
+ )
119
+
120
+ # Resolve the link target to an absolute path
121
+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
122
+
123
+ # If the resolved path points outside of extract_root, we need to fix it!
124
+ if not is_safe_path (extract_root , resolved_path ):
125
+ logger .warning (
92
126
"Traversal attempt through link path." ,
93
- "Skipped." ,
127
+ src = tarinfo .name ,
128
+ dest = tarinfo .linkname ,
129
+ basedir = extract_root ,
130
+ resovled_path = resolved_path ,
94
131
)
95
- return
132
+
133
+ for drop_count in range (len (str (rel_target ).split ("/" ))):
134
+ new_path = (
135
+ (extract_root / tarinfo .name ).parent
136
+ / Path ("/" .join (["placeholder" ] * drop_count ))
137
+ / rel_target
138
+ )
139
+ resolved_path = new_path .resolve ()
140
+ if str (resolved_path ).startswith (str (extract_root )):
141
+ break
142
+ else :
143
+ # We didn't hit the break, we couldn't resolve the path safely
144
+ self .record_problem (
145
+ tarinfo ,
146
+ "Traversal attempt through link path." ,
147
+ "Skipped." ,
148
+ )
149
+ return
150
+
151
+ # Double check that it's safe now
152
+ if not is_safe_path (extract_root , resolved_path ):
153
+ self .record_problem (
154
+ tarinfo ,
155
+ "Traversal attempt through link path." ,
156
+ "Skipped." ,
157
+ )
158
+ return
159
+
160
+ # Prepend placeholder directories before rel_target to get a valid path
161
+ # within extract_root. This is the relative version of resolved_path.
162
+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
163
+ tarinfo .linkname = rel_target
164
+
165
+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
96
166
97
167
target_path = extract_root / tarinfo .name
98
168
# directories are special: we can not set their metadata now + they might also be already existing
0 commit comments