From 235f9d47714eb49f1ab849f619403bf3c6e75c1a Mon Sep 17 00:00:00 2001 From: aeurielesn Date: Wed, 23 Jul 2025 00:17:56 +0200 Subject: [PATCH 1/3] gh-130577: tarfile now validates archives to ensure member offsets are non-negative --- Doc/whatsnew/3.15.rst | 2 + Lib/tarfile.py | 3 + Lib/test/test_tarfile.py | 156 ++++++++++++++++++ ...-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 2 + 4 files changed, 163 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e8e2c1ed6047bf..cfd93aa2a97bec 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -310,6 +310,8 @@ tarfile :func:`~tarfile.TarFile.errorlevel` is zero. (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` and :cve:`2025-4435`.) +* :mod:`tarfile` now validates archives to ensure member offsets are non-negative. + (Contributed by Alexander Enrique Urieles Nieto in :gh:`130577`.) types diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 80d8644af86f74..45f58eb8ac93cf 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1647,6 +1647,9 @@ def _block(self, count): """Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. """ + # Only non-negative offsets are allowed + if count < 0: + raise InvalidHeaderError("invalid offset") blocks, remainder = divmod(count, BLOCKSIZE) if remainder: blocks += 1 diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7055e1ed147a9e..7b7ad791e568fc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -55,6 +55,7 @@ def sha256sum(data): zstname = os.path.join(TEMPDIR, "testtar.tar.zst") tmpname = os.path.join(TEMPDIR, "tmp.tar") dotlessname = os.path.join(TEMPDIR, "testtar") +SPACE = b" " sha256_regtype = ( "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce" @@ -4602,6 +4603,161 @@ def extractall(self, ar): ar.extractall(self.testdir, filter='fully_trusted') +class OffsetValidationTests(unittest.TestCase): + tarname = tmpname + invalid_posix_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0011407" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 6 bytes, version: 2 bytes + + tarfile.POSIX_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # devminor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # prefix: 155 bytes + + tarfile.NUL * tarfile.LENGTH_PREFIX + # padding: 12 bytes + + tarfile.NUL * 12 + ) + invalid_gnu_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, null terminator: 8 bytes + + b"0000755" + tarfile.NUL + # uid, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0011327" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 8 bytes + + tarfile.GNU_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, null terminator: 8 bytes + + tarfile.NUL * 8 + # devminor, null terminator: 8 bytes + + tarfile.NUL * 8 + # padding: 167 bytes + + tarfile.NUL * 167 + ) + invalid_v7_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0010070" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # padding: 255 bytes + + tarfile.NUL * 255 + ) + valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT) + data_block = b"\xff" * tarfile.BLOCKSIZE + + def _write_buffer(self, buffer): + with open(self.tarname, "wb") as f: + f.write(buffer) + + def _get_members(self, ignore_zeros=None): + with open(self.tarname, "rb") as f: + with tarfile.open( + mode="r", fileobj=f, ignore_zeros=ignore_zeros + ) as tar: + return tar.getmembers() + + def _assert_raises_read_error_exception(self): + with self.assertRaisesRegex( + tarfile.ReadError, "file could not be opened successfully" + ): + self._get_members() + + def test_invalid_offset_header_validations(self): + for tar_format, invalid_header in ( + ("posix", self.invalid_posix_header), + ("gnu", self.invalid_gnu_header), + ("v7", self.invalid_v7_header), + ): + with self.subTest(format=tar_format): + self._write_buffer(invalid_header) + self._assert_raises_read_error_exception() + + def test_early_stop_at_invalid_offset_header(self): + buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header + self._write_buffer(buffer) + members = self._get_members() + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, 0) + + def test_ignore_invalid_archive(self): + # 3 invalid headers with their respective data + buffer = (self.invalid_gnu_header + self.data_block) * 3 + self._write_buffer(buffer) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 0) + + def test_ignore_invalid_offset_headers(self): + for first_block, second_block, expected_offset in ( + ( + (self.valid_gnu_header), + (self.invalid_gnu_header + self.data_block), + 0, + ), + ( + (self.invalid_gnu_header + self.data_block), + (self.valid_gnu_header), + 1024, + ), + ): + self._write_buffer(first_block + second_block) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, expected_offset) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst new file mode 100644 index 00000000000000..2d6968c12027fe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst @@ -0,0 +1,2 @@ +:mod:`tarfile` now validates archives to ensure member offsets are +non-negative. From 04fd0852147363b118b1241cfbd4c843443a0ffd Mon Sep 17 00:00:00 2001 From: aeurielesn Date: Fri, 25 Jul 2025 12:15:04 +0200 Subject: [PATCH 2/3] Typo --- Lib/test/test_tarfile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7b7ad791e568fc..28914df6b010d0 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4618,7 +4618,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0011407" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE @@ -4652,7 +4652,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0011327" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE @@ -4684,7 +4684,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0010070" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE From 6d794444416f44da7230b47a46fe5966f9533cba Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 25 Jul 2025 20:20:39 +0000 Subject: [PATCH 3/3] move credit to NEWS, and remove the whatsnew entry (painful for backports) --- Doc/whatsnew/3.15.rst | 2 -- .../Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index cfd93aa2a97bec..e8e2c1ed6047bf 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -310,8 +310,6 @@ tarfile :func:`~tarfile.TarFile.errorlevel` is zero. (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` and :cve:`2025-4435`.) -* :mod:`tarfile` now validates archives to ensure member offsets are non-negative. - (Contributed by Alexander Enrique Urieles Nieto in :gh:`130577`.) types diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst index 2d6968c12027fe..342cabbc865dc4 100644 --- a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst +++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst @@ -1,2 +1,3 @@ :mod:`tarfile` now validates archives to ensure member offsets are -non-negative. +non-negative. (Contributed by Alexander Enrique Urieles Nieto in +:gh:`130577`.)