Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ hide:
unblob supports more than 30 formats. You can see their code in
[`unblob/handlers/`](https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/__init__.py).

✅: Some or all metadata is preserved for the format.
✅: Some or all metadata is preserved for the format.
❌: Metadata is not preserved (limitation of the format).

## Archives
Expand All @@ -22,6 +22,7 @@ unblob supports more than 30 formats. You can see their code in
| CAB | ❌ | ❌ | ❌ | [archive/cab.py][cab-handler] | [`7z`][cab-extractor] |
| CPIO | ✅ | ✅ | ✅ | [archive/cpio.py][cpio-handler] | unblob extractor |
| DMG | ❌ | ❌ | ❌ | [archive/dmg.py][dmg-handler] | [`7z`][dmg-extractor] |
| PARTCLONE | ✅ | ❌ | ❌ | [archive/partclone.py][partclone-handler] | [`partclone`][partclone-extractor] |
| RAR | ❌ | ❌ | ❌ | [archive/rar.py][rar-handler] | [`unar`][rar-extractor] |
| 7ZIP | ❌ | ❌ | ❌ | [archive/sevenzip.py][7zip-handler] | [`7z`][7zip-extractor] |
| StuffIt | ❌ | ❌ | ❌ | [archive/stuffit.py][stuffit-handler] | [`unar`][stuffit-extractor] |
Expand All @@ -39,6 +40,8 @@ unblob supports more than 30 formats. You can see their code in
[cpio-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/cpio.py
[dmg-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/dmg.py
[dmg-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/dmg.py#L67-L69
[partclone-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/partclone.py
[partclone-extractor]: https://github.com/onekey-sec/unblob/blob/b21b6dc291583af6b7ec9b7c3d63ee8302328841/python/unblob/handlers/archive/partclone.py#L44
[rar-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/rar.py
[rar-extractor]: https://github.com/onekey-sec/unblob/blob/3008039881a0434deb75962e7999b7e35aca8271/unblob/handlers/archive/rar.py#L32
[7zip-handler]: https://github.com/onekey-sec/unblob/blob/main/unblob/handlers/archive/sevenzip.py
Expand Down Expand Up @@ -97,7 +100,7 @@ For compression formats, metadata cannot be preserved, as this information in mo
| ---------------------- | ---------------------------------- | ----------------------------------------------- | ----------------------------------------------- |
| Android sparse image | ❌ | [filesystem/android/sparse.py][android-handler] | [`simg2img`][android-extractor] |
| CRAMFS | ✅ | [filesystem/cramfs.py][cramfs-handler] | [`7z`][cramfs-extractor] |
| EROFS | ✅ | [filesystem/android/erofs.py][erofs-handler] | [`fsck.erfos`][erofs-extractor] |
| EROFS | ✅ | [filesystem/android/erofs.py][erofs-handler] | [`fsck.erfos`][erofs-extractor] |
| ExtFS | ✅ | [filesystem/extfs.py][extfs-handler] | [`debugfs`][extfs-extractor] |
| FAT | ✅ | [filesystem/fat.py][fat-handler] | [`7z`][fat-extractor] |
| ISO9660 | ✅ | [filesystem/iso9660.py][iso9660-handler] | [`7z`][iso9660-extractor] |
Expand Down
1 change: 1 addition & 0 deletions install-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ apt-get install --no-install-recommends -y \
lziprecover \
lzop \
p7zip-full \
partclone \
unar \
xz-utils \
libmagic1 \
Expand Down
4 changes: 3 additions & 1 deletion package.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
lib,
stdenv,
python3,
makeWrapper,
e2fsprogs-nofortify,
Expand All @@ -9,6 +10,7 @@
lziprecover,
lzop,
p7zip16,
partclone,
nix-filter,
sasquatch,
sasquatch-v4be,
Expand All @@ -22,7 +24,7 @@

let
# These dependencies are only added to PATH
runtimeDeps = [
runtimeDeps = lib.optional stdenv.isLinux partclone ++ [
e2fsprogs-nofortify
erofs-utils
jefferson
Expand Down
18 changes: 16 additions & 2 deletions python/unblob/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ def parse(
def get_endian(file: File, big_endian_magic: int) -> Endian:
"""Read a four bytes magic and derive endianness from it.

It compares the read data with the big endian magic. It reads
four bytes and seeks back after that.
It compares the read data with the big endian magic and then seeks back
the amount of read bytes.
"""
if big_endian_magic > 0xFF_FF_FF_FF:
raise ValueError("big_endian_magic is larger than a 32 bit integer.")
Expand All @@ -369,6 +369,20 @@ def get_endian(file: File, big_endian_magic: int) -> Endian:
return Endian.BIG if magic == big_endian_magic else Endian.LITTLE


def get_endian_short(file: File, big_endian_magic: int) -> Endian:
"""Read a two bytes magic and derive endianness from it.

It compares the read data with the big endian magic and then seeks back
the amount of read bytes.
"""
if big_endian_magic > 0xFF_FF:
raise ValueError("big_endian_magic is larger than a 16 bit integer.")
magic_bytes = file.read(2)
file.seek(-len(magic_bytes), io.SEEK_CUR)
magic = convert_int16(magic_bytes, Endian.BIG)
return Endian.BIG if magic == big_endian_magic else Endian.LITTLE


def get_endian_multi(file: File, big_endian_magics: list[int]) -> Endian:
"""Read a four bytes magic and derive endianness from it.

Expand Down
2 changes: 2 additions & 0 deletions python/unblob/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
cab,
cpio,
dmg,
partclone,
rar,
sevenzip,
stuffit,
Expand Down Expand Up @@ -119,6 +120,7 @@
ecc.AutelECCHandler,
uzip.UZIPHandler,
erofs.EROFSHandler,
partclone.PartcloneHandler,
)

BUILTIN_DIR_HANDLERS: DirectoryHandlers = (
Expand Down
81 changes: 81 additions & 0 deletions python/unblob/handlers/archive/partclone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import binascii
import io
from math import ceil
from typing import Optional

from unblob.extractors import Command
from unblob.file_utils import File, InvalidInputFormat, get_endian_short
from unblob.models import Regex, StructHandler, ValidChunk

C_DEFINITIONS = r"""
typedef struct partclone_header{
char magic[16];
char partclone_version[14];
char image_version_txt[4];
char endian[2];
char fs_type[16];
uint64 fs_size;
uint64 fs_total_block_count;
uint64 fs_used_block_count_superblock;
uint64 fs_used_block_count_bitmap;
uint32 fs_block_size;
uint32 feature_size;
uint16 image_version;
uint16 number_of_bits_for_CPU;
uint16 checksum_mode;
uint16 checksum_size;
uint32 blocks_per_checksum;
uint8 reseed_checksum;
uint8 bitmap_mode;
uint32 crc32;
} partclone_header_t;
"""

HEADER_STRUCT = "partclone_header_t"
BIG_ENDIAN_MAGIC = 0xC0DE
ENDIAN_OFFSET = 34


class PartcloneHandler(StructHandler):
NAME = "partclone"
PATTERNS = [Regex(r"partclone-image\x00\d+\.\d+\.\d+.*?0002(\xde\xc0|\xc0\xde)")]
HEADER_STRUCT = HEADER_STRUCT
C_DEFINITIONS = C_DEFINITIONS
EXTRACTOR = Command(
"partclone.restore",
"-W",
"-s",
"{inpath}",
"-o",
"{outdir}/partclone.restored",
"-L",
"/dev/stdout",
)

def is_valid_header(self, header) -> bool:
calculated_crc = binascii.crc32(header.dumps()[0:-4])
return (
header.crc32 ^ 0xFFFFFFFF
) == calculated_crc # partclone does not final XOR

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
file.seek(start_offset + ENDIAN_OFFSET, io.SEEK_SET) # go to endian
endian = get_endian_short(file, BIG_ENDIAN_MAGIC)
file.seek(start_offset, io.SEEK_SET) # go to beginning of file
header = self.parse_header(file, endian)

if not self.is_valid_header(header):
raise InvalidInputFormat("Invalid partclone header.")

end_offset = start_offset + len(header) # header
end_offset += header.checksum_size # checksum size
end_offset += ceil(header.fs_total_block_count / 8) # bitmap, as bytes

if header.checksum_mode != 0:
checksum_blocks = ceil(
header.fs_used_block_count_bitmap / header.blocks_per_checksum
)
end_offset += checksum_blocks * header.checksum_size

end_offset += header.fs_used_block_count_bitmap * header.fs_block_size # Data
return ValidChunk(start_offset=start_offset, end_offset=end_offset)
3 changes: 3 additions & 0 deletions tests/integration/archive/partclone/__input__/floppy-144m.img
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
50 changes: 48 additions & 2 deletions tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
convert_int64,
decode_multibyte_integer,
get_endian,
get_endian_short,
is_safe_path,
iterate_file,
iterate_patterns,
Expand Down Expand Up @@ -345,7 +346,10 @@ class TestGetEndian:
"content, big_endian_magic, expected",
[
pytest.param(
b"\xff\x00\x00\x10", 0x100000FF, Endian.LITTLE, id="valid_little_endian"
b"\xff\x00\x00\x10",
0x100000FF,
Endian.LITTLE,
id="valid_little_endian",
),
pytest.param(
b"\x10\x00\x00\xff", 0x100000FF, Endian.BIG, id="valid_big_endian"
Expand All @@ -356,10 +360,27 @@ def test_get_endian(self, content: bytes, big_endian_magic: int, expected: Endia
file = File.from_bytes(content)
assert get_endian(file, big_endian_magic) == expected

@pytest.mark.parametrize(
"content, big_endian_magic, expected",
[
pytest.param(b"\xff\x00", 0x00FF, Endian.LITTLE, id="valid_little_endian"),
pytest.param(b"\x10\x00", 0x1000, Endian.BIG, id="valid_big_endian"),
],
)
def test_get_endian_short(
self, content: bytes, big_endian_magic: int, expected: Endian
):
file = File.from_bytes(content)
assert get_endian_short(file, big_endian_magic) == expected

@pytest.mark.parametrize(
"content, big_endian_magic",
[
pytest.param(b"\x00\x00\x00\x01", 0xFF_FF_FF_FF_FF, id="larger_than_32bit"),
pytest.param(
b"\x00\x00\x00\x01",
0xFF_FF_FF_FF_FF,
id="larger_than_32bit",
),
],
)
def test_get_endian_errors(self, content: bytes, big_endian_magic: int):
Expand All @@ -369,6 +390,23 @@ def test_get_endian_errors(self, content: bytes, big_endian_magic: int):
):
get_endian(file, big_endian_magic)

@pytest.mark.parametrize(
"content, big_endian_magic",
[
pytest.param(
b"\x00\x00\x00\x01",
0xFF_FF_FF,
id="larger_than_16bit",
),
],
)
def test_get_endian_short_errors(self, content: bytes, big_endian_magic: int):
file = File.from_bytes(content)
with pytest.raises(
ValueError, match="big_endian_magic is larger than a 16 bit integer"
):
get_endian_short(file, big_endian_magic)

def test_get_endian_resets_the_file_pointer(self):
file = File.from_bytes(bytes.fromhex("FFFF 0000"))
file.seek(-1, io.SEEK_END)
Expand All @@ -377,6 +415,14 @@ def test_get_endian_resets_the_file_pointer(self):
get_endian(file, 0xFFFF_0000)
assert file.tell() == pos

def test_get_endian_short_resets_the_file_pointer(self):
file = File.from_bytes(bytes.fromhex("FFFF"))
file.seek(-1, io.SEEK_END)
pos = file.tell()
with pytest.raises(InvalidInputFormat):
get_endian_short(file, 0xFFFF)
assert file.tell() == pos


@pytest.mark.parametrize(
"input_path, expected",
Expand Down
7 changes: 7 additions & 0 deletions tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""

import inspect
import sys
from pathlib import Path

import pytest
Expand Down Expand Up @@ -35,6 +36,12 @@ def test_all_handlers(
extraction_config: ExtractionConfig,
request: pytest.FixtureRequest,
):
handler_name = input_dir.parent.name
if (sys.platform, handler_name) == ("darwin", "partclone"):
pytest.skip(
f"Handler '{handler_name}' not supported on platform '{sys.platform}'"
)

log_path = Path("/dev/null") # no logging
report_file = None # no reporting

Expand Down
Loading