From 40448f20f1384f25ad923fa0039f0527fde1033d Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Sun, 19 Oct 2025 22:13:03 +0100 Subject: [PATCH 1/6] arm64 support Signed-off-by: Prabhu Subramanian --- blint/config.py | 17 +- blint/lib/disassembler.py | 313 +++++++++++++++++++++++++------------ tests/test_disassembler.py | 14 +- 3 files changed, 238 insertions(+), 106 deletions(-) diff --git a/blint/config.py b/blint/config.py index 01fc406..81bee23 100644 --- a/blint/config.py +++ b/blint/config.py @@ -1402,14 +1402,14 @@ def __post_init__(self): 'movdqa', 'movdqu', 'vmovdqa', 'vmovdqu', 'padd', 'psub', 'pmul', 'psll', 'psrl', 'psra', - 'vpaddd', 'vpsubd', 'vpmulld', + 'vpaddd', 'vpsubd', 'vpmulld', 'pmaddwd' ] GPU_INDICATORS = [ 'glbind', 'glvertex', 'glcolor', 'glbegin', 'glend', 'glenable', 'gldisable', 'glget', 'glset', 'glload', 'glsave', 'gluniform', 'gluseprogram', 'glattachshader', 'gldraw', 'glclear', 'glviewport', 'glmatrix', 'glpushmatrix', 'glpopmatrix', 'cuda', 'cuinit', 'cucontext', 'cudriver', 'cugpu', 'cudevice', 'cumem', 'cuptr', 'cukernel', 'culaunch', 'cugrid', 'cublock', 'cuthread', 'cufree', 'cucopy', 'clgetplatform', 'clgetdevice', 'clcreatecontext', 'clcreatecommandqueue', 'clcreateshared', 'clcreatekernel', 'clsetkernelarg', 'clenqueuendrange', 'clfinish', 'clrelease', 'clbuildprogram', 'd3d', 'd3d11', 'd3d12', 'create', 'device', 'swapchain', 'rendertarget', 'shaders', 'ps_', 'vs_', 'gs_', 'cs_', 'hs_', 'ds_', - 'vk', 'vkcreate', 'vkdestroy', 'vkallocate', 'vkfree', 'vkqueue', 'vksubmit', 'vkwait', 'vkacquire', 'vkpresent', 'vkcmd', 'vkbegin', 'vkend', 'vkbind', 'vkdraw', 'vkdispatch', 'vkcopy', 'vkblit', 'vkclear', 'vkfill', 'vkupdate', + 'vkcreate', 'vkdestroy', 'vkallocate', 'vkfree', 'vkqueue', 'vksubmit', 'vkwait', 'vkacquire', 'vkpresent', 'vkcmd', 'vkbegin', 'vkend', 'vkbind', 'vkdraw', 'vkdispatch', 'vkcopy', 'vkblit', 'vkclear', 'vkfill', 'vkupdate', 'mtl', 'metal', 'mtldevice', 'mtlcommand', 'mtlrender', 'mtlcompute', 'mtlbuffer', 'mtltexture', 'mtlfunction', 'mtllibrary', 'mtlencoder', 'mtlpass', 'gpu', 'compute', 'shader', 'vertex', 'fragment', 'pixel', 'kernel', 'workgroup', 'local', 'global', 'buffer', 'texture', 'surface', ] @@ -1476,3 +1476,16 @@ def __post_init__(self): 'cpuid': {'read': {'eax', 'ecx'}, 'write': {'eax', 'ebx', 'ecx', 'edx'}}, 'syscall':{'read': {'rcx', 'r11'}, 'write': {'rcx', 'r11'}}, } + +IMPLICIT_REGS_ARM64 = { + 'bl': {'write': {'x30'}}, + 'blr': {'read': {'x30'}, 'write': {'x30'}}, + 'ret': {'read': {'x30'}}, + 'ret x0': {'read': {'x0'}}, + 'mrs': {'write': {'x0'}}, + 'msr': {'read': {'x0'}}, + 'svc': {'read': {'x8', 'sp'}}, + 'hvc': {'read': {'x8', 'sp'}}, + 'smc': {'read': {'x8', 'sp'}}, + 'brk': {'read': {'x0'}}, +} \ No newline at end of file diff --git a/blint/lib/disassembler.py b/blint/lib/disassembler.py index 393a953..067361c 100644 --- a/blint/lib/disassembler.py +++ b/blint/lib/disassembler.py @@ -2,7 +2,7 @@ import lief import hashlib import re -from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64 +from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64, IMPLICIT_REGS_ARM64 OPERAND_DELIMITERS_PATTERN = re.compile(r'[^a-zA-Z0-9_]+') @@ -11,6 +11,13 @@ CONDITIONAL_JMP_INST = ['je', 'jne', 'jz', 'jnz', 'jg', 'jge', 'jl', 'jle', 'ja', 'jae', 'jb', 'jbe', 'jp', 'jnp', 'jo', 'jno', 'js', 'jns', 'loop', 'loopz', 'loopnz', 'jcxz', 'jecxz', 'jrcxz'] +ARM64_GENERAL_REGS_64 = {f'x{i}' for i in range(31)} +ARM64_GENERAL_REGS_32 = {f'w{i}' for i in range(31)} +ARM64_SPECIAL_REGS = {'sp', 'xzr', 'wzr'} +ARM64_ALL_REGS = ( + ARM64_GENERAL_REGS_64 | ARM64_GENERAL_REGS_32 | ARM64_SPECIAL_REGS +) + COMMON_REGS_64 = {'rax', 'rbx', 'rcx', 'rdx', 'rsi', 'rdi', 'rbp', 'rsp', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15'} COMMON_REGS_32 = {'eax', 'ebx', 'ecx', 'edx', 'esi', 'edi', 'ebp', 'esp', @@ -42,8 +49,8 @@ YMM_REGS = {f'ymm{i}' for i in range(32)} ZMM_REGS = {f'zmm{i}' for i in range(32)} ALL_SIMD_REGS = FPU_REGS | MMX_REGS | XMM_REGS | YMM_REGS | ZMM_REGS -ALL_REGS = COMMON_REGS_64 | COMMON_REGS_32 | COMMON_REGS_16 | COMMON_REGS_8l | COMMON_REGS_8h | ALL_SIMD_REGS | SEGMENT_REGS -SORTED_ALL_REGS = sorted(ALL_REGS, key=len, reverse=True) +ALL_REGS_X86 = (COMMON_REGS_64 | COMMON_REGS_32 | COMMON_REGS_16 | COMMON_REGS_8l | COMMON_REGS_8h | ALL_SIMD_REGS | SEGMENT_REGS) +SORTED_ALL_REGS_X86 = sorted(ALL_REGS_X86, key=len, reverse=True) WIN_X64_VOLATILE_REGS = frozenset({'rax', 'rcx', 'rdx', 'r8', 'r9', 'r10', 'r11'}) SYSV_X64_VOLATILE_REGS = frozenset({'rax', 'rcx', 'rdx', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11'}) @@ -58,10 +65,21 @@ LOG.debug("Nyxstone not found. Disassembly features will be unavailable. Install with 'pip install blint[extended]'.") NYXSTONE_AVAILABLE = False +def get_arch_reg_set(arch_target): + """Returns the appropriate set of registers based on the architecture.""" + is_aarch64 = "aarch64" in arch_target.lower() or "arm64" in arch_target.lower() + if is_aarch64: + combined_regs = ALL_REGS_X86 | ARM64_ALL_REGS + return sorted(combined_regs, key=len, reverse=True) + else: + return SORTED_ALL_REGS_X86 + def _get_implicit_regs_map(arch_target): """Selects the appropriate implicit registers map based on architecture.""" - if "64" in arch_target: + if "64" in arch_target and "aarch64" not in arch_target.lower(): return IMPLICIT_REGS_X64 + elif "aarch64" in arch_target.lower() or "arm64" in arch_target.lower(): + return IMPLICIT_REGS_ARM64 return IMPLICIT_REGS_X86 def _find_function_end_index(instr_list): @@ -193,14 +211,23 @@ def _get_disasm_range(func_addr, sec_obj, parsed_obj, section_func_map): size_to_disasm = max(0, min(size_to_disasm, len(sec_content_bytes) - func_offset_in_sec)) return func_offset_in_sec, size_to_disasm, sec_content_bytes -def extract_regs_from_operand(op): +def extract_regs_from_operand(op, sorted_arch_regs=SORTED_ALL_REGS_X86): found_regs = set() if not op: return found_regs potential_tokens = filter(None, OPERAND_DELIMITERS_PATTERN.split(op.lower())) for token in potential_tokens: - if token in SORTED_ALL_REGS: + if token in sorted_arch_regs: found_regs.add(token) + cleaned_token = token.strip('[]!') + if cleaned_token in sorted_arch_regs: + found_regs.add(cleaned_token) + if ' ' in cleaned_token: + sub_tokens = cleaned_token.split() + for sub_t in sub_tokens: + sub_cleaned = sub_t.strip('[]!') + if sub_cleaned in sorted_arch_regs: + found_regs.add(sub_cleaned) return found_regs def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): @@ -213,6 +240,8 @@ def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): regs_written = set() if not instr_assembly: return list(regs_read), list(regs_written) + is_aarch64 = "aarch64" in arch_target.lower() or "arm64" in arch_target.lower() + sorted_arch_regs = get_arch_reg_set(arch_target) first_space_idx = instr_assembly.find(' ') if first_space_idx == -1: mnemonic = instr_assembly.strip().lower().rstrip(':') @@ -231,111 +260,186 @@ def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): num_operands = len(operands) if num_operands > 0: operands = [op.rstrip(',') for op in operands] - has_rep_prefix = False if mnemonic.startswith(('rep', 'repe', 'repne')): has_rep_prefix = True mnemonic = mnemonic[4:] if len(mnemonic) > 3 and mnemonic[3] == 'e' else mnemonic[3:] - if mnemonic in implicit_regs_map: regs_read.update(implicit_regs_map[mnemonic].get('read', set())) regs_written.update(implicit_regs_map[mnemonic].get('write', set())) - if has_rep_prefix: is_64bit = "64" in arch_target counter_reg = 'rcx' if is_64bit else 'ecx' regs_read.add(counter_reg) regs_written.add(counter_reg) - - if mnemonic in WRITE_DST_READ_SRC_INST or mnemonic.startswith('cmov'): - if num_operands >= 2: - dst_regs = extract_regs_from_operand(operands[0].lower()) - src_regs = extract_regs_from_operand(operands[1].lower()) - regs_written.update(dst_regs) - regs_read.update(src_regs) - if mnemonic not in ['mov', 'movzx', 'movsx', 'movsxd', 'lea'] and not mnemonic.startswith('cmov'): - regs_read.update(dst_regs) - - elif mnemonic in READ_WRITE_BOTH_OPS_INST: - if num_operands >= 2: - op1_regs = extract_regs_from_operand(operands[0].lower()) - op2_regs = extract_regs_from_operand(operands[1].lower()) - regs_read.update(op1_regs) - regs_written.update(op1_regs) - regs_read.update(op2_regs) - if mnemonic != 'cmpxchg': - regs_written.update(op2_regs) - - elif mnemonic in BIT_MANIPULATION_INST: - if num_operands >= 2: - dst_regs = extract_regs_from_operand(operands[0].lower()) - src_regs = extract_regs_from_operand(operands[1].lower()) - regs_written.update(dst_regs) - regs_read.update(src_regs) - if mnemonic not in ['bsf', 'bsr', 'lzcnt', 'tzcnt', 'popcnt']: - regs_read.update(dst_regs) - - elif mnemonic in READ_WRITE_ONE_OP_INST: - if num_operands >= 1: - op_regs = extract_regs_from_operand(operands[0].lower()) - regs_read.update(op_regs) - regs_written.update(op_regs) - - elif mnemonic in ['cmp', 'test']: - if num_operands >= 2: - regs_read.update(extract_regs_from_operand(operands[0].lower())) - regs_read.update(extract_regs_from_operand(operands[1].lower())) - - elif mnemonic in ['push', 'pop']: - is_64bit = "64" in arch_target - stack_reg = 'rsp' if is_64bit else 'esp' - regs_read.add(stack_reg) - regs_written.add(stack_reg) - if num_operands >= 1: - op_regs = extract_regs_from_operand(operands[0].lower()) - if mnemonic == 'push': + if is_aarch64: + if mnemonic in ['add', 'adds', 'sub', 'subs', 'neg', 'negs', 'mul', 'umull', 'smull', 'smulh', 'umulh', 'div', 'udiv']: + if num_operands >= 2: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src1_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src1_regs) + if num_operands >= 3: + src2_regs = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + regs_read.update(src2_regs) + elif mnemonic in ['mov', 'movz', 'movk', 'movn', 'fmov', 'fmov immediate']: + if num_operands >= 1: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + if num_operands >= 2 and not operands[1].lower().startswith('#'): + src_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_read.update(src_regs) + elif mnemonic in ['csel', 'csinc', 'csinv', 'cset', 'csetm', 'cinc', 'cinv']: + if num_operands >= 3: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src1_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + src2_regs = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src1_regs) + regs_read.update(src2_regs) + if mnemonic in ['cinc', 'cinv']: + regs_read.update(dst_regs) + elif mnemonic in ['cmp', 'cmn', 'tst']: + if num_operands >= 2: + src1_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src2_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_read.update(src1_regs) + regs_read.update(src2_regs) + elif mnemonic.startswith('ldr') or mnemonic.startswith('str'): + if num_operands >= 2: + data_reg = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + addr_parts = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + if 'str' in mnemonic: + regs_read.update(data_reg) + regs_read.update(addr_parts) + else: # ldr + regs_written.update(data_reg) + regs_read.update(addr_parts) + elif mnemonic.startswith('ldp') or mnemonic.startswith('stp'): + if num_operands >= 3: + data_reg1 = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + data_reg2 = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + addr_parts = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + if 'str' in mnemonic: + regs_read.update(data_reg1) + regs_read.update(data_reg2) + regs_read.update(addr_parts) + else: + regs_written.update(data_reg1) + regs_written.update(data_reg2) + regs_read.update(addr_parts) + elif mnemonic.startswith('cb') or mnemonic.startswith('tb'): + if num_operands >= 1: + src_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + regs_read.update(src_regs) + elif mnemonic.startswith('b') and mnemonic not in ['bl', 'blr', 'br']: + pass + elif mnemonic in ['bl', 'blr', 'br']: + if num_operands >= 1 and mnemonic != 'bl': + target_op = operands[0].lower() + if not target_op.startswith('#') and not target_op.isdigit(): + target_regs = extract_regs_from_operand(target_op, sorted_arch_regs) + regs_read.update(target_regs) + elif mnemonic in ['ret']: + pass + elif mnemonic in ['and', 'orr', 'eor', 'bic', 'tst']: + if num_operands >= 2: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src1_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src1_regs) + if num_operands >= 3: + src2_regs = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + regs_read.update(src2_regs) + elif mnemonic in ['lsl', 'lsr', 'asr', 'ror', 'uxtw', 'sxtw', 'sxtx', 'uxtb', 'uxth', 'sxtb', 'sxth']: + if num_operands >= 2: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src1_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src1_regs) + if num_operands >= 3: + src2_regs = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + regs_read.update(src2_regs) + else: + if mnemonic in WRITE_DST_READ_SRC_INST or mnemonic.startswith('cmov'): + if num_operands >= 2: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src_regs) + if mnemonic not in ['mov', 'movzx', 'movsx', 'movsxd', 'lea'] and not mnemonic.startswith('cmov'): + regs_read.update(dst_regs) + elif mnemonic in READ_WRITE_BOTH_OPS_INST: + if num_operands >= 2: + op1_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + op2_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_read.update(op1_regs) + regs_written.update(op1_regs) + regs_read.update(op2_regs) + if mnemonic != 'cmpxchg': + regs_written.update(op2_regs) + elif mnemonic in BIT_MANIPULATION_INST: + if num_operands >= 2: + dst_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + src_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_written.update(dst_regs) + regs_read.update(src_regs) + if mnemonic not in ['bsf', 'bsr', 'lzcnt', 'tzcnt', 'popcnt']: + regs_read.update(dst_regs) + elif mnemonic in READ_WRITE_ONE_OP_INST: + if num_operands >= 1: + op_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) regs_read.update(op_regs) - else: regs_written.update(op_regs) - - elif mnemonic == 'call': - volatile_regs = _get_abi_volatile_regs(parsed_obj, arch_target) - regs_written.update(volatile_regs) - if num_operands >= 1: - op = operands[0].lower() - if not op.startswith('0x') and not op.isdigit(): - op_regs = extract_regs_from_operand(op) - regs_read.update(op_regs) - - elif mnemonic in TERMINATING_INST: - is_64bit = "64" in arch_target or "aarch64" in arch_target - if is_64bit: - regs_read.update(X64_RETURN_REGS) - else: - regs_read.update(X86_RETURN_REGS) - stack_reg = 'rsp' if is_64bit else 'esp' - regs_read.add(stack_reg) - regs_written.add(stack_reg) - - elif mnemonic.startswith('j'): - if num_operands >= 1: - op = operands[0].lower() - if not op.startswith('0x') and not op.isdigit(): - op_regs = extract_regs_from_operand(op) - regs_read.update(op_regs) - - elif mnemonic == 'xchg': - if num_operands >= 2: - op1_regs = extract_regs_from_operand(operands[0].lower()) - op2_regs = extract_regs_from_operand(operands[1].lower()) - regs_read.update(op1_regs) - regs_written.update(op1_regs) - regs_read.update(op2_regs) - regs_written.update(op2_regs) - - if mnemonic in ['mul', 'imul', 'div', 'idiv'] and num_operands == 1: - op_regs = extract_regs_from_operand(operands[0].lower()) - regs_read.update(op_regs) + elif mnemonic in ['cmp', 'test']: + if num_operands >= 2: + regs_read.update(extract_regs_from_operand(operands[0].lower(), sorted_arch_regs)) + regs_read.update(extract_regs_from_operand(operands[1].lower(), sorted_arch_regs)) + elif mnemonic in ['push', 'pop']: + is_64bit = "64" in arch_target + stack_reg = 'rsp' if is_64bit else 'esp' + regs_read.add(stack_reg) + regs_written.add(stack_reg) + if num_operands >= 1: + op_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + if mnemonic == 'push': + regs_read.update(op_regs) + else: + regs_written.update(op_regs) + elif mnemonic == 'call': + volatile_regs = _get_abi_volatile_regs(parsed_obj, arch_target) + regs_written.update(volatile_regs) + if num_operands >= 1: + op = operands[0].lower() + if not op.startswith('0x') and not op.isdigit(): + op_regs = extract_regs_from_operand(op, sorted_arch_regs) + regs_read.update(op_regs) + elif mnemonic in TERMINATING_INST: + is_64bit = "64" in arch_target or "aarch64" in arch_target + if is_64bit: + regs_read.update(X64_RETURN_REGS) + else: + regs_read.update(X86_RETURN_REGS) + stack_reg = 'rsp' if is_64bit else 'esp' + regs_read.add(stack_reg) + regs_written.add(stack_reg) + elif mnemonic.startswith('j'): + if num_operands >= 1: + op = operands[0].lower() + if not op.startswith('0x') and not op.isdigit(): + op_regs = extract_regs_from_operand(op, sorted_arch_regs) + regs_read.update(op_regs) + elif mnemonic == 'xchg': + if num_operands >= 2: + op1_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + op2_regs = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + regs_read.update(op1_regs) + regs_written.update(op1_regs) + regs_read.update(op2_regs) + regs_written.update(op2_regs) + if mnemonic in ['mul', 'imul', 'div', 'idiv'] and num_operands == 1: + op_regs = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) + regs_read.update(op_regs) return list(regs_read), list(regs_written) @@ -362,7 +466,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad instr_assembly = instr.assembly mnemonic = instr.assembly.split(None, 1)[0].lower() instruction_mnemonics.append(mnemonic) - if mnemonic == 'call': + if mnemonic in ('call'): instruction_metrics["call_count"] += 1 elif mnemonic in CONDITIONAL_JMP_INST: instruction_metrics["conditional_jump_count"] += 1 @@ -392,9 +496,20 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad operand = parts[1].lower().strip() if operand.startswith('[') and operand.endswith(']'): has_indirect_call = True - elif any(operand.startswith(reg) for reg in SORTED_ALL_REGS): + elif any(operand.startswith(reg) for reg in SORTED_ALL_REGS_X86): if operand.isalnum() or '_' in operand: has_indirect_call = True + # Check for ARM64 indirect calls and jumps + elif instr_assembly.startswith(('bl ', 'blr ', 'br ')): + parts = instr_assembly.split(None, 1) + if len(parts) > 1: + operand = parts[1].lower().strip() + if any(operand.startswith(reg) for reg in SORTED_ALL_REGS_X86): + has_indirect_call = True + elif '[' in operand and ']' in operand: + has_indirect_call = True + elif operand.startswith('#') or operand.startswith(('+', '-')) or operand.startswith('0x'): + instruction_metrics["call_count"] += 1 regs_read, regs_written = _extract_register_usage(instr_assembly, parsed_obj, arch_target) all_instr_regs = set(regs_read) | set(regs_written) is_simd_fpu = False diff --git a/tests/test_disassembler.py b/tests/test_disassembler.py index 475fe7a..ce50188 100644 --- a/tests/test_disassembler.py +++ b/tests/test_disassembler.py @@ -18,7 +18,7 @@ def test_extract_register_usage_arith(): def test_extract_register_usage_cmp(): instr_asm = "cmp rdi, 5" - regs_read, regs_written = _extract_register_usage(instr_asm) + regs_read, regs_written = _extract_register_usage(instr_asm, None, 'x86_64') assert set(regs_read) == {"rdi"} assert set(regs_written) == set() @@ -44,10 +44,10 @@ def test_extract_register_usage_call(): cc_regs = {'rsi', 'rcx', 'r9', 'r10', 'rax', 'rdi', 'r8', 'r11', 'rdx'} assert set(regs_written) == cc_regs assert set(regs_read) == set() - instr_asm_indirect = "call r12" + instr_asm_indirect = "blr x12" regs_read_indirect, regs_written_indirect = _extract_register_usage(instr_asm_indirect, {}, "aarch64") - assert "r12" in regs_read_indirect - assert set(regs_written_indirect) == cc_regs + assert "x12" in regs_read_indirect + assert set(regs_written_indirect) == {'x30'} instr_asm_pop = "pop ebx" regs_read_pop, regs_written_pop = _extract_register_usage(instr_asm_pop, {}, "x86") assert "esp" in regs_read_pop @@ -93,6 +93,10 @@ def mock_instructions(): instr6.assembly = "je 0xFF0" instr6.address = 0x1011 instrs.append(instr6) + instr7 = MagicMock() + instr7.assembly = "bl #977140" + instr7.address = 0x1017 + instrs.append(instr7) return instrs def test_analyze_instructions_basic(mock_instructions): @@ -103,7 +107,7 @@ def test_analyze_instructions_basic(mock_instructions): regs_read, regs_written, instrs_with_regs, _) = _analyze_instructions( mock_instructions, func_addr, next_func_addr_in_sec, instr_addresses, {}, "x86_64" ) - assert metrics["call_count"] == 2 + assert metrics["call_count"] == 3 assert metrics["arith_count"] == 1 assert metrics["ret_count"] == 1 assert metrics["conditional_jump_count"] == 1 From 71367708c3a2c7f8cce7ab6955a020127e3b2353 Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Mon, 20 Oct 2025 13:06:55 +0100 Subject: [PATCH 2/6] arm64 support Signed-off-by: Prabhu Subramanian --- blint/config.py | 9 +++-- blint/lib/disassembler.py | 73 ++++++++++++++++++++++----------------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/blint/config.py b/blint/config.py index 81bee23..b9f1abf 100644 --- a/blint/config.py +++ b/blint/config.py @@ -1479,13 +1479,12 @@ def __post_init__(self): IMPLICIT_REGS_ARM64 = { 'bl': {'write': {'x30'}}, - 'blr': {'read': {'x30'}, 'write': {'x30'}}, + 'blr': {'write': {'x30'}}, 'ret': {'read': {'x30'}}, - 'ret x0': {'read': {'x0'}}, 'mrs': {'write': {'x0'}}, 'msr': {'read': {'x0'}}, - 'svc': {'read': {'x8', 'sp'}}, - 'hvc': {'read': {'x8', 'sp'}}, - 'smc': {'read': {'x8', 'sp'}}, + 'svc': {'read': {'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x8'}, 'write': {'x0'}}, + 'hvc': {'read': {'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x8'}, 'write': {'x0'}}, + 'smc': {'read': {'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x8'}, 'write': {'x0'}}, 'brk': {'read': {'x0'}}, } \ No newline at end of file diff --git a/blint/lib/disassembler.py b/blint/lib/disassembler.py index 067361c..ddc1291 100644 --- a/blint/lib/disassembler.py +++ b/blint/lib/disassembler.py @@ -14,9 +14,14 @@ ARM64_GENERAL_REGS_64 = {f'x{i}' for i in range(31)} ARM64_GENERAL_REGS_32 = {f'w{i}' for i in range(31)} ARM64_SPECIAL_REGS = {'sp', 'xzr', 'wzr'} +ARM64_VFP_NEON_REGS = {f'v{i}' for i in range(32)} | \ + {f's{i}' for i in range(32)} | \ + {f'd{i}' for i in range(32)} | \ + {f'q{i}' for i in range(32)} ARM64_ALL_REGS = ( - ARM64_GENERAL_REGS_64 | ARM64_GENERAL_REGS_32 | ARM64_SPECIAL_REGS + ARM64_GENERAL_REGS_64 | ARM64_GENERAL_REGS_32 | ARM64_SPECIAL_REGS | ARM64_VFP_NEON_REGS ) +SORTED_ARM64_ALL_REGS = sorted(ARM64_ALL_REGS, key=len, reverse=True) COMMON_REGS_64 = {'rax', 'rbx', 'rcx', 'rdx', 'rsi', 'rdi', 'rbp', 'rsp', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15'} @@ -69,8 +74,7 @@ def get_arch_reg_set(arch_target): """Returns the appropriate set of registers based on the architecture.""" is_aarch64 = "aarch64" in arch_target.lower() or "arm64" in arch_target.lower() if is_aarch64: - combined_regs = ALL_REGS_X86 | ARM64_ALL_REGS - return sorted(combined_regs, key=len, reverse=True) + return SORTED_ARM64_ALL_REGS else: return SORTED_ALL_REGS_X86 @@ -230,7 +234,7 @@ def extract_regs_from_operand(op, sorted_arch_regs=SORTED_ALL_REGS_X86): found_regs.add(sub_cleaned) return found_regs -def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): +def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target="", sorted_arch_regs=None): """ Performs a first-pass analysis to extract approximate register read/write usage from the instruction assembly string. @@ -241,22 +245,18 @@ def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): if not instr_assembly: return list(regs_read), list(regs_written) is_aarch64 = "aarch64" in arch_target.lower() or "arm64" in arch_target.lower() - sorted_arch_regs = get_arch_reg_set(arch_target) + if not sorted_arch_regs: + sorted_arch_regs = get_arch_reg_set(arch_target) first_space_idx = instr_assembly.find(' ') + operands = [] if first_space_idx == -1: mnemonic = instr_assembly.strip().lower().rstrip(':') - operands = [] else: mnemonic_part = instr_assembly[:first_space_idx].strip().lower().rstrip(':') operands_part = instr_assembly[first_space_idx + 1:].strip() mnemonic = mnemonic_part.rstrip(':') - comma_idx = operands_part.find(',') - if comma_idx != -1: - op1 = operands_part[:comma_idx].strip() - op2 = operands_part[comma_idx + 1:].strip() - operands = [op1, op2] - else: - operands = [operands_part] if operands_part else [] + if operands_part: + operands = [op.strip() for op in operands_part.split(',')] num_operands = len(operands) if num_operands > 0: operands = [op.rstrip(',') for op in operands] @@ -319,7 +319,12 @@ def _extract_register_usage(instr_assembly, parsed_obj=None, arch_target=""): if num_operands >= 3: data_reg1 = extract_regs_from_operand(operands[0].lower(), sorted_arch_regs) data_reg2 = extract_regs_from_operand(operands[1].lower(), sorted_arch_regs) + mem_operand = operands[2].lower() addr_parts = extract_regs_from_operand(operands[2].lower(), sorted_arch_regs) + if '!' in mem_operand: + base_reg = next(iter(addr_parts), None) + if base_reg: + regs_written.add(base_reg) if 'str' in mnemonic: regs_read.update(data_reg1) regs_read.update(data_reg2) @@ -462,6 +467,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad all_regs_written = set() used_simd_reg_types = set() instructions_with_registers = [] + sorted_arch_regs = get_arch_reg_set(arch_target) for instr in instr_list: instr_assembly = instr.assembly mnemonic = instr.assembly.split(None, 1)[0].lower() @@ -496,7 +502,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad operand = parts[1].lower().strip() if operand.startswith('[') and operand.endswith(']'): has_indirect_call = True - elif any(operand.startswith(reg) for reg in SORTED_ALL_REGS_X86): + elif any(operand.startswith(reg) for reg in sorted_arch_regs): if operand.isalnum() or '_' in operand: has_indirect_call = True # Check for ARM64 indirect calls and jumps @@ -504,30 +510,35 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad parts = instr_assembly.split(None, 1) if len(parts) > 1: operand = parts[1].lower().strip() - if any(operand.startswith(reg) for reg in SORTED_ALL_REGS_X86): + if any(operand.startswith(reg) for reg in sorted_arch_regs): has_indirect_call = True elif '[' in operand and ']' in operand: has_indirect_call = True elif operand.startswith('#') or operand.startswith(('+', '-')) or operand.startswith('0x'): instruction_metrics["call_count"] += 1 - regs_read, regs_written = _extract_register_usage(instr_assembly, parsed_obj, arch_target) + regs_read, regs_written = _extract_register_usage(instr_assembly, parsed_obj, arch_target, sorted_arch_regs) all_instr_regs = set(regs_read) | set(regs_written) is_simd_fpu = False - if any(reg in FPU_REGS for reg in all_instr_regs): - used_simd_reg_types.add("FPU") - is_simd_fpu = True - if any(reg in MMX_REGS for reg in all_instr_regs): - used_simd_reg_types.add("MMX") - is_simd_fpu = True - if any(reg in XMM_REGS for reg in all_instr_regs): - used_simd_reg_types.add("SSE/AVX") - is_simd_fpu = True - if any(reg in YMM_REGS for reg in all_instr_regs): - used_simd_reg_types.add("AVX/AVX2") - is_simd_fpu = True - if any(reg in ZMM_REGS for reg in all_instr_regs): - used_simd_reg_types.add("AVX-512") - is_simd_fpu = True + if "aarch64" in arch_target.lower() or "arm64" in arch_target.lower(): + if any(reg in ARM64_VFP_NEON_REGS for reg in all_instr_regs): + used_simd_reg_types.add("NEON/VFP") + is_simd_fpu = True + else: + if any(reg in FPU_REGS for reg in all_instr_regs): + used_simd_reg_types.add("FPU") + is_simd_fpu = True + if any(reg in MMX_REGS for reg in all_instr_regs): + used_simd_reg_types.add("MMX") + is_simd_fpu = True + if any(reg in XMM_REGS for reg in all_instr_regs): + used_simd_reg_types.add("SSE/AVX") + is_simd_fpu = True + if any(reg in YMM_REGS for reg in all_instr_regs): + used_simd_reg_types.add("AVX/AVX2") + is_simd_fpu = True + if any(reg in ZMM_REGS for reg in all_instr_regs): + used_simd_reg_types.add("AVX-512") + is_simd_fpu = True if is_simd_fpu: instruction_metrics["simd_fpu_count"] += 1 all_regs_read.update(regs_read) From 858fd2ccdcc86b89482607bab4546f9b95be9e5a Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Mon, 20 Oct 2025 13:24:34 +0100 Subject: [PATCH 3/6] apple M proprietary instructions support Signed-off-by: Prabhu Subramanian --- blint/config.py | 11 ++++++++++- blint/lib/disassembler.py | 17 +++++++++++++---- docs/DISASSEMBLE.md | 3 ++- tests/test_disassembler.py | 26 +++++++++++++++++++++++--- 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/blint/config.py b/blint/config.py index b9f1abf..84dd750 100644 --- a/blint/config.py +++ b/blint/config.py @@ -1487,4 +1487,13 @@ def __post_init__(self): 'hvc': {'read': {'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x8'}, 'write': {'x0'}}, 'smc': {'read': {'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x8'}, 'write': {'x0'}}, 'brk': {'read': {'x0'}}, -} \ No newline at end of file +} + +# https://github.com/AsahiLinux/docs/blob/main/docs/hw/cpu/apple-instructions.md +APPLE_PROPRIETARY_INSTRUCTION_RANGES = { + "AMX": (0x00201000, 0x002012df), + "WKDM": (0x00200800, 0x00200cff), + "GuardedMode": (0x00201400, 0x00201420), + "AddressTranslation": (0x00201440, 0x00201440), + "SyncBarrier": (0x00201460, 0x00201463), +} diff --git a/blint/lib/disassembler.py b/blint/lib/disassembler.py index ddc1291..c0c2d55 100644 --- a/blint/lib/disassembler.py +++ b/blint/lib/disassembler.py @@ -2,7 +2,7 @@ import lief import hashlib import re -from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64, IMPLICIT_REGS_ARM64 +from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64, IMPLICIT_REGS_ARM64, APPLE_PROPRIETARY_INSTRUCTION_RANGES OPERAND_DELIMITERS_PATTERN = re.compile(r'[^a-zA-Z0-9_]+') @@ -468,8 +468,16 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad used_simd_reg_types = set() instructions_with_registers = [] sorted_arch_regs = get_arch_reg_set(arch_target) + proprietary_instr_found = set() + is_apple_silicon = "aarch64" in arch_target.lower() and isinstance(parsed_obj, lief.MachO.Binary) for instr in instr_list: instr_assembly = instr.assembly + if is_apple_silicon and len(instr.bytes) == 4: + opcode = int.from_bytes(instr.bytes, 'little') + for name, (start, end) in APPLE_PROPRIETARY_INSTRUCTION_RANGES.items(): + if start <= opcode <= end: + proprietary_instr_found.add(name) + break mnemonic = instr.assembly.split(None, 1)[0].lower() instruction_mnemonics.append(mnemonic) if mnemonic in ('call'): @@ -549,7 +557,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad }) instruction_metrics["unique_regs_read_count"] = len(all_regs_read) instruction_metrics["unique_regs_written_count"] = len(all_regs_written) - return instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, list(all_regs_read), list(all_regs_written), instructions_with_registers, list(used_simd_reg_types) + return instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, list(all_regs_read), list(all_regs_written), instructions_with_registers, list(used_simd_reg_types), list(proprietary_instr_found) def _build_addr_to_name_map(metadata): """Builds a lookup map from address (int) to name from metadata functions.""" @@ -729,7 +737,7 @@ def disassemble_functions(parsed_obj, metadata, arch_target="", cpu="", features assembly_hash = hashlib.sha256(plain_assembly_text.encode('utf-8')).hexdigest() instruction_count = len(truncated_instr_list) instr_addresses = [instr.address for instr in truncated_instr_list] - instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, regs_read, regs_written, instructions_with_registers, used_simd_reg_types = _analyze_instructions(truncated_instr_list, func_addr, func_addr + size_to_disasm, instr_addresses, parsed_obj, arch_target) + instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, regs_read, regs_written, instructions_with_registers, used_simd_reg_types, proprietary_instructions = _analyze_instructions(truncated_instr_list, func_addr, func_addr + size_to_disasm, instr_addresses, parsed_obj, arch_target) direct_calls = _resolve_direct_calls(truncated_instr_list, addr_to_name_map) joined_mnemonics = "\n".join(instruction_mnemonics) instruction_hash = hashlib.sha256(joined_mnemonics.encode('utf-8')).hexdigest() @@ -757,7 +765,8 @@ def disassemble_functions(parsed_obj, metadata, arch_target="", cpu="", features "regs_written": regs_written, "used_simd_reg_types": used_simd_reg_types, "instructions_with_registers": instructions_with_registers, - "function_type": function_type + "function_type": function_type, + "proprietary_instructions": proprietary_instructions } if inst_count == 0: num_success += 1 diff --git a/docs/DISASSEMBLE.md b/docs/DISASSEMBLE.md index 6c367a6..84bda3d 100644 --- a/docs/DISASSEMBLE.md +++ b/docs/DISASSEMBLE.md @@ -15,7 +15,7 @@ The `disassembled_functions` attribute is an optional output of the `blint` bina The `disassembled_functions` attribute is a dictionary where each key is a unique string identifying the function by its virtual address and name, in the format "0xADDRESS::FUNCTION_NAME" (e.g., "0x140012345::simple_add"). Using both address and name prevents collisions in cases where multiple functions might share the same name (e.g., in different modules or due to symbol stripping). The value for each key is another dictionary containing the following fields: | Field Name | Type | Description | -| :---------------------------- | :----------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|:------------------------------|:-------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `name` | String | The name of the function. | | `address` | String | The virtual address of the function entry point (hexadecimal string, e.g., "0x12345"). | | `assembly` | String | The full disassembled code of the function, with instructions separated by newlines. | @@ -35,6 +35,7 @@ The `disassembled_functions` attribute is a dictionary where each key is a uniqu | `used_simd_reg_types` | List of Strings | A list of SIMD register types such as FPU, MMX, SSE/AVX etc. | | `instructions_with_registers` | List of Dictionary | A detailed list providing register usage information for _each individual instruction_ within the function. | | `function_type` | String | A classification of the function based on heuristics. Possible values include: "PLT_Thunk", "Simple_Return", "Has_Syscalls", "Has_Indirect_Calls", or "Has_Conditional_Jumps". If a function doesn't fit these specific categories but is not a simple return, this field will be an empty string. | +| `proprietary_instructions` | List of Strings | List of proprietary instructions such as Apple. Eg: `GuardedMode`, `SyncBarrier` | ### `instruction_metrics` Sub-structure diff --git a/tests/test_disassembler.py b/tests/test_disassembler.py index ce50188..d00c5a9 100644 --- a/tests/test_disassembler.py +++ b/tests/test_disassembler.py @@ -1,4 +1,5 @@ import pytest +import lief from unittest.mock import MagicMock from blint.lib.disassembler import _extract_register_usage, _analyze_instructions, _classify_function @@ -104,7 +105,7 @@ def test_analyze_instructions_basic(mock_instructions): func_addr = 0x1000 next_func_addr_in_sec = 0x2000 (metrics, mnemonics, has_indirect_call, has_loop, - regs_read, regs_written, instrs_with_regs, _) = _analyze_instructions( + regs_read, regs_written, instrs_with_regs, _, _) = _analyze_instructions( mock_instructions, func_addr, next_func_addr_in_sec, instr_addresses, {}, "x86_64" ) assert metrics["call_count"] == 3 @@ -135,7 +136,7 @@ def test_analyze_instructions_loop_detection(): target_instr = MagicMock() target_instr.address = 0x0FFF instr_addresses_with_target = instr_addresses + [target_instr.address] - (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _) = _analyze_instructions( + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _) = _analyze_instructions( instrs, func_addr, next_func_addr_in_sec, instr_addresses_with_target ) instrs_corrected = [] @@ -144,11 +145,30 @@ def test_analyze_instructions_loop_detection(): instr1_corrected.address = 0x1000 instrs_corrected.append(instr1_corrected) instr_addresses_corrected = [0x0FFE, 0x0FFF, 0x1000] - (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _) = _analyze_instructions( + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _) = _analyze_instructions( instrs_corrected, func_addr, next_func_addr_in_sec, instr_addresses_corrected ) assert has_loop == True +def test_apple_proprietary_instruction_detection(): + func_addr = 0x1000 + next_func_addr_in_sec = 0x2000 + instr1_corrected = MagicMock() + instr1_corrected.assembly = ".inst 0x00201420" + instr1_corrected.address = 0x1000 + instr1_corrected.bytes = (0x00201420).to_bytes(4, 'little') + instrs_corrected = [instr1_corrected] + instr_addresses_corrected = [0x1000] + mock_macho = MagicMock(spec=lief.MachO.Binary) + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, proprietary_instructions) = _analyze_instructions( + instrs_corrected, + func_addr, + next_func_addr_in_sec, + instr_addresses_corrected, + parsed_obj=mock_macho, + arch_target="aarch64" + ) + assert proprietary_instructions == ['GuardedMode'] def test_classify_function_plt_thunk(): metrics = {"jump_count": 1, "conditional_jump_count": 0, "call_count": 0, "ret_count": 0, "arith_count": 0, "shift_count": 0, "xor_count": 0} From a5e4898b804555a2485e137352f8f9285424cbdd Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Mon, 20 Oct 2025 13:42:52 +0100 Subject: [PATCH 4/6] Tweaks Signed-off-by: Prabhu Subramanian --- blint/lib/binary.py | 77 +++++++++++++++++++-------------------- blint/lib/disassembler.py | 51 +++++++++++++++----------- blint/lib/utils.py | 4 ++ 3 files changed, 72 insertions(+), 60 deletions(-) diff --git a/blint/lib/binary.py b/blint/lib/binary.py index bb1d2ba..3d539f2 100644 --- a/blint/lib/binary.py +++ b/blint/lib/binary.py @@ -19,6 +19,7 @@ cleanup_dict_lief_errors, decode_base64, demangle_symbolic_name, + enum_to_str, ) from blint.lib.disassembler import disassemble_functions @@ -98,7 +99,7 @@ def extract_note_data(idx, note): if note.type == lief.ELF.Note.TYPE.GNU_BUILD_ID: build_id = description_str.replace(" ", "") type_str = note.type - type_str = str(type_str).rsplit(".", maxsplit=1)[-1] + type_str = enum_to_str(type_str) note_details = "" sdk_version = "" ndk_version = "" @@ -258,10 +259,10 @@ def parse_symbols(symbols): symbols_list.append( { "name": symbol_name, - "type": str(symbol.type).rsplit(".", maxsplit=1)[-1], + "type": enum_to_str(symbol.type), "value": symbol.value, - "visibility": str(symbol.visibility).rsplit(".", maxsplit=1)[-1], - "binding": str(symbol.binding).rsplit(".", maxsplit=1)[-1], + "visibility": enum_to_str(symbol.visibility), + "binding": enum_to_str(symbol.binding), "is_imported": is_imported, "is_exported": is_exported, "information": symbol.information, @@ -342,13 +343,13 @@ def parse_pe_data(parsed_obj): section_name = "" section_chars = "" section_entropy = "" - dir_type = str(directory.type).rsplit(".", maxsplit=1)[-1] + dir_type = enum_to_str(directory.type) if not dir_type.startswith("?") and directory.size: if directory.has_section: if directory.section.has_characteristic: section_chars = ", ".join( [ - str(chara).rsplit(".", maxsplit=1)[-1] + enum_to_str(chara) for chara in directory.section.characteristics_lists ] ) @@ -433,10 +434,10 @@ def process_pe_signature(parsed_obj): ci = sig.content_info signature_obj = { "version": sig.version, - "digest_algorithm": str(sig.digest_algorithm).rsplit(".", maxsplit=1)[-1], + "digest_algorithm": enum_to_str(sig.digest_algorithm), "content_info": { "content_type": lief.PE.oid_to_string(ci.content_type), - "digest_algorithm": str(ci.digest_algorithm).rsplit(".", maxsplit=1)[-1], + "digest_algorithm": enum_to_str(ci.digest_algorithm), "digest": ci.digest.hex(), }, } @@ -446,7 +447,7 @@ def process_pe_signature(parsed_obj): "version": signer.version, "serial_number": signer.serial_number.hex(), "issuer": str(signer.issuer), - "digest_algorithm": str(signer.digest_algorithm).rsplit(".", maxsplit=1)[-1], + "digest_algorithm": enum_to_str(signer.digest_algorithm), "encryption_algorithm": str(signer.encryption_algorithm).rsplit( ".", maxsplit=1 )[-1], @@ -475,7 +476,7 @@ def parse_pe_authenticode(parsed_obj): "sha256_hash": parsed_obj.authentihash_sha256.hex(*sep), "sha512_hash": parsed_obj.authentihash_sha512.hex(*sep), "sha1_hash": parsed_obj.authentihash(lief.PE.ALGORITHMS.SHA_1).hex(*sep), - "verification_flags": str(parsed_obj.verify_signature()).rsplit(".", maxsplit=1)[-1], + "verification_flags": enum_to_str(parsed_obj.verify_signature()), } if signatures := parsed_obj.signatures: if not isinstance(signatures, lief.lief_errors) and signatures[0].signers: @@ -535,9 +536,9 @@ def parse_pe_symbols(symbols): "name": demangle_symbolic_name(symbol.name), "value": symbol.value, "id": section_nb_str, - "base_type": str(symbol.base_type).rsplit(".", maxsplit=1)[-1], - "complex_type": str(symbol.complex_type).rsplit(".", maxsplit=1)[-1], - "storage_class": str(symbol.storage_class).rsplit(".", maxsplit=1)[-1], + "base_type": enum_to_str(symbol.base_type), + "complex_type": enum_to_str(symbol.complex_type), + "storage_class": enum_to_str(symbol.storage_class), } ) except (IndexError, AttributeError, ValueError, RuntimeError): @@ -790,16 +791,14 @@ def add_elf_header(header, metadata): return metadata try: eflags_str = determine_elf_flags(header) - metadata["class"] = str(header.identity_class).rsplit(".", maxsplit=1)[-1] - metadata["endianness"] = str(header.identity_data).rsplit(".", maxsplit=1)[-1] - metadata["identity_version"] = str(header.identity_version).rsplit(".", maxsplit=1)[-1] - metadata["identity_os_abi"] = str(header.identity_os_abi).rsplit(".", maxsplit=1)[-1] - metadata["identity_abi_version"] = header.identity_abi_version - metadata["file_type"] = str(header.file_type).rsplit(".", maxsplit=1)[-1] - metadata["machine_type"] = str(header.machine_type).rsplit(".", maxsplit=1)[-1] - metadata["object_file_version"] = str(header.object_file_version).rsplit(".", maxsplit=1)[ - -1 - ] + metadata["class"] = enum_to_str(header.identity_class) + metadata["endianness"] = enum_to_str(header.identity_data) + metadata["identity_version"] = enum_to_str(header.identity_version) + metadata["identity_os_abi"] = enum_to_str(header.identity_os_abi) + metadata["identity_abi_version"] = enum_to_str(header.identity_abi_version) + metadata["file_type"] = enum_to_str(header.file_type) + metadata["machine_type"] = enum_to_str(header.machine_type) + metadata["object_file_version"] = enum_to_str(header.object_file_version) metadata["entrypoint"] = header.entrypoint metadata["processor_flag"] = str(header.processor_flag) + eflags_str except (AttributeError, TypeError, ValueError) as e: @@ -864,7 +863,7 @@ def add_elf_dynamic_entries(dynamic_entries, metadata): metadata["dynamic_entries"].append( { "name": demangle_symbolic_name(entry.name), - "tag": str(entry.tag).rsplit(".", maxsplit=1)[-1], + "tag": enum_to_str(entry.tag), "value": entry.value, } ) @@ -876,7 +875,7 @@ def add_elf_dynamic_entries(dynamic_entries, metadata): metadata["dynamic_entries"].append( { "name": "runpath", - "tag": str(entry.tag).rsplit(".", maxsplit=1)[-1], + "tag": enum_to_str(entry.tag), "value": entry.runpath, } ) @@ -886,7 +885,7 @@ def add_elf_dynamic_entries(dynamic_entries, metadata): metadata["dynamic_entries"].append( { "name": "rpath", - "tag": str(entry.tag).rsplit(".", maxsplit=1)[-1], + "tag": enum_to_str(entry.tag), "value": entry.rpath, } ) @@ -904,7 +903,7 @@ def determine_elf_flags(header): eflags_str = "" if header.machine_type == lief.ELF.ARCH.ARM and hasattr(header, "arm_flags_list"): eflags_str = " - ".join( - [str(s).rsplit(".", maxsplit=1)[-1] for s in header.arm_flags_list] + [enum_to_str(s) for s in header.arm_flags_list] ) if header.machine_type in [ lief.ELF.ARCH.MIPS, @@ -912,15 +911,15 @@ def determine_elf_flags(header): lief.ELF.ARCH.MIPS_X, ]: eflags_str = " - ".join( - [str(s).rsplit(".", maxsplit=1)[-1] for s in header.mips_flags_list] + [enum_to_str(s) for s in header.mips_flags_list] ) if header.machine_type == lief.ELF.ARCH.PPC64: eflags_str = " - ".join( - [str(s).rsplit(".", maxsplit=1)[-1] for s in header.ppc64_flags_list] + [enum_to_str(s) for s in header.ppc64_flags_list] ) if header.machine_type == lief.ELF.ARCH.HEXAGON: eflags_str = " - ".join( - [str(s).rsplit(".", maxsplit=1)[-1] for s in header.hexagon_flags_list] + [enum_to_str(s) for s in header.hexagon_flags_list] ) return eflags_str @@ -1308,7 +1307,7 @@ def add_pe_header_data(metadata, parsed_obj): dos_header.addressof_new_exeheader ).strip() metadata["characteristics"] = ", ".join( - [str(chara).rsplit(".", maxsplit=1)[-1] for chara in header.characteristics_list] + [enum_to_str(chara) for chara in header.characteristics_list] ) metadata["num_sections"] = header.numberof_sections metadata["time_date_stamps"] = header.time_date_stamps @@ -1336,14 +1335,14 @@ def add_pe_optional_headers(metadata, optional_header): with contextlib.suppress(IndexError, TypeError): metadata["dll_characteristics"] = ", ".join( [ - str(chara).rsplit(".", maxsplit=1)[-1] + enum_to_str(chara) for chara in optional_header.dll_characteristics_lists ] ) # Detect if this binary is a driver if "WDM_DRIVER" in metadata["dll_characteristics"]: metadata["is_driver"] = True - metadata["subsystem"] = str(optional_header.subsystem).rsplit(".", maxsplit=1)[-1] + metadata["subsystem"] = enum_to_str(optional_header.subsystem) metadata["is_gui"] = metadata["subsystem"] == "WINDOWS_GUI" metadata["exe_type"] = "PE32" if optional_header.magic == lief.PE.PE_TYPE.PE32 else "PE64" metadata["major_linker_version"] = optional_header.major_linker_version @@ -1557,13 +1556,13 @@ def add_mach0_build_metadata(exe_file, metadata, parsed_obj): build_version = parsed_obj.build_version if not build_version: return metadata - metadata["platform"] = str(build_version.platform).rsplit(".", maxsplit=1)[-1] + metadata["platform"] = enum_to_str(build_version.platform) metadata["minos"] = "{:d}.{:d}.{:d}".format(*build_version.minos) metadata["sdk"] = "{:d}.{:d}.{:d}".format(*build_version.sdk) if tools := build_version.tools: metadata["tools"] = [] for tool in tools: - tool_str = str(tool.tool).rsplit(".", maxsplit=1)[-1] + tool_str = enum_to_str(tool.tool) metadata["tools"].append( { "tool": tool_str, @@ -1619,12 +1618,12 @@ def add_mach0_header_data(exe_file, metadata, parsed_obj): """ try: header = parsed_obj.header - flags_str = ", ".join([str(s).rsplit(".", maxsplit=1)[-1] for s in header.flags_list]) - metadata["magic"] = str(header.magic).rsplit(".", maxsplit=1)[-1] + flags_str = ", ".join([enum_to_str(s) for s in header.flags_list]) + metadata["magic"] = enum_to_str(header.magic) metadata["is_neural_model"] = header.magic == lief.MachO.MACHO_TYPES.NEURAL_MODEL - metadata["cpu_type"] = str(header.cpu_type).rsplit(".", maxsplit=1)[-1] + metadata["cpu_type"] = enum_to_str(header.cpu_type) metadata["cpu_subtype"] = header.cpu_subtype - metadata["file_type"] = str(header.file_type).rsplit(".", maxsplit=1)[-1] + metadata["file_type"] = enum_to_str(header.file_type) metadata["flags"] = flags_str metadata["number_commands"] = header.nb_cmds metadata["size_commands"] = header.sizeof_cmds diff --git a/blint/lib/disassembler.py b/blint/lib/disassembler.py index c0c2d55..b1168e9 100644 --- a/blint/lib/disassembler.py +++ b/blint/lib/disassembler.py @@ -562,7 +562,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad def _build_addr_to_name_map(metadata): """Builds a lookup map from address (int) to name from metadata functions.""" addr_to_name_map = {} - for func_list_key in ["functions", "ctor_functions", "exception_functions", "unwind_functions", "exports"]: + for func_list_key in ["functions", "ctor_functions", "exception_functions", "unwind_functions", "exports", "imports", "symtab_symbols", "dynamic_symbols"]: for func_entry in metadata.get(func_list_key, []): addr_str = func_entry.get("address", "") name = func_entry.get("name", "") @@ -574,31 +574,40 @@ def _build_addr_to_name_map(metadata): continue return addr_to_name_map -def _resolve_direct_calls(instr_list, addr_to_name_map): +def _resolve_direct_calls(instr_list, addr_to_name_map, arch_target=""): """Identifies direct calls in instructions and resolves target addresses to function names. Handles both immediate absolute addresses (0x...) and relative offsets.""" potential_callees = [] + is_aarch64 = "aarch64" in arch_target.lower() or "arm64" in arch_target.lower() for instr in instr_list: instr_assembly = instr.assembly - if instr_assembly.startswith('call '): - parts = instr_assembly.split(None, 1) - if len(parts) > 1: - operand = parts[1] - target_addr = None + parts = instr_assembly.split(None, 1) + if not parts: + continue + mnemonic = parts[0].lower() + is_direct_call = False + if is_aarch64 and mnemonic == 'bl': + is_direct_call = True + elif not is_aarch64 and mnemonic == 'call': + is_direct_call = True + if is_direct_call and len(parts) > 1: + operand = parts[1] + target_addr = None + try: if operand.startswith('0x'): - try: - target_addr = int(operand, 16) - except ValueError: - continue - elif operand.isdigit(): - target_addr = operand - elif operand.startswith(('+', '-')): - offset = int(operand, 10) - target_addr = instr.address + offset - if target_addr is not None: - target_name = addr_to_name_map.get(target_addr) - if target_name: - potential_callees.append(target_name) + target_addr = int(operand, 16) + elif operand.startswith('#'): + offset = int(operand.lstrip('#')) + target_addr = instr.address + offset + elif operand.isdigit() or operand.startswith(('+', '-')): + offset = int(operand) + target_addr = instr.address + len(instr.bytes) + offset + except (ValueError, IndexError): + continue + if target_addr is not None: + target_name = addr_to_name_map.get(target_addr) + if target_name: + potential_callees.append(target_name) return potential_callees def _classify_function(instruction_metrics, instruction_count, plain_assembly_text, has_system_call, has_indirect_call): @@ -738,7 +747,7 @@ def disassemble_functions(parsed_obj, metadata, arch_target="", cpu="", features instruction_count = len(truncated_instr_list) instr_addresses = [instr.address for instr in truncated_instr_list] instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, regs_read, regs_written, instructions_with_registers, used_simd_reg_types, proprietary_instructions = _analyze_instructions(truncated_instr_list, func_addr, func_addr + size_to_disasm, instr_addresses, parsed_obj, arch_target) - direct_calls = _resolve_direct_calls(truncated_instr_list, addr_to_name_map) + direct_calls = _resolve_direct_calls(truncated_instr_list, addr_to_name_map, arch_target) joined_mnemonics = "\n".join(instruction_mnemonics) instruction_hash = hashlib.sha256(joined_mnemonics.encode('utf-8')).hexdigest() has_system_call = any(syscall_pattern in lower_assembly for syscall_pattern in SYSCALL_INDICATORS) diff --git a/blint/lib/utils.py b/blint/lib/utils.py index 21a6765..505bb97 100644 --- a/blint/lib/utils.py +++ b/blint/lib/utils.py @@ -682,3 +682,7 @@ def json_serializer(obj): return "" return obj + +def enum_to_str(enum_obj) -> str: + """Converts a lief enum object to its string name.""" + return str(enum_obj).rsplit(".", maxsplit=1)[-1] From afa25dd14274f2ca26e2859c4f353fa2e7706302 Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Mon, 20 Oct 2025 14:20:14 +0100 Subject: [PATCH 5/6] apple M proprietary instructions support Signed-off-by: Prabhu Subramanian --- blint/config.py | 48 ++++++++++++++++++++++++ blint/lib/disassembler.py | 34 ++++++++++++++--- docs/DISASSEMBLE.md | 76 +++++++++++++++++++++++++++++++++++++- tests/test_disassembler.py | 50 +++++++++++++++++++++++-- 4 files changed, 197 insertions(+), 11 deletions(-) diff --git a/blint/config.py b/blint/config.py index 84dd750..261cb78 100644 --- a/blint/config.py +++ b/blint/config.py @@ -1497,3 +1497,51 @@ def __post_init__(self): "AddressTranslation": (0x00201440, 0x00201440), "SyncBarrier": (0x00201460, 0x00201463), } + +APPLE_PROPRIETARY_SREGS = { + "GXF_CONTROL": { + "S3_6_C15_C1_2", + "S3_6_C15_C8_1", + }, + "SPRR_CONTROL": { + "S3_6_C15_C1_0", + "S3_6_C15_C1_5", + "S3_6_C15_C1_6", + }, + "PAC_KEYS": { + "S3_4_C15_C1_0", + "S3_4_C15_C1_1", + }, + "JIT_HARDENING": { + "S3_4_C15_C2_6", + "S3_4_C15_C2_7", + }, + "PERF_COUNTERS": { + "S3_1_C15_C0_0", + "S3_1_C15_C1_0", + "S3_1_C15_C2_0", + "S3_1_C15_C3_0", + "S3_1_C15_C4_0", + "S3_1_C15_C5_0", + "S3_1_C15_C6_0", + "S3_1_C15_C13_0", + "S3_2_C15_C0_0", + "S3_2_C15_C1_0", + "S3_2_C15_C2_0", + "S3_2_C15_C3_0", + "S3_2_C15_C4_0", + "S3_2_C15_C5_0", + "S3_2_C15_C6_0", + "S3_2_C15_C7_0", + "S3_2_C15_C9_0", + "S3_2_C15_C10_0", + }, + "IPI_CONTROL": { + "S3_5_C15_C1_1", + "S3_5_C15_C3_1", + }, + "VIRTUALIZATION": { + "S3_5_C15_C1_2", + "S3_5_C15_C1_3", + }, +} diff --git a/blint/lib/disassembler.py b/blint/lib/disassembler.py index b1168e9..b016e9f 100644 --- a/blint/lib/disassembler.py +++ b/blint/lib/disassembler.py @@ -2,7 +2,7 @@ import lief import hashlib import re -from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64, IMPLICIT_REGS_ARM64, APPLE_PROPRIETARY_INSTRUCTION_RANGES +from blint.config import CRYPTO_INDICATORS, GPU_INDICATORS, SECURITY_INDICATORS, SYSCALL_INDICATORS, IMPLICIT_REGS_X86, IMPLICIT_REGS_X64, IMPLICIT_REGS_ARM64, APPLE_PROPRIETARY_INSTRUCTION_RANGES, APPLE_PROPRIETARY_SREGS OPERAND_DELIMITERS_PATTERN = re.compile(r'[^a-zA-Z0-9_]+') @@ -63,6 +63,12 @@ X64_RETURN_REGS = frozenset({'rax'}) X86_RETURN_REGS = frozenset({'eax'}) +_SREG_TO_CATEGORY_MAP = { + sreg.lower(): category + for category, sregs in APPLE_PROPRIETARY_SREGS.items() + for sreg in sregs +} + try: from nyxstone import Nyxstone NYXSTONE_AVAILABLE = True @@ -469,22 +475,37 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad instructions_with_registers = [] sorted_arch_regs = get_arch_reg_set(arch_target) proprietary_instr_found = set() + sreg_interactions = set() is_apple_silicon = "aarch64" in arch_target.lower() and isinstance(parsed_obj, lief.MachO.Binary) for instr in instr_list: instr_assembly = instr.assembly + parts = instr_assembly.split() if is_apple_silicon and len(instr.bytes) == 4: opcode = int.from_bytes(instr.bytes, 'little') for name, (start, end) in APPLE_PROPRIETARY_INSTRUCTION_RANGES.items(): if start <= opcode <= end: proprietary_instr_found.add(name) break - mnemonic = instr.assembly.split(None, 1)[0].lower() + mnemonic_and_operands = instr_assembly.split(None, 1) + mnemonic = mnemonic_and_operands[0].lower() + if is_apple_silicon and mnemonic in ('mrs', 'msr') and len(parts) > 1: + operands_str = mnemonic_and_operands[1] + operands = [op.strip().lower() for op in operands_str.split(',')] + sreg_operand = None + try: + if mnemonic == 'mrs': + sreg_operand = operands[1] + elif mnemonic == 'msr': + sreg_operand = operands[0] + except IndexError: + pass + if sreg_operand and sreg_operand in _SREG_TO_CATEGORY_MAP: + sreg_interactions.add(_SREG_TO_CATEGORY_MAP[sreg_operand]) instruction_mnemonics.append(mnemonic) if mnemonic in ('call'): instruction_metrics["call_count"] += 1 elif mnemonic in CONDITIONAL_JMP_INST: instruction_metrics["conditional_jump_count"] += 1 - parts = instr_assembly.split() if len(parts) >= 2: target_part = parts[1] if target_part.startswith('0x'): @@ -557,7 +578,7 @@ def _analyze_instructions(instr_list, func_addr, next_func_addr_in_sec, instr_ad }) instruction_metrics["unique_regs_read_count"] = len(all_regs_read) instruction_metrics["unique_regs_written_count"] = len(all_regs_written) - return instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, list(all_regs_read), list(all_regs_written), instructions_with_registers, list(used_simd_reg_types), list(proprietary_instr_found) + return instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, list(all_regs_read), list(all_regs_written), instructions_with_registers, list(used_simd_reg_types), list(proprietary_instr_found), list(sreg_interactions) def _build_addr_to_name_map(metadata): """Builds a lookup map from address (int) to name from metadata functions.""" @@ -746,7 +767,7 @@ def disassemble_functions(parsed_obj, metadata, arch_target="", cpu="", features assembly_hash = hashlib.sha256(plain_assembly_text.encode('utf-8')).hexdigest() instruction_count = len(truncated_instr_list) instr_addresses = [instr.address for instr in truncated_instr_list] - instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, regs_read, regs_written, instructions_with_registers, used_simd_reg_types, proprietary_instructions = _analyze_instructions(truncated_instr_list, func_addr, func_addr + size_to_disasm, instr_addresses, parsed_obj, arch_target) + instruction_metrics, instruction_mnemonics, has_indirect_call, has_loop, regs_read, regs_written, instructions_with_registers, used_simd_reg_types, proprietary_instructions, sreg_interactions = _analyze_instructions(truncated_instr_list, func_addr, func_addr + size_to_disasm, instr_addresses, parsed_obj, arch_target) direct_calls = _resolve_direct_calls(truncated_instr_list, addr_to_name_map, arch_target) joined_mnemonics = "\n".join(instruction_mnemonics) instruction_hash = hashlib.sha256(joined_mnemonics.encode('utf-8')).hexdigest() @@ -775,7 +796,8 @@ def disassemble_functions(parsed_obj, metadata, arch_target="", cpu="", features "used_simd_reg_types": used_simd_reg_types, "instructions_with_registers": instructions_with_registers, "function_type": function_type, - "proprietary_instructions": proprietary_instructions + "proprietary_instructions": proprietary_instructions, + "sreg_interactions": sreg_interactions } if inst_count == 0: num_success += 1 diff --git a/docs/DISASSEMBLE.md b/docs/DISASSEMBLE.md index 84bda3d..87a7b97 100644 --- a/docs/DISASSEMBLE.md +++ b/docs/DISASSEMBLE.md @@ -35,7 +35,8 @@ The `disassembled_functions` attribute is a dictionary where each key is a uniqu | `used_simd_reg_types` | List of Strings | A list of SIMD register types such as FPU, MMX, SSE/AVX etc. | | `instructions_with_registers` | List of Dictionary | A detailed list providing register usage information for _each individual instruction_ within the function. | | `function_type` | String | A classification of the function based on heuristics. Possible values include: "PLT_Thunk", "Simple_Return", "Has_Syscalls", "Has_Indirect_Calls", or "Has_Conditional_Jumps". If a function doesn't fit these specific categories but is not a simple return, this field will be an empty string. | -| `proprietary_instructions` | List of Strings | List of proprietary instructions such as Apple. Eg: `GuardedMode`, `SyncBarrier` | +| `proprietary_instructions` | List of Strings | (Apple Silicon Only) A list of categories for proprietary instructions found (e.g., "GuardedMode", "AMX"). This indicates the use of non-standard hardware features. | +| `sreg_interactions` | List of Strings | (Apple Silicon Only) A list of categories for interactions with proprietary System Registers (e.g., "SPRR_CONTROL", "PAC_KEYS"). This signals manipulation of low-level security and hardware configuration. | | ### `instruction_metrics` Sub-structure @@ -77,6 +78,8 @@ The `regs_read` and `regs_written` fields (both globally for the function and pe - **Memory Operands**: Instructions accessing memory via addresses calculated from registers (e.g., `mov rax, [rbx + rcx*2]`) indicate that `rbx` and `rcx` are read (used for address calculation). The destination `rax` is written. - **Limitations**: This analysis is based on parsing the assembly text string. It provides a good approximation for common instructions but might be inaccurate for highly complex or obfuscated code, or for instructions not explicitly handled in the parsing logic. +----- + ## Use Cases 1. **Binary Fingerprinting and Diffing:** Compare binaries by matching functions based on their `assembly_hash` or `instruction_hash`. This helps identify identical or modified functions between different versions or variants of a binary. @@ -91,6 +94,27 @@ The `regs_read` and `regs_written` fields (both globally for the function and pe - **Track Data Flow:** By examining `instructions_with_registers`, you can trace how data moves through registers within a function. For example, seeing `rax` written by one instruction and then read by a subsequent one. - **Detect Register Preservation:** Check if a function modifies callee-saved registers (like `rbx`, `rbp`, `r12-r15` on x64) without restoring them, which might violate calling conventions or indicate specific behavior. - **Spot Unusual Register Patterns:** Functions that read or write an unusually large number of registers might be complex, perform context switching, or manipulate state extensively. +9. Analyzing Proprietary Hardware Features (Apple Silicon) + +The proprietary_instructions and sreg_interactions fields provide powerful insights into how software leverages Apple's custom silicon features. This is critical for security research, anti-tampering analysis, and performance tuning on macOS and iOS. + - **Detecting Advanced Security Hardening:** + - Use Case: A kernel extension or system daemon uses hardware-enforced memory permissions that are stronger than standard ARM features. + - blint Findings: The sreg_interactions list contains "SPRR_CONTROL" or "GXF_CONTROL". + - Analysis: This indicates the function is setting up or entering a "Guarded Execution" mode (GXF) or manipulating the Secure Page Table (SPRR). This code is highly security-sensitive and is likely part of Apple's core operating system defenses, such as protecting kernel memory or DRM components. + - **Identifying Anti-Debugging and Anti-Emulation:** + - Use Case: A protected application wants to detect if it's being run under a debugger or in an emulator. It does this by reading hardware performance counters, which behave differently in virtualized environments. + - blint Findings: The sreg_interactions list contains "PERF_COUNTERS". + - Analysis: This is a strong indicator of an anti-analysis technique. The function is likely measuring execution time or specific hardware events to detect anomalies caused by debuggers or emulators. + - **Finding Performance-Critical Code:** + - Use Case: A high-performance application uses Apple's custom matrix co-processor for machine learning or signal processing tasks. + - blint Findings: The proprietary_instructions list contains "AMX" (Apple Matrix Coprocessor). + - Analysis: This function is a candidate for performance analysis. It directly leverages specialized hardware, and any changes to it could have significant performance implications. + - **Locating Kernel-Level Pointer Authentication Logic:** + - Use Case: The kernel is configuring Pointer Authentication (PAC) keys to protect its own function pointers from being overwritten in an attack. + - blint Findings: The sreg_interactions list contains "PAC_KEYS". + - Analysis: This function is manipulating the hardware keys used for pointer signing and authentication. It is a critical part of the system's control-flow integrity and a high-value target for security researchers. + +------ ## Examples @@ -169,6 +193,56 @@ simple_add: - `pop rbp`: Reads `rbp` (from stack, implicitly using `rsp`), Writes `rsp` (stack pointer incremented). - `ret`: Typically doesn't directly read/write general-purpose registers listed here (though it implicitly uses `rsp` to get the return address and `rip` to set the next instruction). + +Example 2: Analyzing an Apple Silicon Security Function + +Consider a hypothetical function on macOS that configures memory permissions. + +``` +_configure_secure_memory: + stp x29, x30, [sp, #-16]! + mov x29, sp + mrs x0, s3_6_c15_c1_0 // Read SPRR_CONFIG_EL1 + orr x0, x0, #1 // Set the SPRR_CONFIG_EN bit + msr s3_6_c15_c1_0, x0 // Write back to enable SPRR + ldp x29, x30, [sp], #16 + ret +``` + +Corresponding `disassembled_functions` attribute: + +```json +{ + "0x1000abcde::_configure_secure_memory": { + "name": "_configure_secure_memory", + "address": "0x1000abcde", + "assembly": "stp x29, x30, [sp, #-16]!\nmov x29, sp\nmrs x0, s3_6_c15_c1_0\norr x0, x0, #1\nmsr s3_6_c15_c1_0, x0\nldp x29, x30, [sp], #16\nret", + "proprietary_instructions": [], + "sreg_interactions": [ + "SPRR_CONTROL" + ], + "regs_read": ["x29", "x30", "sp", "x0"], + "regs_written": ["x29", "x30", "sp", "x0"], + "instructions_with_registers": [ + // ... + { + "regs_read": [], + "regs_written": ["x0"] + }, + // ... + ] + ... + } +} +``` + +**Explanation:** + +1. sreg_interactions: The analysis detects that the code reads (mrs) and writes (msr) to the s3_6_c15_c1_0 system register. It looks this up in its internal map and correctly identifies it as a control register for the SPRR hardware feature, adding "SPRR_CONTROL" to the list. +2. Analyst Conclusion: An analyst can immediately conclude that this function is not a typical application function but is instead part of a low-level system component responsible for configuring hardware memory security. This allows them to prioritize it for further investigation. + +------ + ## Function Boundary Detection The disassembler determines the end of a function using a "linear sweep" heuristic. Disassembly begins at the function's entry point and stops when it encounters a terminating instruction (like ret or an unconditional jmp) or when it reaches the address of the next known function in the same section. diff --git a/tests/test_disassembler.py b/tests/test_disassembler.py index d00c5a9..8da1600 100644 --- a/tests/test_disassembler.py +++ b/tests/test_disassembler.py @@ -105,7 +105,7 @@ def test_analyze_instructions_basic(mock_instructions): func_addr = 0x1000 next_func_addr_in_sec = 0x2000 (metrics, mnemonics, has_indirect_call, has_loop, - regs_read, regs_written, instrs_with_regs, _, _) = _analyze_instructions( + regs_read, regs_written, instrs_with_regs, _, _, _) = _analyze_instructions( mock_instructions, func_addr, next_func_addr_in_sec, instr_addresses, {}, "x86_64" ) assert metrics["call_count"] == 3 @@ -136,7 +136,7 @@ def test_analyze_instructions_loop_detection(): target_instr = MagicMock() target_instr.address = 0x0FFF instr_addresses_with_target = instr_addresses + [target_instr.address] - (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _) = _analyze_instructions( + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _, _) = _analyze_instructions( instrs, func_addr, next_func_addr_in_sec, instr_addresses_with_target ) instrs_corrected = [] @@ -145,7 +145,7 @@ def test_analyze_instructions_loop_detection(): instr1_corrected.address = 0x1000 instrs_corrected.append(instr1_corrected) instr_addresses_corrected = [0x0FFE, 0x0FFF, 0x1000] - (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _) = _analyze_instructions( + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, _, _) = _analyze_instructions( instrs_corrected, func_addr, next_func_addr_in_sec, instr_addresses_corrected ) assert has_loop == True @@ -160,7 +160,7 @@ def test_apple_proprietary_instruction_detection(): instrs_corrected = [instr1_corrected] instr_addresses_corrected = [0x1000] mock_macho = MagicMock(spec=lief.MachO.Binary) - (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, proprietary_instructions) = _analyze_instructions( + (metrics, mnemonics, has_indirect_call, has_loop, _, _, _, _, proprietary_instructions, _) = _analyze_instructions( instrs_corrected, func_addr, next_func_addr_in_sec, @@ -170,6 +170,48 @@ def test_apple_proprietary_instruction_detection(): ) assert proprietary_instructions == ['GuardedMode'] +def test_apple_sreg_interaction_msr(): + func_addr = 0x1000 + next_func_addr_in_sec = 0x2000 + instr = MagicMock() + instr.assembly = "msr s3_6_c15_c1_0, x0" + instr.address = 0x1004 + instr.bytes = b'\x00\x00\x00\x00' + instructions = [instr] + instr_addresses = [instr.address] + mock_macho = MagicMock(spec=lief.MachO.Binary) + (_, _, _, _, _, _, _, _, proprietary_instructions, sreg_interactions) = _analyze_instructions( + instructions, + func_addr, + next_func_addr_in_sec, + instr_addresses, + parsed_obj=mock_macho, + arch_target="aarch64" + ) + assert proprietary_instructions == [] + assert sreg_interactions == ['SPRR_CONTROL'] + +def test_apple_sreg_interaction_mrs(): + func_addr = 0x1000 + next_func_addr_in_sec = 0x2000 + instr = MagicMock() + instr.assembly = "mrs x1, s3_6_c15_c1_0" + instr.address = 0x1008 + instr.bytes = b'\x00\x00\x00\x00' + instructions = [instr] + instr_addresses = [instr.address] + mock_macho = MagicMock(spec=lief.MachO.Binary) + (_, _, _, _, _, _, _, _, proprietary_instructions, sreg_interactions) = _analyze_instructions( + instructions, + func_addr, + next_func_addr_in_sec, + instr_addresses, + parsed_obj=mock_macho, + arch_target="aarch64" + ) + assert proprietary_instructions == [] + assert sreg_interactions == ['SPRR_CONTROL'] + def test_classify_function_plt_thunk(): metrics = {"jump_count": 1, "conditional_jump_count": 0, "call_count": 0, "ret_count": 0, "arith_count": 0, "shift_count": 0, "xor_count": 0} instruction_count = 3 From 610995362a7bc447a8795726ab121854793aa00a Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Mon, 20 Oct 2025 14:22:07 +0100 Subject: [PATCH 6/6] bump version Signed-off-by: Prabhu Subramanian --- Info.plist | 2 +- file_version_info.txt | 4 ++-- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Info.plist b/Info.plist index a2cea57..2b8e463 100644 --- a/Info.plist +++ b/Info.plist @@ -9,6 +9,6 @@ CFBundleName blint CFBundleVersion - 3.0.1 + 3.0.2 diff --git a/file_version_info.txt b/file_version_info.txt index 5bb2498..2dac6bd 100644 --- a/file_version_info.txt +++ b/file_version_info.txt @@ -32,12 +32,12 @@ VSVersionInfo( u'040904B0', [StringStruct(u'CompanyName', u'OWASP Foundation'), StringStruct(u'FileDescription', u'blint - The Binary Linter'), - StringStruct(u'FileVersion', u'3.0.1.0'), + StringStruct(u'FileVersion', u'3.0.2.0'), StringStruct(u'InternalName', u'blint'), StringStruct(u'LegalCopyright', u'© OWASP Foundation. All rights reserved.'), StringStruct(u'OriginalFilename', u'blint.exe'), StringStruct(u'ProductName', u'blint'), - StringStruct(u'ProductVersion', u'3.0.1.0')]) + StringStruct(u'ProductVersion', u'3.0.2.0')]) ]), VarFileInfo([VarStruct(u'Translation', [1033, 1200])]) ] diff --git a/pyproject.toml b/pyproject.toml index 8e255c3..578f9df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "blint" -version = "3.0.1" +version = "3.0.2" description = "Linter and SBOM generator for binary files." authors = [ {name= "Team AppThreat", email = "cloud@appthreat.com"},