From 248bf052641de01a2f3d065ae2526dba2d289c74 Mon Sep 17 00:00:00 2001 From: Divyansh Date: Wed, 26 Mar 2025 19:13:53 +0530 Subject: [PATCH 1/3] Fixes issue #937: Show Offset and VirtAddr for language-specific strings --- .gitignore | 2 ++ floss/language/go/extract.py | 19 +++++++++++++++++++ floss/language/rust/extract.py | 15 +++++++++++++++ floss/main.py | 7 +++++++ floss/render/default.py | 33 +++++++++++++++++++++++++++------ floss/results.py | 3 ++- 6 files changed, 72 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 32cf7f990..4a98369aa 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ lib/ # Pyenv file .python-version .venv +floss-venv/ # Test executables bin/ @@ -33,3 +34,4 @@ flare_floss.egg-info .direnv/ .env/ .envrc +*.code-workspace diff --git a/floss/language/go/extract.py b/floss/language/go/extract.py index 95e5e95ac..bfb8fbf8c 100644 --- a/floss/language/go/extract.py +++ b/floss/language/go/extract.py @@ -424,6 +424,25 @@ def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: Lis return list(filter(lambda s: string_blob_start <= s.offset < string_blob_end, static_strings)) +def get_file_offset_in_blob(sample: pathlib.Path) -> int: + pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True) + + struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address)) + if not struct_strings: + return -1 + + try: + string_blob_start, _ = find_string_blob_range(pe, struct_strings) + except ValueError: + return -1 + + image_base = pe.OPTIONAL_HEADER.ImageBase + virtual_address = string_blob_start - image_base + pointer_to_raw_data = pe.get_offset_from_rva(string_blob_start - image_base) + + return image_base + virtual_address - pointer_to_raw_data + + def main(argv=None): parser = argparse.ArgumentParser(description="Get Go strings") parser.add_argument("path", help="file or path to analyze") diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py index b31c4b96c..59dc2d866 100644 --- a/floss/language/rust/extract.py +++ b/floss/language/rust/extract.py @@ -148,6 +148,21 @@ def get_static_strings_from_rdata(sample, static_strings) -> List[StaticString]: return list(filter(lambda s: start_rdata <= s.offset < end_rdata, static_strings)) +def get_file_offset_in_rdata(sample: pathlib.Path) -> int: + pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True) + + try: + rdata_section = get_rdata_section(pe) + except ValueError: + return -1 + + image_base = pe.OPTIONAL_HEADER.ImageBase + virtual_address = rdata_section.VirtualAddress + pointer_to_raw_data = rdata_section.PointerToRawData + + return image_base + virtual_address - pointer_to_raw_data + + def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]: image_base = pe.OPTIONAL_HEADER.ImageBase diff --git a/floss/main.py b/floss/main.py index f428b4e65..fce935c5f 100644 --- a/floss/main.py +++ b/floss/main.py @@ -691,6 +691,9 @@ def main(argv=None) -> int: string_blob_strings, results.strings.language_strings, args.min_length ) + if args.verbose: + results.metadata.file_offset = floss.language.go.extract.get_file_offset_in_blob(sample) + elif results.metadata.language == Language.RUST.value: logger.info("extracting language-specific Rust strings") @@ -703,6 +706,10 @@ def main(argv=None) -> int: results.strings.language_strings_missed = floss.language.utils.get_missed_strings( rdata_strings, results.strings.language_strings, args.min_length ) + + if args.verbose: + results.metadata.file_offset = floss.language.rust.extract.get_file_offset_in_rdata(sample) + if ( results.analysis.enable_decoded_strings or results.analysis.enable_stack_strings diff --git a/floss/render/default.py b/floss/render/default.py index 9526c3f9b..85da2d673 100644 --- a/floss/render/default.py +++ b/floss/render/default.py @@ -171,16 +171,36 @@ def strtime(seconds): return f"{m:02.0f}:{s:02.0f}" -def render_language_strings(language, language_strings, language_strings_missed, console, verbose, disable_headers): +def render_language_strings(language, language_strings, language_strings_missed, file_offset, console, verbose, disable_headers): strings = sorted(language_strings + language_strings_missed, key=lambda s: s.offset) render_heading(f"FLOSS {language.upper()} STRINGS ({len(strings)})", console, verbose, disable_headers) offset_len = len(f"{strings[-1].offset}") - for s in strings: - if verbose == Verbosity.DEFAULT: + va_offset_len = len(f"{strings[-1].offset + file_offset}") + + if verbose != Verbosity.DEFAULT: + # add column headers + table = Table( + "Offset", + "VirtAddr", + "String", + show_header=not (disable_headers), + box=box.ASCII2, + show_edge=False, + ) + + # add rows + for s in strings: + table.add_row( + f"0x{s.offset:>0{offset_len}x}", + f"0x{s.offset + file_offset:>0{va_offset_len}x}", + string_style(sanitize(s.string, is_ascii_only=False)), + ) + + console.print(table) + + else: + for s in strings: console.print(sanitize(s.string, is_ascii_only=False), markup=False) - else: - colored_string = string_style(sanitize(s.string, is_ascii_only=False)) - console.print(f"0x{s.offset:>0{offset_len}x} {colored_string}") def render_static_substrings(strings, encoding, offset_len, console, verbose, disable_headers): @@ -353,6 +373,7 @@ def render(results: floss.results.ResultDocument, verbose, disable_headers, colo results.metadata.language, results.strings.language_strings, results.strings.language_strings_missed, + results.metadata.file_offset, console, verbose, disable_headers, diff --git a/floss/results.py b/floss/results.py index c56ad4765..37dec87e1 100644 --- a/floss/results.py +++ b/floss/results.py @@ -17,7 +17,7 @@ import json import datetime from enum import Enum -from typing import Dict, List +from typing import Dict, List, Optional from pathlib import Path from dataclasses import field @@ -207,6 +207,7 @@ class Metadata: language: str = "" language_version: str = "" language_selected: str = "" # configured by user + file_offset: Optional[int] = None @dataclass From 67b12ffbe52150a83d7a1f0d21262eb7da87ab14 Mon Sep 17 00:00:00 2001 From: Divyansh Date: Wed, 26 Mar 2025 19:53:11 +0530 Subject: [PATCH 2/3] Fixes issue #937: Show Offset and VirtAddr for language-specific strings: Go and Rust --- floss/language/go/extract.py | 4 ++-- floss/main.py | 2 +- floss/render/default.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/floss/language/go/extract.py b/floss/language/go/extract.py index bfb8fbf8c..2fa16aea0 100644 --- a/floss/language/go/extract.py +++ b/floss/language/go/extract.py @@ -435,11 +435,11 @@ def get_file_offset_in_blob(sample: pathlib.Path) -> int: string_blob_start, _ = find_string_blob_range(pe, struct_strings) except ValueError: return -1 - + image_base = pe.OPTIONAL_HEADER.ImageBase virtual_address = string_blob_start - image_base pointer_to_raw_data = pe.get_offset_from_rva(string_blob_start - image_base) - + return image_base + virtual_address - pointer_to_raw_data diff --git a/floss/main.py b/floss/main.py index fce935c5f..65716658a 100644 --- a/floss/main.py +++ b/floss/main.py @@ -709,7 +709,7 @@ def main(argv=None) -> int: if args.verbose: results.metadata.file_offset = floss.language.rust.extract.get_file_offset_in_rdata(sample) - + if ( results.analysis.enable_decoded_strings or results.analysis.enable_stack_strings diff --git a/floss/render/default.py b/floss/render/default.py index 85da2d673..a65563f5c 100644 --- a/floss/render/default.py +++ b/floss/render/default.py @@ -171,12 +171,14 @@ def strtime(seconds): return f"{m:02.0f}:{s:02.0f}" -def render_language_strings(language, language_strings, language_strings_missed, file_offset, console, verbose, disable_headers): +def render_language_strings( + language, language_strings, language_strings_missed, file_offset, console, verbose, disable_headers +): strings = sorted(language_strings + language_strings_missed, key=lambda s: s.offset) render_heading(f"FLOSS {language.upper()} STRINGS ({len(strings)})", console, verbose, disable_headers) offset_len = len(f"{strings[-1].offset}") va_offset_len = len(f"{strings[-1].offset + file_offset}") - + if verbose != Verbosity.DEFAULT: # add column headers table = Table( From d49438d9eeb327fddcd31f0af1dedd86a1c0e58f Mon Sep 17 00:00:00 2001 From: Divyansh Date: Wed, 26 Mar 2025 20:35:32 +0530 Subject: [PATCH 3/3] Applied changes suggested by gemini-code-assist --- floss/language/go/extract.py | 10 +++++----- floss/language/rust/extract.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/floss/language/go/extract.py b/floss/language/go/extract.py index 2fa16aea0..219e1e3b8 100644 --- a/floss/language/go/extract.py +++ b/floss/language/go/extract.py @@ -429,18 +429,18 @@ def get_file_offset_in_blob(sample: pathlib.Path) -> int: struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address)) if not struct_strings: - return -1 + return [] try: string_blob_start, _ = find_string_blob_range(pe, struct_strings) - except ValueError: - return -1 + except ValueError as e: + raise ValueError("Failed to find string blob range") from e image_base = pe.OPTIONAL_HEADER.ImageBase virtual_address = string_blob_start - image_base - pointer_to_raw_data = pe.get_offset_from_rva(string_blob_start - image_base) + raw_data_offset = pe.get_offset_from_rva(string_blob_start - image_base) - return image_base + virtual_address - pointer_to_raw_data + return image_base + virtual_address - raw_data_offset def main(argv=None): diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py index 59dc2d866..6e1ac3983 100644 --- a/floss/language/rust/extract.py +++ b/floss/language/rust/extract.py @@ -153,14 +153,14 @@ def get_file_offset_in_rdata(sample: pathlib.Path) -> int: try: rdata_section = get_rdata_section(pe) - except ValueError: - return -1 + except ValueError as e: + raise ValueError("Failed to find .rdata section") from e image_base = pe.OPTIONAL_HEADER.ImageBase virtual_address = rdata_section.VirtualAddress - pointer_to_raw_data = rdata_section.PointerToRawData + raw_data_offset = rdata_section.PointerToRawData - return image_base + virtual_address - pointer_to_raw_data + return image_base + virtual_address - raw_data_offset def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]: