diff --git a/plugins/warp/src/cache/guid.rs b/plugins/warp/src/cache/guid.rs index f1788a33e6..df2155546a 100644 --- a/plugins/warp/src/cache/guid.rs +++ b/plugins/warp/src/cache/guid.rs @@ -4,28 +4,35 @@ use crate::function_guid; use binaryninja::binary_view::BinaryViewExt; use binaryninja::function::Function as BNFunction; use binaryninja::low_level_il::function::{FunctionMutability, LowLevelILFunction, NonSSA}; +use binaryninja::rc::Ref as BNRef; use binaryninja::symbol::Symbol as BNSymbol; use std::collections::HashSet; use uuid::Uuid; use warp::signature::constraint::Constraint; use warp::signature::function::FunctionGUID; +/// Try to get the cached function GUID from the metadata. +/// +/// If not cached, we will use `lifted_il_accessor` to retrieve the lifted IL and create the GUID. +/// +/// `lifted_il_accessor` exists as it is to allow the retrieval of a cached GUID without incurring +/// the cost of building the IL (if it no longer exists). pub fn cached_function_guid( function: &BNFunction, - lifted_il: &LowLevelILFunction, -) -> FunctionGUID { + lifted_il_accessor: impl Fn() -> Option>>, +) -> Option { let cached_guid = try_cached_function_guid(function); if let Some(cached_guid) = cached_guid { - return cached_guid; + return Some(cached_guid); } - let function_guid = function_guid(function, lifted_il); + let function_guid = function_guid(function, lifted_il_accessor()?.as_ref()); function.store_metadata( "warp_function_guid", &function_guid.as_bytes().to_vec(), false, ); - function_guid + Some(function_guid) } pub fn try_cached_function_guid(function: &BNFunction) -> Option { diff --git a/plugins/warp/src/lib.rs b/plugins/warp/src/lib.rs index be6f8bc398..50ad388958 100644 --- a/plugins/warp/src/lib.rs +++ b/plugins/warp/src/lib.rs @@ -75,44 +75,51 @@ pub fn user_signature_dir() -> PathBuf { pub fn build_variables(func: &BNFunction) -> Vec { let func_start = func.start(); - let mut variables = vec![]; - if let Ok(mlil) = func.medium_level_il() { - let bn_vars = func.variables(); - for bn_var in &bn_vars { - if mlil.is_var_user_defined(&bn_var.variable) { - // TODO: live_instruction_for_variable only works for register types. - if let Some(first_instr) = mlil - .live_instruction_for_variable(&bn_var.variable, true) - .iter() - .sorted_by_key(|i| i.instr_index) - .next() - { - if let Some(var_loc) = bn_var_to_location(bn_var.variable) { - let var_name = bn_var.name; - let var_type = - from_bn_type(&func.view(), &bn_var.ty.contents, bn_var.ty.confidence); - variables.push(FunctionVariable { - offset: (first_instr.address as i64) - (func_start as i64), - location: var_loc, - name: Some(var_name), - ty: Some(var_type), - }) - } - } - } - } - } - variables + // It is important that we only retrieve the medium-level IL if the function has + // any user-defined variables, otherwise, we will possibly be generating MLIL for no reason. + // For the above reason, we do a filter on user-defined variables first. + func.variables() + .iter() + .filter(|var| func.is_variable_user_defined(&var.variable)) + .filter_map(|var| { + // Get the first instruction that uses the variable, this is the "placement" location we store. + // TODO: live_instruction_for_variable only works for register types. + let first_instr = func + .medium_level_il() + .ok()? + .live_instruction_for_variable(&var.variable, true) + .iter() + .sorted_by_key(|i| i.instr_index) + .next()?; + Some((var, first_instr)) + }) + .filter_map(|(var, instr)| { + // Build the WARP function variable using the placement location, and the variable itself. + let var_loc = bn_var_to_location(var.variable)?; + let var_type = from_bn_type(&func.view(), &var.ty.contents, var.ty.confidence); + Some(FunctionVariable { + offset: (instr.address as i64) - (func_start as i64), + location: var_loc, + name: Some(var.name), + ty: Some(var_type), + }) + }) + .collect() } // TODO: Get rid of the minimal bool. +/// Build the WARP [`Function`] from the Binary Ninja [`BNFunction`]. +/// +/// The `lifted_il_accessor` is passed in such that a function with a guid already cached will not +/// require us to regenerate the IL. This is important in the event of someone generating signatures +/// off of an existing BNDB or when the IL is no longer present. pub fn build_function( func: &BNFunction, - lifted_il: &LowLevelILFunction, + lifted_il_accessor: impl Fn() -> Option>>, minimal: bool, -) -> Function { +) -> Option { let mut function = Function { - guid: cached_function_guid(func, lifted_il), + guid: cached_function_guid(func, lifted_il_accessor)?, symbol: from_bn_symbol(&func.symbol()), // NOTE: Adding adjacent only works if analysis is complete. // NOTE: We do not filter out adjacent functions here. @@ -123,7 +130,7 @@ pub fn build_function( }; if minimal { - return function; + return Some(function); } // Currently we only store the type if its a user type. @@ -142,7 +149,7 @@ pub fn build_function( .map(|c| bn_comment_to_comment(func, c)) .collect(); function.variables = build_variables(func); - function + Some(function) } /// Basic blocks sorted from high to low. diff --git a/plugins/warp/src/matcher.rs b/plugins/warp/src/matcher.rs index c6ab7f9f06..52a5b04ffb 100644 --- a/plugins/warp/src/matcher.rs +++ b/plugins/warp/src/matcher.rs @@ -47,6 +47,18 @@ impl Matcher { return None; } + // The number of possible functions is too high, skip. + // This can happen if the function is extremely common, in cases like that we are already unlikely to match. + // It is unfortunate that we have to do this, but it is the best we can do. In the future we + // may find a way to chunk up the possible functions and only match on a subset of them. + if self + .settings + .maximum_possible_functions + .is_some_and(|max| max < matched_functions.len() as u64) + { + return None; + } + // If we have a single possible match than that must be our function. // We must also not be a trivial function, as those will likely be artifacts of an incomplete dataset if matched_functions.len() == 1 && !is_function_trivial { @@ -296,6 +308,10 @@ pub struct MatcherSettings { /// /// This is set to [MatcherSettings::TRIVIAL_FUNCTION_ADJACENT_ALLOWED_DEFAULT] by default. pub trivial_function_adjacent_allowed: bool, + /// The maximum number of WARP functions that can be used to match a Binary Ninja function. + /// + /// This is set to [MatcherSettings::MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT] by default. + pub maximum_possible_functions: Option, } impl MatcherSettings { @@ -311,6 +327,9 @@ impl MatcherSettings { pub const TRIVIAL_FUNCTION_ADJACENT_ALLOWED_DEFAULT: bool = false; pub const TRIVIAL_FUNCTION_ADJACENT_ALLOWED_SETTING: &'static str = "analysis.warp.trivialFunctionAdjacentAllowed"; + pub const MAXIMUM_POSSIBLE_FUNCTIONS_SETTING: &'static str = + "analysis.warp.maximumPossibleFunctions"; + pub const MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT: u64 = 1000; /// Populates the [MatcherSettings] to the current Binary Ninja settings instance. /// @@ -378,6 +397,18 @@ impl MatcherSettings { Self::TRIVIAL_FUNCTION_ADJACENT_ALLOWED_SETTING, &trivial_function_adjacent_allowed_props.to_string(), ); + + let maximum_possible_functions_props = json!({ + "title" : "Maximum Possible Functions", + "type" : "number", + "default" : Self::MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT, + "description" : "When matching any function that has a list of possible functions greater than this number will be skipped. A value of 0 will disable this check.", + "ignore" : [] + }); + bn_settings.register_setting_json( + Self::MAXIMUM_POSSIBLE_FUNCTIONS_SETTING, + &maximum_possible_functions_props.to_string(), + ); } /// Retrieve matcher settings from [`BNSettings`]. @@ -407,6 +438,14 @@ impl MatcherSettings { settings.trivial_function_adjacent_allowed = bn_settings .get_bool_with_opts(Self::TRIVIAL_FUNCTION_ADJACENT_ALLOWED_SETTING, query_opts); } + if bn_settings.contains(Self::MAXIMUM_POSSIBLE_FUNCTIONS_SETTING) { + match bn_settings + .get_integer_with_opts(Self::MAXIMUM_POSSIBLE_FUNCTIONS_SETTING, query_opts) + { + 0 => settings.maximum_possible_functions = None, + len => settings.maximum_possible_functions = Some(len), + } + } settings } } @@ -420,6 +459,7 @@ impl Default for MatcherSettings { minimum_matched_constraints: MatcherSettings::MINIMUM_MATCHED_CONSTRAINTS_DEFAULT, trivial_function_adjacent_allowed: MatcherSettings::TRIVIAL_FUNCTION_ADJACENT_ALLOWED_DEFAULT, + maximum_possible_functions: Some(MatcherSettings::MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT), } } } diff --git a/plugins/warp/src/plugin/create.rs b/plugins/warp/src/plugin/create.rs index 0410e74a0e..c9bd32ce4b 100644 --- a/plugins/warp/src/plugin/create.rs +++ b/plugins/warp/src/plugin/create.rs @@ -164,8 +164,8 @@ impl CreateFromCurrentView { if let Err(err) = file { binaryninja::interaction::show_message_box( - "Error", - &format!("Failed to create signature file: {}", err), + "Failed to create signature file", + &err.to_string(), MessageBoxButtonSet::OKButtonSet, MessageBoxIcon::ErrorIcon, ); @@ -173,16 +173,30 @@ impl CreateFromCurrentView { return None; } + let background_task = BackgroundTask::new("Creating WARP File...", false); let mut file = file.unwrap(); // Add back the existing chunks if the user selected to keep them. - file.chunks.extend(existing_chunks); - // TODO: Make merging optional? - file.chunks = Chunk::merge(&file.chunks, compression_type.into()); + if !existing_chunks.is_empty() { + file.chunks.extend(existing_chunks); + // TODO: Make merging optional? + // TODO: Merging can lose chunk data if it goes above the maximum table count. + // TODO: We should probably solve that in the warp crate itself? + file.chunks = Chunk::merge(&file.chunks, compression_type.into()); - if std::fs::write(&file_path, file.to_bytes()).is_err() { + // After merging, we should have at least one chunk. If not, merging actually removed data. + if file.chunks.len() < 1 { + log::error!("Failed to merge chunks! Please report this, it should not happen."); + return None; + } + } + + let file_bytes = file.to_bytes(); + let file_size = file_bytes.len(); + if std::fs::write(&file_path, file_bytes).is_err() { log::error!("Failed to write data to signature file!"); } log::info!("Saved signature file to: '{}'", file_path.display()); + background_task.finish(); // Show a report of the generate signatures, if desired. let report_generator = ReportGenerator::new(); @@ -195,16 +209,25 @@ impl CreateFromCurrentView { let _ = std::fs::write(report_path, &report_string); } - match report_kind { - ReportKindField::None => {} - ReportKindField::Html => { - view.show_html_report("Generated WARP File", report_string.as_str(), ""); - } - ReportKindField::Markdown => { - view.show_markdown_report("Generated WARP File", report_string.as_str(), ""); - } - ReportKindField::Json => { - view.show_plaintext_report("Generated WARP File", report_string.as_str()); + // The ReportWidget uses a QTextBrowser, which cannot render large files very well. + if file_size > 10000000 { + log::warn!("WARP report file is too large to show in the UI. Please see the report file on disk."); + } else { + match report_kind { + ReportKindField::None => {} + ReportKindField::Html => { + view.show_html_report("Generated WARP File", report_string.as_str(), ""); + } + ReportKindField::Markdown => { + view.show_markdown_report( + "Generated WARP File", + report_string.as_str(), + "", + ); + } + ReportKindField::Json => { + view.show_plaintext_report("Generated WARP File", report_string.as_str()); + } } } } diff --git a/plugins/warp/src/plugin/debug.rs b/plugins/warp/src/plugin/debug.rs index 24cdf1c3d2..399dba50bf 100644 --- a/plugins/warp/src/plugin/debug.rs +++ b/plugins/warp/src/plugin/debug.rs @@ -9,9 +9,10 @@ pub struct DebugFunction; impl FunctionCommand for DebugFunction { fn action(&self, _view: &BinaryView, func: &Function) { - if let Ok(lifted_il) = func.lifted_il() { - log::info!("{:#?}", build_function(func, &lifted_il, false)); - } + log::info!( + "{:#?}", + build_function(func, || func.lifted_il().ok(), false) + ); } fn valid(&self, _view: &BinaryView, _func: &Function) -> bool { diff --git a/plugins/warp/src/plugin/ffi.rs b/plugins/warp/src/plugin/ffi.rs index d78ca3c581..c3a8051cb0 100644 --- a/plugins/warp/src/plugin/ffi.rs +++ b/plugins/warp/src/plugin/ffi.rs @@ -124,12 +124,12 @@ pub unsafe extern "C" fn BNWARPGetAnalysisFunctionGUID( result: *mut BNWARPFunctionGUID, ) -> bool { let function = unsafe { Function::from_raw(analysis_function) }; - match function.lifted_il() { - Ok(lifted_il) => { - *result = cached_function_guid(&function, &lifted_il); + match cached_function_guid(&function, || function.lifted_il().ok()) { + Some(guid) => { + *result = guid; true } - Err(_) => false, + None => false, } } diff --git a/plugins/warp/src/plugin/ffi/function.rs b/plugins/warp/src/plugin/ffi/function.rs index 7563bd969f..b37d613bb1 100644 --- a/plugins/warp/src/plugin/ffi/function.rs +++ b/plugins/warp/src/plugin/ffi/function.rs @@ -37,11 +37,10 @@ pub unsafe extern "C" fn BNWARPGetFunction( analysis_function: *mut BNFunction, ) -> *mut BNWARPFunction { let function = Function::from_raw(analysis_function); - let Ok(lifted_il) = function.lifted_il() else { - return std::ptr::null_mut(); - }; - let function = build_function(&function, &lifted_il, false); - Arc::into_raw(Arc::new(function)) as *mut BNWARPFunction + match build_function(&function, || function.lifted_il().ok(), false) { + Some(function) => Arc::into_raw(Arc::new(function)) as *mut BNWARPFunction, + None => std::ptr::null_mut(), + } } #[no_mangle] diff --git a/plugins/warp/src/plugin/function.rs b/plugins/warp/src/plugin/function.rs index 130e8582d9..a4fd2c101a 100644 --- a/plugins/warp/src/plugin/function.rs +++ b/plugins/warp/src/plugin/function.rs @@ -47,11 +47,10 @@ pub struct CopyFunctionGUID; impl FunctionCommand for CopyFunctionGUID { fn action(&self, _view: &BinaryView, func: &Function) { - let Ok(lifted_il) = func.lifted_il() else { - log::error!("Could not get lifted il for copied function"); + let Some(guid) = cached_function_guid(func, || func.lifted_il().ok()) else { + log::error!("Could not get guid for copied function"); return; }; - let guid = cached_function_guid(func, &lifted_il); log::info!( "Function GUID for {:?}... {}", func.symbol().short_name(), diff --git a/plugins/warp/src/plugin/workflow.rs b/plugins/warp/src/plugin/workflow.rs index 735706cf8b..47f4b2a76f 100644 --- a/plugins/warp/src/plugin/workflow.rs +++ b/plugins/warp/src/plugin/workflow.rs @@ -13,6 +13,8 @@ use binaryninja::command::Command; use binaryninja::settings::{QueryOptions, Settings}; use binaryninja::workflow::{activity, Activity, AnalysisContext, Workflow, WorkflowBuilder}; use itertools::Itertools; +use rayon::iter::IntoParallelIterator; +use rayon::iter::ParallelIterator; use std::collections::HashMap; use std::time::Instant; use warp::r#type::class::function::{Location, RegisterLocation, StackLocation}; @@ -112,31 +114,47 @@ pub fn run_matcher(view: &BinaryView) { .sources_with_function_guids(target, guids) .unwrap_or_default(); - for (guid, sources) in &function_guid_with_sources { - let matched_functions: Vec = sources - .iter() - .flat_map(|source| { - container - .functions_with_guid(target, source, guid) - .unwrap_or_default() - }) - .collect(); + function_guid_with_sources + .into_par_iter() + .for_each(|(guid, sources)| { + let matched_functions: Vec = sources + .iter() + .flat_map(|source| { + container + .functions_with_guid(target, source, &guid) + .unwrap_or_default() + }) + .collect(); - let functions = functions_by_target_and_guid - .get(&(*guid, target.clone())) - .expect("Function guid not found"); - - for function in functions { - // Match on all the possible functions - if let Some(matched_function) = - matcher.match_function_from_constraints(function, &matched_functions) + // NOTE: See the comment in `match_function_from_constraints` about this fast fail. + if matcher + .settings + .maximum_possible_functions + .is_some_and(|max| max < matched_functions.len() as u64) { - // We were able to find a match, add it to the match cache and then mark the function - // as requiring updates; this is so that we know about it in the applier activity. - insert_cached_function_match(function, Some(matched_function.clone())); + log::warn!( + "Skipping {}, too many possible functions: {}", + guid, + matched_functions.len() + ); + return; } - } - } + + let functions = functions_by_target_and_guid + .get(&(guid, target.clone())) + .expect("Function guid not found"); + + for function in functions { + // Match on all the possible functions + if let Some(matched_function) = + matcher.match_function_from_constraints(function, &matched_functions) + { + // We were able to find a match, add it to the match cache and then mark the function + // as requiring updates; this is so that we know about it in the applier activity. + insert_cached_function_match(function, Some(matched_function.clone())); + } + } + }); } }); @@ -220,9 +238,7 @@ pub fn insert_workflow() -> Result<(), ()> { let guid_activity = |ctx: &AnalysisContext| { let function = ctx.function(); - if let Some(lifted_il) = unsafe { ctx.lifted_il_function() } { - cached_function_guid(&function, &lifted_il); - } + cached_function_guid(&function, || unsafe { ctx.lifted_il_function() }); }; let guid_config = activity::Config::action( diff --git a/plugins/warp/src/processor.rs b/plugins/warp/src/processor.rs index 75e8569e76..86ac5d0292 100644 --- a/plugins/warp/src/processor.rs +++ b/plugins/warp/src/processor.rs @@ -11,6 +11,7 @@ use ar::Archive; use dashmap::DashMap; use rayon::iter::IntoParallelIterator; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use rayon::prelude::ParallelSlice; use regex::Regex; use serde_json::{json, Value}; use tempdir::TempDir; @@ -36,6 +37,11 @@ use crate::cache::cached_type_references; use crate::convert::platform_to_target; use crate::{build_function, INCLUDE_TAG_ICON, INCLUDE_TAG_NAME}; +/// Ensure we never exceed these many functions per signature chunk. +/// +/// This was added to fix running into the max table limit on certain files. +const MAX_FUNCTIONS_PER_CHUNK: usize = 1_000_000; + #[derive(Error, Debug)] pub enum ProcessingError { #[error("Failed to open archive: {0}")] @@ -618,6 +624,15 @@ impl WarpFileProcessor { } } + // In the future we may want to do something with a view that has no functions, but for now + // we do not care to create any chunks. By skipping this we can avoid merging of empty chunks, + // which is quick, but it still requires some allocations that we can avoid. + if view.functions().is_empty() { + self.state + .set_file_state(path.clone(), ProcessingFileState::Processed); + return Err(ProcessingError::SkippedFile(path)); + } + // Process the view let warp_file = self.process_view(path, &view); // Close the view manually, see comment in [`BinaryView`]. @@ -655,6 +670,10 @@ impl WarpFileProcessor { }) .filter_map(|res| match res { Ok(result) => Some(Ok(result)), + Err(ProcessingError::SkippedFile(path)) => { + log::debug!("Skipping directory file: {:?}", path); + None + } Err(ProcessingError::Cancelled) => Some(Err(ProcessingError::Cancelled)), Err(e) => { log::error!("Directory file processing error: {:?}", e); @@ -739,21 +758,23 @@ impl WarpFileProcessor { let mut chunks = Vec::new(); if self.file_data != FileDataKindField::Types { let mut signature_chunks = self.create_signature_chunks(view)?; - for (target, signature_chunk) in signature_chunks.drain() { - if signature_chunk.raw_functions().count() != 0 { - let chunk = Chunk::new_with_target( - ChunkKind::Signature(signature_chunk), - self.compression_type.into(), - target, - ); - chunks.push(chunk) + for (target, mut target_chunks) in signature_chunks.drain() { + for signature_chunk in target_chunks.drain(..) { + if signature_chunk.raw_functions().next().is_some() { + let chunk = Chunk::new_with_target( + ChunkKind::Signature(signature_chunk), + self.compression_type.into(), + target.clone(), + ); + chunks.push(chunk) + } } } } if self.file_data != FileDataKindField::Signatures { let type_chunk = self.create_type_chunk(view)?; - if type_chunk.raw_types().count() != 0 { + if type_chunk.raw_types().next().is_some() { chunks.push(Chunk::new( ChunkKind::Type(type_chunk), self.compression_type.into(), @@ -773,7 +794,7 @@ impl WarpFileProcessor { pub fn create_signature_chunks( &self, view: &BinaryView, - ) -> Result>, ProcessingError> { + ) -> Result>>, ProcessingError> { let is_function_named = |f: &Guard| { self.included_functions == IncludedFunctionsField::All || view.symbol_by_address(f.start()).is_some() @@ -814,13 +835,12 @@ impl WarpFileProcessor { .filter(is_function_named) .filter(|f| !f.analysis_skipped()) .filter_map(|func| { - let lifted_il = func.lifted_il().ok()?; let target = platform_to_target(&func.platform()); let built_function = build_function( &func, - &lifted_il, + || func.lifted_il().ok(), self.file_data == FileDataKindField::Symbols, - ); + )?; Some((target, built_function)) }) .fold( @@ -837,15 +857,19 @@ impl WarpFileProcessor { acc }); - let chunks: Result>, ProcessingError> = + // Split into multiple chunks if a target has more than MAX_FUNCTIONS_PER_CHUNK functions. + // We do this because otherwise some chunks may have too many flatbuffer tables for the verifier to handle. + let chunks: Result>>, ProcessingError> = built_functions - .into_iter() + .into_par_iter() .map(|(target, functions)| { - Ok(( - target, - SignatureChunk::new(&functions) - .ok_or(ProcessingError::ChunkCreationFailed)?, - )) + let chunks: Result, _> = functions + .par_chunks(MAX_FUNCTIONS_PER_CHUNK) + .map(|f| { + SignatureChunk::new(&f).ok_or(ProcessingError::ChunkCreationFailed) + }) + .collect(); + Ok((target, chunks?)) }) .collect();