@@ -11,6 +11,7 @@ use ar::Archive;
1111use dashmap:: DashMap ;
1212use rayon:: iter:: IntoParallelIterator ;
1313use rayon:: iter:: { IntoParallelRefIterator , ParallelIterator } ;
14+ use rayon:: prelude:: ParallelSlice ;
1415use regex:: Regex ;
1516use serde_json:: { json, Value } ;
1617use tempdir:: TempDir ;
@@ -36,6 +37,11 @@ use crate::cache::cached_type_references;
3637use crate :: convert:: platform_to_target;
3738use crate :: { build_function, INCLUDE_TAG_ICON , INCLUDE_TAG_NAME } ;
3839
40+ /// Ensure we never exceed these many functions per signature chunk.
41+ ///
42+ /// This was added to fix running into the max table limit on certain files.
43+ const MAX_FUNCTIONS_PER_CHUNK : usize = 1_000_000 ;
44+
3945#[ derive( Error , Debug ) ]
4046pub enum ProcessingError {
4147 #[ error( "Failed to open archive: {0}" ) ]
@@ -618,6 +624,15 @@ impl WarpFileProcessor {
618624 }
619625 }
620626
627+ // In the future we may want to do something with a view that has no functions, but for now
628+ // we do not care to create any chunks. By skipping this we can avoid merging of empty chunks,
629+ // which is quick, but it still requires some allocations that we can avoid.
630+ if view. functions ( ) . is_empty ( ) {
631+ self . state
632+ . set_file_state ( path. clone ( ) , ProcessingFileState :: Processed ) ;
633+ return Err ( ProcessingError :: SkippedFile ( path) ) ;
634+ }
635+
621636 // Process the view
622637 let warp_file = self . process_view ( path, & view) ;
623638 // Close the view manually, see comment in [`BinaryView`].
@@ -655,6 +670,10 @@ impl WarpFileProcessor {
655670 } )
656671 . filter_map ( |res| match res {
657672 Ok ( result) => Some ( Ok ( result) ) ,
673+ Err ( ProcessingError :: SkippedFile ( path) ) => {
674+ log:: debug!( "Skipping directory file: {:?}" , path) ;
675+ None
676+ }
658677 Err ( ProcessingError :: Cancelled ) => Some ( Err ( ProcessingError :: Cancelled ) ) ,
659678 Err ( e) => {
660679 log:: error!( "Directory file processing error: {:?}" , e) ;
@@ -739,21 +758,23 @@ impl WarpFileProcessor {
739758 let mut chunks = Vec :: new ( ) ;
740759 if self . file_data != FileDataKindField :: Types {
741760 let mut signature_chunks = self . create_signature_chunks ( view) ?;
742- for ( target, signature_chunk) in signature_chunks. drain ( ) {
743- if signature_chunk. raw_functions ( ) . count ( ) != 0 {
744- let chunk = Chunk :: new_with_target (
745- ChunkKind :: Signature ( signature_chunk) ,
746- self . compression_type . into ( ) ,
747- target,
748- ) ;
749- chunks. push ( chunk)
761+ for ( target, mut target_chunks) in signature_chunks. drain ( ) {
762+ for signature_chunk in target_chunks. drain ( ..) {
763+ if signature_chunk. raw_functions ( ) . next ( ) . is_some ( ) {
764+ let chunk = Chunk :: new_with_target (
765+ ChunkKind :: Signature ( signature_chunk) ,
766+ self . compression_type . into ( ) ,
767+ target. clone ( ) ,
768+ ) ;
769+ chunks. push ( chunk)
770+ }
750771 }
751772 }
752773 }
753774
754775 if self . file_data != FileDataKindField :: Signatures {
755776 let type_chunk = self . create_type_chunk ( view) ?;
756- if type_chunk. raw_types ( ) . count ( ) != 0 {
777+ if type_chunk. raw_types ( ) . next ( ) . is_some ( ) {
757778 chunks. push ( Chunk :: new (
758779 ChunkKind :: Type ( type_chunk) ,
759780 self . compression_type . into ( ) ,
@@ -773,7 +794,7 @@ impl WarpFileProcessor {
773794 pub fn create_signature_chunks (
774795 & self ,
775796 view : & BinaryView ,
776- ) -> Result < HashMap < Target , SignatureChunk < ' static > > , ProcessingError > {
797+ ) -> Result < HashMap < Target , Vec < SignatureChunk < ' static > > > , ProcessingError > {
777798 let is_function_named = |f : & Guard < BNFunction > | {
778799 self . included_functions == IncludedFunctionsField :: All
779800 || view. symbol_by_address ( f. start ( ) ) . is_some ( )
@@ -836,15 +857,19 @@ impl WarpFileProcessor {
836857 acc
837858 } ) ;
838859
839- let chunks: Result < HashMap < Target , SignatureChunk < ' static > > , ProcessingError > =
860+ // Split into multiple chunks if a target has more than MAX_FUNCTIONS_PER_CHUNK functions.
861+ // We do this because otherwise some chunks may have too many flatbuffer tables for the verifier to handle.
862+ let chunks: Result < HashMap < Target , Vec < SignatureChunk < ' static > > > , ProcessingError > =
840863 built_functions
841- . into_iter ( )
864+ . into_par_iter ( )
842865 . map ( |( target, functions) | {
843- Ok ( (
844- target,
845- SignatureChunk :: new ( & functions)
846- . ok_or ( ProcessingError :: ChunkCreationFailed ) ?,
847- ) )
866+ let chunks: Result < Vec < _ > , _ > = functions
867+ . par_chunks ( MAX_FUNCTIONS_PER_CHUNK )
868+ . map ( |f| {
869+ SignatureChunk :: new ( & f) . ok_or ( ProcessingError :: ChunkCreationFailed )
870+ } )
871+ . collect ( ) ;
872+ Ok ( ( target, chunks?) )
848873 } )
849874 . collect ( ) ;
850875
0 commit comments