@@ -82,6 +82,7 @@ def __init__(self, download_dir, base_url, cache_dir=None, status_q=None,
8282 def run_analysis (self , manifest : Manifest , old_manifest : Manifest = None ,
8383 patch = True , resume = True , file_prefix_filter = None ,
8484 file_exclude_filter = None , file_install_tag = None ,
85+ read_files = False ,
8586 processing_optimization = False ) -> AnalysisResult :
8687 """
8788 Run analysis on manifest and old manifest (if not None) and return a result
@@ -94,6 +95,7 @@ def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
9495 :param file_prefix_filter: Only download files that start with this prefix
9596 :param file_exclude_filter: Exclude files with this prefix from download
9697 :param file_install_tag: Only install files with the specified tag
98+ :param read_files: Allow reading from already finished files
9799 :param processing_optimization: Attempt to optimize processing order and RAM usage
98100 :return: AnalysisResult
99101 """
@@ -320,25 +322,27 @@ def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
320322
321323 # determine whether a chunk part is currently in written files
322324 reusable_written = defaultdict (dict )
323- cur_written_cps = defaultdict (list )
324- for cur_file in fmlist :
325- cur_file_cps = dict ()
326- cur_file_offset = 0
327- for cp in cur_file .chunk_parts :
328- key = (cp .guid_num , cp .offset , cp .size )
329- for wr_file_name , wr_file_offset , wr_cp_offset , wr_cp_end_offset in cur_written_cps [cp .guid_num ]:
330- # check if new chunk part is wholly contained in a written chunk part
331- cur_cp_end_offset = cp .offset + cp .size
332- if wr_cp_offset <= cp .offset and wr_cp_end_offset >= cur_cp_end_offset :
333- references [cp .guid_num ] -= 1
334- reuse_offset = wr_file_offset + (cp .offset - wr_cp_offset )
335- reusable_written [cur_file .filename ][key ] = (wr_file_name , reuse_offset )
336- break
337- cur_file_cps [cp .guid_num ] = (cur_file .filename , cur_file_offset , cp .offset , cp .offset + cp .size )
338- cur_file_offset += cp .size
325+ if read_files :
326+ self .log .debug ('Analyzing manifest for re-usable chunks in saved files...' )
327+ cur_written_cps = defaultdict (list )
328+ for cur_file in fmlist :
329+ cur_file_cps = dict ()
330+ cur_file_offset = 0
331+ for cp in cur_file .chunk_parts :
332+ key = (cp .guid_num , cp .offset , cp .size )
333+ for wr_file_name , wr_file_offset , wr_cp_offset , wr_cp_end_offset in cur_written_cps [cp .guid_num ]:
334+ # check if new chunk part is wholly contained in a written chunk part
335+ cur_cp_end_offset = cp .offset + cp .size
336+ if wr_cp_offset <= cp .offset and wr_cp_end_offset >= cur_cp_end_offset :
337+ references [cp .guid_num ] -= 1
338+ reuse_offset = wr_file_offset + (cp .offset - wr_cp_offset )
339+ reusable_written [cur_file .filename ][key ] = (wr_file_name , reuse_offset )
340+ break
341+ cur_file_cps [cp .guid_num ] = (cur_file .filename , cur_file_offset , cp .offset , cp .offset + cp .size )
342+ cur_file_offset += cp .size
339343
340- for guid , value in cur_file_cps .items ():
341- cur_written_cps [guid ].append (value )
344+ for guid , value in cur_file_cps .items ():
345+ cur_written_cps [guid ].append (value )
342346
343347 last_cache_size = current_cache_size = 0
344348 # set to determine whether a file is currently cached or not
0 commit comments