From 25948db93ce2fd1f2d664cce336d230a83e0a124 Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Thu, 9 Mar 2023 13:22:47 +0400 Subject: [PATCH 1/2] shredder: fix --hash-unmatched criterion The intention behind this criterion seems to handle partially-hashed (i. e. child) groups that do not fulfill requirements to continue hashing them (i. e. only contain a single file after sifting). Normally such groups are left alone (because we have hashed enough of the file to understand it is unique), however by definition of --hash-unmatched we have to hash these files to the end. The problem is that when rm_shred_group_update_status() is called, group->held_files does not contain the currently pushed file. Thus, because this condition is only executed for single-file groups (group->n_clusters == 1), group->held_files will never be non-empty. Anyway, even if it was non-empty, head->digest will always be NULL because held files have digests freed in rm_shred_group_push_file(). Therefore, this condition will never be fulfilled. The only reason why --hash-unmatched even works at all is due to the broken hardlinked files condition that matches all single-inode groups. (Another consequence is that --hash-unmatched is broken if --merge-directories is not used.) --- lib/shredder.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/shredder.c b/lib/shredder.c index e773fedd..83a33553 100644 --- a/lib/shredder.c +++ b/lib/shredder.c @@ -860,12 +860,9 @@ static void rm_shred_group_update_status(RmShredGroup *group) { /* special case of hardlinked files that still need hashing to help identify * matching directories */ group->status = RM_SHRED_GROUP_START_HASHING; - } else if(group->session->cfg->hash_unmatched && group->held_files->length > 0) { - RmFile *head = group->held_files->head->data; - if(head->digest) { - // with hash_unmatched, keep going once we start - group->status = RM_SHRED_GROUP_START_HASHING; - } + } else if(group->session->cfg->hash_unmatched && group->digest) { + // with hash_unmatched, keep going once we start + group->status = RM_SHRED_GROUP_START_HASHING; } } From 62fb5cf0f65d1d5320e4a927a950e9ea4ed83b69 Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Wed, 8 Mar 2023 14:51:04 +0400 Subject: [PATCH 2/2] shredder: only hash single-inode groups that are actually hardlinked `group->n_inodes == 1` is also true for groups that simply consist of a single file. This condition will cause all single-file groups to be hashed if `--merge-directories` is also set. Additionally, the whole `group->n_inodes == 1` condition is redundant because not following on the branch means that `group->n_clusters == 1` and therefore `group->n_inodes == 1`. Thus, check that we are actually dealing with a group of hardlinks (and not just a single-file group that did not cause an early return because we are also doing `--hash-unmatched`). Fixes #614. --- lib/shredder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/shredder.c b/lib/shredder.c index 83a33553..11db288f 100644 --- a/lib/shredder.c +++ b/lib/shredder.c @@ -855,7 +855,7 @@ static void rm_shred_group_update_status(RmShredGroup *group) { } else if(group->n_clusters > 1) { // we have potential match candidates; start hashing group->status = RM_SHRED_GROUP_START_HASHING; - } else if(group->n_inodes == 1 && group->n_unhashed_clusters > 0 && + } else if(group->num_files > 1 && group->n_unhashed_clusters > 0 && group->session->cfg->merge_directories) { /* special case of hardlinked files that still need hashing to help identify * matching directories */