Skip to content

Commit d9364ce

Browse files
Implement #624: Use shorter hashes with CPM_SOURCE_CACHE (#631)
* Add ASSERT_CONTENTS_EQUAL test macro in testing.cmake Checks if the contents of a file matches the given input * Use shorter hashes with CPM_SOURCE_CACHE (#624) Uses shorter hashes with CPM_SOURCE_CACHE. Falls back to a longer hash if necessary (ie, if there's a collision with an existing hash). See: #624 * Update integration tests to support shorter hashes * trigger ci * run cmake-format * if already available, use the legacy cache hash * create temporary file in current binary dir * add test case for legacy hash --------- Co-authored-by: Lars Melchior <lars.melchior@gmail.com> Co-authored-by: Lars Melchior <TheLartians@users.noreply.github.com>
1 parent d761438 commit d9364ce

File tree

4 files changed

+182
-1
lines changed

4 files changed

+182
-1
lines changed

cmake/CPM.cmake

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,60 @@ function(cpm_package_name_from_git_uri URI RESULT)
202202
endif()
203203
endfunction()
204204

205+
# Find the shortest hash that can be used eg, if origin_hash is
206+
# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found
207+
# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash}
208+
#
209+
# * .../cccb.hash
210+
# * .../cccb77ae.hash
211+
# * .../cccb77ae9609.hash
212+
# * .../cccb77ae9609d276.hash
213+
# * etc
214+
#
215+
# We will be able to use a shorter path with very high probability, but in the (rare) event that the
216+
# first couple characters collide, we will check longer and longer substrings.
217+
function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
218+
# for compatibility with caches populated by a previous version of CPM, check if a directory using
219+
# the full hash already exists
220+
if(EXISTS "${source_cache_dir}/${origin_hash}")
221+
set(${short_hash_output_var}
222+
"${origin_hash}"
223+
PARENT_SCOPE
224+
)
225+
return()
226+
endif()
227+
228+
foreach(len RANGE 4 40 4)
229+
string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
230+
set(hash_lock ${source_cache_dir}/${short_hash}.lock)
231+
set(hash_fp ${source_cache_dir}/${short_hash}.hash)
232+
# Take a lock, so we don't have a race condition with another instance of cmake. We will release
233+
# this lock when we can, however, if there is an error, we want to ensure it gets released on
234+
# it's own on exit from the function.
235+
file(LOCK ${hash_lock} GUARD FUNCTION)
236+
237+
# Load the contents of .../${short_hash}.hash
238+
file(TOUCH ${hash_fp})
239+
file(READ ${hash_fp} hash_fp_contents)
240+
241+
if(hash_fp_contents STREQUAL "")
242+
# Write the origin hash
243+
file(WRITE ${hash_fp} ${origin_hash})
244+
file(LOCK ${hash_lock} RELEASE)
245+
break()
246+
elseif(hash_fp_contents STREQUAL origin_hash)
247+
file(LOCK ${hash_lock} RELEASE)
248+
break()
249+
else()
250+
file(LOCK ${hash_lock} RELEASE)
251+
endif()
252+
endforeach()
253+
set(${short_hash_output_var}
254+
"${short_hash}"
255+
PARENT_SCOPE
256+
)
257+
endfunction()
258+
205259
# Try to infer package name and version from a url
206260
function(cpm_package_name_and_ver_from_url url outName outVer)
207261
if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
@@ -806,9 +860,19 @@ function(CPMAddPackage)
806860
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
807861
elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
808862
string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
863+
cpm_get_shortest_hash(
864+
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
865+
"${origin_hash}" # Input hash
866+
origin_hash # Computed hash
867+
)
809868
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
810869
else()
811870
string(SHA1 origin_hash "${origin_parameters}")
871+
cpm_get_shortest_hash(
872+
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
873+
"${origin_hash}" # Input hash
874+
origin_hash # Computed hash
875+
)
812876
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
813877
endif()
814878
# Expand `download_directory` relative path. This is important because EXISTS doesn't work for

cmake/testing.cmake

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file)
7979
message(FATAL_ERROR "assertion failed: file ${file} exists")
8080
endif()
8181
endfunction()
82+
83+
function(ASSERT_CONTENTS_EQUAL file content)
84+
if(EXISTS ${file})
85+
file(READ ${file} file_content)
86+
if(content STREQUAL file_content)
87+
message(STATUS "test passed: '${file}' exists and contains '${content}'")
88+
else()
89+
message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.")
90+
endif()
91+
else()
92+
message(FATAL_ERROR "assertion failed: file '${file} does not exist")
93+
endif()
94+
endfunction()

test/integration/test_source_cache.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ def check_package_cache(name, ver, dir_sha1)
7575
assert_equal ver, package.ver
7676
expected_parent_dir = File.join(@cache_dir, name.downcase)
7777
assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}"
78-
assert_equal dir_sha1, File.basename(package.src_dir)
78+
79+
# The hash has been shortened by cpm_get_shortest_hash. The following
80+
# should hold:
81+
# - The short hash should be a prefix of the input hash
82+
# - There should be a file ".../${short_hash}.hash" which matches the full hash
83+
short_hash = File.basename(package.src_dir)
84+
assert dir_sha1.start_with?(short_hash), "short_hash should be a prefix of dir_sha1"
85+
86+
# Check that the full hash is stored in the .hash file
87+
hash_file = "#{package.src_dir}.hash"
88+
assert File.exist?(hash_file), "Hash file #{hash_file} should exist"
89+
assert_equal dir_sha1, File.read(hash_file), "Hash file should contain the full original hash"
7990
end
8091
end

test/unit/get_shortest_hash.cmake

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
2+
3+
include(${CPM_PATH}/CPM.cmake)
4+
include(${CPM_PATH}/testing.cmake)
5+
6+
# Random suffix
7+
string(
8+
RANDOM
9+
LENGTH 6
10+
ALPHABET "0123456789abcdef" tmpdir_suffix
11+
)
12+
13+
# Seconds since epoch
14+
string(TIMESTAMP tmpdir_base "%s" UTC)
15+
16+
set(tmp "${CMAKE_CURRENT_BINARY_DIR}/get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}")
17+
18+
if(IS_DIRECTORY ${tmp})
19+
message(FATAL_ERROR "Test directory ${tmp} already exists")
20+
endif()
21+
22+
file(MAKE_DIRECTORY "${tmp}")
23+
24+
# 1. Sanity check: none of these directories should exist yet
25+
26+
assert_not_exists(${tmp}/cccb.hash)
27+
assert_not_exists(${tmp}/cccb77ae.hash)
28+
assert_not_exists(${tmp}/cccb77ae9609.hash)
29+
assert_not_exists(${tmp}/cccb77ae9608.hash)
30+
assert_not_exists(${tmp}/cccb77be.hash)
31+
32+
# 1. The directory is empty, so it should get a 4-character hash
33+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
34+
assert_equal(${hash} "cccb")
35+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
36+
37+
# 1. Calling the function with a new hash that differs subtly should result in more characters being
38+
# used, enough to uniquely identify the hash
39+
40+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
41+
assert_equal(${hash} "cccb77ae")
42+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
43+
44+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
45+
assert_equal(${hash} "cccb77ae9609")
46+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
47+
48+
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
49+
assert_equal(${hash} "cccb77ae9608")
50+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
51+
52+
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
53+
assert_equal(${hash} "cccb77be")
54+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
55+
56+
# check that legacy hashs are recognized
57+
file(MAKE_DIRECTORY "${tmp}/cccb77be9609d2768ed80dd42cec54f77b1f1457")
58+
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1457" hash)
59+
assert_equal(${hash} "cccb77be9609d2768ed80dd42cec54f77b1f1457")
60+
61+
# 1. The old file should still exist, and have the same content
62+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
63+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
64+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
65+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
66+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
67+
68+
# 1. Confirm idempotence: calling any of these function should produce the same hash as before (hash
69+
# lookups work correctly once the .hash files are created)
70+
71+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
72+
assert_equal(${hash} "cccb")
73+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
74+
75+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
76+
assert_equal(${hash} "cccb77ae")
77+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
78+
79+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
80+
assert_equal(${hash} "cccb77ae9609")
81+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
82+
83+
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
84+
assert_equal(${hash} "cccb77ae9608")
85+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
86+
87+
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
88+
assert_equal(${hash} "cccb77be")
89+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
90+
91+
# 1. Cleanup - remove the temporary directory that we created
92+
93+
file(REMOVE_RECURSE ${tmp})

0 commit comments

Comments
 (0)