Skip to content

Commit bb0abfe

Browse files
committed
feat(libstore): AWS credential provider caching
1 parent 9a5210f commit bb0abfe

File tree

4 files changed

+156
-12
lines changed

4 files changed

+156
-12
lines changed

src/libstore-tests/filetransfer-s3.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,4 +147,4 @@ TEST_F(S3FileTransferTest, s3RegionQueryParameters)
147147

148148
} // namespace nix
149149

150-
#endif // NIX_WITH_S3_SUPPORT
150+
#endif // NIX_WITH_S3_SUPPORT

src/libstore/filetransfer.cc

Lines changed: 120 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "nix/util/finally.hh"
77
#include "nix/util/callback.hh"
88
#include "nix/util/signals.hh"
9+
#include "nix/util/sync.hh"
910
#include "nix/store/store-reference.hh"
1011

1112
#include "store-config-private.hh"
@@ -48,6 +49,61 @@ struct curlFileTransfer : public FileTransfer
4849
std::random_device rd;
4950
std::mt19937 mt19937;
5051

52+
#if NIX_WITH_S3_SUPPORT
53+
public:
54+
// Public method to clear credential provider cache (called from AWS CRT shutdown)
55+
void clearCredentialCache();
56+
57+
private:
58+
struct CredentialProviderCache
59+
{
60+
// Key: profile name (empty string for default profile)
61+
std::map<std::string, std::shared_ptr<AwsCredentialProvider>> providers;
62+
};
63+
64+
Sync<CredentialProviderCache> credentialProviderCache;
65+
66+
/**
67+
* Get or create a credential provider for the given profile.
68+
* Thread-safe: holds lock for entire duration to prevent duplicate creation.
69+
*/
70+
std::shared_ptr<AwsCredentialProvider> getOrCreateCredentialProvider(const std::string & profile)
71+
{
72+
auto cache(credentialProviderCache.lock());
73+
74+
// Check if provider exists
75+
auto it = cache->providers.find(profile);
76+
if (it != cache->providers.end()) {
77+
return it->second;
78+
}
79+
80+
try {
81+
debug(
82+
"creating new AWS credential provider for profile '%s'",
83+
profile.empty() ? "(default)" : profile.c_str());
84+
auto provider = profile.empty() ? AwsCredentialProvider::createDefault()
85+
: AwsCredentialProvider::createProfile(profile);
86+
87+
auto sharedProvider = std::shared_ptr<AwsCredentialProvider>(std::move(provider));
88+
cache->providers[profile] = sharedProvider;
89+
return sharedProvider;
90+
91+
} catch (const AwsAuthError & e) {
92+
// Don't cache failed providers, just propagate the error
93+
throw;
94+
}
95+
}
96+
97+
/**
98+
* Invalidate a cached credential provider (e.g., on auth failure)
99+
*/
100+
void invalidateCredentialProvider(const std::string & profile)
101+
{
102+
auto cache(credentialProviderCache.lock());
103+
cache->providers.erase(profile);
104+
}
105+
#endif
106+
51107
struct TransferItem : public std::enable_shared_from_this<TransferItem>
52108
{
53109
curlFileTransfer & fileTransfer;
@@ -157,9 +213,9 @@ struct curlFileTransfer : public FileTransfer
157213
// Get credentials
158214
try {
159215
std::string profile = parsed.profile.value_or("");
160-
auto credProvider = profile.empty() ? AwsCredentialProvider::createDefault()
161-
: AwsCredentialProvider::createProfile(profile);
162216

217+
// Use cached credential provider
218+
auto credProvider = fileTransfer.getOrCreateCredentialProvider(profile);
163219
auto creds = credProvider->getCredentials();
164220
awsCredentials = creds.accessKeyId + ":" + creds.secretAccessKey;
165221

@@ -174,6 +230,11 @@ struct curlFileTransfer : public FileTransfer
174230
}
175231
} catch (const AwsAuthError & e) {
176232
warn("AWS authentication failed for S3 request %s: %s", request.uri, e.what());
233+
234+
// Invalidate the cached provider so next request will retry
235+
std::string profile = parsed.profile.value_or("");
236+
fileTransfer.invalidateCredentialProvider(profile);
237+
177238
// Continue without authentication - might be a public bucket
178239
}
179240
} catch (std::exception & e) {
@@ -677,6 +738,12 @@ struct curlFileTransfer : public FileTransfer
677738

678739
std::thread workerThread;
679740

741+
public:
742+
bool hasQuit()
743+
{
744+
return state_.lock()->quit;
745+
}
746+
680747
curlFileTransfer()
681748
: mt19937(rd())
682749
{
@@ -703,9 +770,18 @@ struct curlFileTransfer : public FileTransfer
703770
~curlFileTransfer()
704771
{
705772
stopWorkerThread();
706-
707773
workerThread.join();
708774

775+
#if NIX_WITH_S3_SUPPORT
776+
{
777+
auto cache(credentialProviderCache.lock());
778+
// IMPORTANT: We must clear the providers here to ensure they are destroyed
779+
// BEFORE the AWS CRT ApiHandle is destroyed during static destruction.
780+
// This prevents a deadlock where AWS CRT waits for resources we're holding.
781+
cache->providers.clear();
782+
}
783+
#endif
784+
709785
if (curlm)
710786
curl_multi_cleanup(curlm);
711787
}
@@ -930,12 +1006,25 @@ ref<curlFileTransfer> makeCurlFileTransfer()
9301006

9311007
ref<FileTransfer> getFileTransfer()
9321008
{
933-
static ref<curlFileTransfer> fileTransfer = makeCurlFileTransfer();
1009+
struct StaticFileTransfer
1010+
{
1011+
ref<curlFileTransfer> fileTransfer;
1012+
1013+
StaticFileTransfer()
1014+
: fileTransfer(makeCurlFileTransfer())
1015+
{
1016+
}
9341017

935-
if (fileTransfer->state_.lock()->quit)
936-
fileTransfer = makeCurlFileTransfer();
1018+
~StaticFileTransfer() {}
1019+
};
1020+
1021+
static StaticFileTransfer staticFT;
1022+
1023+
if (staticFT.fileTransfer->hasQuit()) {
1024+
staticFT.fileTransfer = makeCurlFileTransfer();
1025+
}
9371026

938-
return fileTransfer;
1027+
return staticFT.fileTransfer;
9391028
}
9401029

9411030
ref<FileTransfer> makeFileTransfer()
@@ -1091,4 +1180,28 @@ FileTransferError::FileTransferError(
10911180
err.msg = hf;
10921181
}
10931182

1183+
#if NIX_WITH_S3_SUPPORT
1184+
void curlFileTransfer::clearCredentialCache()
1185+
{
1186+
auto cache(credentialProviderCache.lock());
1187+
cache->providers.clear();
1188+
}
1189+
1190+
// Function called by AWS CRT shutdown to clear credential provider cache
1191+
void cleanupCredentialProviderCache()
1192+
{
1193+
// Access the curlFileTransfer singleton and clear its cache
1194+
try {
1195+
auto ft = getFileTransfer();
1196+
if (auto curlFt = ft.dynamic_pointer_cast<curlFileTransfer>()) {
1197+
curlFt->clearCredentialCache();
1198+
}
1199+
} catch (const std::exception & e) {
1200+
warn("Error clearing credential cache during AWS CRT shutdown: %s", e.what());
1201+
} catch (...) {
1202+
warn("Unknown error clearing credential cache during AWS CRT shutdown");
1203+
}
1204+
}
1205+
#endif
1206+
10941207
} // namespace nix

src/libstore/s3.cc

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# include <condition_variable>
1717
# include <mutex>
1818
# include <string_view>
19+
# include <pthread.h>
1920

2021
using namespace std::string_view_literals;
2122

@@ -25,13 +26,31 @@ namespace nix {
2526
static std::once_flag crtInitFlag;
2627
static bool crtInitialized = false;
2728

29+
// Forward declaration for cleanup function
30+
void cleanupCredentialProviderCache();
31+
2832
static void initAwsCrt()
2933
{
3034
std::call_once(crtInitFlag, []() {
3135
try {
3236
// Use a static local variable instead of global to control destruction order
33-
static Aws::Crt::ApiHandle apiHandle;
34-
apiHandle.InitializeLogging(Aws::Crt::LogLevel::Warn, (FILE *) nullptr);
37+
struct CrtWrapper
38+
{
39+
Aws::Crt::ApiHandle apiHandle;
40+
CrtWrapper()
41+
{
42+
apiHandle.InitializeLogging(Aws::Crt::LogLevel::Warn, (FILE *) nullptr);
43+
}
44+
~CrtWrapper()
45+
{
46+
// CRITICAL: Clear credential provider cache BEFORE AWS CRT shuts down
47+
// This ensures all providers (which hold references to ClientBootstrap)
48+
// are destroyed while AWS CRT is still valid
49+
cleanupCredentialProviderCache();
50+
// Now it's safe for ApiHandle destructor to run
51+
}
52+
};
53+
static CrtWrapper crt;
3554
crtInitialized = true;
3655
} catch (const std::exception & e) {
3756
debug("Failed to initialize AWS CRT: %s", e.what());
@@ -218,6 +237,9 @@ try {
218237
throw;
219238
}
220239

240+
// Function to clean up credential provider cache (defined in filetransfer.cc)
241+
void cleanupCredentialProviderCache();
242+
221243
} // namespace nix
222244

223245
#endif // NIX_WITH_S3_SUPPORT

tests/nixos/s3-binary-cache-store.nix

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ in
6161

6262
testScript =
6363
{ nodes }:
64+
# python
6465
''
6566
# fmt: off
6667
start_all()
@@ -73,7 +74,11 @@ in
7374
server.succeed("mc config host add minio http://localhost:9000 ${accessKey} ${secretKey} --api s3v4")
7475
server.succeed("mc mb minio/my-cache")
7576
76-
server.succeed("${env} nix copy --to '${storeUrl}' ${pkgA}")
77+
# Test copying from a store and credential caching works
78+
server_cp_out = server.succeed("${env} nix copy --debug --to '${storeUrl}' ${pkgA}")
79+
server_providers_created = server_cp_out.count("creating new AWS credential provider")
80+
if server_providers_created > 1:
81+
raise Exception(f"Expected only 1 credential provider to be created, but got {server_providers_created}. Credential provider caching is probably not working.")
7782
7883
client.wait_for_unit("network-addresses-eth1.service")
7984
@@ -105,7 +110,11 @@ in
105110
assert store_info.get("url"), "Store should have a URL"
106111
print(f"Store URL: {store_info.get('url')}")
107112
108-
client.succeed("${env} nix copy --no-check-sigs --from '${storeUrl}' ${pkgA}")
113+
# Test copying from a store and credential caching works
114+
client_cp_out = client.succeed("${env} nix copy --debug --no-check-sigs --from '${storeUrl}' ${pkgA} 2>&1")
115+
client_providers_created = client_cp_out.count("creating new AWS credential provider")
116+
if client_providers_created > 1:
117+
raise Exception(f"Expected only 1 credential provider to be created, but got {client_providers_created}. Credential provider caching is probably not working.")
109118
110119
client.succeed("nix path-info ${pkgA}")
111120
'';

0 commit comments

Comments
 (0)