Skip to content

Commit a5af87e

Browse files
kostasrimromange
authored andcommitted
fix: data race in save_stages_controller (#2647)
* fix: data race in save_stages_controller
1 parent 7f93a8f commit a5af87e

File tree

4 files changed

+39
-6
lines changed

4 files changed

+39
-6
lines changed

src/server/detail/save_stages_controller.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ SaveStagesController::SaveStagesController(SaveStagesInputs&& inputs)
149149
SaveStagesController::~SaveStagesController() {
150150
}
151151

152-
SaveInfo SaveStagesController::Save() {
152+
std::optional<SaveInfo> SaveStagesController::InitResourcesAndStart() {
153153
if (auto err = BuildFullPath(); err) {
154154
shared_err_ = err;
155155
return GetSaveInfo();
@@ -162,8 +162,14 @@ SaveInfo SaveStagesController::Save() {
162162
else
163163
SaveRdb();
164164

165+
return {};
166+
}
167+
168+
void SaveStagesController::WaitAllSnapshots() {
165169
RunStage(&SaveStagesController::SaveCb);
170+
}
166171

172+
SaveInfo SaveStagesController::Finalize() {
167173
RunStage(&SaveStagesController::CloseCb);
168174

169175
FinalizeFileMovement();

src/server/detail/save_stages_controller.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,21 @@ class RdbSnapshot {
7171

7272
struct SaveStagesController : public SaveStagesInputs {
7373
SaveStagesController(SaveStagesInputs&& input);
74+
// Objects of this class are used concurrently. Call this function
75+
// in a mutually exlusive context to avoid data races.
76+
// Also call this function before any call to `WaitAllSnapshots`
77+
// Returns empty optional on success and SaveInfo on failure
78+
std::optional<SaveInfo> InitResourcesAndStart();
7479

7580
~SaveStagesController();
7681

77-
SaveInfo Save();
82+
// Safe to call and no locks required
83+
void WaitAllSnapshots();
84+
85+
// Same semantics as InitResourcesAndStart. Must be used in a mutually exclusive
86+
// context. Call this function after you `WaitAllSnapshots`to finalize the chore.
87+
// Performs cleanup of the object internally.
88+
SaveInfo Finalize();
7889
size_t GetSaveBuffersSize();
7990
uint32_t GetCurrentSaveDuration();
8091

src/server/server_family.cc

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,19 +1310,29 @@ GenericError ServerFamily::DoSave(bool new_version, string_view basename, Transa
13101310
return GenericError{make_error_code(errc::operation_in_progress),
13111311
"SAVING - can not save database"};
13121312
}
1313+
13131314
save_controller_ = make_unique<SaveStagesController>(detail::SaveStagesInputs{
13141315
new_version, basename, trans, &service_, fq_threadpool_.get(), snapshot_storage_});
1316+
1317+
auto res = save_controller_->InitResourcesAndStart();
1318+
1319+
if (res) {
1320+
DCHECK_EQ(res->error, true);
1321+
last_save_info_.SetLastSaveError(*res);
1322+
save_controller_.reset();
1323+
return res->error;
1324+
}
13151325
}
13161326

1317-
detail::SaveInfo save_info = save_controller_->Save();
1327+
save_controller_->WaitAllSnapshots();
1328+
detail::SaveInfo save_info;
13181329

13191330
{
13201331
std::lock_guard lk(save_mu_);
1332+
save_info = save_controller_->Finalize();
13211333

13221334
if (save_info.error) {
1323-
last_save_info_.last_error = save_info.error;
1324-
last_save_info_.last_error_time = save_info.save_time;
1325-
last_save_info_.failed_duration_sec = save_info.duration_sec;
1335+
last_save_info_.SetLastSaveError(save_info);
13261336
} else {
13271337
last_save_info_.save_time = save_info.save_time;
13281338
last_save_info_.success_duration_sec = save_info.duration_sec;

src/server/server_family.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ struct Metrics {
104104

105105
struct LastSaveInfo {
106106
// last success save info
107+
void SetLastSaveError(const detail::SaveInfo& save_info) {
108+
last_error = save_info.error;
109+
last_error_time = save_info.save_time;
110+
failed_duration_sec = save_info.duration_sec;
111+
}
112+
107113
time_t save_time = 0; // epoch time in seconds.
108114
uint32_t success_duration_sec = 0;
109115
std::string file_name; //

0 commit comments

Comments
 (0)