Skip to content

Commit a871442

Browse files
jameseh96daverigby
authored andcommitted
MB-48816: Avoid unsafe use of cookie from background tasks
Previously, StatCheckpointTask and StatDCPTask immediately wrote responses when collecting stats while on a background thread. TSAN reported this as unsafe; no locks prevent potential racing with a frontend thread manipulating the cookie. Change both tasks to accumulate task values, but leave the frontend thread to actually write the responses when it resumes the ewouldblock'ed operation. TSAN Report: WARNING: ThreadSanitizer: data race (pid=24371) Read of size 8 at 0x7b54000a2df0 by thread T62: #0 Cookie::getHeader() const kv_engine/daemon/cookie.cc:201 (memcached+0x6508ac) #1 append_stats kv_engine/daemon/protocol/mcbp/stats_context.cc:95 (memcached+0x71fd6c) .... #19 void StatCollector::addStat<cb::stats::Key, unsigned long const&>(cb::stats::Key&&, unsigned long const&) const ../kv_engine/include/statistics/collector.h:336 (memcached+0x7e50e5) #20 EventuallyPersistentEngine::addAggregatedProducerStats(BucketStatCollector const&, ConnCounter const&) kv_engine/engines/ep/src/ep_engine.cc:4038 (memcached+0x7e50e5) #21 EventuallyPersistentEngine::doDcpStatsInner(CookieIface const*, std::function<void (std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, void const*)> const&, std::basic_string_view<char, std::char_traits<char> >) kv_engine/engines/ep/src/ep_engine.cc:4030 (memcached+0x81bd05) Previous write of size 8 at 0x7b54000a2df0 by thread T21 (mutexes: write M3843): #0 Cookie::setPacket(cb::mcbp::Header const&, bool) kv_engine/daemon/cookie.cc:186 (memcached+0x65080e) #1 Cookie::preserveRequest() kv_engine/daemon/cookie.h:225 (memcached+0x696aa7) #2 Connection::executeCommandPipeline() kv_engine/daemon/connection.cc:581 (memcached+0x696aa7) #3 Connection::executeCommandsCallback() kv_engine/daemon/connection.cc:793 (memcached+0x696be8) #4 Connection::rw_callback(bufferevent*, void*) kv_engine/daemon/connection.cc:942 (memcached+0x697851) #5 bufferevent_run_deferred_callbacks_unlocked /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/bufferevent.c:208 (libevent_core-2.1.so.7+0xf71d) #6 folly::EventBase::loopBody(int, bool) folly/io/async/EventBase.cpp:397 (memcached+0xfc9b52) #7 folly::EventBase::loop() folly/io/async/EventBase.cpp:315 (memcached+0xfcb06b) #8 folly::EventBase::loopForever() folly/io/async/EventBase.cpp:538 (memcached+0xfcb06b) #9 worker_libevent kv_engine/daemon/thread.cc:115 (memcached+0x6c16af) #10 CouchbaseThread::run() platform/src/cb_pthreads.cc:51 (memcached+0xf217d5) #11 platform_thread_wrap platform/src/cb_pthreads.cc:64 (memcached+0xf217d5) Change-Id: I3fbd8d51e174a7d19c5cb608a969795e445b8e86 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/163709 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Dave Rigby <daver@couchbase.com>
1 parent 8ae3b4f commit a871442

File tree

5 files changed

+220
-45
lines changed

5 files changed

+220
-45
lines changed

engines/ep/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ set_source_files_properties(src/crc32.c
292292

293293
ADD_LIBRARY(ep_objs OBJECT
294294
src/access_scanner.cc
295+
src/background_stat_task.cc
295296
src/bgfetcher.cc
296297
src/blob.cc
297298
src/bloomfilter.cc
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2+
/*
3+
* Copyright 2021-Present Couchbase, Inc.
4+
*
5+
* Use of this software is governed by the Business Source License included
6+
* in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
7+
* in that file, in accordance with the Business Source License, use of this
8+
* software will be governed by the Apache License, Version 2.0, included in
9+
* the file licenses/APL2.txt.
10+
*/
11+
12+
#include "background_stat_task.h"
13+
14+
#include "bucket_logger.h"
15+
#include "ep_engine.h"
16+
#include <phosphor/phosphor.h>
17+
#include <string_view>
18+
19+
BackgroundStatTask::BackgroundStatTask(EventuallyPersistentEngine* e,
20+
const CookieIface* cookie,
21+
TaskId taskId)
22+
: GlobalTask(e, taskId, 0, false), e(e), cookie(cookie) {
23+
}
24+
25+
bool BackgroundStatTask::run() {
26+
TRACE_EVENT0("ep-engine/task", "BackgroundStatTask");
27+
try {
28+
status = collectStats();
29+
} catch (const std::exception& e) {
30+
EP_LOG_WARN(
31+
"BackgroundStatTask: callback threw exception: \"{}\" task "
32+
"desc:\"{}\"",
33+
e.what(),
34+
getDescription());
35+
}
36+
// _must_ notify success to ensure the frontend calls back into
37+
// getStats - the second call is required to avoid leaking data
38+
// allocated to store in the engine specific.
39+
// The real status is stored and shall be retrieved later.
40+
e->notifyIOComplete(cookie, cb::engine_errc::success);
41+
return false;
42+
}
43+
44+
cb::engine_errc BackgroundStatTask::maybeWriteResponse(
45+
const AddStatFn& addStat) const {
46+
if (status == cb::engine_errc::success) {
47+
for (const auto& [key, value] : stats) {
48+
addStat(key, value, cookie);
49+
}
50+
}
51+
return status;
52+
}
53+
54+
AddStatFn BackgroundStatTask::getDeferredAddStat() {
55+
return [this](std::string_view key, std::string_view value, const void*) {
56+
this->stats.emplace_back(key, value);
57+
};
58+
}

engines/ep/src/background_stat_task.h

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2+
/*
3+
* Copyright 2021-Present Couchbase, Inc.
4+
*
5+
* Use of this software is governed by the Business Source License included
6+
* in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
7+
* in that file, in accordance with the Business Source License, use of this
8+
* software will be governed by the Apache License, Version 2.0, included in
9+
* the file licenses/APL2.txt.
10+
*/
11+
#pragma once
12+
13+
#include <executor/globaltask.h>
14+
#include <memcached/cookie_iface.h>
15+
#include <memcached/engine_common.h>
16+
#include <memcached/engine_error.h>
17+
18+
#include <functional>
19+
#include <string>
20+
#include <utility>
21+
#include <vector>
22+
23+
// forward decl
24+
enum class TaskId : int;
25+
26+
/**
27+
* Base type for tasks which gather stats on a background task.
28+
*
29+
* For use when generating stats is likely to be expensive, to avoid
30+
* taking up frontend thread time.
31+
* Users should construct and schedule the task, then store it in the
32+
* cookie engine specific with
33+
* EventuallyPersistentEngine::storeStatTask(...)
34+
* Once the task has notified the frontend, the task should be retrieved with
35+
* EventuallyPersistentEngine::retrieveStatTask(...)
36+
*/
37+
class BackgroundStatTask : public GlobalTask {
38+
public:
39+
using Callback = std::function<cb::engine_errc(EventuallyPersistentEngine*,
40+
const AddStatFn&)>;
41+
BackgroundStatTask(EventuallyPersistentEngine* e,
42+
const CookieIface* cookie,
43+
TaskId taskId);
44+
45+
bool run() override;
46+
47+
/**
48+
* If the task was successful, write all gathered stats as responses.
49+
*
50+
* Should only be called from a frontend thread.
51+
* @param addStat frontend provided callback
52+
* @return errc reflecting status of the operation
53+
*/
54+
cb::engine_errc maybeWriteResponse(const AddStatFn& addStat) const;
55+
56+
protected:
57+
/**
58+
* Do potentially expensive work to collect stats in a background task.
59+
* @return status of operation
60+
*/
61+
virtual cb::engine_errc collectStats() = 0;
62+
63+
/**
64+
* Get a callback used to store stats which have been computed by the
65+
* background task.
66+
*
67+
* Stats cannot be immediately written as responses from the background
68+
* task as doing so could be racy. Instead, store them for when the
69+
* frontend thread revisits this operation.
70+
*/
71+
AddStatFn getDeferredAddStat();
72+
73+
EventuallyPersistentEngine* e;
74+
const CookieIface* cookie;
75+
76+
// stats which have been collected while running in a background task
77+
std::vector<std::pair<std::string, std::string>> stats;
78+
cb::engine_errc status = cb::engine_errc::success;
79+
};

engines/ep/src/ep_engine.cc

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,31 @@ void* EventuallyPersistentEngine::getEngineSpecific(const CookieIface* cookie) {
18481848
return cookie->getEngineStorage();
18491849
}
18501850

1851+
void EventuallyPersistentEngine::storeStatTask(
1852+
const CookieIface* cookie, std::shared_ptr<BackgroundStatTask> task) {
1853+
// store a ptr to a shared_ptr to the task (must ensure the task is not
1854+
// destroyed before the connection retrieves the result).
1855+
// Once the frontend thread is notified, it must call retrieveStatTask,
1856+
// which will free the heap-allocated shared_ptr.
1857+
auto wrapper = std::make_unique<std::shared_ptr<BackgroundStatTask>>(task);
1858+
storeEngineSpecific(cookie, wrapper.release());
1859+
}
1860+
1861+
std::shared_ptr<BackgroundStatTask>
1862+
EventuallyPersistentEngine::retrieveStatTask(const CookieIface* cookie) {
1863+
void* data = getEngineSpecific(cookie);
1864+
if (data) {
1865+
// take back ownership of the task ptr
1866+
auto ptr = std::unique_ptr<std::shared_ptr<BackgroundStatTask>>(
1867+
reinterpret_cast<std::shared_ptr<BackgroundStatTask>*>(data));
1868+
// clear the engine specific - it's not valid to retrieve the
1869+
// task twice (would lead to a double free)
1870+
storeEngineSpecific(cookie, nullptr);
1871+
return *ptr;
1872+
}
1873+
return nullptr;
1874+
}
1875+
18511876
bool EventuallyPersistentEngine::isDatatypeSupported(
18521877
const CookieIface* cookie, protocol_binary_datatype_t datatype) {
18531878
return cookie->isDatatypeSupported(datatype);
@@ -3756,23 +3781,20 @@ class StatCheckpointVisitor : public VBucketVisitor {
37563781
AddStatFn add_stat;
37573782
};
37583783

3759-
3760-
class StatCheckpointTask : public GlobalTask {
3784+
class StatCheckpointTask : public BackgroundStatTask {
37613785
public:
37623786
StatCheckpointTask(EventuallyPersistentEngine* e,
37633787
const CookieIface* c,
37643788
AddStatFn a)
3765-
: GlobalTask(e, TaskId::StatCheckpointTask, 0, false),
3766-
ep(e),
3767-
cookie(c),
3768-
add_stat(std::move(a)) {
3789+
: BackgroundStatTask(e, c, TaskId::StatCheckpointTask) {
37693790
}
3770-
bool run() override {
3791+
3792+
cb::engine_errc collectStats() override {
37713793
TRACE_EVENT0("ep-engine/task", "StatsCheckpointTask");
3772-
StatCheckpointVisitor scv(ep->getKVBucket(), cookie, add_stat);
3773-
ep->getKVBucket()->visit(scv);
3774-
ep->notifyIOComplete(cookie, cb::engine_errc::success);
3775-
return false;
3794+
StatCheckpointVisitor scv(
3795+
e->getKVBucket(), cookie, getDeferredAddStat());
3796+
e->getKVBucket()->visit(scv);
3797+
return cb::engine_errc::success;
37763798
}
37773799

37783800
std::string getDescription() const override {
@@ -3785,11 +3807,6 @@ class StatCheckpointTask : public GlobalTask {
37853807
// take /too/ long, so set limit of 100ms.
37863808
return std::chrono::milliseconds(100);
37873809
}
3788-
3789-
private:
3790-
EventuallyPersistentEngine *ep;
3791-
const CookieIface* cookie;
3792-
AddStatFn add_stat;
37933810
};
37943811
/// @endcond
37953812

@@ -3799,15 +3816,14 @@ cb::engine_errc EventuallyPersistentEngine::doCheckpointStats(
37993816
const char* stat_key,
38003817
int nkey) {
38013818
if (nkey == 10) {
3802-
void* es = getEngineSpecific(cookie);
3803-
if (es == nullptr) {
3804-
ExTask task = std::make_shared<StatCheckpointTask>(
3805-
this, cookie, add_stat);
3819+
auto task = retrieveStatTask(cookie);
3820+
if (!task) {
3821+
task = std::make_shared<StatCheckpointTask>(this, cookie, add_stat);
38063822
ExecutorPool::get()->schedule(task);
3807-
storeEngineSpecific(cookie, this);
3823+
storeStatTask(cookie, task);
38083824
return cb::engine_errc::would_block;
38093825
} else {
3810-
storeEngineSpecific(cookie, nullptr);
3826+
return task->maybeWriteResponse(add_stat);
38113827
}
38123828
} else if (nkey > 11) {
38133829
std::string vbid(&stat_key[11], nkey - 11);
@@ -4034,21 +4050,19 @@ static void showConnAggStat(const std::string& connType,
40344050
}
40354051
}
40364052

4037-
class StatDCPTask : public GlobalTask {
4053+
class StatDCPTask : public BackgroundStatTask {
40384054
public:
40394055
using Callback = std::function<cb::engine_errc(
40404056
EventuallyPersistentEngine* e, const CookieIface* cookie)>;
40414057
StatDCPTask(EventuallyPersistentEngine* e,
40424058
const CookieIface* cookie,
40434059
std::string_view description,
40444060
Callback callback)
4045-
: GlobalTask(e, TaskId::StatDCPTask, 0, false),
4046-
e(e),
4047-
cookie(cookie),
4061+
: BackgroundStatTask(e, cookie, TaskId::StatDCPTask),
40484062
description(description),
40494063
callback(std::move(callback)) {
40504064
}
4051-
bool run() override {
4065+
cb::engine_errc collectStats() override {
40524066
TRACE_EVENT0("ep-engine/task", "StatDCPTask");
40534067
cb::engine_errc result = cb::engine_errc::failed;
40544068
try {
@@ -4060,8 +4074,7 @@ class StatDCPTask : public GlobalTask {
40604074
e.what(),
40614075
getDescription());
40624076
}
4063-
e->notifyIOComplete(cookie, result);
4064-
return false;
4077+
return result;
40654078
}
40664079

40674080
std::string getDescription() const override {
@@ -4075,8 +4088,6 @@ class StatDCPTask : public GlobalTask {
40754088
}
40764089

40774090
private:
4078-
EventuallyPersistentEngine* e;
4079-
const CookieIface* cookie;
40804091
const std::string description;
40814092
Callback callback;
40824093
};
@@ -4085,9 +4096,9 @@ cb::engine_errc EventuallyPersistentEngine::doConnAggStats(
40854096
const CookieIface* cookie,
40864097
const AddStatFn& add_stat,
40874098
std::string_view sep) {
4088-
void* engineSpecific = getEngineSpecific(cookie);
4089-
if (engineSpecific == nullptr) {
4090-
ExTask task = std::make_shared<StatDCPTask>(
4099+
auto task = retrieveStatTask(cookie);
4100+
if (!task) {
4101+
task = std::make_shared<StatDCPTask>(
40914102
this,
40924103
cookie,
40934104
"Aggregated DCP stats",
@@ -4100,13 +4111,11 @@ cb::engine_errc EventuallyPersistentEngine::doConnAggStats(
41004111
return cb::engine_errc::success;
41014112
});
41024113
ExecutorPool::get()->schedule(task);
4103-
storeEngineSpecific(cookie, this);
4114+
storeStatTask(cookie, task);
41044115
return cb::engine_errc::would_block;
41054116
} else {
4106-
storeEngineSpecific(cookie, nullptr);
4117+
return task->maybeWriteResponse(add_stat);
41074118
}
4108-
4109-
return cb::engine_errc::success;
41104119
}
41114120

41124121
cb::engine_errc EventuallyPersistentEngine::doConnAggStatsInner(
@@ -4137,9 +4146,9 @@ cb::engine_errc EventuallyPersistentEngine::doDcpStats(
41374146
const CookieIface* cookie,
41384147
const AddStatFn& add_stat,
41394148
std::string_view value) {
4140-
void* engineSpecific = getEngineSpecific(cookie);
4141-
if (engineSpecific == nullptr) {
4142-
ExTask task = std::make_shared<StatDCPTask>(
4149+
auto task = retrieveStatTask(cookie);
4150+
if (!task) {
4151+
task = std::make_shared<StatDCPTask>(
41434152
this,
41444153
cookie,
41454154
"Summarised bucket-wide DCP stats",
@@ -4150,13 +4159,11 @@ cb::engine_errc EventuallyPersistentEngine::doDcpStats(
41504159
return cb::engine_errc::success;
41514160
});
41524161
ExecutorPool::get()->schedule(task);
4153-
storeEngineSpecific(cookie, this);
4162+
storeStatTask(cookie, task);
41544163
return cb::engine_errc::would_block;
41554164
} else {
4156-
storeEngineSpecific(cookie, nullptr);
4165+
return task->maybeWriteResponse(add_stat);
41574166
}
4158-
4159-
return cb::engine_errc::success;
41604167
}
41614168

41624169
void EventuallyPersistentEngine::doDcpStatsInner(const CookieIface* cookie,

engines/ep/src/ep_engine.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#pragma once
1313

14+
#include "background_stat_task.h"
1415
#include "configuration.h"
1516
#include "ep_engine_public.h"
1617
#include "error_handler.h"
@@ -538,6 +539,35 @@ class EventuallyPersistentEngine : public EngineIface, public DcpIface {
538539

539540
void* getEngineSpecific(const CookieIface* cookie);
540541

542+
/**
543+
* Store a task pointer as the engine specific data.
544+
*
545+
* Allocates a wrapper holding a shared_ptr; storing the raw task ptr
546+
* would be insufficient - the task needs to be owned to ensure it has not
547+
* been destroyed when the frontend thread needs to retrieve it's result.
548+
*
549+
* Caller is responsible for scheduling the task, and later calling
550+
* retrieveStatTask to destroy the wrapper and retrieve the task.
551+
*
552+
* @param cookie cookie from frontend
553+
* @param task shared_ptr to background task
554+
*/
555+
void storeStatTask(const CookieIface* cookie,
556+
std::shared_ptr<BackgroundStatTask> task);
557+
558+
/**
559+
* Get and clear the engine specific data, then attempt to
560+
* extract a BackgroundStatTask.
561+
*
562+
* Frees the wrapper allocated by storeStatTask, if one was found in the
563+
* engine specific data.
564+
*
565+
* @param cookie cookie from frontend
566+
* @return (possibly null) shared_ptr to background task
567+
*/
568+
std::shared_ptr<BackgroundStatTask> retrieveStatTask(
569+
const CookieIface* cookie);
570+
541571
bool isDatatypeSupported(const CookieIface* cookie,
542572
protocol_binary_datatype_t datatype);
543573

0 commit comments

Comments
 (0)