Skip to content

Commit fb7e114

Browse files
committed
MB-41510: Fix TSAN failure due to lock order inversion
Fix a TSAN failure due to a lock order inversion observed on the mad-hatter TSAN cv machine. This was due to a potential deadlock while acquiring locks on this and other histogram in HdrHistogram::operator+=(). To fix this use folly::acquireLocked() to safely take write lock and read lock on this->histogram and other.histogram respectively. We also remove the nullptr for this->histogram as we don't guard any other access to the histograms pointer, in case cb_calloc() fails. TSAN failure: WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) (pid=11208) Cycle in lock order graph: M296533874179548048 (0x000000000000) => M174936684240544688 (0x000000000000) => M296533874179548048 Mutex M174936684240544688 acquired here while holding mutex M296533874179548048 in main thread: #0 AnnotateRWLockAcquired <null> (libtsan.so.0+0x00000005b63d) #1 folly::detail::annotate_rwlock_acquired_impl(void const volatile*, folly::annotate_rwlock_level, char const*, int) /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/follytsan-prefix/src/follytsan/folly/synchronization/SanitizeThread.cpp:99 (ep-engine_ep_unit_tests+0x00000139555e) #2 annotate_rwlock_acquired tlm/deps/folly.exploded/include/folly/synchronization/SanitizeThread.h:111 (ep-engine_ep_unit_tests+0x000001362a06) #3 folly::SharedMutexImpl<false, void, std::atomic, false, true>::annotateAcquired(folly::annotate_rwlock_level) tlm/deps/folly.exploded/include/folly/SharedMutex.h:726 (ep-engine_ep_unit_tests+0x000001362a06) #4 folly::SharedMutexImpl<false, void, std::atomic, false, true>::lock_shared() tlm/deps/folly.exploded/include/folly/SharedMutex.h:400 (ep-engine_ep_unit_tests+0x000001362a06) #5 folly::detail::LockTraitsImpl<folly::SharedMutexImpl<false, void, std::atomic, false, true>, (folly::detail::MutexLevel)1, false>::lock_shared(folly::SharedMutexImpl<false, void, std::atomic, false, true>&) tlm/deps/folly.exploded/include/folly/LockTraits.h:157 (ep-engine_ep_unit_tests+0x000001362a06) #6 std::integral_constant<bool, true> folly::LockPolicyShared::lock<folly::SharedMutexImpl<false, void, std::atomic, false, true> >(folly::SharedMutexImpl<false, void, std::atomic, false, true>&) tlm/deps/folly.exploded/include/folly/LockTraits.h:499 (ep-engine_ep_unit_tests+0x000001362a06) #7 folly::LockedPtrBase<folly::Synchronized<std::unique_ptr<hdr_histogram, HdrHistogram::HdrDeleter>, folly::SharedMutexImpl<false, void, std::atomic, false, true> > const, folly::SharedMutexImpl<false, void, std::atomic, false, true>, folly::LockPolicyShared>::LockedPtrBase(folly::Synchronized<std::unique_ptr<hdr_histogram, HdrHistogram::HdrDeleter>, folly::SharedMutexImpl<false, void, std::atomic, false, true> > const*) tlm/deps/folly.exploded/include/folly/Synchronized.h:1089 (ep-engine_ep_unit_tests+0x000001362a06) #8 folly::LockedPtr<folly::Synchronized<std::unique_ptr<hdr_histogram, HdrHistogram::HdrDeleter>, folly::SharedMutexImpl<false, void, std::atomic, false, true> > const, folly::LockPolicyShared>::LockedPtr(folly::Synchronized<std::unique_ptr<hdr_histogram, HdrHistogram::HdrDeleter>, folly::SharedMutexImpl<false, void, std::atomic, false, true> > const*) tlm/deps/folly.exploded/include/folly/Synchronized.h:1394 (ep-engine_ep_unit_tests+0x000001360063) #9 folly::SynchronizedBase<folly::Synchronized<std::unique_ptr<hdr_histogram, HdrHistogram::HdrDeleter>, folly::SharedMutexImpl<false, void, std::atomic, false, true> >, (folly::detail::MutexLevel)1>::rlock() tlm/deps/folly.exploded/include/folly/Synchronized.h:140 (ep-engine_ep_unit_tests+0x000001360063) #10 HdrHistogram::operator+=(HdrHistogram const&) ../kv_engine/utilities/hdrhistogram.cc:83 (ep-engine_ep_unit_tests+0x000001360063) #11 HdrHistogramTest_aggregationTest_Test::TestBody() ../kv_engine/engines/ep/tests/module_tests/hdrhistogram_test.cc:317 (ep-engine_ep_unit_tests+0x00000106945c) #12 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) ../googletest/googletest/src/gtest.cc:2402 (ep-engine_ep_unit_tests+0x0000013276cb) #13 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) ../googletest/googletest/src/gtest.cc:2438 (ep-engine_ep_unit_tests+0x00000132e814) #14 testing::Test::Run() ../googletest/googletest/src/gtest.cc:2474 (ep-engine_ep_unit_tests+0x00000131d649) #15 testing::TestInfo::Run() ../googletest/googletest/src/gtest.cc:2656 (ep-engine_ep_unit_tests+0x00000131d91d) #16 testing::TestCase::Run() ../googletest/googletest/src/gtest.cc:2774 (ep-engine_ep_unit_tests+0x00000131dac5) #17 testing::internal::UnitTestImpl::RunAllTests() ../googletest/googletest/src/gtest.cc:4649 (ep-engine_ep_unit_tests+0x00000131fe3e) #18 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) ../googletest/googletest/src/gtest.cc:2402 (ep-engine_ep_unit_tests+0x000001327972) #19 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) ../googletest/googletest/src/gtest.cc:2438 (ep-engine_ep_unit_tests+0x00000132edc8) #20 testing::UnitTest::Run() ../googletest/googletest/src/gtest.cc:4257 (ep-engine_ep_unit_tests+0x00000131d741) #21 RUN_ALL_TESTS() ../googletest/googletest/include/gtest/gtest.h:2237 (ep-engine_ep_unit_tests+0x000000d30e6a) #22 main ../kv_engine/engines/ep/tests/module_tests/ep_unit_tests_main.cc:144 (ep-engine_ep_unit_tests+0x000000d30e6a) Change-Id: I7f7448627d20c753add8c92eaf1186fb350aaab0 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/137548 Reviewed-by: Dave Rigby <daver@couchbase.com> Tested-by: Build Bot <build@couchbase.com>
1 parent a075036 commit fb7e114

File tree

2 files changed

+62
-32
lines changed

2 files changed

+62
-32
lines changed

utilities/hdrhistogram.cc

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,31 +56,43 @@ HdrHistogram::HdrHistogram(uint64_t lowestTrackableValue,
5656
}
5757

5858
HdrHistogram& HdrHistogram::operator+=(const HdrHistogram& other) {
59-
if (other.histogram.rlock()->get() != nullptr &&
60-
other.getValueCount() > 0) {
59+
/*
60+
* Note: folly::acquireLockedPair() takes an exclusive lock on
61+
* this->histogram as we pass it a non-const ref. Where as it takes a shared
62+
* lock on other.histogram as we pass it a const ref. We can do this as we
63+
* only need to write to the ptr of this->histogram. (see
64+
* folly::acquireLocked() source code comments for more information regard
65+
* how it takes locks)
66+
*/
67+
auto lockPair = folly::acquireLockedPair(this->histogram, other.histogram);
68+
// Don't use structured binding as this messes up clang-analyzer, analyzing
69+
// the destruction of folly's locks.
70+
WHistoLockedPtr& thisLock = lockPair.first;
71+
ConstRHistoLockedPtr& otherLock = lockPair.second;
72+
if (otherLock->get()->total_count > 0) {
6173
// work out if we need to resize the receiving histogram
62-
uint64_t newMin = this->getMinTrackableValue();
63-
uint64_t newMax = this->getMaxTrackableValue();
64-
int32_t newSigfig = this->getSigFigAccuracy();
74+
auto newMin = getMinTrackableValue(thisLock);
75+
auto newMax = getMaxTrackableValue(thisLock);
76+
auto newSigfig = getSigFigAccuracy(thisLock);
6577
bool resize = false;
6678

67-
if (other.getMinTrackableValue() < this->getMinTrackableValue()) {
68-
newMin = other.getMinTrackableValue();
79+
if (getMinTrackableValue(otherLock) < newMin) {
80+
newMin = getMinTrackableValue(otherLock);
6981
resize = true;
7082
}
71-
if (other.getMaxTrackableValue() > this->getMaxTrackableValue()) {
72-
newMax = other.getMaxTrackableValue();
83+
if (getMaxTrackableValue(otherLock) > newMax) {
84+
newMax = getMaxTrackableValue(otherLock);
7385
resize = true;
7486
}
7587

76-
if (other.getSigFigAccuracy() > this->getSigFigAccuracy()) {
77-
newSigfig = other.getSigFigAccuracy();
88+
if (getSigFigAccuracy(otherLock) > newSigfig) {
89+
newSigfig = getSigFigAccuracy(otherLock);
7890
resize = true;
7991
}
8092
if (resize) {
81-
this->resize(newMin, newMax, newSigfig);
93+
this->resize(thisLock, newMin, newMax, newSigfig);
8294
}
83-
hdr_add(this->histogram.rlock()->get(), other.histogram.rlock()->get());
95+
hdr_add(thisLock->get(), otherLock->get());
8496
}
8597
return *this;
8698
}
@@ -323,7 +335,8 @@ double HdrHistogram::getMean() const {
323335
return hdr_mean(histogram.rlock()->get()) - 1;
324336
}
325337

326-
void HdrHistogram::resize(uint64_t lowestTrackableValue,
338+
void HdrHistogram::resize(WHistoLockedPtr& histoLockPtr,
339+
uint64_t lowestTrackableValue,
327340
uint64_t highestTrackableValue,
328341
int significantFigures) {
329342
if (lowestTrackableValue >= highestTrackableValue ||
@@ -345,6 +358,6 @@ void HdrHistogram::resize(uint64_t lowestTrackableValue,
345358
&hist, // Pointer to initialise
346359
cb_calloc);
347360

348-
hdr_add(hist, histogram.rlock()->get());
349-
histogram.wlock()->reset(hist);
361+
hdr_add(hist, histoLockPtr->get());
362+
histoLockPtr->reset(hist);
350363
}

utilities/hdrhistogram.h

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ class HdrHistogram {
5252

5353
using SyncHdrHistogramPtr = folly::Synchronized<
5454
std::unique_ptr<struct hdr_histogram, HdrDeleter>>;
55-
using HistoLockedPtr = SyncHdrHistogramPtr::ConstLockedPtr;
55+
using ConstRHistoLockedPtr = SyncHdrHistogramPtr::ConstRLockedPtr;
56+
using WHistoLockedPtr = SyncHdrHistogramPtr::WLockedPtr;
5657

5758
public:
5859
struct Iterator : public hdr_iter {
@@ -142,7 +143,7 @@ class HdrHistogram {
142143
* it being resized or reset while we're reading from the underlying
143144
* data structure.
144145
*/
145-
HistoLockedPtr histoRLockPtr;
146+
ConstRHistoLockedPtr histoRLockPtr;
146147
};
147148

148149
/**
@@ -320,25 +321,15 @@ class HdrHistogram {
320321
* @return minimum trackable value
321322
*/
322323
uint64_t getMinTrackableValue() const {
323-
// We subtract one from the lowest value as we have added a one offset
324-
// as the underlying hdr_histogram cannot store 0 and
325-
// therefore the value must be greater than or equal to 1.
326-
return static_cast<uint64_t>(
327-
histogram.rlock()->get()->lowest_trackable_value) -
328-
1;
324+
return getMinTrackableValue(histogram.rlock());
329325
}
330326

331327
/**
332328
* Method to get the maximum trackable value of this histogram
333329
* @return maximum trackable value
334330
*/
335331
uint64_t getMaxTrackableValue() const {
336-
// We subtract one from the lowest value as we have added a one offset
337-
// as the underlying hdr_histogram cannot store 0 and
338-
// therefore the value must be greater than or equal to 1.
339-
return static_cast<uint64_t>(
340-
histogram.rlock()->get()->highest_trackable_value) -
341-
1;
332+
return getMaxTrackableValue(histogram.rlock());
342333
}
343334

344335
/**
@@ -348,7 +339,7 @@ class HdrHistogram {
348339
* figures bing used
349340
*/
350341
int getSigFigAccuracy() const {
351-
return histogram.rlock()->get()->significant_figures;
342+
return getSigFigAccuracy(histogram.rlock());
352343
}
353344

354345
/**
@@ -358,10 +349,36 @@ class HdrHistogram {
358349
double getMean() const;
359350

360351
private:
361-
void resize(uint64_t lowestTrackableValue,
352+
void resize(WHistoLockedPtr& histoLockPtr,
353+
uint64_t lowestTrackableValue,
362354
uint64_t highestTrackableValue,
363355
int significantFigures);
364356

357+
template <class LockType>
358+
static uint64_t getMinTrackableValue(const LockType& histoLockPtr) {
359+
// We subtract one from the lowest value as we have added a one offset
360+
// as the underlying hdr_histogram cannot store 0 and
361+
// therefore the value must be greater than or equal to 1.
362+
return static_cast<uint64_t>(
363+
histoLockPtr->get()->lowest_trackable_value) -
364+
1;
365+
}
366+
367+
template <class LockType>
368+
static uint64_t getMaxTrackableValue(const LockType& histoLockPtr) {
369+
// We subtract one from the lowest value as we have added a one offset
370+
// as the underlying hdr_histogram cannot store 0 and
371+
// therefore the value must be greater than or equal to 1.
372+
return static_cast<uint64_t>(
373+
histoLockPtr->get()->highest_trackable_value) -
374+
1;
375+
}
376+
377+
template <class LockType>
378+
static int getSigFigAccuracy(const LockType& histoLockPtr) {
379+
return histoLockPtr->get()->significant_figures;
380+
}
381+
365382
/**
366383
* Private method used to create an iterator for the specified mode
367384
* @param mode the mode of the iterator to be created

0 commit comments

Comments
 (0)