Skip to content

Commit f1dc64e

Browse files
jameseh96daverigby
authored andcommitted
MB-35073: Release StreamContainer lock before calling Stream::setDead
TSan found lock inversion as DcpProducer::closeAllStreams() holds `StreamContainer->wlock()` and then acquires `vb->getStateLock()` whereas `VBucket::set()` acquires them in the opposite order. Release the stream container lock before calling `Stream::setDead()` to avoid holding both in the `closeAllStreams()` path. Also, preemptively apply the same change to `setStreamDeadStatus` though TSan has not identified inversion in this case. TSan report: [ RUN ] DurabilityTest.MB34780 ================== WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) (pid=16422) Cycle in lock order graph: M3987 (0x7b68000308f8) => M225878274331574312 (0x000000000000) => M3987 Mutex M225878274331574312 acquired here while holding mutex M3987 in thread T7: #0 pthread_rwlock_rdlock <null> (libtsan.so.0+0x00000002953b) couchbase#1 cb_rw_reader_enter(pthread_rwlock_t*) .../platform/src/cb_pthreads.cc:195 (libplatform_so.so.0.1.0+0x000000009cfa) couchbase#2 cb::RWLock::readerLock() .../platform/include/platform/rwlock.h:87 (ep.so+0x0000000f423a) couchbase#3 cb::RWLock::lock_shared() .../platform/include/platform/rwlock.h:67 (ep.so+0x00000012f578) couchbase#4 std::shared_lock<cb::RWLock>::shared_lock(cb::RWLock&) /usr/local/include/c++/7.3.0/shared_mutex:553 (ep.so+0x00000012f578) couchbase#5 StreamContainer<std::shared_ptr<Stream> >::ReadLockedHandle::ReadLockedHandle(StreamContainer<std::shared_ptr<Stream> > const&) .../kv_engine/engines/ep/src/dcp/stream_container.h:213 (ep.so+0x00000012f578) couchbase#6 StreamContainer<std::shared_ptr<Stream> >::rlock() const .../kv_engine/engines/ep/src/dcp/stream_container.h:273 (ep.so+0x000000122ea7) #7 DcpProducer::notifySeqnoAvailable(Vbid, unsigned long) .../kv_engine/engines/ep/src/dcp/producer.cc:1312 (ep.so+0x000000122ea7) #8 DcpConnMap::notifyVBConnections(Vbid, unsigned long) .../kv_engine/engines/ep/src/dcp/dcpconnmap.cc:424 (ep.so+0x0000000fa071) #9 KVBucket::notifyReplication(Vbid, long) .../kv_engine/engines/ep/src/kv_bucket.cc:2570 (ep.so+0x000000210711) #10 EPBucket::notifyNewSeqno(Vbid, VBNotifyCtx const&) .../kv_engine/engines/ep/src/ep_bucket.cc:1327 (ep.so+0x00000016232b) #11 NotifyNewSeqnoCB::callback(Vbid const&, VBNotifyCtx const&) .../kv_engine/engines/ep/src/kv_bucket.h:837 (ep.so+0x0000002267d9) #12 VBucket::notifyNewSeqno(VBNotifyCtx const&) .../kv_engine/engines/ep/src/vbucket.cc:3631 (ep.so+0x000000264871) #13 VBucket::set() .../kv_engine/engines/ep/src/vbucket.cc:1568 (ep.so+0x00000026c768) #14 KVBucket::set() .../kv_engine/engines/ep/src/kv_bucket.cc:692 (ep.so+0x000000220856) #15 EventuallyPersistentEngine::storeIfInner() .../kv_engine/engines/ep/src/ep_engine.cc:2440 (ep.so+0x000000181fef) Mutex M3987 previously acquired by the same thread here: #0 AnnotateRWLockAcquired <null> (libtsan.so.0+0x00000005b63d) couchbase#1 folly::detail::annotate_rwlock_acquired_impl(void const volatile*, folly::annotate_rwlock_level, char const*, int) .../folly/follytsan-prefix/src/follytsan/folly/synchronization/SanitizeThread.cpp:91 (memcached+0x0000006463de) couchbase#2 annotate_rwlock_acquired .../build/tlm/deps/folly.exploded/include/folly/synchronization/SanitizeThread.h:99 (ep.so+0x000000220340) couchbase#3 folly::SharedMutexImpl<false, void, std::atomic, false, true>::annotateAcquired(folly::annotate_rwlock_level) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:696 (ep.so+0x000000220340) couchbase#4 folly::SharedMutexImpl<false, void, std::atomic, false, true>::lock_shared(folly::SharedMutexToken&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:376 (ep.so+0x000000220340) couchbase#5 folly::SharedMutexImpl<false, void, std::atomic, false, true>::ReadHolder::ReadHolder(folly::SharedMutexImpl<false, void, std::atomic, false, true> const&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:1315 (ep.so+0x000000220340) couchbase#6 KVBucket::set() .../kv_engine/engines/ep/src/kv_bucket.cc:659 (ep.so+0x000000220340) #7 EventuallyPersistentEngine::storeIfInner() .../kv_engine/engines/ep/src/ep_engine.cc:2440 (ep.so+0x000000181fef) Mutex M3987 acquired here while holding mutex M225878274331574312 in thread T5: #0 AnnotateRWLockAcquired <null> (libtsan.so.0+0x00000005b63d) couchbase#1 folly::detail::annotate_rwlock_acquired_impl(void const volatile*, folly::annotate_rwlock_level, char const*, int) .../folly/follytsan-prefix/src/follytsan/folly/synchronization/SanitizeThread.cpp:91 (memcached+0x0000006463de) couchbase#2 annotate_rwlock_acquired .../build/tlm/deps/folly.exploded/include/folly/synchronization/SanitizeThread.h:99 (ep.so+0x0000000bb626) couchbase#3 folly::SharedMutexImpl<false, void, std::atomic, false, true>::annotateAcquired(folly::annotate_rwlock_level) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:696 (ep.so+0x0000000bb626) couchbase#4 folly::SharedMutexImpl<false, void, std::atomic, false, true>::lock_shared(folly::SharedMutexToken&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:376 (ep.so+0x0000000bb626) couchbase#5 folly::SharedMutexImpl<false, void, std::atomic, false, true>::ReadHolder::ReadHolder(folly::SharedMutexImpl<false, void, std::atomic, false, true> const&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:1315 (ep.so+0x0000000bb626) couchbase#6 ActiveStream::setDead(end_stream_status_t) .../kv_engine/engines/ep/src/dcp/active_stream.cc:1181 (ep.so+0x0000000bb626) #7 operator() .../kv_engine/engines/ep/src/dcp/producer.cc:1383 (ep.so+0x0000001257d1) #8 for_each<...> /usr/local/include/c++/7.3.0/bits/stl_algo.h:3884 (ep.so+0x0000001257d1) #9 DcpProducer::closeAllStreams() .../kv_engine/engines/ep/src/dcp/producer.cc:1377 (ep.so+0x000000125c00) Mutex M225878274331574312 previously acquired by the same thread here: #0 pthread_rwlock_wrlock <null> (libtsan.so.0+0x0000000297eb) couchbase#1 cb_rw_writer_enter(pthread_rwlock_t*) .../platform/src/cb_pthreads.cc:217 (libplatform_so.so.0.1.0+0x000000009e80) couchbase#2 cb::RWLock::writerLock() .../platform/include/platform/rwlock.h:103 (ep.so+0x000000125597) couchbase#3 cb::RWLock::lock() .../platform/include/platform/rwlock.h:77 (ep.so+0x000000125597) couchbase#4 std::unique_lock<cb::RWLock>::lock() /usr/local/include/c++/7.3.0/bits/std_mutex.h:267 (ep.so+0x000000125597) couchbase#5 std::unique_lock<cb::RWLock>::unique_lock(cb::RWLock&) /usr/local/include/c++/7.3.0/bits/std_mutex.h:197 (ep.so+0x000000125597) couchbase#6 StreamContainer<std::shared_ptr<Stream> >::WriteLockedHandle::WriteLockedHandle(StreamContainer<std::shared_ptr<Stream> >&) .../kv_engine/engines/ep/src/dcp/stream_container.h:237 (ep.so+0x000000125597) #7 StreamContainer<std::shared_ptr<Stream> >::wlock() .../kv_engine/engines/ep/src/dcp/stream_container.h:277 (ep.so+0x000000125597) #8 operator() .../kv_engine/engines/ep/src/dcp/producer.cc:1381 (ep.so+0x000000125597) #9 for_each<...> /usr/local/include/c++/7.3.0/bits/stl_algo.h:3884 (ep.so+0x000000125597) #10 DcpProducer::closeAllStreams() .../kv_engine/engines/ep/src/dcp/producer.cc:1377 (ep.so+0x000000125c00) Change-Id: Icc15e74e80d7f1926ce6c75bbdd8aa1c43f5ca2c Reviewed-on: http://review.couchbase.org/111989 Reviewed-by: Dave Rigby <daver@couchbase.com> Tested-by: Dave Rigby <daver@couchbase.com>
1 parent 45c199e commit f1dc64e

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

engines/ep/src/dcp/producer.cc

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,12 +1358,20 @@ bool DcpProducer::setStreamDeadStatus(Vbid vbid,
13581358
end_stream_status_t status) {
13591359
auto rv = streams.find(vbid.get());
13601360
if (rv != streams.end()) {
1361-
for (auto handle = rv->second->rlock(); !handle.end(); handle.next()) {
1362-
if (handle.get()->compareStreamId(sid)) {
1363-
handle.get()->setDead(status);
1364-
return true;
1361+
std::shared_ptr<Stream> streamPtr;
1362+
// MB-35073: holding StreamContainer rlock while calling setDead
1363+
// has been seen to cause lock inversion elsewhere.
1364+
// Collect sharedptr then setDead once lock is released (itr out of
1365+
// scope).
1366+
for (auto itr = rv->second->rlock(); !itr.end(); itr.next()) {
1367+
if (itr.get()->compareStreamId(sid)) {
1368+
streamPtr = itr.get();
1369+
break;
13651370
}
13661371
}
1372+
if (streamPtr) {
1373+
streamPtr->setDead(status);
1374+
}
13671375
return true;
13681376
}
13691377

@@ -1378,12 +1386,22 @@ void DcpProducer::closeAllStreams() {
13781386
streams.end(),
13791387
[&vbvector](StreamsMap::value_type& vt) {
13801388
vbvector.push_back((Vbid)vt.first);
1381-
auto handle = vt.second->wlock();
1382-
while (!handle.end()) {
1383-
handle.get()->setDead(END_STREAM_DISCONNECTED);
1384-
handle.next();
1389+
std::vector<std::shared_ptr<Stream>> streamPtrs;
1390+
// MB-35073: holding StreamContainer lock while
1391+
// calling setDead leads to lock inversion - so
1392+
// collect sharedptrs in one pass then setDead once
1393+
// lock is released (itr out of scope).
1394+
{
1395+
auto handle = vt.second->wlock();
1396+
for (; !handle.end(); handle.next()) {
1397+
streamPtrs.push_back(handle.get());
1398+
}
1399+
handle.clear();
1400+
}
1401+
1402+
for (auto streamPtr : streamPtrs) {
1403+
streamPtr->setDead(END_STREAM_DISCONNECTED);
13851404
}
1386-
handle.clear();
13871405
});
13881406
}
13891407
for (const auto vbid: vbvector) {

0 commit comments

Comments
 (0)