Skip to content

Commit e6b7b0b

Browse files
Fix for lock order inversion reported by TSAN. (#2634)
We have the following (summarized) reports from TSAN about lock order inversion that are potential deadlock cases: 1) First Thread: stack: PreProcessor::releaseClientPreProcessRequestSafe -> lock: lock_guard<mutex> lock(reqEntry->mutex) RequestsBatch::releaseReqsAndSendBatchedReplyIfCompleted -> lock: const lock_guard<mutex> lock(batchMutex_) Second Thread: stack: preprocessor::RequestsBatch::registerBatch -> const std::lock_guard<std::mutex> lock(batchMutex_) PreProcessor::registerRequest PreProcessor::registerAndHandleClientPreProcessReqOnNonPrimary PreProcessor::handleSingleClientRequestMessage -> lock_guard<mutex> lock(reqEntry->mutex) 2) First Thread: stack: RequestsBatch::registerBatch -> lock: const std::lock_guard<std::mutex> lock(batchMutex_) PreProcessor::registerRequestOnPrimaryReplica PreProcessor::handleSingleClientRequestMessage -> lock: lock_guard<mutex> lock(reqEntry->mutex); Second Thread: stack: PreProcessor::releaseClientPreProcessRequestSafe -> lock: lock_guard<mutex> lock(reqEntry->mutex) RequestsBatch::cancelRequestAndBatchIfCompleted -> lock: const lock_guard<mutex> lock(batchMutex_); We resolve those situations by releasing the lock on batchMutex_ before the invocations to releaseClientPreProcessRequestSafe in releaseReqsAndSendBatchedReplyIfCompleted and cancelRequestAndBatchIfCompleted.
1 parent 69e02ea commit e6b7b0b

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

bftengine/src/preprocessor/PreProcessor.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ void RequestsBatch::releaseReqsAndSendBatchedReplyIfCompleted(PreProcessReplyMsg
178178
atomic_uint32_t batchSize = 0;
179179
PreProcessBatchReplyMsgSharedPtr batchReplyMsg;
180180
{
181-
const lock_guard<mutex> lock(batchMutex_);
181+
unique_lock<mutex> lock(batchMutex_);
182182
if (!batchInProcess_) {
183183
LOG_DEBUG(preProcessor_.logger(), "The batch is not in process; ignore" << KVLOG(clientId_, batchCid_));
184184
return;
@@ -192,10 +192,12 @@ void RequestsBatch::releaseReqsAndSendBatchedReplyIfCompleted(PreProcessReplyMsg
192192
batchCid = batchCid_;
193193
batchSize = batchSize_;
194194
// The last batch request has pre-processed => send batched reply message
195+
lock.unlock();
195196
for (auto const &replyMsg : repliesList_) {
196197
replyMsgsSize += replyMsg->size();
197198
preProcessor_.releaseClientPreProcessRequestSafe(clientId_, replyMsg->reqOffsetInBatch(), COMPLETE);
198199
}
200+
lock.lock();
199201
batchReplyMsg = make_shared<PreProcessBatchReplyMsg>(
200202
clientId_, senderId, repliesList_, batchCid_, replyMsgsSize, preProcessor_.myReplica_.getCurrentView());
201203
resetBatchParams();
@@ -210,7 +212,7 @@ void RequestsBatch::releaseReqsAndSendBatchedReplyIfCompleted(PreProcessReplyMsg
210212
void RequestsBatch::cancelRequestAndBatchIfCompleted(const string &reqBatchCid,
211213
uint16_t reqOffsetInBatch,
212214
PreProcessingResult status) {
213-
const lock_guard<mutex> lock(batchMutex_);
215+
unique_lock<mutex> lock(batchMutex_);
214216
if (batchCid_ != reqBatchCid) {
215217
LOG_INFO(preProcessor_.logger(),
216218
"The batch has been cancelled/completed earlier; do nothing" << KVLOG(clientId_, reqBatchCid, batchCid_));
@@ -219,7 +221,9 @@ void RequestsBatch::cancelRequestAndBatchIfCompleted(const string &reqBatchCid,
219221
const auto &reqEntry = requestsMap_[reqOffsetInBatch];
220222
if (reqEntry) {
221223
if (status == CANCEL) preProcessor_.preProcessorMetrics_.preProcConsensusNotReached++;
224+
lock.unlock();
222225
preProcessor_.releaseClientPreProcessRequestSafe(clientId_, reqEntry, status);
226+
lock.lock();
223227
finalizeBatchIfCompleted();
224228
}
225229
}

0 commit comments

Comments
 (0)