@@ -73,6 +73,7 @@ class ThreadPoolTempl {
73
73
allow_spinning_ (allow_spinning),
74
74
global_steal_partition_(EncodePartition(0 , num_threads_)),
75
75
blocked_(0 ),
76
+ num_tasks_(0 ),
76
77
spinning_(0 ),
77
78
done_(false ),
78
79
cancelled_(false ),
@@ -143,6 +144,7 @@ class ThreadPoolTempl {
143
144
void AddTaskWithHint (std::function<void ()> fn, int start, int limit) {
144
145
Task t = env_.CreateTask (std::move (fn));
145
146
PerThread* pt = GetPerThread ();
147
+ uint64_t num_tasks = num_tasks_.fetch_add (1 , std::memory_order_relaxed) + 1 ;
146
148
if (pt->pool == this ) {
147
149
// Worker thread of this pool, push onto the thread's queue.
148
150
Queue& q = thread_data_[pt->thread_id ].queue ;
@@ -166,8 +168,11 @@ class ThreadPoolTempl {
166
168
// this. We expect that such scenario is prevented by program, that is,
167
169
// this is kept alive while any threads can potentially be in Schedule.
168
170
if (!t.f ) {
169
- ec_.Notify (false );
171
+ if (num_tasks > num_threads_ - blocked_.load (std::memory_order_relaxed)) {
172
+ ec_.Notify (false );
173
+ }
170
174
} else {
175
+ num_tasks_.fetch_sub (1 , std::memory_order_relaxed);
171
176
env_.ExecuteTask (t); // Push failed, execute directly.
172
177
}
173
178
}
@@ -263,6 +268,7 @@ class ThreadPoolTempl {
263
268
std::vector<std::vector<unsigned >> all_coprimes_;
264
269
unsigned global_steal_partition_;
265
270
std::atomic<unsigned > blocked_;
271
+ std::atomic<uint64_t > num_tasks_;
266
272
std::atomic<bool > spinning_;
267
273
std::atomic<bool > done_;
268
274
std::atomic<bool > cancelled_;
@@ -305,6 +311,7 @@ class ThreadPoolTempl {
305
311
}
306
312
if (t.f ) {
307
313
env_.ExecuteTask (t);
314
+ num_tasks_.fetch_sub (1 , std::memory_order_relaxed);
308
315
}
309
316
}
310
317
} else {
@@ -315,16 +322,14 @@ class ThreadPoolTempl {
315
322
if (!t.f ) {
316
323
t = GlobalSteal ();
317
324
if (!t.f ) {
318
- // Leave one thread spinning. This reduces latency.
319
- if (allow_spinning_ && !spinning_ && !spinning_.exchange (true )) {
325
+ if (allow_spinning_) {
320
326
for (int i = 0 ; i < spin_count && !t.f ; i++) {
321
327
if (!cancelled_.load (std::memory_order_relaxed)) {
322
328
t = GlobalSteal ();
323
329
} else {
324
330
return ;
325
331
}
326
332
}
327
- spinning_ = false ;
328
333
}
329
334
if (!t.f ) {
330
335
if (!WaitForWork (waiter, &t)) {
@@ -336,6 +341,7 @@ class ThreadPoolTempl {
336
341
}
337
342
if (t.f ) {
338
343
env_.ExecuteTask (t);
344
+ num_tasks_.fetch_sub (1 , std::memory_order_relaxed);
339
345
}
340
346
}
341
347
}
0 commit comments