16 #ifndef KMP_WAIT_RELEASE_H 17 #define KMP_WAIT_RELEASE_H 21 #include "kmp_stats.h" 59 volatile P *
get() {
return loc; }
63 void set(
volatile P *new_loc) {
loc = new_loc; }
95 __kmp_wait_template(kmp_info_t *this_thr, C *flag,
96 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
98 volatile typename C::flag_t *spin = flag->get();
100 kmp_uint32 hibernate;
102 int tasks_completed = FALSE;
105 kmp_uint64 poll_count;
106 kmp_uint64 hibernate_goal;
109 KMP_FSYNC_SPIN_INIT(spin, NULL);
110 if (flag->done_check()) {
111 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
typename C::flag_t *, spin));
114 th_gtid = this_thr->th.th_info.ds.ds_gtid;
116 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
117 #if KMP_STATS_ENABLED 121 #if OMPT_SUPPORT && OMPT_BLAME 122 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
123 if (ompt_enabled && ompt_state != ompt_state_undefined) {
124 if (ompt_state == ompt_state_idle) {
125 if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
126 ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
128 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
129 KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
130 ompt_state == ompt_state_wait_barrier_implicit ||
131 ompt_state == ompt_state_wait_barrier_explicit);
133 ompt_lw_taskteam_t *team =
134 this_thr->th.th_team->t.ompt_serialized_team_info;
135 ompt_parallel_id_t pId;
138 pId = team->ompt_team_info.parallel_id;
139 tId = team->ompt_task_info.task_id;
141 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
142 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
144 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
150 KMP_INIT_YIELD(spins);
152 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
156 #ifdef KMP_ADJUST_BLOCKTIME 157 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
162 hibernate = this_thr->th.th_team_bt_intervals;
164 hibernate = this_thr->th.th_team_bt_intervals;
175 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
176 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
177 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
178 hibernate - __kmp_global.g.g_time.dt.t_value));
180 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
182 #endif // KMP_USE_MONITOR 185 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
189 while (flag->notdone_check()) {
191 kmp_task_team_t *task_team = NULL;
192 if (__kmp_tasking_mode != tskm_immediate_exec) {
193 task_team = this_thr->th.th_task_team;
201 if (task_team != NULL) {
202 if (TCR_SYNC_4(task_team->tt.tt_active)) {
203 if (KMP_TASKING_ENABLED(task_team))
205 this_thr, th_gtid, final_spin,
206 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
208 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
210 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
211 this_thr->th.th_task_team = NULL;
212 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
215 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
219 KMP_FSYNC_SPIN_PREPARE(CCAST(
typename C::flag_t *, spin));
220 if (TCR_4(__kmp_global.g.g_done)) {
221 if (__kmp_global.g.g_abort)
222 __kmp_abort_thread();
231 if (oversubscribed) {
234 KMP_YIELD_SPIN(spins);
238 in_pool = !!TCR_4(this_thr->th.th_in_pool);
239 if (in_pool != !!this_thr->th.th_active_in_pool) {
241 KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth);
242 this_thr->th.th_active_in_pool = TRUE;
251 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
252 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
253 this_thr->th.th_active_in_pool = FALSE;
257 #if KMP_STATS_ENABLED 260 if (this_thr->th.th_stats->isIdle() &&
261 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
262 KMP_SET_THREAD_STATE(IDLE);
263 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
268 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
272 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
277 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
280 if (KMP_BLOCKING(hibernate_goal, poll_count++))
284 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
285 flag->suspend(th_gtid);
287 if (TCR_4(__kmp_global.g.g_done)) {
288 if (__kmp_global.g.g_abort)
289 __kmp_abort_thread();
291 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
292 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
293 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
298 #if OMPT_SUPPORT && OMPT_BLAME 299 if (ompt_enabled && ompt_state != ompt_state_undefined) {
300 if (ompt_state == ompt_state_idle) {
301 if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
302 ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
304 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
305 KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
306 ompt_state == ompt_state_wait_barrier_implicit ||
307 ompt_state == ompt_state_wait_barrier_explicit);
309 ompt_lw_taskteam_t *team =
310 this_thr->th.th_team->t.ompt_serialized_team_info;
311 ompt_parallel_id_t pId;
314 pId = team->ompt_team_info.parallel_id;
315 tId = team->ompt_task_info.task_id;
317 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
318 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
320 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
324 #if KMP_STATS_ENABLED 326 if (KMP_GET_THREAD_STATE() == IDLE) {
327 KMP_POP_PARTITIONED_TIMER();
328 KMP_SET_THREAD_STATE(thread_state);
329 this_thr->th.th_stats->resetIdleFlag();
333 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
typename C::flag_t *, spin));
340 template <
class C>
static inline void __kmp_release_template(C *flag) {
342 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
344 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
345 KMP_DEBUG_ASSERT(flag->get());
346 KMP_FSYNC_RELEASING(CCAST(
typename C::flag_t *, flag->get()));
348 flag->internal_release();
350 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
353 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
356 if (flag->is_any_sleeping()) {
357 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
359 kmp_info_t *waiter = flag->get_waiter(i);
361 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
363 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep " 365 gtid, wait_gtid, flag->get()));
366 flag->resume(wait_gtid);
373 template <
typename FlagType>
struct flag_traits {};
375 template <>
struct flag_traits<kmp_uint32> {
376 typedef kmp_uint32 flag_t;
378 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
379 static inline flag_t test_then_add4(
volatile flag_t *f) {
380 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
382 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
383 return KMP_TEST_THEN_OR32(f, v);
385 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
386 return KMP_TEST_THEN_AND32(f, v);
390 template <>
struct flag_traits<kmp_uint64> {
391 typedef kmp_uint64 flag_t;
393 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
394 static inline flag_t test_then_add4(
volatile flag_t *f) {
395 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
397 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
398 return KMP_TEST_THEN_OR64(f, v);
400 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
401 return KMP_TEST_THEN_AND64(f, v);
405 template <
typename FlagType>
class kmp_basic_flag :
public kmp_flag<FlagType> {
406 typedef flag_traits<FlagType> traits_type;
414 kmp_basic_flag(
volatile FlagType *p)
416 kmp_basic_flag(
volatile FlagType *p, kmp_info_t *thr)
418 waiting_threads[0] = thr;
420 kmp_basic_flag(
volatile FlagType *p, FlagType c)
422 num_waiting_threads(0) {}
427 kmp_info_t *get_waiter(kmp_uint32 i) {
428 KMP_DEBUG_ASSERT(i < num_waiting_threads);
429 return waiting_threads[i];
434 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
440 void set_waiter(kmp_info_t *thr) {
441 waiting_threads[0] = thr;
442 num_waiting_threads = 1;
447 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
452 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
460 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
465 void internal_release() {
466 (void)traits_type::test_then_add4((
volatile FlagType *)this->
get());
473 FlagType set_sleeping() {
474 return traits_type::test_then_or((
volatile FlagType *)this->
get(),
475 KMP_BARRIER_SLEEP_STATE);
482 FlagType unset_sleeping() {
483 return traits_type::test_then_and((
volatile FlagType *)this->
get(),
484 ~KMP_BARRIER_SLEEP_STATE);
490 bool is_sleeping_val(FlagType old_loc) {
491 return old_loc & KMP_BARRIER_SLEEP_STATE;
496 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
497 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
498 kmp_uint8 *get_stolen() {
return NULL; }
499 enum barrier_type get_bt() {
return bs_last_barrier; }
502 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
504 kmp_flag_32(
volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
505 kmp_flag_32(
volatile kmp_uint32 *p, kmp_info_t *thr)
506 : kmp_basic_flag<kmp_uint32>(p, thr) {}
507 kmp_flag_32(
volatile kmp_uint32 *p, kmp_uint32 c)
508 : kmp_basic_flag<kmp_uint32>(p, c) {}
509 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
510 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
511 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
512 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
513 kmp_int32 is_constrained) {
514 return __kmp_execute_tasks_32(
515 this_thr, gtid,
this, final_spin,
516 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
518 void wait(kmp_info_t *this_thr,
519 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
520 __kmp_wait_template(this_thr,
this,
521 final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
523 void release() { __kmp_release_template(
this); }
527 class kmp_flag_64 :
public kmp_basic_flag<kmp_uint64> {
529 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
530 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
531 : kmp_basic_flag<kmp_uint64>(p, thr) {}
532 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
533 : kmp_basic_flag<kmp_uint64>(p, c) {}
534 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
535 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
536 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
537 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
538 kmp_int32 is_constrained) {
539 return __kmp_execute_tasks_64(
540 this_thr, gtid,
this, final_spin,
541 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
543 void wait(kmp_info_t *this_thr,
544 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
545 __kmp_wait_template(this_thr,
this,
546 final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
548 void release() { __kmp_release_template(
this); }
553 class kmp_flag_oncore :
public kmp_flag<kmp_uint64> {
555 kmp_info_t *waiting_threads[1];
556 kmp_uint32 num_waiting_threads;
560 enum barrier_type bt;
561 kmp_info_t *this_thr;
567 unsigned char &byteref(
volatile kmp_uint64 *
loc,
size_t offset) {
568 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
572 kmp_flag_oncore(
volatile kmp_uint64 *p)
574 flag_switch(
false) {}
575 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
577 offset(idx), flag_switch(
false) {}
578 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
579 enum barrier_type bar_t, kmp_info_t *thr
586 num_waiting_threads(0), offset(idx), flag_switch(
false), bt(bar_t),
594 kmp_info_t *get_waiter(kmp_uint32 i) {
595 KMP_DEBUG_ASSERT(i < num_waiting_threads);
596 return waiting_threads[i];
598 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
599 void set_waiter(kmp_info_t *thr) {
600 waiting_threads[0] = thr;
601 num_waiting_threads = 1;
603 bool done_check_val(kmp_uint64 old_loc) {
604 return byteref(&old_loc, offset) == checker;
606 bool done_check() {
return done_check_val(*
get()); }
607 bool notdone_check() {
609 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
611 if (byteref(
get(), offset) != 1 && !flag_switch)
613 else if (flag_switch) {
614 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
615 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
616 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
617 __kmp_wait_64(this_thr, &flag, TRUE
626 void internal_release() {
627 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
628 byteref(
get(), offset) = 1;
631 byteref(&mask, offset) = 1;
632 KMP_TEST_THEN_OR64(
get(), mask);
635 kmp_uint64 set_sleeping() {
636 return KMP_TEST_THEN_OR64(
get(), KMP_BARRIER_SLEEP_STATE);
638 kmp_uint64 unset_sleeping() {
639 return KMP_TEST_THEN_AND64(
get(), ~KMP_BARRIER_SLEEP_STATE);
641 bool is_sleeping_val(kmp_uint64 old_loc) {
642 return old_loc & KMP_BARRIER_SLEEP_STATE;
644 bool is_sleeping() {
return is_sleeping_val(*
get()); }
645 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
646 void wait(kmp_info_t *this_thr,
int final_spin) {
647 __kmp_wait_template<kmp_flag_oncore>(
648 this_thr,
this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
650 void release() { __kmp_release_template(
this); }
651 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
652 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
653 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
654 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
655 kmp_int32 is_constrained) {
656 return __kmp_execute_tasks_oncore(
657 this_thr, gtid,
this, final_spin,
658 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
660 kmp_uint8 *get_stolen() {
return NULL; }
661 enum barrier_type get_bt() {
return bt; }
667 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
671 switch (RCAST(kmp_flag_64 *, CCAST(
void *, flag))->
get_type()) {
673 __kmp_resume_32(gtid, NULL);
676 __kmp_resume_64(gtid, NULL);
679 __kmp_resume_oncore(gtid, NULL);
688 #endif // KMP_WAIT_RELEASE_H
stats_state_e
the states which a thread can be in