17 #include "kmp_wait_release.h" 20 #include "kmp_stats.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #include "tsan_annotations.h" 30 #if KMP_MIC && USE_NGO_STORES 32 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 33 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 34 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 35 #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") 37 #define ngo_load(src) ((void)0) 38 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 39 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 40 #define ngo_sync() ((void)0) 43 void __kmp_print_structure(
void);
48 static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
50 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
52 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
58 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
62 #if USE_ITT_BUILD && USE_ITT_NOTIFY 64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
71 if (!KMP_MASTER_TID(tid)) {
73 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
86 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
90 kmp_uint64 new_state =
91 team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
94 for (i = 1; i < nproc; ++i) {
98 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
100 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 101 "arrived(%p) == %llu\n",
102 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
104 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
107 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
109 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
110 ANNOTATE_BARRIER_END(other_threads[i]);
111 #if USE_ITT_BUILD && USE_ITT_NOTIFY 114 if (__kmp_forkjoin_frames_mode == 2) {
115 this_thr->th.th_bar_min_time = KMP_MIN(
116 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
121 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
122 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
124 ANNOTATE_REDUCE_AFTER(reduce);
125 (*reduce)(this_thr->th.th_local.reduce_data,
126 other_threads[i]->th.th_local.reduce_data);
127 ANNOTATE_REDUCE_BEFORE(reduce);
128 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
132 team_bar->b_arrived = new_state;
133 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " 134 "arrived(%p) = %llu\n",
135 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
140 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
141 gtid, team->t.t_id, tid, bt));
144 static void __kmp_linear_barrier_release(
145 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
146 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
147 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
148 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
151 if (KMP_MASTER_TID(tid)) {
153 kmp_uint32 nproc = this_thr->th.th_team_nproc;
154 kmp_info_t **other_threads;
156 team = __kmp_threads[gtid]->th.th_team;
157 KMP_DEBUG_ASSERT(team != NULL);
158 other_threads = team->t.t_threads;
160 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for " 162 gtid, team->t.t_id, tid, bt));
165 #if KMP_BARRIER_ICV_PUSH 167 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
168 if (propagate_icvs) {
169 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
170 for (i = 1; i < nproc; ++i) {
171 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
173 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
174 &team->t.t_implicit_task_taskdata[0].td_icvs);
179 #endif // KMP_BARRIER_ICV_PUSH 182 for (i = 1; i < nproc; ++i) {
186 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
190 (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 191 "go(%p): %u => %u\n",
192 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
193 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go,
195 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
196 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
197 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
203 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
204 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
205 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
206 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
207 ANNOTATE_BARRIER_END(this_thr);
208 #if USE_ITT_BUILD && USE_ITT_NOTIFY 209 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
212 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
214 __kmp_itt_task_starting(itt_sync_obj);
216 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
219 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
220 if (itt_sync_obj != NULL)
222 __kmp_itt_task_finished(itt_sync_obj);
226 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
230 tid = __kmp_tid_from_gtid(gtid);
231 team = __kmp_threads[gtid]->th.th_team;
233 KMP_DEBUG_ASSERT(team != NULL);
234 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
236 (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
237 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
242 (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
243 gtid, team->t.t_id, tid, bt));
248 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
249 int tid,
void (*reduce)(
void *,
void *)
250 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
251 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
252 kmp_team_t *team = this_thr->th.th_team;
253 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
254 kmp_info_t **other_threads = team->t.t_threads;
255 kmp_uint32 nproc = this_thr->th.th_team_nproc;
256 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
257 kmp_uint32 branch_factor = 1 << branch_bits;
259 kmp_uint32 child_tid;
260 kmp_uint64 new_state;
263 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
264 gtid, team->t.t_id, tid, bt));
265 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
267 #if USE_ITT_BUILD && USE_ITT_NOTIFY 269 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
270 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
271 __itt_get_timestamp();
276 child_tid = (tid << branch_bits) + 1;
277 if (child_tid < nproc) {
279 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
282 kmp_info_t *child_thr = other_threads[child_tid];
283 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
286 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
288 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
291 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 292 "arrived(%p) == %llu\n",
293 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
294 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
296 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
297 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
298 ANNOTATE_BARRIER_END(child_thr);
299 #if USE_ITT_BUILD && USE_ITT_NOTIFY 302 if (__kmp_forkjoin_frames_mode == 2) {
303 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
304 child_thr->th.th_bar_min_time);
309 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
310 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
311 team->t.t_id, child_tid));
312 ANNOTATE_REDUCE_AFTER(reduce);
313 (*reduce)(this_thr->th.th_local.reduce_data,
314 child_thr->th.th_local.reduce_data);
315 ANNOTATE_REDUCE_BEFORE(reduce);
316 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
320 }
while (child <= branch_factor && child_tid < nproc);
323 if (!KMP_MASTER_TID(tid)) {
324 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
327 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 328 "arrived(%p): %llu => %llu\n",
329 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
330 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
331 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
337 ANNOTATE_BARRIER_BEGIN(this_thr);
338 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
343 team->t.t_bar[bt].b_arrived = new_state;
345 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
346 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " 347 "arrived(%p) = %llu\n",
348 gtid, team->t.t_id, tid, team->t.t_id,
349 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
352 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
353 gtid, team->t.t_id, tid, bt));
356 static void __kmp_tree_barrier_release(
357 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
358 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
359 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
361 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
363 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
364 kmp_uint32 branch_factor = 1 << branch_bits;
366 kmp_uint32 child_tid;
371 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
372 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
374 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
375 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
376 ANNOTATE_BARRIER_END(this_thr);
377 #if USE_ITT_BUILD && USE_ITT_NOTIFY 378 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
381 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
383 __kmp_itt_task_starting(itt_sync_obj);
385 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
388 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
389 if (itt_sync_obj != NULL)
391 __kmp_itt_task_finished(itt_sync_obj);
395 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
399 team = __kmp_threads[gtid]->th.th_team;
400 KMP_DEBUG_ASSERT(team != NULL);
401 tid = __kmp_tid_from_gtid(gtid);
403 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
405 (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
406 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
409 team = __kmp_threads[gtid]->th.th_team;
410 KMP_DEBUG_ASSERT(team != NULL);
411 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for " 413 gtid, team->t.t_id, tid, bt));
415 nproc = this_thr->th.th_team_nproc;
416 child_tid = (tid << branch_bits) + 1;
418 if (child_tid < nproc) {
419 kmp_info_t **other_threads = team->t.t_threads;
423 kmp_info_t *child_thr = other_threads[child_tid];
424 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
427 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
429 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
432 #if KMP_BARRIER_ICV_PUSH 434 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
435 if (propagate_icvs) {
436 __kmp_init_implicit_task(team->t.t_ident,
437 team->t.t_threads[child_tid], team,
439 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
440 &team->t.t_implicit_task_taskdata[0].td_icvs);
443 #endif // KMP_BARRIER_ICV_PUSH 445 (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 446 "go(%p): %u => %u\n",
447 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
448 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
449 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
451 ANNOTATE_BARRIER_BEGIN(child_thr);
452 kmp_flag_64 flag(&child_bar->b_go, child_thr);
456 }
while (child <= branch_factor && child_tid < nproc);
459 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
460 gtid, team->t.t_id, tid, bt));
465 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
466 int tid,
void (*reduce)(
void *,
void *)
467 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
468 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
469 kmp_team_t *team = this_thr->th.th_team;
470 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
471 kmp_info_t **other_threads = team->t.t_threads;
472 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
473 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
474 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
475 kmp_uint32 branch_factor = 1 << branch_bits;
481 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
482 gtid, team->t.t_id, tid, bt));
483 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
485 #if USE_ITT_BUILD && USE_ITT_NOTIFY 487 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
488 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
489 __itt_get_timestamp();
494 kmp_flag_64 p_flag(&thr_bar->b_arrived);
495 for (level = 0, offset = 1; offset < num_threads;
496 level += branch_bits, offset <<= branch_bits) {
498 kmp_uint32 child_tid;
500 if (((tid >> level) & (branch_factor - 1)) != 0) {
501 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
504 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 505 "arrived(%p): %llu => %llu\n",
506 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
507 team->t.t_id, parent_tid, &thr_bar->b_arrived,
509 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
514 ANNOTATE_BARRIER_BEGIN(this_thr);
515 p_flag.set_waiter(other_threads[parent_tid]);
521 if (new_state == KMP_BARRIER_UNUSED_STATE)
522 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
523 for (child = 1, child_tid = tid + (1 << level);
524 child < branch_factor && child_tid < num_threads;
525 child++, child_tid += (1 << level)) {
526 kmp_info_t *child_thr = other_threads[child_tid];
527 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
529 kmp_uint32 next_child_tid = child_tid + (1 << level);
531 if (child + 1 < branch_factor && next_child_tid < num_threads)
533 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
536 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 537 "arrived(%p) == %llu\n",
538 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
539 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
541 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
542 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
543 ANNOTATE_BARRIER_END(child_thr);
544 #if USE_ITT_BUILD && USE_ITT_NOTIFY 547 if (__kmp_forkjoin_frames_mode == 2) {
548 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
549 child_thr->th.th_bar_min_time);
554 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
555 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
556 team->t.t_id, child_tid));
557 ANNOTATE_REDUCE_AFTER(reduce);
558 (*reduce)(this_thr->th.th_local.reduce_data,
559 child_thr->th.th_local.reduce_data);
560 ANNOTATE_REDUCE_BEFORE(reduce);
561 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
566 if (KMP_MASTER_TID(tid)) {
568 if (new_state == KMP_BARRIER_UNUSED_STATE)
569 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
571 team->t.t_bar[bt].b_arrived = new_state;
572 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " 573 "arrived(%p) = %llu\n",
574 gtid, team->t.t_id, tid, team->t.t_id,
575 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
578 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
579 gtid, team->t.t_id, tid, bt));
583 #define KMP_REVERSE_HYPER_BAR 584 static void __kmp_hyper_barrier_release(
585 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
586 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
587 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
589 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
590 kmp_info_t **other_threads;
591 kmp_uint32 num_threads;
592 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
593 kmp_uint32 branch_factor = 1 << branch_bits;
595 kmp_uint32 child_tid;
603 if (KMP_MASTER_TID(tid)) {
604 team = __kmp_threads[gtid]->th.th_team;
605 KMP_DEBUG_ASSERT(team != NULL);
606 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for " 608 gtid, team->t.t_id, tid, bt));
609 #if KMP_BARRIER_ICV_PUSH 610 if (propagate_icvs) {
611 copy_icvs(&thr_bar->th_fixed_icvs,
612 &team->t.t_implicit_task_taskdata[tid].td_icvs);
616 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
617 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
619 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
620 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
621 ANNOTATE_BARRIER_END(this_thr);
622 #if USE_ITT_BUILD && USE_ITT_NOTIFY 623 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
625 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
627 __kmp_itt_task_starting(itt_sync_obj);
629 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
632 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
633 if (itt_sync_obj != NULL)
635 __kmp_itt_task_finished(itt_sync_obj);
639 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
643 team = __kmp_threads[gtid]->th.th_team;
644 KMP_DEBUG_ASSERT(team != NULL);
645 tid = __kmp_tid_from_gtid(gtid);
647 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
649 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
650 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
653 num_threads = this_thr->th.th_team_nproc;
654 other_threads = team->t.t_threads;
656 #ifdef KMP_REVERSE_HYPER_BAR 658 for (level = 0, offset = 1;
659 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
660 level += branch_bits, offset <<= branch_bits)
664 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
665 level -= branch_bits, offset >>= branch_bits)
668 for (level = 0, offset = 1; offset < num_threads;
669 level += branch_bits, offset <<= branch_bits)
672 #ifdef KMP_REVERSE_HYPER_BAR 675 child = num_threads >> ((level == 0) ? level : level - 1);
676 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
677 child_tid = tid + (child << level);
678 child >= 1; child--, child_tid -= (1 << level))
680 if (((tid >> level) & (branch_factor - 1)) != 0)
685 for (child = 1, child_tid = tid + (1 << level);
686 child < branch_factor && child_tid < num_threads;
687 child++, child_tid += (1 << level))
688 #endif // KMP_REVERSE_HYPER_BAR 690 if (child_tid >= num_threads)
693 kmp_info_t *child_thr = other_threads[child_tid];
694 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
696 kmp_uint32 next_child_tid = child_tid - (1 << level);
698 #ifdef KMP_REVERSE_HYPER_BAR 699 if (child - 1 >= 1 && next_child_tid < num_threads)
701 if (child + 1 < branch_factor && next_child_tid < num_threads)
702 #endif // KMP_REVERSE_HYPER_BAR 704 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
707 #if KMP_BARRIER_ICV_PUSH 709 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
710 #endif // KMP_BARRIER_ICV_PUSH 714 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 715 "go(%p): %u => %u\n",
716 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
717 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
718 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
720 ANNOTATE_BARRIER_BEGIN(child_thr);
721 kmp_flag_64 flag(&child_bar->b_go, child_thr);
726 #if KMP_BARRIER_ICV_PUSH 727 if (propagate_icvs &&
728 !KMP_MASTER_TID(tid)) {
729 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
731 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
732 &thr_bar->th_fixed_icvs);
737 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
738 gtid, team->t.t_id, tid, bt));
751 static bool __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt,
752 kmp_bstate_t *thr_bar,
753 kmp_uint32 nproc,
int gtid,
754 int tid, kmp_team_t *team) {
756 bool uninitialized = thr_bar->team == NULL;
757 bool team_changed = team != thr_bar->team;
758 bool team_sz_changed = nproc != thr_bar->nproc;
759 bool tid_changed = tid != thr_bar->old_tid;
762 if (uninitialized || team_sz_changed) {
763 __kmp_get_hierarchy(nproc, thr_bar);
766 if (uninitialized || team_sz_changed || tid_changed) {
767 thr_bar->my_level = thr_bar->depth - 1;
768 thr_bar->parent_tid = -1;
772 while (d < thr_bar->depth) {
775 if (d == thr_bar->depth - 2) {
776 thr_bar->parent_tid = 0;
777 thr_bar->my_level = d;
779 }
else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
782 thr_bar->parent_tid = tid - rem;
783 thr_bar->my_level = d;
789 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
790 thr_bar->old_tid = tid;
791 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
792 thr_bar->team = team;
793 thr_bar->parent_bar =
794 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
796 if (uninitialized || team_changed || tid_changed) {
797 thr_bar->team = team;
798 thr_bar->parent_bar =
799 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
802 if (uninitialized || team_sz_changed || tid_changed) {
803 thr_bar->nproc = nproc;
804 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
805 if (thr_bar->my_level == 0)
806 thr_bar->leaf_kids = 0;
807 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
808 thr_bar->leaf_kids = nproc - tid - 1;
809 thr_bar->leaf_state = 0;
810 for (
int i = 0; i < thr_bar->leaf_kids; ++i)
811 ((
char *)&(thr_bar->leaf_state))[7 - i] = 1;
816 static void __kmp_hierarchical_barrier_gather(
817 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
818 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
819 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
820 kmp_team_t *team = this_thr->th.th_team;
821 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
822 kmp_uint32 nproc = this_thr->th.th_team_nproc;
823 kmp_info_t **other_threads = team->t.t_threads;
824 kmp_uint64 new_state;
826 int level = team->t.t_level;
829 ->th.th_teams_microtask)
830 if (this_thr->th.th_teams_size.nteams > 1)
834 thr_bar->use_oncore_barrier = 1;
836 thr_bar->use_oncore_barrier = 0;
838 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " 840 gtid, team->t.t_id, tid, bt));
841 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
843 #if USE_ITT_BUILD && USE_ITT_NOTIFY 845 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
846 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
850 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
853 if (thr_bar->my_level) {
856 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
857 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
858 thr_bar->use_oncore_barrier) {
859 if (thr_bar->leaf_kids) {
861 kmp_uint64 leaf_state =
863 ? thr_bar->b_arrived | thr_bar->leaf_state
864 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
865 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " 867 gtid, team->t.t_id, tid));
868 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
869 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
871 ANNOTATE_REDUCE_AFTER(reduce);
872 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
874 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 876 gtid, team->t.t_id, tid,
877 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
879 ANNOTATE_BARRIER_END(other_threads[child_tid]);
880 (*reduce)(this_thr->th.th_local.reduce_data,
881 other_threads[child_tid]->th.th_local.reduce_data);
883 ANNOTATE_REDUCE_BEFORE(reduce);
884 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
887 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
890 for (kmp_uint32 d = 1; d < thr_bar->my_level;
892 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
893 skip = thr_bar->skip_per_level[d];
896 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
897 kmp_info_t *child_thr = other_threads[child_tid];
898 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
899 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 901 "arrived(%p) == %llu\n",
902 gtid, team->t.t_id, tid,
903 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
904 child_tid, &child_bar->b_arrived, new_state));
905 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
906 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
907 ANNOTATE_BARRIER_END(child_thr);
909 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 911 gtid, team->t.t_id, tid,
912 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
914 ANNOTATE_REDUCE_AFTER(reduce);
915 (*reduce)(this_thr->th.th_local.reduce_data,
916 child_thr->th.th_local.reduce_data);
917 ANNOTATE_REDUCE_BEFORE(reduce);
918 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
923 for (kmp_uint32 d = 0; d < thr_bar->my_level;
925 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
926 skip = thr_bar->skip_per_level[d];
929 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
930 kmp_info_t *child_thr = other_threads[child_tid];
931 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
932 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 934 "arrived(%p) == %llu\n",
935 gtid, team->t.t_id, tid,
936 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
937 child_tid, &child_bar->b_arrived, new_state));
938 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
939 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
940 ANNOTATE_BARRIER_END(child_thr);
942 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 944 gtid, team->t.t_id, tid,
945 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
947 ANNOTATE_REDUCE_AFTER(reduce);
948 (*reduce)(this_thr->th.th_local.reduce_data,
949 child_thr->th.th_local.reduce_data);
950 ANNOTATE_REDUCE_BEFORE(reduce);
951 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
959 if (!KMP_MASTER_TID(tid)) {
962 (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 963 "arrived(%p): %llu => %llu\n",
964 gtid, team->t.t_id, tid,
965 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
966 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
967 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
971 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
972 !thr_bar->use_oncore_barrier) {
974 ANNOTATE_BARRIER_BEGIN(this_thr);
975 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
979 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
980 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
981 flag.set_waiter(other_threads[thr_bar->parent_tid]);
985 team->t.t_bar[bt].b_arrived = new_state;
986 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " 987 "arrived(%p) = %llu\n",
988 gtid, team->t.t_id, tid, team->t.t_id,
989 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
992 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " 994 gtid, team->t.t_id, tid, bt));
997 static void __kmp_hierarchical_barrier_release(
998 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
999 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
1000 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
1002 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1004 bool team_change =
false;
1006 if (KMP_MASTER_TID(tid)) {
1007 team = __kmp_threads[gtid]->th.th_team;
1008 KMP_DEBUG_ASSERT(team != NULL);
1009 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master " 1010 "entered barrier type %d\n",
1011 gtid, team->t.t_id, tid, bt));
1014 if (!thr_bar->use_oncore_barrier ||
1015 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1016 thr_bar->team == NULL) {
1018 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1019 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1020 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1021 ANNOTATE_BARRIER_END(this_thr);
1022 TCW_8(thr_bar->b_go,
1023 KMP_INIT_BARRIER_STATE);
1027 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1028 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1029 thr_bar->offset, bt,
1030 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1031 flag.wait(this_thr, TRUE);
1032 if (thr_bar->wait_flag ==
1033 KMP_BARRIER_SWITCHING) {
1034 TCW_8(thr_bar->b_go,
1035 KMP_INIT_BARRIER_STATE);
1037 (RCAST(
volatile char *,
1038 &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
1041 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1043 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1046 team = __kmp_threads[gtid]->th.th_team;
1047 KMP_DEBUG_ASSERT(team != NULL);
1048 tid = __kmp_tid_from_gtid(gtid);
1052 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1053 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1057 nproc = this_thr->th.th_team_nproc;
1058 int level = team->t.t_level;
1060 if (team->t.t_threads[0]
1061 ->th.th_teams_microtask) {
1062 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1063 this_thr->th.th_teams_level == level)
1065 if (this_thr->th.th_teams_size.nteams > 1)
1070 thr_bar->use_oncore_barrier = 1;
1072 thr_bar->use_oncore_barrier = 0;
1076 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1077 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1078 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1084 #if KMP_BARRIER_ICV_PUSH 1085 if (propagate_icvs) {
1086 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1090 copy_icvs(&thr_bar->th_fixed_icvs,
1091 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1092 }
else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1093 thr_bar->use_oncore_barrier) {
1094 if (!thr_bar->my_level)
1097 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1098 &thr_bar->parent_bar->th_fixed_icvs);
1101 if (thr_bar->my_level)
1103 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1105 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1106 &thr_bar->parent_bar->th_fixed_icvs);
1109 #endif // KMP_BARRIER_ICV_PUSH 1112 if (thr_bar->my_level) {
1113 kmp_int32 child_tid;
1115 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1116 thr_bar->use_oncore_barrier) {
1117 if (KMP_MASTER_TID(tid)) {
1120 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1123 ngo_load(&thr_bar->th_fixed_icvs);
1126 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (
int)nproc;
1127 child_tid += thr_bar->skip_per_level[1]) {
1128 kmp_bstate_t *child_bar =
1129 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1130 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1131 "releasing T#%d(%d:%d)" 1132 " go(%p): %u => %u\n",
1133 gtid, team->t.t_id, tid,
1134 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1135 child_tid, &child_bar->b_go, child_bar->b_go,
1136 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1139 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1143 TCW_8(thr_bar->b_go,
1144 KMP_INIT_BARRIER_STATE);
1146 if (thr_bar->leaf_kids) {
1149 old_leaf_kids < thr_bar->leaf_kids) {
1150 if (old_leaf_kids) {
1151 thr_bar->b_go |= old_leaf_state;
1154 last = tid + thr_bar->skip_per_level[1];
1157 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1159 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1160 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1163 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 1164 " T#%d(%d:%d) go(%p): %u => %u\n",
1165 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1166 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1167 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1169 ANNOTATE_BARRIER_BEGIN(child_thr);
1170 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1175 thr_bar->b_go |= thr_bar->leaf_state;
1179 for (
int d = thr_bar->my_level - 1; d >= 0;
1181 last = tid + thr_bar->skip_per_level[d + 1];
1182 kmp_uint32 skip = thr_bar->skip_per_level[d];
1185 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1186 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1187 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1188 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1189 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1190 gtid, team->t.t_id, tid,
1191 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1192 child_tid, &child_bar->b_go, child_bar->b_go,
1193 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1195 ANNOTATE_BARRIER_BEGIN(child_thr);
1196 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1201 #if KMP_BARRIER_ICV_PUSH 1202 if (propagate_icvs && !KMP_MASTER_TID(tid))
1204 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1205 &thr_bar->th_fixed_icvs);
1206 #endif // KMP_BARRIER_ICV_PUSH 1208 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " 1209 "barrier type %d\n",
1210 gtid, team->t.t_id, tid, bt));
1220 int __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
1221 size_t reduce_size,
void *reduce_data,
1222 void (*reduce)(
void *,
void *)) {
1223 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1224 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1225 int tid = __kmp_tid_from_gtid(gtid);
1226 kmp_info_t *this_thr = __kmp_threads[gtid];
1227 kmp_team_t *team = this_thr->th.th_team;
1229 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1231 ompt_task_id_t my_task_id;
1232 ompt_parallel_id_t my_parallel_id;
1235 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1236 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1238 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1242 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1243 my_parallel_id = team->t.ompt_team_info.parallel_id;
1246 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1247 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1248 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1249 my_parallel_id, my_task_id);
1253 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1254 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id,
1261 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1265 if (!team->t.t_serialized) {
1268 void *itt_sync_obj = NULL;
1270 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1271 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1274 if (__kmp_tasking_mode == tskm_extra_barrier) {
1275 __kmp_tasking_barrier(team, this_thr, gtid);
1277 (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1278 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1285 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1287 this_thr->th.th_team_bt_intervals =
1288 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1289 this_thr->th.th_team_bt_set =
1290 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1292 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1297 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1298 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1302 if (KMP_MASTER_TID(tid)) {
1303 team->t.t_bar[bt].b_master_arrived += 1;
1305 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1308 if (reduce != NULL) {
1310 this_thr->th.th_local.reduce_data = reduce_data;
1313 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1314 __kmp_task_team_setup(
1318 switch (__kmp_barrier_gather_pattern[bt]) {
1319 case bp_hyper_bar: {
1320 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1322 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1323 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1326 case bp_hierarchical_bar: {
1327 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1328 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1332 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1334 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1335 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1339 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1340 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1346 if (KMP_MASTER_TID(tid)) {
1348 if (__kmp_tasking_mode != tskm_immediate_exec) {
1349 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1354 team->t.t_bar[bt].b_team_arrived += 1;
1359 if (team->t.t_cancel_request == cancel_loop ||
1360 team->t.t_cancel_request == cancel_sections) {
1361 team->t.t_cancel_request = cancel_noreq;
1369 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1370 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1372 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1374 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1375 __kmp_forkjoin_frames_mode &&
1377 this_thr->th.th_teams_microtask == NULL &&
1379 team->t.t_active_level == 1) {
1380 kmp_uint64 cur_time = __itt_get_timestamp();
1381 kmp_info_t **other_threads = team->t.t_threads;
1382 int nproc = this_thr->th.th_team_nproc;
1384 switch (__kmp_forkjoin_frames_mode) {
1386 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1388 this_thr->th.th_frame_time = cur_time;
1392 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1396 if (__itt_metadata_add_ptr) {
1398 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1401 this_thr->th.th_bar_arrive_time = 0;
1402 for (i = 1; i < nproc; ++i) {
1403 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1404 other_threads[i]->th.th_bar_arrive_time = 0;
1406 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1408 (kmp_uint64)(reduce != NULL));
1410 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1412 this_thr->th.th_frame_time = cur_time;
1420 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1421 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1424 if (status == 1 || !is_split) {
1425 switch (__kmp_barrier_release_pattern[bt]) {
1426 case bp_hyper_bar: {
1427 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1428 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1429 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1432 case bp_hierarchical_bar: {
1433 __kmp_hierarchical_barrier_release(
1434 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1438 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1439 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1440 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1444 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1445 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1448 if (__kmp_tasking_mode != tskm_immediate_exec) {
1449 __kmp_task_team_sync(this_thr, team);
1457 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1458 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1462 if (__kmp_tasking_mode != tskm_immediate_exec) {
1464 if (this_thr->th.th_task_team != NULL) {
1465 void *itt_sync_obj = NULL;
1467 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1468 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1469 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1473 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1475 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1476 __kmp_task_team_setup(this_thr, team, 0);
1479 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1480 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1486 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1487 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1491 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1492 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1493 __kmp_tid_from_gtid(gtid), status));
1498 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1499 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id,
1503 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1506 ANNOTATE_BARRIER_END(&team->t.t_bar);
1511 void __kmp_end_split_barrier(
enum barrier_type bt,
int gtid) {
1512 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1513 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1514 int tid = __kmp_tid_from_gtid(gtid);
1515 kmp_info_t *this_thr = __kmp_threads[gtid];
1516 kmp_team_t *team = this_thr->th.th_team;
1518 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1519 if (!team->t.t_serialized) {
1520 if (KMP_MASTER_GTID(gtid)) {
1521 switch (__kmp_barrier_release_pattern[bt]) {
1522 case bp_hyper_bar: {
1523 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1524 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1525 FALSE USE_ITT_BUILD_ARG(NULL));
1528 case bp_hierarchical_bar: {
1529 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1530 FALSE USE_ITT_BUILD_ARG(NULL));
1534 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1535 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1536 FALSE USE_ITT_BUILD_ARG(NULL));
1540 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1541 FALSE USE_ITT_BUILD_ARG(NULL));
1544 if (__kmp_tasking_mode != tskm_immediate_exec) {
1545 __kmp_task_team_sync(this_thr, team);
1549 ANNOTATE_BARRIER_END(&team->t.t_bar);
1552 void __kmp_join_barrier(
int gtid) {
1553 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1554 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1555 kmp_info_t *this_thr = __kmp_threads[gtid];
1558 kmp_info_t *master_thread;
1564 void *itt_sync_obj = NULL;
1566 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1568 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1574 team = this_thr->th.th_team;
1575 nproc = this_thr->th.th_team_nproc;
1576 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1577 tid = __kmp_tid_from_gtid(gtid);
1579 team_id = team->t.t_id;
1581 master_thread = this_thr->th.th_team_master;
1583 if (master_thread != team->t.t_threads[0]) {
1584 __kmp_print_structure();
1587 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1591 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1592 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1593 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1594 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1595 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1596 gtid, team_id, tid));
1598 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1601 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1602 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1603 team->t.ompt_team_info.parallel_id,
1604 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1607 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1610 if (__kmp_tasking_mode == tskm_extra_barrier) {
1611 __kmp_tasking_barrier(team, this_thr, gtid);
1612 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1616 if (__kmp_tasking_mode != tskm_immediate_exec) {
1617 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = " 1618 "%p, th_task_team = %p\n",
1619 __kmp_gtid_from_thread(this_thr), team_id,
1620 team->t.t_task_team[this_thr->th.th_task_state],
1621 this_thr->th.th_task_team));
1622 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1623 team->t.t_task_team[this_thr->th.th_task_state]);
1632 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1634 this_thr->th.th_team_bt_intervals =
1635 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1636 this_thr->th.th_team_bt_set =
1637 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1639 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1644 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1645 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1648 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1649 case bp_hyper_bar: {
1650 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1651 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1652 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1655 case bp_hierarchical_bar: {
1656 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1657 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1661 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1662 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1663 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1667 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1668 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1676 if (KMP_MASTER_TID(tid)) {
1677 if (__kmp_tasking_mode != tskm_immediate_exec) {
1678 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1680 #if KMP_STATS_ENABLED 1684 for (
int i = 0; i < team->t.t_nproc; ++i) {
1685 kmp_info_t *team_thread = team->t.t_threads[i];
1686 if (team_thread == this_thr)
1688 team_thread->th.th_stats->setIdleFlag();
1689 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1690 team_thread->th.th_sleep_loc != NULL)
1691 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1692 team_thread->th.th_sleep_loc);
1696 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1697 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1700 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1702 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1703 __kmp_forkjoin_frames_mode &&
1705 this_thr->th.th_teams_microtask == NULL &&
1707 team->t.t_active_level == 1) {
1708 kmp_uint64 cur_time = __itt_get_timestamp();
1709 ident_t *loc = team->t.t_ident;
1710 kmp_info_t **other_threads = team->t.t_threads;
1711 int nproc = this_thr->th.th_team_nproc;
1713 switch (__kmp_forkjoin_frames_mode) {
1715 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1719 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1723 if (__itt_metadata_add_ptr) {
1725 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1728 this_thr->th.th_bar_arrive_time = 0;
1729 for (i = 1; i < nproc; ++i) {
1730 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1731 other_threads[i]->th.th_bar_arrive_time = 0;
1733 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1734 cur_time, delta, 0);
1736 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1738 this_thr->th.th_frame_time = cur_time;
1746 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1747 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1752 if (KMP_MASTER_TID(tid)) {
1755 (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1756 gtid, team_id, tid, nproc));
1763 (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1768 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1769 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1770 team->t.ompt_team_info.parallel_id,
1771 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1776 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1779 ANNOTATE_BARRIER_END(&team->t.t_bar);
1784 void __kmp_fork_barrier(
int gtid,
int tid) {
1785 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1786 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1787 kmp_info_t *this_thr = __kmp_threads[gtid];
1788 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1790 void *itt_sync_obj = NULL;
1793 ANNOTATE_BARRIER_END(&team->t.t_bar);
1795 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1796 (team != NULL) ? team->t.t_id : -1, tid));
1799 if (KMP_MASTER_TID(tid)) {
1800 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1801 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1803 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1804 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1809 kmp_info_t **other_threads = team->t.t_threads;
1815 for (i = 1; i < team->t.t_nproc; ++i) {
1817 (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " 1819 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1820 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1821 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1823 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1824 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1825 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1829 if (__kmp_tasking_mode != tskm_immediate_exec) {
1831 __kmp_task_team_setup(this_thr, team, 0);
1840 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1842 this_thr->th.th_team_bt_intervals =
1843 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1844 this_thr->th.th_team_bt_set =
1845 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1847 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1852 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1853 case bp_hyper_bar: {
1854 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1855 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1856 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1859 case bp_hierarchical_bar: {
1860 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1861 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1865 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1866 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1867 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1871 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1872 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1877 if (TCR_4(__kmp_global.g.g_done)) {
1878 this_thr->th.th_task_team = NULL;
1880 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1881 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1882 if (!KMP_MASTER_TID(tid)) {
1883 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1885 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1889 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1897 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1898 KMP_DEBUG_ASSERT(team != NULL);
1899 tid = __kmp_tid_from_gtid(gtid);
1901 #if KMP_BARRIER_ICV_PULL 1909 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1910 if (!KMP_MASTER_TID(tid)) {
1914 (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1915 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1917 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1918 &team->t.t_threads[0]
1919 ->th.th_bar[bs_forkjoin_barrier]
1923 #endif // KMP_BARRIER_ICV_PULL 1925 if (__kmp_tasking_mode != tskm_immediate_exec) {
1926 __kmp_task_team_sync(this_thr, team);
1929 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1930 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1931 if (proc_bind == proc_bind_intel) {
1933 #if KMP_AFFINITY_SUPPORTED 1935 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1936 __kmp_balanced_affinity(tid, team->t.t_nproc);
1938 #endif // KMP_AFFINITY_SUPPORTED 1939 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1940 }
else if (proc_bind != proc_bind_false) {
1941 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1942 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1943 __kmp_gtid_from_thread(this_thr),
1944 this_thr->th.th_current_place));
1946 __kmp_affinity_set_place(gtid);
1951 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1952 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1953 if (!KMP_MASTER_TID(tid)) {
1955 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1956 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1960 ANNOTATE_BARRIER_END(&team->t.t_bar);
1961 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
1962 team->t.t_id, tid));
1965 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
1966 kmp_internal_control_t *new_icvs,
ident_t *loc) {
1967 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
1969 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1970 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1975 #if KMP_BARRIER_ICV_PULL 1979 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1982 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
1984 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
1985 team->t.t_threads[0], team));
1986 #elif KMP_BARRIER_ICV_PUSH 1989 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
1990 team->t.t_threads[0], team));
1995 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1997 for (
int f = 1; f < new_nproc; ++f) {
1999 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2000 f, team->t.t_threads[f], team));
2001 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2002 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2003 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2004 f, team->t.t_threads[f], team));
2007 #endif // KMP_BARRIER_ICV_PULL