17 #include "kmp_affinity.h" 18 #include "kmp_atomic.h" 19 #include "kmp_environment.h" 20 #include "kmp_error.h" 24 #include "kmp_settings.h" 25 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_wrapper_getpid.h" 31 #include "ompt-specific.h" 35 #define KMP_USE_PRCTL 0 41 #include "tsan_annotations.h" 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] =
45 KMP_VERSION_PREFIX
"alternative compiler support: yes";
48 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 60 char const __kmp_version_lock[] =
61 KMP_VERSION_PREFIX
"lock type: run time selectable";
64 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 68 kmp_info_t __kmp_monitor;
72 void __kmp_cleanup(
void);
74 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
76 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
77 kmp_internal_control_t *new_icvs,
79 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 80 static void __kmp_partition_places(kmp_team_t *team,
81 int update_master_only = 0);
83 static void __kmp_do_serial_initialize(
void);
84 void __kmp_fork_barrier(
int gtid,
int tid);
85 void __kmp_join_barrier(
int gtid);
86 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
ident_t *loc);
89 #ifdef USE_LOAD_BALANCE 90 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
93 static int __kmp_expand_threads(
int nWish,
int nNeed);
95 static int __kmp_unregister_root_other_thread(
int gtid);
97 static void __kmp_unregister_library(
void);
98 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
99 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
104 int __kmp_get_global_thread_id() {
106 kmp_info_t **other_threads;
114 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth));
122 if (!TCR_4(__kmp_init_gtid))
125 #ifdef KMP_TDATA_GTID 126 if (TCR_4(__kmp_gtid_mode) >= 3) {
127 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
131 if (TCR_4(__kmp_gtid_mode) >= 2) {
132 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
133 return __kmp_gtid_get_specific();
135 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
137 stack_addr = (
char *)&stack_data;
138 other_threads = __kmp_threads;
151 for (i = 0; i < __kmp_threads_capacity; i++) {
153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
162 if (stack_addr <= stack_base) {
163 size_t stack_diff = stack_base - stack_addr;
165 if (stack_diff <= stack_size) {
168 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
176 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 177 "thread, using TLS\n"));
178 i = __kmp_gtid_get_specific();
188 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
189 KMP_FATAL(StackOverflow, i);
192 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
193 if (stack_addr > stack_base) {
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
196 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 stack_base - stack_addr);
204 if (__kmp_storage_map) {
205 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
207 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
208 other_threads[i]->th.th_info.ds.ds_stacksize,
209 "th_%d stack (refinement)", i);
214 int __kmp_get_global_thread_id_reg() {
217 if (!__kmp_init_serial) {
220 #ifdef KMP_TDATA_GTID 221 if (TCR_4(__kmp_gtid_mode) >= 3) {
222 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
226 if (TCR_4(__kmp_gtid_mode) >= 2) {
227 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
228 gtid = __kmp_gtid_get_specific();
231 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
232 gtid = __kmp_get_global_thread_id();
236 if (gtid == KMP_GTID_DNE) {
238 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 239 "Registering a new gtid.\n"));
240 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
241 if (!__kmp_init_serial) {
242 __kmp_do_serial_initialize();
243 gtid = __kmp_gtid_get_specific();
245 gtid = __kmp_register_root(FALSE);
247 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
251 KMP_DEBUG_ASSERT(gtid >= 0);
257 void __kmp_check_stack_overlap(kmp_info_t *th) {
259 char *stack_beg = NULL;
260 char *stack_end = NULL;
263 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
264 if (__kmp_storage_map) {
265 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
266 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
268 gtid = __kmp_gtid_from_thread(th);
270 if (gtid == KMP_GTID_MONITOR) {
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%s stack (%s)",
"mon",
274 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
276 __kmp_print_storage_map_gtid(
277 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
278 "th_%d stack (%s)", gtid,
279 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
285 gtid = __kmp_gtid_from_thread(th);
286 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
288 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
289 if (stack_beg == NULL) {
290 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
291 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 for (f = 0; f < __kmp_threads_capacity; f++) {
295 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
297 if (f_th && f_th != th) {
298 char *other_stack_end =
299 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
300 char *other_stack_beg =
301 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
302 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
303 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
306 if (__kmp_storage_map)
307 __kmp_print_storage_map_gtid(
308 -1, other_stack_beg, other_stack_end,
309 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
310 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
312 __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap),
313 KMP_HNT(ChangeStackLimit), __kmp_msg_null);
318 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
323 void __kmp_infinite_loop(
void) {
324 static int done = FALSE;
331 #define MAX_MESSAGE 512 333 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
334 char const *format, ...) {
335 char buffer[MAX_MESSAGE];
338 va_start(ap, format);
339 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
340 p2, (
unsigned long)size, format);
341 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
342 __kmp_vprintf(kmp_err, buffer, ap);
343 #if KMP_PRINT_DATA_PLACEMENT 346 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
347 if (__kmp_storage_map_verbose) {
348 node = __kmp_get_host_node(p1);
350 __kmp_storage_map_verbose = FALSE;
354 int localProc = __kmp_get_cpu_from_gtid(gtid);
356 const int page_size = KMP_GET_PAGE_SIZE();
358 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
359 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
361 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
364 __kmp_printf_no_lock(
" GTID %d\n", gtid);
373 (
char *)p1 += page_size;
374 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
375 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
379 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
380 (
char *)p1 + (page_size - 1),
381 __kmp_get_host_node(p1));
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
384 (
char *)p2 + (page_size - 1),
385 __kmp_get_host_node(p2));
391 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
394 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
397 void __kmp_warn(
char const *format, ...) {
398 char buffer[MAX_MESSAGE];
401 if (__kmp_generate_warnings == kmp_warnings_off) {
405 va_start(ap, format);
407 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
408 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
409 __kmp_vprintf(kmp_err, buffer, ap);
410 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
415 void __kmp_abort_process() {
417 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
419 if (__kmp_debug_buf) {
420 __kmp_dump_debug_buffer();
423 if (KMP_OS_WINDOWS) {
426 __kmp_global.g.g_abort = SIGABRT;
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
448 void __kmp_abort_thread(
void) {
451 __kmp_infinite_loop();
457 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
458 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
461 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
462 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
465 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
467 __kmp_print_storage_map_gtid(
468 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier + 1],
473 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
477 &thr->th.th_bar[bs_forkjoin_barrier + 1],
478 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
481 #if KMP_FAST_REDUCTION_BARRIER 482 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
483 &thr->th.th_bar[bs_reduction_barrier + 1],
484 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
486 #endif // KMP_FAST_REDUCTION_BARRIER 492 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
493 int team_id,
int num_thr) {
494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
495 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
499 &team->t.t_bar[bs_last_barrier],
500 sizeof(kmp_balign_team_t) * bs_last_barrier,
501 "%s_%d.t_bar", header, team_id);
503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
504 &team->t.t_bar[bs_plain_barrier + 1],
505 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
509 &team->t.t_bar[bs_forkjoin_barrier + 1],
510 sizeof(kmp_balign_team_t),
511 "%s_%d.t_bar[forkjoin]", header, team_id);
513 #if KMP_FAST_REDUCTION_BARRIER 514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
515 &team->t.t_bar[bs_reduction_barrier + 1],
516 sizeof(kmp_balign_team_t),
517 "%s_%d.t_bar[reduction]", header, team_id);
518 #endif // KMP_FAST_REDUCTION_BARRIER 520 __kmp_print_storage_map_gtid(
521 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
522 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
526 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
528 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
529 &team->t.t_disp_buffer[num_disp_buff],
530 sizeof(dispatch_shared_info_t) * num_disp_buff,
531 "%s_%d.t_disp_buffer", header, team_id);
533 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
534 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
538 static void __kmp_init_allocator() {}
539 static void __kmp_fini_allocator() {}
543 #ifdef KMP_DYNAMIC_LIB 546 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
548 __kmp_init_bootstrap_lock(lck);
551 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
569 for (i = 0; i < __kmp_threads_capacity; ++i) {
572 kmp_info_t *th = __kmp_threads[i];
575 int gtid = th->th.th_info.ds.ds_gtid;
576 if (gtid == gtid_req)
581 int alive = __kmp_is_thread_alive(th, &exit_val);
586 if (thread_count == 0)
592 __kmp_reset_lock(&__kmp_forkjoin_lock);
594 __kmp_reset_lock(&__kmp_stdio_lock);
598 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
603 case DLL_PROCESS_ATTACH:
604 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
608 case DLL_PROCESS_DETACH:
609 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
611 if (lpReserved != NULL) {
637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
640 __kmp_internal_end_library(__kmp_gtid_get_specific());
644 case DLL_THREAD_ATTACH:
645 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
651 case DLL_THREAD_DETACH:
652 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
654 __kmp_internal_end_thread(__kmp_gtid_get_specific());
666 int __kmp_change_library(
int status) {
669 old_status = __kmp_yield_init &
673 __kmp_yield_init |= 1;
675 __kmp_yield_init &= ~1;
683 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
684 int gtid = *gtid_ref;
685 #ifdef BUILD_PARALLEL_ORDERED 686 kmp_team_t *team = __kmp_team_from_gtid(gtid);
689 if (__kmp_env_consistency_check) {
690 if (__kmp_threads[gtid]->th.th_root->r.r_active)
691 #if KMP_USE_DYNAMIC_LOCK 692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
694 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
697 #ifdef BUILD_PARALLEL_ORDERED 698 if (!team->t.t_serialized) {
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
708 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
709 int gtid = *gtid_ref;
710 #ifdef BUILD_PARALLEL_ORDERED 711 int tid = __kmp_tid_from_gtid(gtid);
712 kmp_team_t *team = __kmp_team_from_gtid(gtid);
715 if (__kmp_env_consistency_check) {
716 if (__kmp_threads[gtid]->th.th_root->r.r_active)
717 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
719 #ifdef BUILD_PARALLEL_ORDERED 720 if (!team->t.t_serialized) {
725 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
727 #if OMPT_SUPPORT && OMPT_BLAME 729 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
731 kmp_info_t *this_thread = __kmp_threads[gtid];
732 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
733 this_thread->th.ompt_thread_info.wait_id);
745 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
750 if (!TCR_4(__kmp_init_parallel))
751 __kmp_parallel_initialize();
753 th = __kmp_threads[gtid];
754 team = th->th.th_team;
757 th->th.th_ident = id_ref;
759 if (team->t.t_serialized) {
762 kmp_int32 old_this = th->th.th_local.this_construct;
764 ++th->th.th_local.this_construct;
768 if (team->t.t_construct == old_this) {
769 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
770 th->th.th_local.this_construct);
773 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
774 KMP_MASTER_GTID(gtid) &&
776 th->th.th_teams_microtask == NULL &&
778 team->t.t_active_level ==
780 __kmp_itt_metadata_single(id_ref);
785 if (__kmp_env_consistency_check) {
786 if (status && push_ws) {
787 __kmp_push_workshare(gtid, ct_psingle, id_ref);
789 __kmp_check_workshare(gtid, ct_psingle, id_ref);
794 __kmp_itt_single_start(gtid);
800 void __kmp_exit_single(
int gtid) {
802 __kmp_itt_single_end(gtid);
804 if (__kmp_env_consistency_check)
805 __kmp_pop_workshare(gtid, ct_psingle, NULL);
814 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
815 int master_tid,
int set_nthreads
823 KMP_DEBUG_ASSERT(__kmp_init_serial);
824 KMP_DEBUG_ASSERT(root && parent_team);
828 new_nthreads = set_nthreads;
829 if (!get__dynamic_2(parent_team, master_tid)) {
832 #ifdef USE_LOAD_BALANCE 833 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
834 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
835 if (new_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 837 "reservation to 1 thread\n",
841 if (new_nthreads < set_nthreads) {
842 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 843 "reservation to %d threads\n",
844 master_tid, new_nthreads));
848 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
849 new_nthreads = __kmp_avail_proc - __kmp_nth +
850 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
851 if (new_nthreads <= 1) {
852 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 853 "reservation to 1 thread\n",
857 if (new_nthreads < set_nthreads) {
858 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 859 "reservation to %d threads\n",
860 master_tid, new_nthreads));
862 new_nthreads = set_nthreads;
864 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
865 if (set_nthreads > 2) {
866 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
867 new_nthreads = (new_nthreads % set_nthreads) + 1;
868 if (new_nthreads == 1) {
869 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 870 "reservation to 1 thread\n",
874 if (new_nthreads < set_nthreads) {
875 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 876 "reservation to %d threads\n",
877 master_tid, new_nthreads));
885 if (__kmp_nth + new_nthreads -
886 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
888 int tl_nthreads = __kmp_max_nth - __kmp_nth +
889 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
890 if (tl_nthreads <= 0) {
895 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
896 __kmp_reserve_warn = 1;
897 __kmp_msg(kmp_ms_warning,
898 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
899 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
901 if (tl_nthreads == 1) {
902 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 903 "reservation to 1 thread\n",
907 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 908 "reservation to %d threads\n",
909 master_tid, tl_nthreads));
910 new_nthreads = tl_nthreads;
916 capacity = __kmp_threads_capacity;
917 if (TCR_PTR(__kmp_threads[0]) == NULL) {
920 if (__kmp_nth + new_nthreads -
921 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
924 int slotsRequired = __kmp_nth + new_nthreads -
925 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
927 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
928 if (slotsAdded < slotsRequired) {
930 new_nthreads -= (slotsRequired - slotsAdded);
931 KMP_ASSERT(new_nthreads >= 1);
934 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
935 __kmp_reserve_warn = 1;
936 if (__kmp_tp_cached) {
937 __kmp_msg(kmp_ms_warning,
938 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
939 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
940 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
942 __kmp_msg(kmp_ms_warning,
943 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
944 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
951 if (new_nthreads == 1) {
953 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 954 "dead roots and rechecking; requested %d threads\n",
955 __kmp_get_gtid(), set_nthreads));
957 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 959 __kmp_get_gtid(), new_nthreads, set_nthreads));
968 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
969 kmp_info_t *master_th,
int master_gtid) {
973 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
974 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
978 master_th->th.th_info.ds.ds_tid = 0;
979 master_th->th.th_team = team;
980 master_th->th.th_team_nproc = team->t.t_nproc;
981 master_th->th.th_team_master = master_th;
982 master_th->th.th_team_serialized = FALSE;
983 master_th->th.th_dispatch = &team->t.t_dispatch[0];
986 #if KMP_NESTED_HOT_TEAMS 988 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
991 int level = team->t.t_active_level - 1;
992 if (master_th->th.th_teams_microtask) {
993 if (master_th->th.th_teams_size.nteams > 1) {
997 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
998 master_th->th.th_teams_level == team->t.t_level) {
1003 if (level < __kmp_hot_teams_max_level) {
1004 if (hot_teams[level].hot_team) {
1006 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1010 hot_teams[level].hot_team = team;
1011 hot_teams[level].hot_team_nth = team->t.t_nproc;
1018 use_hot_team = team == root->r.r_hot_team;
1020 if (!use_hot_team) {
1023 team->t.t_threads[0] = master_th;
1024 __kmp_initialize_info(master_th, team, 0, master_gtid);
1027 for (i = 1; i < team->t.t_nproc; i++) {
1030 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1031 team->t.t_threads[i] = thr;
1032 KMP_DEBUG_ASSERT(thr);
1033 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1035 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1036 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1037 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1038 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1039 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1040 team->t.t_bar[bs_plain_barrier].b_arrived));
1042 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1043 thr->th.th_teams_level = master_th->th.th_teams_level;
1044 thr->th.th_teams_size = master_th->th.th_teams_size;
1048 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1049 for (b = 0; b < bs_last_barrier; ++b) {
1050 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1051 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1053 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1059 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1060 __kmp_partition_places(team);
1067 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1071 inline static void propagateFPControl(kmp_team_t *team) {
1072 if (__kmp_inherit_fp_control) {
1073 kmp_int16 x87_fpu_control_word;
1077 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1078 __kmp_store_mxcsr(&mxcsr);
1079 mxcsr &= KMP_X86_MXCSR_MASK;
1090 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1091 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1094 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1098 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1104 inline static void updateHWFPControl(kmp_team_t *team) {
1105 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1108 kmp_int16 x87_fpu_control_word;
1110 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1111 __kmp_store_mxcsr(&mxcsr);
1112 mxcsr &= KMP_X86_MXCSR_MASK;
1114 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1115 __kmp_clear_x87_fpu_status_word();
1116 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1119 if (team->t.t_mxcsr != mxcsr) {
1120 __kmp_load_mxcsr(&team->t.t_mxcsr);
1125 #define propagateFPControl(x) ((void)0) 1126 #define updateHWFPControl(x) ((void)0) 1129 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1134 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1135 kmp_info_t *this_thr;
1136 kmp_team_t *serial_team;
1138 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1145 if (!TCR_4(__kmp_init_parallel))
1146 __kmp_parallel_initialize();
1148 this_thr = __kmp_threads[global_tid];
1149 serial_team = this_thr->th.th_serial_team;
1152 KMP_DEBUG_ASSERT(serial_team);
1155 if (__kmp_tasking_mode != tskm_immediate_exec) {
1157 this_thr->th.th_task_team ==
1158 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1159 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1161 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1162 "team %p, new task_team = NULL\n",
1163 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1164 this_thr->th.th_task_team = NULL;
1168 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1169 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1170 proc_bind = proc_bind_false;
1171 }
else if (proc_bind == proc_bind_default) {
1174 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1177 this_thr->th.th_set_proc_bind = proc_bind_default;
1180 if (this_thr->th.th_team != serial_team) {
1182 int level = this_thr->th.th_team->t.t_level;
1184 if (serial_team->t.t_serialized) {
1187 kmp_team_t *new_team;
1189 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1192 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1195 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1202 &this_thr->th.th_current_task->td_icvs,
1203 0 USE_NESTED_HOT_ARG(NULL));
1204 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1205 KMP_ASSERT(new_team);
1208 new_team->t.t_threads[0] = this_thr;
1209 new_team->t.t_parent = this_thr->th.th_team;
1210 serial_team = new_team;
1211 this_thr->th.th_serial_team = serial_team;
1215 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1216 global_tid, serial_team));
1224 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1225 global_tid, serial_team));
1229 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1230 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1231 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1232 serial_team->t.t_ident = loc;
1233 serial_team->t.t_serialized = 1;
1234 serial_team->t.t_nproc = 1;
1235 serial_team->t.t_parent = this_thr->th.th_team;
1236 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1237 this_thr->th.th_team = serial_team;
1238 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1240 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1241 this_thr->th.th_current_task));
1242 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1243 this_thr->th.th_current_task->td_flags.executing = 0;
1245 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1250 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1251 &this_thr->th.th_current_task->td_parent->td_icvs);
1255 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1256 this_thr->th.th_current_task->td_icvs.nproc =
1257 __kmp_nested_nth.nth[level + 1];
1261 if (__kmp_nested_proc_bind.used &&
1262 (level + 1 < __kmp_nested_proc_bind.used)) {
1263 this_thr->th.th_current_task->td_icvs.proc_bind =
1264 __kmp_nested_proc_bind.bind_types[level + 1];
1269 serial_team->t.t_pkfn = (microtask_t)(~0);
1271 this_thr->th.th_info.ds.ds_tid = 0;
1274 this_thr->th.th_team_nproc = 1;
1275 this_thr->th.th_team_master = this_thr;
1276 this_thr->th.th_team_serialized = 1;
1278 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1279 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1281 propagateFPControl(serial_team);
1284 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1285 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1286 serial_team->t.t_dispatch->th_disp_buffer =
1287 (dispatch_private_info_t *)__kmp_allocate(
1288 sizeof(dispatch_private_info_t));
1290 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1293 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1294 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1302 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1303 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1305 ++serial_team->t.t_serialized;
1306 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1309 int level = this_thr->th.th_team->t.t_level;
1312 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1313 this_thr->th.th_current_task->td_icvs.nproc =
1314 __kmp_nested_nth.nth[level + 1];
1316 serial_team->t.t_level++;
1317 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1318 "of serial team %p to %d\n",
1319 global_tid, serial_team, serial_team->t.t_level));
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1324 dispatch_private_info_t *disp_buffer =
1325 (dispatch_private_info_t *)__kmp_allocate(
1326 sizeof(dispatch_private_info_t));
1327 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1328 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1330 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1338 if (__kmp_env_consistency_check)
1339 __kmp_push_parallel(global_tid, NULL);
1344 int __kmp_fork_call(
ident_t *loc,
int gtid,
1345 enum fork_context_e call_context,
1348 void *unwrapped_task,
1350 microtask_t microtask, launch_t invoker,
1352 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1361 int master_this_cons;
1363 kmp_team_t *parent_team;
1364 kmp_info_t *master_th;
1368 int master_set_numthreads;
1374 #if KMP_NESTED_HOT_TEAMS 1375 kmp_hot_team_ptr_t **p_hot_teams;
1378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1381 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1382 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1385 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1387 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1388 __kmp_stkpadding += (short)((kmp_int64)dummy);
1394 if (!TCR_4(__kmp_init_parallel))
1395 __kmp_parallel_initialize();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_parallel_id_t ompt_parallel_id;
1409 ompt_task_id_t ompt_task_id;
1410 ompt_frame_t *ompt_frame;
1411 ompt_task_id_t my_task_id;
1412 ompt_parallel_id_t my_parallel_id;
1415 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1416 ompt_task_id = __ompt_get_task_id_internal(0);
1417 ompt_frame = __ompt_get_task_frame_internal(0);
1422 level = parent_team->t.t_level;
1424 active_level = parent_team->t.t_active_level;
1427 teams_level = master_th->th.th_teams_level;
1429 #if KMP_NESTED_HOT_TEAMS 1430 p_hot_teams = &master_th->th.th_hot_teams;
1431 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1432 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1433 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1434 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1436 (*p_hot_teams)[0].hot_team_nth = 1;
1442 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1443 int team_size = master_set_numthreads;
1445 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1446 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
1447 OMPT_INVOKER(call_context));
1451 master_th->th.th_ident = loc;
1454 if (master_th->th.th_teams_microtask && ap &&
1455 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1459 parent_team->t.t_ident = loc;
1460 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1461 parent_team->t.t_argc = argc;
1462 argv = (
void **)parent_team->t.t_argv;
1463 for (i = argc - 1; i >= 0; --i)
1465 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1466 *argv++ = va_arg(*ap,
void *);
1468 *argv++ = va_arg(ap,
void *);
1471 if (parent_team == master_th->th.th_serial_team) {
1474 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1477 parent_team->t.t_serialized--;
1480 void **exit_runtime_p;
1482 ompt_lw_taskteam_t lw_taskteam;
1485 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
1487 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1489 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1491 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1495 my_task_id = lw_taskteam.ompt_task_info.task_id;
1496 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1497 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1498 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1499 my_parallel_id, my_task_id);
1504 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1506 exit_runtime_p = &dummy;
1511 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1512 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1513 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1522 *exit_runtime_p = NULL;
1525 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1529 ompt_parallel_id, ompt_task_id);
1532 __ompt_lw_taskteam_unlink(master_th);
1534 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1537 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1538 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1539 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1541 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1547 parent_team->t.t_pkfn = microtask;
1549 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1551 parent_team->t.t_invoke = invoker;
1552 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1553 parent_team->t.t_active_level++;
1554 parent_team->t.t_level++;
1557 if (master_set_numthreads) {
1558 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1560 kmp_info_t **other_threads = parent_team->t.t_threads;
1561 parent_team->t.t_nproc = master_set_numthreads;
1562 for (i = 0; i < master_set_numthreads; ++i) {
1563 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1567 master_th->th.th_set_nproc = 0;
1571 if (__kmp_debugging) {
1572 int nth = __kmp_omp_num_threads(loc);
1574 master_set_numthreads = nth;
1579 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1580 "master_th=%p, gtid=%d\n",
1581 root, parent_team, master_th, gtid));
1582 __kmp_internal_fork(loc, gtid, parent_team);
1583 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1584 "master_th=%p, gtid=%d\n",
1585 root, parent_team, master_th, gtid));
1588 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1589 parent_team->t.t_id, parent_team->t.t_pkfn));
1592 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1593 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1594 if (!parent_team->t.t_invoke(gtid)) {
1595 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1598 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1599 parent_team->t.t_id, parent_team->t.t_pkfn));
1602 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1609 if (__kmp_tasking_mode != tskm_immediate_exec) {
1610 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1611 parent_team->t.t_task_team[master_th->th.th_task_state]);
1615 if (parent_team->t.t_active_level >=
1616 master_th->th.th_current_task->td_icvs.max_active_levels) {
1620 int enter_teams = ((ap == NULL && active_level == 0) ||
1621 (ap && teams_level > 0 && teams_level == level));
1624 master_set_numthreads
1625 ? master_set_numthreads
1634 if ((!get__nested(master_th) && (root->r.r_in_parallel
1639 (__kmp_library == library_serial)) {
1640 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1649 nthreads = __kmp_reserve_threads(
1650 root, parent_team, master_tid, nthreads
1661 if (nthreads == 1) {
1665 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1669 KMP_DEBUG_ASSERT(nthreads > 0);
1672 master_th->th.th_set_nproc = 0;
1675 if (nthreads == 1) {
1677 #if KMP_OS_LINUX && \ 1678 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1681 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1686 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1690 if (call_context == fork_context_intel) {
1692 master_th->th.th_serial_team->t.t_ident = loc;
1696 master_th->th.th_serial_team->t.t_level--;
1701 void **exit_runtime_p;
1703 ompt_lw_taskteam_t lw_taskteam;
1706 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1707 unwrapped_task, ompt_parallel_id);
1708 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1710 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1712 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1715 my_task_id = lw_taskteam.ompt_task_info.task_id;
1716 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1717 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1718 ompt_parallel_id, my_task_id);
1723 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1725 exit_runtime_p = &dummy;
1730 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1731 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1732 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1733 parent_team->t.t_argv
1742 *exit_runtime_p = NULL;
1744 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1747 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1748 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1749 ompt_parallel_id, ompt_task_id);
1753 __ompt_lw_taskteam_unlink(master_th);
1755 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1757 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1758 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1759 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1761 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1764 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1765 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1766 master_th->th.th_serial_team);
1767 team = master_th->th.th_team;
1769 team->t.t_invoke = invoker;
1770 __kmp_alloc_argv_entries(argc, team, TRUE);
1771 team->t.t_argc = argc;
1772 argv = (
void **)team->t.t_argv;
1774 for (i = argc - 1; i >= 0; --i)
1776 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1777 *argv++ = va_arg(*ap,
void *);
1779 *argv++ = va_arg(ap,
void *);
1782 for (i = 0; i < argc; ++i)
1784 argv[i] = parent_team->t.t_argv[i];
1791 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1792 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1798 for (i = argc - 1; i >= 0; --i)
1800 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1801 *argv++ = va_arg(*ap,
void *);
1803 *argv++ = va_arg(ap,
void *);
1809 void **exit_runtime_p;
1811 ompt_lw_taskteam_t lw_taskteam;
1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1815 unwrapped_task, ompt_parallel_id);
1816 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1818 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1820 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1824 my_task_id = lw_taskteam.ompt_task_info.task_id;
1825 my_parallel_id = ompt_parallel_id;
1826 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1827 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1828 my_parallel_id, my_task_id);
1833 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1835 exit_runtime_p = &dummy;
1840 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1841 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1842 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1851 *exit_runtime_p = NULL;
1854 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1856 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1857 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1858 my_parallel_id, my_task_id);
1862 __ompt_lw_taskteam_unlink(master_th);
1864 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1866 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1867 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1868 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1870 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1876 }
else if (call_context == fork_context_gnu) {
1878 ompt_lw_taskteam_t *lwt =
1879 (ompt_lw_taskteam_t *)__kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1880 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
1883 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1884 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
1885 __ompt_lw_taskteam_link(lwt, master_th);
1889 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1892 KMP_ASSERT2(call_context < fork_context_last,
1893 "__kmp_fork_call: unknown fork_context parameter");
1896 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1903 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1904 "curtask=%p, curtask_max_aclevel=%d\n",
1905 parent_team->t.t_active_level, master_th,
1906 master_th->th.th_current_task,
1907 master_th->th.th_current_task->td_icvs.max_active_levels));
1911 master_th->th.th_current_task->td_flags.executing = 0;
1914 if (!master_th->th.th_teams_microtask || level > teams_level)
1918 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1922 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1923 if ((level + 1 < __kmp_nested_nth.used) &&
1924 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1925 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1932 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1933 kmp_proc_bind_t proc_bind_icv =
1935 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1936 proc_bind = proc_bind_false;
1938 if (proc_bind == proc_bind_default) {
1941 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1947 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1948 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1949 master_th->th.th_current_task->td_icvs.proc_bind)) {
1950 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1955 master_th->th.th_set_proc_bind = proc_bind_default;
1958 if ((nthreads_icv > 0)
1960 || (proc_bind_icv != proc_bind_default)
1963 kmp_internal_control_t new_icvs;
1964 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1965 new_icvs.next = NULL;
1966 if (nthreads_icv > 0) {
1967 new_icvs.nproc = nthreads_icv;
1971 if (proc_bind_icv != proc_bind_default) {
1972 new_icvs.proc_bind = proc_bind_icv;
1977 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1978 team = __kmp_allocate_team(root, nthreads, nthreads,
1985 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
1988 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1989 team = __kmp_allocate_team(root, nthreads, nthreads,
1996 &master_th->th.th_current_task->td_icvs,
1997 argc USE_NESTED_HOT_ARG(master_th));
2000 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2003 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2004 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2005 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2006 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2007 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2009 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
2011 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2014 if (!master_th->th.th_teams_microtask || level > teams_level) {
2016 int new_level = parent_team->t.t_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2018 new_level = parent_team->t.t_active_level + 1;
2019 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2023 int new_level = parent_team->t.t_level;
2024 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2025 new_level = parent_team->t.t_active_level;
2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2029 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2030 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
2031 team->t.t_sched.chunk != new_sched.chunk)
2036 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2040 propagateFPControl(team);
2042 if (__kmp_tasking_mode != tskm_immediate_exec) {
2045 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2046 parent_team->t.t_task_team[master_th->th.th_task_state]);
2047 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2048 "%p, new task_team %p / team %p\n",
2049 __kmp_gtid_from_thread(master_th),
2050 master_th->th.th_task_team, parent_team,
2051 team->t.t_task_team[master_th->th.th_task_state], team));
2053 if (active_level || master_th->th.th_task_team) {
2055 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2056 if (master_th->th.th_task_state_top >=
2057 master_th->th.th_task_state_stack_sz) {
2058 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2059 kmp_uint8 *old_stack, *new_stack;
2061 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2062 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2063 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2065 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2069 old_stack = master_th->th.th_task_state_memo_stack;
2070 master_th->th.th_task_state_memo_stack = new_stack;
2071 master_th->th.th_task_state_stack_sz = new_size;
2072 __kmp_free(old_stack);
2076 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2077 master_th->th.th_task_state;
2078 master_th->th.th_task_state_top++;
2079 #if KMP_NESTED_HOT_TEAMS 2080 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2082 master_th->th.th_task_state =
2084 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2087 master_th->th.th_task_state = 0;
2088 #if KMP_NESTED_HOT_TEAMS 2092 #if !KMP_NESTED_HOT_TEAMS 2093 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2094 (team == root->r.r_hot_team));
2100 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2101 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2103 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2104 (team->t.t_master_tid == 0 &&
2105 (team->t.t_parent == root->r.r_root_team ||
2106 team->t.t_parent->t.t_serialized)));
2110 argv = (
void **)team->t.t_argv;
2114 for (i = argc - 1; i >= 0; --i) {
2116 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2117 void *new_argv = va_arg(*ap,
void *);
2119 void *new_argv = va_arg(ap,
void *);
2121 KMP_CHECK_UPDATE(*argv, new_argv);
2126 for (i = 0; i < argc; ++i) {
2128 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2134 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2135 if (!root->r.r_active)
2136 root->r.r_active = TRUE;
2138 __kmp_fork_team_threads(root, team, master_th, gtid);
2139 __kmp_setup_icv_copy(team, nthreads,
2140 &master_th->th.th_current_task->td_icvs, loc);
2143 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2146 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2149 if (team->t.t_active_level == 1
2151 && !master_th->th.th_teams_microtask
2155 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2156 (__kmp_forkjoin_frames_mode == 3 ||
2157 __kmp_forkjoin_frames_mode == 1)) {
2158 kmp_uint64 tmp_time = 0;
2159 if (__itt_get_timestamp_ptr)
2160 tmp_time = __itt_get_timestamp();
2162 master_th->th.th_frame_time = tmp_time;
2163 if (__kmp_forkjoin_frames_mode == 3)
2164 team->t.t_region_time = tmp_time;
2168 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2169 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2171 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2177 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2180 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2181 root, team, master_th, gtid));
2184 if (__itt_stack_caller_create_ptr) {
2185 team->t.t_stack_id =
2186 __kmp_itt_stack_caller_create();
2197 __kmp_internal_fork(loc, gtid, team);
2198 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2199 "master_th=%p, gtid=%d\n",
2200 root, team, master_th, gtid));
2203 if (call_context == fork_context_gnu) {
2204 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2209 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2210 team->t.t_id, team->t.t_pkfn));
2214 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2215 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2216 if (!team->t.t_invoke(gtid)) {
2217 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2220 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2221 team->t.t_id, team->t.t_pkfn));
2224 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2228 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2236 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2239 thread->th.ompt_thread_info.state =
2240 ((team->t.t_serialized) ? ompt_state_work_serial
2241 : ompt_state_work_parallel);
2244 static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
2245 ompt_parallel_id_t parallel_id,
2246 fork_context_e fork_context) {
2247 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2248 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2249 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2250 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2253 task_info->frame.reenter_runtime_frame = NULL;
2254 __kmp_join_restore_state(thread, team);
2258 void __kmp_join_call(
ident_t *loc,
int gtid
2261 enum fork_context_e fork_context
2268 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2270 kmp_team_t *parent_team;
2271 kmp_info_t *master_th;
2276 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2279 master_th = __kmp_threads[gtid];
2280 root = master_th->th.th_root;
2281 team = master_th->th.th_team;
2282 parent_team = team->t.t_parent;
2284 master_th->th.th_ident = loc;
2288 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2293 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2294 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2295 "th_task_team = %p\n",
2296 __kmp_gtid_from_thread(master_th), team,
2297 team->t.t_task_team[master_th->th.th_task_state],
2298 master_th->th.th_task_team));
2299 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2300 team->t.t_task_team[master_th->th.th_task_state]);
2304 if (team->t.t_serialized) {
2306 if (master_th->th.th_teams_microtask) {
2308 int level = team->t.t_level;
2309 int tlevel = master_th->th.th_teams_level;
2310 if (level == tlevel) {
2314 }
else if (level == tlevel + 1) {
2318 team->t.t_serialized++;
2326 __kmp_join_restore_state(master_th, parent_team);
2333 master_active = team->t.t_master_active;
2341 __kmp_internal_join(loc, gtid, team);
2345 master_th->th.th_task_state =
2353 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2357 if (__itt_stack_caller_create_ptr) {
2358 __kmp_itt_stack_caller_destroy(
2359 (__itt_caller)team->t
2364 if (team->t.t_active_level == 1
2366 && !master_th->th.th_teams_microtask
2369 master_th->th.th_ident = loc;
2372 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2373 __kmp_forkjoin_frames_mode == 3)
2374 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2375 master_th->th.th_frame_time, 0, loc,
2376 master_th->th.th_team_nproc, 1);
2377 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2378 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2379 __kmp_itt_region_joined(gtid);
2384 if (master_th->th.th_teams_microtask && !exit_teams &&
2385 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2386 team->t.t_level == master_th->th.th_teams_level + 1) {
2393 team->t.t_active_level--;
2394 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2397 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2398 int old_num = master_th->th.th_team_nproc;
2399 int new_num = master_th->th.th_teams_size.nth;
2400 kmp_info_t **other_threads = team->t.t_threads;
2401 team->t.t_nproc = new_num;
2402 for (i = 0; i < old_num; ++i) {
2403 other_threads[i]->th.th_team_nproc = new_num;
2406 for (i = old_num; i < new_num; ++i) {
2409 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2410 for (b = 0; b < bs_last_barrier; ++b) {
2411 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2412 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2414 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2417 if (__kmp_tasking_mode != tskm_immediate_exec) {
2419 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2426 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2435 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2436 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2438 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2443 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2446 if (!master_th->th.th_teams_microtask ||
2447 team->t.t_level > master_th->th.th_teams_level)
2451 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2453 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2455 #if OMPT_SUPPORT && OMPT_TRACE 2457 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2458 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2459 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2460 parallel_id, task_info->task_id);
2462 task_info->frame.exit_runtime_frame = NULL;
2463 task_info->task_id = 0;
2467 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2469 __kmp_pop_current_task_from_thread(master_th);
2471 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2473 master_th->th.th_first_place = team->t.t_first_place;
2474 master_th->th.th_last_place = team->t.t_last_place;
2477 updateHWFPControl(team);
2479 if (root->r.r_active != master_active)
2480 root->r.r_active = master_active;
2482 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2490 master_th->th.th_team = parent_team;
2491 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2492 master_th->th.th_team_master = parent_team->t.t_threads[0];
2493 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2496 if (parent_team->t.t_serialized &&
2497 parent_team != master_th->th.th_serial_team &&
2498 parent_team != root->r.r_root_team) {
2499 __kmp_free_team(root,
2500 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2501 master_th->th.th_serial_team = parent_team;
2504 if (__kmp_tasking_mode != tskm_immediate_exec) {
2505 if (master_th->th.th_task_state_top >
2507 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2509 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2510 master_th->th.th_task_state;
2511 --master_th->th.th_task_state_top;
2513 master_th->th.th_task_state =
2515 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2518 master_th->th.th_task_team =
2519 parent_team->t.t_task_team[master_th->th.th_task_state];
2521 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2522 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2529 master_th->th.th_current_task->td_flags.executing = 1;
2531 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2535 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2540 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2545 void __kmp_save_internal_controls(kmp_info_t *thread) {
2547 if (thread->th.th_team != thread->th.th_serial_team) {
2550 if (thread->th.th_team->t.t_serialized > 1) {
2553 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2556 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2557 thread->th.th_team->t.t_serialized) {
2562 kmp_internal_control_t *control =
2563 (kmp_internal_control_t *)__kmp_allocate(
2564 sizeof(kmp_internal_control_t));
2566 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2568 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2570 control->next = thread->th.th_team->t.t_control_stack_top;
2571 thread->th.th_team->t.t_control_stack_top = control;
2577 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2581 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2582 KMP_DEBUG_ASSERT(__kmp_init_serial);
2586 else if (new_nth > __kmp_max_nth)
2587 new_nth = __kmp_max_nth;
2590 thread = __kmp_threads[gtid];
2592 __kmp_save_internal_controls(thread);
2594 set__nproc(thread, new_nth);
2599 root = thread->th.th_root;
2600 if (__kmp_init_parallel && (!root->r.r_active) &&
2601 (root->r.r_hot_team->t.t_nproc > new_nth)
2602 #
if KMP_NESTED_HOT_TEAMS
2603 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2606 kmp_team_t *hot_team = root->r.r_hot_team;
2609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2612 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2613 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2614 if (__kmp_tasking_mode != tskm_immediate_exec) {
2617 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2619 __kmp_free_thread(hot_team->t.t_threads[f]);
2620 hot_team->t.t_threads[f] = NULL;
2622 hot_team->t.t_nproc = new_nth;
2623 #if KMP_NESTED_HOT_TEAMS 2624 if (thread->th.th_hot_teams) {
2625 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2626 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2630 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2633 for (f = 0; f < new_nth; f++) {
2634 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2635 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2638 hot_team->t.t_size_changed = -1;
2643 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2646 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2648 gtid, max_active_levels));
2649 KMP_DEBUG_ASSERT(__kmp_init_serial);
2652 if (max_active_levels < 0) {
2653 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2658 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2659 "max_active_levels for thread %d = (%d)\n",
2660 gtid, max_active_levels));
2663 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2668 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2669 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2670 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2676 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2677 "max_active_levels for thread %d = (%d)\n",
2678 gtid, max_active_levels));
2680 thread = __kmp_threads[gtid];
2682 __kmp_save_internal_controls(thread);
2684 set__max_active_levels(thread, max_active_levels);
2688 int __kmp_get_max_active_levels(
int gtid) {
2691 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2692 KMP_DEBUG_ASSERT(__kmp_init_serial);
2694 thread = __kmp_threads[gtid];
2695 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2696 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2697 "curtask_maxaclevel=%d\n",
2698 gtid, thread->th.th_current_task,
2699 thread->th.th_current_task->td_icvs.max_active_levels));
2700 return thread->th.th_current_task->td_icvs.max_active_levels;
2704 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2708 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2709 gtid, (
int)kind, chunk));
2710 KMP_DEBUG_ASSERT(__kmp_init_serial);
2716 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2717 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2719 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2720 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2722 kind = kmp_sched_default;
2726 thread = __kmp_threads[gtid];
2728 __kmp_save_internal_controls(thread);
2730 if (kind < kmp_sched_upper_std) {
2731 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2734 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2736 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2737 __kmp_sch_map[kind - kmp_sched_lower - 1];
2742 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2743 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2744 kmp_sched_lower - 2];
2746 if (kind == kmp_sched_auto || chunk < 1) {
2748 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2750 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2755 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2759 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2760 KMP_DEBUG_ASSERT(__kmp_init_serial);
2762 thread = __kmp_threads[gtid];
2764 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2768 case kmp_sch_static_greedy:
2769 case kmp_sch_static_balanced:
2770 *kind = kmp_sched_static;
2773 case kmp_sch_static_chunked:
2774 *kind = kmp_sched_static;
2776 case kmp_sch_dynamic_chunked:
2777 *kind = kmp_sched_dynamic;
2780 case kmp_sch_guided_iterative_chunked:
2781 case kmp_sch_guided_analytical_chunked:
2782 *kind = kmp_sched_guided;
2785 *kind = kmp_sched_auto;
2787 case kmp_sch_trapezoidal:
2788 *kind = kmp_sched_trapezoidal;
2790 #if KMP_STATIC_STEAL_ENABLED 2791 case kmp_sch_static_steal:
2792 *kind = kmp_sched_static_steal;
2796 KMP_FATAL(UnknownSchedulingType, th_type);
2799 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2802 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2808 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2809 KMP_DEBUG_ASSERT(__kmp_init_serial);
2816 thr = __kmp_threads[gtid];
2817 team = thr->th.th_team;
2818 ii = team->t.t_level;
2823 if (thr->th.th_teams_microtask) {
2825 int tlevel = thr->th.th_teams_level;
2828 KMP_DEBUG_ASSERT(ii >= tlevel);
2841 return __kmp_tid_from_gtid(gtid);
2843 dd = team->t.t_serialized;
2845 while (ii > level) {
2846 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2848 if ((team->t.t_serialized) && (!dd)) {
2849 team = team->t.t_parent;
2853 team = team->t.t_parent;
2854 dd = team->t.t_serialized;
2859 return (dd > 1) ? (0) : (team->t.t_master_tid);
2862 int __kmp_get_team_size(
int gtid,
int level) {
2868 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2869 KMP_DEBUG_ASSERT(__kmp_init_serial);
2876 thr = __kmp_threads[gtid];
2877 team = thr->th.th_team;
2878 ii = team->t.t_level;
2883 if (thr->th.th_teams_microtask) {
2885 int tlevel = thr->th.th_teams_level;
2888 KMP_DEBUG_ASSERT(ii >= tlevel);
2900 while (ii > level) {
2901 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2903 if (team->t.t_serialized && (!dd)) {
2904 team = team->t.t_parent;
2908 team = team->t.t_parent;
2913 return team->t.t_nproc;
2916 kmp_r_sched_t __kmp_get_schedule_global() {
2921 kmp_r_sched_t r_sched;
2928 r_sched.r_sched_type = __kmp_static;
2931 r_sched.r_sched_type = __kmp_guided;
2934 r_sched.r_sched_type =
2938 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2940 r_sched.chunk = KMP_DEFAULT_CHUNK;
2942 r_sched.chunk = __kmp_chunk;
2950 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2952 KMP_DEBUG_ASSERT(team);
2953 if (!realloc || argc > team->t.t_max_argc) {
2955 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2956 "current entries=%d\n",
2957 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2959 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2960 __kmp_free((
void *)team->t.t_argv);
2962 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
2964 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2965 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 2967 team->t.t_id, team->t.t_max_argc));
2968 team->t.t_argv = &team->t.t_inline_argv[0];
2969 if (__kmp_storage_map) {
2970 __kmp_print_storage_map_gtid(
2971 -1, &team->t.t_inline_argv[0],
2972 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2973 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
2978 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
2979 ? KMP_MIN_MALLOC_ARGV_ENTRIES
2981 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 2983 team->t.t_id, team->t.t_max_argc));
2985 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
2986 if (__kmp_storage_map) {
2987 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
2988 &team->t.t_argv[team->t.t_max_argc],
2989 sizeof(
void *) * team->t.t_max_argc,
2990 "team_%d.t_argv", team->t.t_id);
2996 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
2998 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3000 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3001 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3002 sizeof(dispatch_shared_info_t) * num_disp_buff);
3003 team->t.t_dispatch =
3004 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3005 team->t.t_implicit_task_taskdata =
3006 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3007 team->t.t_max_nproc = max_nth;
3010 for (i = 0; i < num_disp_buff; ++i) {
3011 team->t.t_disp_buffer[i].buffer_index = i;
3013 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3018 static void __kmp_free_team_arrays(kmp_team_t *team) {
3021 for (i = 0; i < team->t.t_max_nproc; ++i) {
3022 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3023 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3024 team->t.t_dispatch[i].th_disp_buffer = NULL;
3027 __kmp_free(team->t.t_threads);
3028 __kmp_free(team->t.t_disp_buffer);
3029 __kmp_free(team->t.t_dispatch);
3030 __kmp_free(team->t.t_implicit_task_taskdata);
3031 team->t.t_threads = NULL;
3032 team->t.t_disp_buffer = NULL;
3033 team->t.t_dispatch = NULL;
3034 team->t.t_implicit_task_taskdata = 0;
3037 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3038 kmp_info_t **oldThreads = team->t.t_threads;
3040 __kmp_free(team->t.t_disp_buffer);
3041 __kmp_free(team->t.t_dispatch);
3042 __kmp_free(team->t.t_implicit_task_taskdata);
3043 __kmp_allocate_team_arrays(team, max_nth);
3045 KMP_MEMCPY(team->t.t_threads, oldThreads,
3046 team->t.t_nproc *
sizeof(kmp_info_t *));
3048 __kmp_free(oldThreads);
3051 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3053 kmp_r_sched_t r_sched =
3054 __kmp_get_schedule_global();
3057 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3060 kmp_internal_control_t g_icvs = {
3062 (kmp_int8)__kmp_dflt_nested,
3064 (kmp_int8)__kmp_global.g.g_dynamic,
3066 (kmp_int8)__kmp_env_blocktime,
3068 __kmp_dflt_blocktime,
3073 __kmp_dflt_team_nth,
3076 __kmp_dflt_max_active_levels,
3081 __kmp_nested_proc_bind.bind_types[0],
3082 __kmp_default_device,
3090 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3092 kmp_internal_control_t gx_icvs;
3093 gx_icvs.serial_nesting_level =
3095 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3096 gx_icvs.next = NULL;
3101 static void __kmp_initialize_root(kmp_root_t *root) {
3103 kmp_team_t *root_team;
3104 kmp_team_t *hot_team;
3105 int hot_team_max_nth;
3106 kmp_r_sched_t r_sched =
3107 __kmp_get_schedule_global();
3108 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3109 KMP_DEBUG_ASSERT(root);
3110 KMP_ASSERT(!root->r.r_begin);
3113 __kmp_init_lock(&root->r.r_begin_lock);
3114 root->r.r_begin = FALSE;
3115 root->r.r_active = FALSE;
3116 root->r.r_in_parallel = 0;
3117 root->r.r_blocktime = __kmp_dflt_blocktime;
3118 root->r.r_nested = __kmp_dflt_nested;
3122 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3125 __kmp_allocate_team(root,
3132 __kmp_nested_proc_bind.bind_types[0],
3136 USE_NESTED_HOT_ARG(NULL)
3141 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3144 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3146 root->r.r_root_team = root_team;
3147 root_team->t.t_control_stack_top = NULL;
3150 root_team->t.t_threads[0] = NULL;
3151 root_team->t.t_nproc = 1;
3152 root_team->t.t_serialized = 1;
3154 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3155 root_team->t.t_sched.chunk = r_sched.chunk;
3158 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3159 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3163 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3166 __kmp_allocate_team(root,
3168 __kmp_dflt_team_nth_ub * 2,
3173 __kmp_nested_proc_bind.bind_types[0],
3177 USE_NESTED_HOT_ARG(NULL)
3179 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3181 root->r.r_hot_team = hot_team;
3182 root_team->t.t_control_stack_top = NULL;
3185 hot_team->t.t_parent = root_team;
3188 hot_team_max_nth = hot_team->t.t_max_nproc;
3189 for (f = 0; f < hot_team_max_nth; ++f) {
3190 hot_team->t.t_threads[f] = NULL;
3192 hot_team->t.t_nproc = 1;
3194 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3195 hot_team->t.t_sched.chunk = r_sched.chunk;
3196 hot_team->t.t_size_changed = 0;
3201 typedef struct kmp_team_list_item {
3202 kmp_team_p
const *entry;
3203 struct kmp_team_list_item *next;
3204 } kmp_team_list_item_t;
3205 typedef kmp_team_list_item_t *kmp_team_list_t;
3207 static void __kmp_print_structure_team_accum(
3208 kmp_team_list_t list,
3209 kmp_team_p
const *team
3219 KMP_DEBUG_ASSERT(list != NULL);
3224 __kmp_print_structure_team_accum(list, team->t.t_parent);
3225 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3229 while (l->next != NULL && l->entry != team) {
3232 if (l->next != NULL) {
3238 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3244 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3245 sizeof(kmp_team_list_item_t));
3252 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3255 __kmp_printf(
"%s", title);
3257 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3259 __kmp_printf(
" - (nil)\n");
3263 static void __kmp_print_structure_thread(
char const *title,
3264 kmp_info_p
const *thread) {
3265 __kmp_printf(
"%s", title);
3266 if (thread != NULL) {
3267 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3269 __kmp_printf(
" - (nil)\n");
3273 void __kmp_print_structure(
void) {
3275 kmp_team_list_t list;
3279 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3283 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3284 "Table\n------------------------------\n");
3287 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3288 __kmp_printf(
"%2d", gtid);
3289 if (__kmp_threads != NULL) {
3290 __kmp_printf(
" %p", __kmp_threads[gtid]);
3292 if (__kmp_root != NULL) {
3293 __kmp_printf(
" %p", __kmp_root[gtid]);
3300 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3302 if (__kmp_threads != NULL) {
3304 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3305 kmp_info_t
const *thread = __kmp_threads[gtid];
3306 if (thread != NULL) {
3307 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3308 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3309 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3310 __kmp_print_structure_team(
" Serial Team: ",
3311 thread->th.th_serial_team);
3312 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3313 __kmp_print_structure_thread(
" Master: ",
3314 thread->th.th_team_master);
3315 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3316 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3318 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3320 __kmp_print_structure_thread(
" Next in pool: ",
3321 thread->th.th_next_pool);
3323 __kmp_print_structure_team_accum(list, thread->th.th_team);
3324 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3328 __kmp_printf(
"Threads array is not allocated.\n");
3332 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3334 if (__kmp_root != NULL) {
3336 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3337 kmp_root_t
const *root = __kmp_root[gtid];
3339 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3340 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3341 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3342 __kmp_print_structure_thread(
" Uber Thread: ",
3343 root->r.r_uber_thread);
3344 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3345 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3346 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel);
3348 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3349 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3353 __kmp_printf(
"Ubers array is not allocated.\n");
3356 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3358 while (list->next != NULL) {
3359 kmp_team_p
const *team = list->entry;
3361 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3362 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3363 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3364 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3365 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3366 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3367 for (i = 0; i < team->t.t_nproc; ++i) {
3368 __kmp_printf(
" Thread %2d: ", i);
3369 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3371 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3377 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3379 __kmp_print_structure_thread(
"Thread pool: ",
3380 CCAST(kmp_info_t *, __kmp_thread_pool));
3381 __kmp_print_structure_team(
"Team pool: ",
3382 CCAST(kmp_team_t *, __kmp_team_pool));
3386 while (list != NULL) {
3387 kmp_team_list_item_t *item = list;
3389 KMP_INTERNAL_FREE(item);
3398 static const unsigned __kmp_primes[] = {
3399 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3400 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3401 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3402 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3403 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3404 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3405 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3406 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3407 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3408 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3409 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3413 unsigned short __kmp_get_random(kmp_info_t *thread) {
3414 unsigned x = thread->th.th_x;
3415 unsigned short r = x >> 16;
3417 thread->th.th_x = x * thread->th.th_a + 1;
3419 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3420 thread->th.th_info.ds.ds_tid, r));
3426 void __kmp_init_random(kmp_info_t *thread) {
3427 unsigned seed = thread->th.th_info.ds.ds_tid;
3430 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3431 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3433 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3439 static int __kmp_reclaim_dead_roots(
void) {
3442 for (i = 0; i < __kmp_threads_capacity; ++i) {
3443 if (KMP_UBER_GTID(i) &&
3444 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3447 r += __kmp_unregister_root_other_thread(i);
3476 static int __kmp_expand_threads(
int nWish,
int nNeed) {
3479 int __kmp_actual_max_nth;
3483 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3486 added = __kmp_reclaim_dead_roots();
3504 int minimumRequiredCapacity;
3506 kmp_info_t **newThreads;
3507 kmp_root_t **newRoot;
3526 old_tp_cached = __kmp_tp_cached;
3527 __kmp_actual_max_nth =
3528 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3529 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3534 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3538 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3544 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3551 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3553 newCapacity = __kmp_threads_capacity;
3555 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3556 ? (newCapacity << 1)
3557 : __kmp_actual_max_nth;
3558 }
while (newCapacity < minimumRequiredCapacity);
3559 newThreads = (kmp_info_t **)__kmp_allocate(
3560 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity +
3562 newRoot = (kmp_root_t **)((
char *)newThreads +
3563 sizeof(kmp_info_t *) * newCapacity);
3564 KMP_MEMCPY(newThreads, __kmp_threads,
3565 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3566 KMP_MEMCPY(newRoot, __kmp_root,
3567 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3568 memset(newThreads + __kmp_threads_capacity, 0,
3569 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t *));
3570 memset(newRoot + __kmp_threads_capacity, 0,
3571 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t *));
3573 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3579 __kmp_free(newThreads);
3582 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3583 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3586 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3587 __kmp_free(newThreads);
3593 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3594 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3595 added += newCapacity - __kmp_threads_capacity;
3596 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3597 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3607 int __kmp_register_root(
int initial_thread) {
3608 kmp_info_t *root_thread;
3612 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3613 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3630 capacity = __kmp_threads_capacity;
3631 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3636 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3637 if (__kmp_tp_cached) {
3638 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3639 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3640 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3642 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3643 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
3650 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3654 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3655 KMP_ASSERT(gtid < __kmp_threads_capacity);
3659 TCW_4(__kmp_nth, __kmp_nth + 1);
3663 if (__kmp_adjust_gtid_mode) {
3664 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3665 if (TCR_4(__kmp_gtid_mode) != 2) {
3666 TCW_4(__kmp_gtid_mode, 2);
3669 if (TCR_4(__kmp_gtid_mode) != 1) {
3670 TCW_4(__kmp_gtid_mode, 1);
3675 #ifdef KMP_ADJUST_BLOCKTIME 3678 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3679 if (__kmp_nth > __kmp_avail_proc) {
3680 __kmp_zero_bt = TRUE;
3686 if (!(root = __kmp_root[gtid])) {
3687 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3688 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3691 #if KMP_STATS_ENABLED 3693 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3695 KMP_SET_THREAD_STATE(SERIAL_REGION);
3698 __kmp_initialize_root(root);
3701 if (root->r.r_uber_thread) {
3702 root_thread = root->r.r_uber_thread;
3704 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3705 if (__kmp_storage_map) {
3706 __kmp_print_thread_storage_map(root_thread, gtid);
3708 root_thread->th.th_info.ds.ds_gtid = gtid;
3709 root_thread->th.th_root = root;
3710 if (__kmp_env_consistency_check) {
3711 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3714 __kmp_initialize_fast_memory(root_thread);
3718 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3719 __kmp_initialize_bget(root_thread);
3721 __kmp_init_random(root_thread);
3725 if (!root_thread->th.th_serial_team) {
3726 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3727 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3728 root_thread->th.th_serial_team =
3729 __kmp_allocate_team(root, 1, 1,
3736 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3738 KMP_ASSERT(root_thread->th.th_serial_team);
3739 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3740 root_thread->th.th_serial_team));
3743 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3745 root->r.r_root_team->t.t_threads[0] = root_thread;
3746 root->r.r_hot_team->t.t_threads[0] = root_thread;
3747 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3749 root_thread->th.th_serial_team->t.t_serialized = 0;
3750 root->r.r_uber_thread = root_thread;
3753 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3754 TCW_4(__kmp_init_gtid, TRUE);
3757 __kmp_gtid_set_specific(gtid);
3760 __kmp_itt_thread_name(gtid);
3763 #ifdef KMP_TDATA_GTID 3766 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3767 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3769 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3771 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3772 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3773 KMP_INIT_BARRIER_STATE));
3776 for (b = 0; b < bs_last_barrier; ++b) {
3777 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3779 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3783 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3784 KMP_INIT_BARRIER_STATE);
3786 #if KMP_AFFINITY_SUPPORTED 3788 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3789 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3790 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3794 if (TCR_4(__kmp_init_middle)) {
3795 __kmp_affinity_set_init_mask(gtid, TRUE);
3799 __kmp_root_counter++;
3802 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3807 #if KMP_NESTED_HOT_TEAMS 3808 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3809 const int max_level) {
3811 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3812 if (!hot_teams || !hot_teams[level].hot_team) {
3815 KMP_DEBUG_ASSERT(level < max_level);
3816 kmp_team_t *team = hot_teams[level].hot_team;
3817 nth = hot_teams[level].hot_team_nth;
3819 if (level < max_level - 1) {
3820 for (i = 0; i < nth; ++i) {
3821 kmp_info_t *th = team->t.t_threads[i];
3822 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3823 if (i > 0 && th->th.th_hot_teams) {
3824 __kmp_free(th->th.th_hot_teams);
3825 th->th.th_hot_teams = NULL;
3829 __kmp_free_team(root, team, NULL);
3836 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3837 kmp_team_t *root_team = root->r.r_root_team;
3838 kmp_team_t *hot_team = root->r.r_hot_team;
3839 int n = hot_team->t.t_nproc;
3842 KMP_DEBUG_ASSERT(!root->r.r_active);
3844 root->r.r_root_team = NULL;
3845 root->r.r_hot_team = NULL;
3848 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3849 #if KMP_NESTED_HOT_TEAMS 3850 if (__kmp_hot_teams_max_level >
3852 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3853 kmp_info_t *th = hot_team->t.t_threads[i];
3854 if (__kmp_hot_teams_max_level > 1) {
3855 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3857 if (th->th.th_hot_teams) {
3858 __kmp_free(th->th.th_hot_teams);
3859 th->th.th_hot_teams = NULL;
3864 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3869 if (__kmp_tasking_mode != tskm_immediate_exec) {
3870 __kmp_wait_to_unref_task_teams();
3876 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3878 (LPVOID) & (root->r.r_uber_thread->th),
3879 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3880 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3884 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3885 int gtid = __kmp_get_gtid();
3886 __ompt_thread_end(ompt_thread_initial, gtid);
3892 __kmp_reap_thread(root->r.r_uber_thread, 1);
3896 root->r.r_uber_thread = NULL;
3898 root->r.r_begin = FALSE;
3903 void __kmp_unregister_root_current_thread(
int gtid) {
3904 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3908 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3909 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3910 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3913 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3916 kmp_root_t *root = __kmp_root[gtid];
3918 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3919 KMP_ASSERT(KMP_UBER_GTID(gtid));
3920 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3921 KMP_ASSERT(root->r.r_active == FALSE);
3926 kmp_info_t *thread = __kmp_threads[gtid];
3927 kmp_team_t *team = thread->th.th_team;
3928 kmp_task_team_t *task_team = thread->th.th_task_team;
3931 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3934 thread->th.ompt_thread_info.state = ompt_state_undefined;
3936 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3940 __kmp_reset_root(gtid, root);
3943 __kmp_gtid_set_specific(KMP_GTID_DNE);
3944 #ifdef KMP_TDATA_GTID 3945 __kmp_gtid = KMP_GTID_DNE;
3950 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3952 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3959 static int __kmp_unregister_root_other_thread(
int gtid) {
3960 kmp_root_t *root = __kmp_root[gtid];
3963 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3964 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3965 KMP_ASSERT(KMP_UBER_GTID(gtid));
3966 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3967 KMP_ASSERT(root->r.r_active == FALSE);
3969 r = __kmp_reset_root(gtid, root);
3971 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
3977 void __kmp_task_info() {
3979 kmp_int32 gtid = __kmp_entry_gtid();
3980 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
3981 kmp_info_t *this_thr = __kmp_threads[gtid];
3982 kmp_team_t *steam = this_thr->th.th_serial_team;
3983 kmp_team_t *team = this_thr->th.th_team;
3985 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 3987 gtid, tid, this_thr, team, this_thr->th.th_current_task,
3988 team->t.t_implicit_task_taskdata[tid].td_parent);
3995 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
3996 int tid,
int gtid) {
4000 kmp_info_t *master = team->t.t_threads[0];
4001 KMP_DEBUG_ASSERT(this_thr != NULL);
4002 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4003 KMP_DEBUG_ASSERT(team);
4004 KMP_DEBUG_ASSERT(team->t.t_threads);
4005 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4006 KMP_DEBUG_ASSERT(master);
4007 KMP_DEBUG_ASSERT(master->th.th_root);
4011 TCW_SYNC_PTR(this_thr->th.th_team, team);
4013 this_thr->th.th_info.ds.ds_tid = tid;
4014 this_thr->th.th_set_nproc = 0;
4015 if (__kmp_tasking_mode != tskm_immediate_exec)
4018 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4020 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4022 this_thr->th.th_set_proc_bind = proc_bind_default;
4023 #if KMP_AFFINITY_SUPPORTED 4024 this_thr->th.th_new_place = this_thr->th.th_current_place;
4027 this_thr->th.th_root = master->th.th_root;
4030 this_thr->th.th_team_nproc = team->t.t_nproc;
4031 this_thr->th.th_team_master = master;
4032 this_thr->th.th_team_serialized = team->t.t_serialized;
4033 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4035 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4037 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4038 tid, gtid, this_thr, this_thr->th.th_current_task));
4040 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4043 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4044 tid, gtid, this_thr, this_thr->th.th_current_task));
4049 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4051 this_thr->th.th_local.this_construct = 0;
4054 this_thr->th.th_local.tv_data = 0;
4057 if (!this_thr->th.th_pri_common) {
4058 this_thr->th.th_pri_common =
4059 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4060 if (__kmp_storage_map) {
4061 __kmp_print_storage_map_gtid(
4062 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4063 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4065 this_thr->th.th_pri_head = NULL;
4070 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4073 sizeof(dispatch_private_info_t) *
4074 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4075 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4076 team->t.t_max_nproc));
4077 KMP_ASSERT(dispatch);
4078 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4079 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4081 dispatch->th_disp_index = 0;
4083 dispatch->th_doacross_buf_idx = 0;
4085 if (!dispatch->th_disp_buffer) {
4086 dispatch->th_disp_buffer =
4087 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4089 if (__kmp_storage_map) {
4090 __kmp_print_storage_map_gtid(
4091 gtid, &dispatch->th_disp_buffer[0],
4092 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4094 : __kmp_dispatch_num_buffers],
4095 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4096 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4097 gtid, team->t.t_id, gtid);
4100 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4103 dispatch->th_dispatch_pr_current = 0;
4104 dispatch->th_dispatch_sh_current = 0;
4106 dispatch->th_deo_fcn = 0;
4107 dispatch->th_dxo_fcn = 0;
4110 this_thr->th.th_next_pool = NULL;
4112 if (!this_thr->th.th_task_state_memo_stack) {
4114 this_thr->th.th_task_state_memo_stack =
4115 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4116 this_thr->th.th_task_state_top = 0;
4117 this_thr->th.th_task_state_stack_sz = 4;
4118 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4120 this_thr->th.th_task_state_memo_stack[i] = 0;
4123 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4124 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4134 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4136 kmp_team_t *serial_team;
4137 kmp_info_t *new_thr;
4140 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4141 KMP_DEBUG_ASSERT(root && team);
4142 #if !KMP_NESTED_HOT_TEAMS 4143 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4148 if (__kmp_thread_pool) {
4150 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4151 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4152 if (new_thr == __kmp_thread_pool_insert_pt) {
4153 __kmp_thread_pool_insert_pt = NULL;
4155 TCW_4(new_thr->th.th_in_pool, FALSE);
4158 __kmp_thread_pool_nth--;
4160 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4161 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4162 KMP_ASSERT(!new_thr->th.th_team);
4163 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4164 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4167 __kmp_initialize_info(new_thr, team, new_tid,
4168 new_thr->th.th_info.ds.ds_gtid);
4169 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4171 TCW_4(__kmp_nth, __kmp_nth + 1);
4173 new_thr->th.th_task_state = 0;
4174 new_thr->th.th_task_state_top = 0;
4175 new_thr->th.th_task_state_stack_sz = 4;
4177 #ifdef KMP_ADJUST_BLOCKTIME 4180 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4181 if (__kmp_nth > __kmp_avail_proc) {
4182 __kmp_zero_bt = TRUE;
4191 kmp_balign_t *balign = new_thr->th.th_bar;
4192 for (b = 0; b < bs_last_barrier; ++b)
4193 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4196 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4197 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4204 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4205 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4210 if (!TCR_4(__kmp_init_monitor)) {
4211 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4212 if (!TCR_4(__kmp_init_monitor)) {
4213 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4214 TCW_4(__kmp_init_monitor, 1);
4215 __kmp_create_monitor(&__kmp_monitor);
4216 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4227 while (TCR_4(__kmp_init_monitor) < 2) {
4230 KF_TRACE(10, (
"after monitor thread has started\n"));
4233 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4238 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4239 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4243 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4245 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4247 if (__kmp_storage_map) {
4248 __kmp_print_thread_storage_map(new_thr, new_gtid);
4253 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4254 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4255 new_thr->th.th_serial_team = serial_team =
4256 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4263 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4265 KMP_ASSERT(serial_team);
4266 serial_team->t.t_serialized = 0;
4268 serial_team->t.t_threads[0] = new_thr;
4270 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4274 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4277 __kmp_initialize_fast_memory(new_thr);
4281 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4282 __kmp_initialize_bget(new_thr);
4285 __kmp_init_random(new_thr);
4289 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4290 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4293 kmp_balign_t *balign = new_thr->th.th_bar;
4294 for (b = 0; b < bs_last_barrier; ++b) {
4295 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4296 balign[b].bb.team = NULL;
4297 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4298 balign[b].bb.use_oncore_barrier = 0;
4301 new_thr->th.th_spin_here = FALSE;
4302 new_thr->th.th_next_waiting = 0;
4304 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4305 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4306 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4307 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4308 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4311 TCW_4(new_thr->th.th_in_pool, FALSE);
4312 new_thr->th.th_active_in_pool = FALSE;
4313 TCW_4(new_thr->th.th_active, TRUE);
4321 if (__kmp_adjust_gtid_mode) {
4322 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4323 if (TCR_4(__kmp_gtid_mode) != 2) {
4324 TCW_4(__kmp_gtid_mode, 2);
4327 if (TCR_4(__kmp_gtid_mode) != 1) {
4328 TCW_4(__kmp_gtid_mode, 1);
4333 #ifdef KMP_ADJUST_BLOCKTIME 4336 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4337 if (__kmp_nth > __kmp_avail_proc) {
4338 __kmp_zero_bt = TRUE;
4345 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4346 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4348 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4350 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4361 static void __kmp_reinitialize_team(kmp_team_t *team,
4362 kmp_internal_control_t *new_icvs,
4364 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4365 team->t.t_threads[0], team));
4366 KMP_DEBUG_ASSERT(team && new_icvs);
4367 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4368 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4370 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4372 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4373 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4375 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4376 team->t.t_threads[0], team));
4382 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4383 kmp_internal_control_t *new_icvs,
4385 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4388 KMP_DEBUG_ASSERT(team);
4389 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4390 KMP_DEBUG_ASSERT(team->t.t_threads);
4393 team->t.t_master_tid = 0;
4395 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4396 team->t.t_nproc = new_nproc;
4399 team->t.t_next_pool = NULL;
4403 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4404 team->t.t_invoke = NULL;
4407 team->t.t_sched = new_icvs->sched;
4409 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4410 team->t.t_fp_control_saved = FALSE;
4411 team->t.t_x87_fpu_control_word = 0;
4412 team->t.t_mxcsr = 0;
4415 team->t.t_construct = 0;
4416 __kmp_init_lock(&team->t.t_single_lock);
4418 team->t.t_ordered.dt.t_value = 0;
4419 team->t.t_master_active = FALSE;
4421 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4424 team->t.t_copypriv_data = NULL;
4426 team->t.t_copyin_counter = 0;
4428 team->t.t_control_stack_top = NULL;
4430 __kmp_reinitialize_team(team, new_icvs, loc);
4433 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4436 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4439 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4440 if (KMP_AFFINITY_CAPABLE()) {
4442 if (old_mask != NULL) {
4443 status = __kmp_get_system_affinity(old_mask, TRUE);
4446 __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError),
4447 KMP_ERR(error), __kmp_msg_null);
4450 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4455 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4461 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4463 kmp_info_t *master_th = team->t.t_threads[0];
4464 KMP_DEBUG_ASSERT(master_th != NULL);
4465 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4466 int first_place = master_th->th.th_first_place;
4467 int last_place = master_th->th.th_last_place;
4468 int masters_place = master_th->th.th_current_place;
4469 team->t.t_first_place = first_place;
4470 team->t.t_last_place = last_place;
4472 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4473 "bound to place %d partition = [%d,%d]\n",
4474 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4475 team->t.t_id, masters_place, first_place, last_place));
4477 switch (proc_bind) {
4479 case proc_bind_default:
4482 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4485 case proc_bind_master: {
4487 int n_th = team->t.t_nproc;
4488 for (f = 1; f < n_th; f++) {
4489 kmp_info_t *th = team->t.t_threads[f];
4490 KMP_DEBUG_ASSERT(th != NULL);
4491 th->th.th_first_place = first_place;
4492 th->th.th_last_place = last_place;
4493 th->th.th_new_place = masters_place;
4495 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4496 "partition = [%d,%d]\n",
4497 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4498 f, masters_place, first_place, last_place));
4502 case proc_bind_close: {
4504 int n_th = team->t.t_nproc;
4506 if (first_place <= last_place) {
4507 n_places = last_place - first_place + 1;
4509 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4511 if (n_th <= n_places) {
4512 int place = masters_place;
4513 for (f = 1; f < n_th; f++) {
4514 kmp_info_t *th = team->t.t_threads[f];
4515 KMP_DEBUG_ASSERT(th != NULL);
4517 if (place == last_place) {
4518 place = first_place;
4519 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4524 th->th.th_first_place = first_place;
4525 th->th.th_last_place = last_place;
4526 th->th.th_new_place = place;
4528 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4529 "partition = [%d,%d]\n",
4530 __kmp_gtid_from_thread(team->t.t_threads[f]),
4531 team->t.t_id, f, place, first_place, last_place));
4534 int S, rem, gap, s_count;
4535 S = n_th / n_places;
4537 rem = n_th - (S * n_places);
4538 gap = rem > 0 ? n_places / rem : n_places;
4539 int place = masters_place;
4541 for (f = 0; f < n_th; f++) {
4542 kmp_info_t *th = team->t.t_threads[f];
4543 KMP_DEBUG_ASSERT(th != NULL);
4545 th->th.th_first_place = first_place;
4546 th->th.th_last_place = last_place;
4547 th->th.th_new_place = place;
4550 if ((s_count == S) && rem && (gap_ct == gap)) {
4552 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4554 if (place == last_place) {
4555 place = first_place;
4556 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4564 }
else if (s_count == S) {
4565 if (place == last_place) {
4566 place = first_place;
4567 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4577 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4578 "partition = [%d,%d]\n",
4579 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4580 th->th.th_new_place, first_place, last_place));
4582 KMP_DEBUG_ASSERT(place == masters_place);
4586 case proc_bind_spread: {
4588 int n_th = team->t.t_nproc;
4591 if (first_place <= last_place) {
4592 n_places = last_place - first_place + 1;
4594 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4596 if (n_th <= n_places) {
4597 int place = masters_place;
4598 int S = n_places / n_th;
4599 int s_count, rem, gap, gap_ct;
4600 rem = n_places - n_th * S;
4601 gap = rem ? n_th / rem : 1;
4604 if (update_master_only == 1)
4606 for (f = 0; f < thidx; f++) {
4607 kmp_info_t *th = team->t.t_threads[f];
4608 KMP_DEBUG_ASSERT(th != NULL);
4610 th->th.th_first_place = place;
4611 th->th.th_new_place = place;
4613 while (s_count < S) {
4614 if (place == last_place) {
4615 place = first_place;
4616 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4623 if (rem && (gap_ct == gap)) {
4624 if (place == last_place) {
4625 place = first_place;
4626 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4634 th->th.th_last_place = place;
4637 if (place == last_place) {
4638 place = first_place;
4639 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4645 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4646 "partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread(team->t.t_threads[f]),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place));
4651 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4653 int S, rem, gap, s_count;
4654 S = n_th / n_places;
4656 rem = n_th - (S * n_places);
4657 gap = rem > 0 ? n_places / rem : n_places;
4658 int place = masters_place;
4661 if (update_master_only == 1)
4663 for (f = 0; f < thidx; f++) {
4664 kmp_info_t *th = team->t.t_threads[f];
4665 KMP_DEBUG_ASSERT(th != NULL);
4667 th->th.th_first_place = place;
4668 th->th.th_last_place = place;
4669 th->th.th_new_place = place;
4672 if ((s_count == S) && rem && (gap_ct == gap)) {
4674 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4676 if (place == last_place) {
4677 place = first_place;
4678 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4686 }
else if (s_count == S) {
4687 if (place == last_place) {
4688 place = first_place;
4689 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4698 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4699 "partition = [%d,%d]\n",
4700 __kmp_gtid_from_thread(team->t.t_threads[f]),
4701 team->t.t_id, f, th->th.th_new_place,
4702 th->th.th_first_place, th->th.th_last_place));
4704 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4712 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4720 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4722 ompt_parallel_id_t ompt_parallel_id,
4725 kmp_proc_bind_t new_proc_bind,
4727 kmp_internal_control_t *new_icvs,
4728 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4729 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4732 int use_hot_team = !root->r.r_active;
4735 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4736 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4737 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4740 #if KMP_NESTED_HOT_TEAMS 4741 kmp_hot_team_ptr_t *hot_teams;
4743 team = master->th.th_team;
4744 level = team->t.t_active_level;
4745 if (master->th.th_teams_microtask) {
4746 if (master->th.th_teams_size.nteams > 1 &&
4749 (microtask_t)__kmp_teams_master ||
4750 master->th.th_teams_level <
4756 hot_teams = master->th.th_hot_teams;
4757 if (level < __kmp_hot_teams_max_level && hot_teams &&
4767 if (use_hot_team && new_nproc > 1) {
4768 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4769 #if KMP_NESTED_HOT_TEAMS 4770 team = hot_teams[level].hot_team;
4772 team = root->r.r_hot_team;
4775 if (__kmp_tasking_mode != tskm_immediate_exec) {
4776 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4777 "task_team[1] = %p before reinit\n",
4778 team->t.t_task_team[0], team->t.t_task_team[1]));
4785 if (team->t.t_nproc == new_nproc) {
4786 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4789 if (team->t.t_size_changed == -1) {
4790 team->t.t_size_changed = 1;
4792 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4796 kmp_r_sched_t new_sched = new_icvs->sched;
4797 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4798 team->t.t_sched.chunk != new_sched.chunk)
4802 __kmp_reinitialize_team(team, new_icvs,
4803 root->r.r_uber_thread->th.th_ident);
4805 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4806 team->t.t_threads[0], team));
4807 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4810 #if KMP_AFFINITY_SUPPORTED 4811 if ((team->t.t_size_changed == 0) &&
4812 (team->t.t_proc_bind == new_proc_bind)) {
4813 if (new_proc_bind == proc_bind_spread) {
4814 __kmp_partition_places(
4817 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4818 "proc_bind = %d, partition = [%d,%d]\n",
4819 team->t.t_id, new_proc_bind, team->t.t_first_place,
4820 team->t.t_last_place));
4822 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4823 __kmp_partition_places(team);
4826 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4829 }
else if (team->t.t_nproc > new_nproc) {
4831 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4834 team->t.t_size_changed = 1;
4835 #if KMP_NESTED_HOT_TEAMS 4836 if (__kmp_hot_teams_mode == 0) {
4839 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4840 hot_teams[level].hot_team_nth = new_nproc;
4841 #endif // KMP_NESTED_HOT_TEAMS 4843 for (f = new_nproc; f < team->t.t_nproc; f++) {
4844 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4845 if (__kmp_tasking_mode != tskm_immediate_exec) {
4848 team->t.t_threads[f]->th.th_task_team = NULL;
4850 __kmp_free_thread(team->t.t_threads[f]);
4851 team->t.t_threads[f] = NULL;
4853 #if KMP_NESTED_HOT_TEAMS 4858 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4859 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4860 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4861 for (
int b = 0; b < bs_last_barrier; ++b) {
4862 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4863 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4865 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4869 #endif // KMP_NESTED_HOT_TEAMS 4870 team->t.t_nproc = new_nproc;
4872 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4873 team->t.t_sched.chunk != new_icvs->sched.chunk)
4874 team->t.t_sched = new_icvs->sched;
4875 __kmp_reinitialize_team(team, new_icvs,
4876 root->r.r_uber_thread->th.th_ident);
4879 for (f = 0; f < new_nproc; ++f) {
4880 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4884 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4885 team->t.t_threads[0], team));
4887 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4890 for (f = 0; f < team->t.t_nproc; f++) {
4891 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4892 team->t.t_threads[f]->th.th_team_nproc ==
4898 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4899 #if KMP_AFFINITY_SUPPORTED 4900 __kmp_partition_places(team);
4904 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4905 kmp_affin_mask_t *old_mask;
4906 if (KMP_AFFINITY_CAPABLE()) {
4907 KMP_CPU_ALLOC(old_mask);
4912 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
4915 team->t.t_size_changed = 1;
4917 #if KMP_NESTED_HOT_TEAMS 4918 int avail_threads = hot_teams[level].hot_team_nth;
4919 if (new_nproc < avail_threads)
4920 avail_threads = new_nproc;
4921 kmp_info_t **other_threads = team->t.t_threads;
4922 for (f = team->t.t_nproc; f < avail_threads; ++f) {
4926 kmp_balign_t *balign = other_threads[f]->th.th_bar;
4927 for (b = 0; b < bs_last_barrier; ++b) {
4928 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4929 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4931 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4935 if (hot_teams[level].hot_team_nth >= new_nproc) {
4938 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4939 team->t.t_nproc = new_nproc;
4945 hot_teams[level].hot_team_nth = new_nproc;
4946 #endif // KMP_NESTED_HOT_TEAMS 4947 if (team->t.t_max_nproc < new_nproc) {
4949 __kmp_reallocate_team_arrays(team, new_nproc);
4950 __kmp_reinitialize_team(team, new_icvs, NULL);
4953 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4958 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
4962 for (f = team->t.t_nproc; f < new_nproc; f++) {
4963 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
4964 KMP_DEBUG_ASSERT(new_worker);
4965 team->t.t_threads[f] = new_worker;
4968 (
"__kmp_allocate_team: team %d init T#%d arrived: " 4969 "join=%llu, plain=%llu\n",
4970 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
4971 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4972 team->t.t_bar[bs_plain_barrier].b_arrived));
4976 kmp_balign_t *balign = new_worker->th.th_bar;
4977 for (b = 0; b < bs_last_barrier; ++b) {
4978 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4979 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
4980 KMP_BARRIER_PARENT_FLAG);
4982 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4988 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4989 if (KMP_AFFINITY_CAPABLE()) {
4991 __kmp_set_system_affinity(old_mask, TRUE);
4992 KMP_CPU_FREE(old_mask);
4995 #if KMP_NESTED_HOT_TEAMS 4997 #endif // KMP_NESTED_HOT_TEAMS 4999 int old_nproc = team->t.t_nproc;
5001 __kmp_initialize_team(team, new_nproc, new_icvs,
5002 root->r.r_uber_thread->th.th_ident);
5005 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5006 for (f = 0; f < team->t.t_nproc; ++f)
5007 __kmp_initialize_info(team->t.t_threads[f], team, f,
5008 __kmp_gtid_from_tid(f, team));
5015 for (f = old_nproc; f < team->t.t_nproc; ++f)
5016 team->t.t_threads[f]->th.th_task_state =
5017 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5020 team->t.t_threads[0]->th.th_task_state;
5021 for (f = old_nproc; f < team->t.t_nproc; ++f)
5022 team->t.t_threads[f]->th.th_task_state = old_state;
5026 for (f = 0; f < team->t.t_nproc; ++f) {
5027 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5028 team->t.t_threads[f]->th.th_team_nproc ==
5034 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5035 #if KMP_AFFINITY_SUPPORTED 5036 __kmp_partition_places(team);
5042 kmp_info_t *master = team->t.t_threads[0];
5043 if (master->th.th_teams_microtask) {
5044 for (f = 1; f < new_nproc; ++f) {
5046 kmp_info_t *thr = team->t.t_threads[f];
5047 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5048 thr->th.th_teams_level = master->th.th_teams_level;
5049 thr->th.th_teams_size = master->th.th_teams_size;
5053 #if KMP_NESTED_HOT_TEAMS 5057 for (f = 1; f < new_nproc; ++f) {
5058 kmp_info_t *thr = team->t.t_threads[f];
5060 kmp_balign_t *balign = thr->th.th_bar;
5061 for (b = 0; b < bs_last_barrier; ++b) {
5062 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5063 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5065 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5070 #endif // KMP_NESTED_HOT_TEAMS 5073 __kmp_alloc_argv_entries(argc, team, TRUE);
5074 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5078 KF_TRACE(10, (
" hot_team = %p\n", team));
5081 if (__kmp_tasking_mode != tskm_immediate_exec) {
5082 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5083 "task_team[1] = %p after reinit\n",
5084 team->t.t_task_team[0], team->t.t_task_team[1]));
5089 __ompt_team_assign_id(team, ompt_parallel_id);
5099 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5102 if (team->t.t_max_nproc >= max_nproc) {
5104 __kmp_team_pool = team->t.t_next_pool;
5107 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5109 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5110 "task_team[1] %p to NULL\n",
5111 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5112 team->t.t_task_team[0] = NULL;
5113 team->t.t_task_team[1] = NULL;
5116 __kmp_alloc_argv_entries(argc, team, TRUE);
5117 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5120 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5121 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5124 for (b = 0; b < bs_last_barrier; ++b) {
5125 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5127 team->t.t_bar[b].b_master_arrived = 0;
5128 team->t.t_bar[b].b_team_arrived = 0;
5134 team->t.t_proc_bind = new_proc_bind;
5137 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5141 __ompt_team_assign_id(team, ompt_parallel_id);
5152 team = __kmp_reap_team(team);
5153 __kmp_team_pool = team;
5158 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5161 team->t.t_max_nproc = max_nproc;
5164 __kmp_allocate_team_arrays(team, max_nproc);
5166 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5167 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5169 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5171 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5172 team->t.t_task_team[0] = NULL;
5174 team->t.t_task_team[1] = NULL;
5177 if (__kmp_storage_map) {
5178 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5182 __kmp_alloc_argv_entries(argc, team, FALSE);
5183 team->t.t_argc = argc;
5186 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5187 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5190 for (b = 0; b < bs_last_barrier; ++b) {
5191 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5193 team->t.t_bar[b].b_master_arrived = 0;
5194 team->t.t_bar[b].b_team_arrived = 0;
5200 team->t.t_proc_bind = new_proc_bind;
5204 __ompt_team_assign_id(team, ompt_parallel_id);
5205 team->t.ompt_serialized_team_info = NULL;
5210 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5221 void __kmp_free_team(kmp_root_t *root,
5222 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5224 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5228 KMP_DEBUG_ASSERT(root);
5229 KMP_DEBUG_ASSERT(team);
5230 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5231 KMP_DEBUG_ASSERT(team->t.t_threads);
5233 int use_hot_team = team == root->r.r_hot_team;
5234 #if KMP_NESTED_HOT_TEAMS 5236 kmp_hot_team_ptr_t *hot_teams;
5238 level = team->t.t_active_level - 1;
5239 if (master->th.th_teams_microtask) {
5240 if (master->th.th_teams_size.nteams > 1) {
5244 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5245 master->th.th_teams_level == team->t.t_level) {
5250 hot_teams = master->th.th_hot_teams;
5251 if (level < __kmp_hot_teams_max_level) {
5252 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5256 #endif // KMP_NESTED_HOT_TEAMS 5259 TCW_SYNC_PTR(team->t.t_pkfn,
5261 team->t.t_copyin_counter = 0;
5265 if (!use_hot_team) {
5266 if (__kmp_tasking_mode != tskm_immediate_exec) {
5268 for (f = 1; f < team->t.t_nproc; ++f) {
5269 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5270 kmp_info_t *th = team->t.t_threads[f];
5271 volatile kmp_uint32 *state = &th->th.th_reap_state;
5272 while (*state != KMP_SAFE_TO_REAP) {
5276 if (!__kmp_is_thread_alive(th, &ecode)) {
5277 *state = KMP_SAFE_TO_REAP;
5282 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5283 if (fl.is_sleeping())
5284 fl.resume(__kmp_gtid_from_thread(th));
5291 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5292 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5293 if (task_team != NULL) {
5294 for (f = 0; f < team->t.t_nproc;
5296 team->t.t_threads[f]->th.th_task_team = NULL;
5300 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5301 __kmp_get_gtid(), task_team, team->t.t_id));
5302 #if KMP_NESTED_HOT_TEAMS 5303 __kmp_free_task_team(master, task_team);
5305 team->t.t_task_team[tt_idx] = NULL;
5311 team->t.t_parent = NULL;
5312 team->t.t_level = 0;
5313 team->t.t_active_level = 0;
5316 for (f = 1; f < team->t.t_nproc; ++f) {
5317 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5318 __kmp_free_thread(team->t.t_threads[f]);
5319 team->t.t_threads[f] = NULL;
5324 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5325 __kmp_team_pool = (
volatile kmp_team_t *)team;
5332 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5333 kmp_team_t *next_pool = team->t.t_next_pool;
5335 KMP_DEBUG_ASSERT(team);
5336 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5337 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5338 KMP_DEBUG_ASSERT(team->t.t_threads);
5339 KMP_DEBUG_ASSERT(team->t.t_argv);
5344 __kmp_free_team_arrays(team);
5345 if (team->t.t_argv != &team->t.t_inline_argv[0])
5346 __kmp_free((
void *)team->t.t_argv);
5378 void __kmp_free_thread(kmp_info_t *this_th) {
5382 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5383 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5385 KMP_DEBUG_ASSERT(this_th);
5390 kmp_balign_t *balign = this_th->th.th_bar;
5391 for (b = 0; b < bs_last_barrier; ++b) {
5392 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5393 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5394 balign[b].bb.team = NULL;
5395 balign[b].bb.leaf_kids = 0;
5397 this_th->th.th_task_state = 0;
5400 TCW_PTR(this_th->th.th_team, NULL);
5401 TCW_PTR(this_th->th.th_root, NULL);
5402 TCW_PTR(this_th->th.th_dispatch, NULL);
5406 gtid = this_th->th.th_info.ds.ds_gtid;
5407 if (__kmp_thread_pool_insert_pt != NULL) {
5408 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5409 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5410 __kmp_thread_pool_insert_pt = NULL;
5419 if (__kmp_thread_pool_insert_pt != NULL) {
5420 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5422 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5424 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5425 scan = &((*scan)->th.th_next_pool))
5430 TCW_PTR(this_th->th.th_next_pool, *scan);
5431 __kmp_thread_pool_insert_pt = *scan = this_th;
5432 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5433 (this_th->th.th_info.ds.ds_gtid <
5434 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5435 TCW_4(this_th->th.th_in_pool, TRUE);
5436 __kmp_thread_pool_nth++;
5438 TCW_4(__kmp_nth, __kmp_nth - 1);
5440 #ifdef KMP_ADJUST_BLOCKTIME 5443 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5444 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5445 if (__kmp_nth <= __kmp_avail_proc) {
5446 __kmp_zero_bt = FALSE;
5456 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5457 int gtid = this_thr->th.th_info.ds.ds_gtid;
5459 kmp_team_t *(*
volatile pteam);
5462 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5464 if (__kmp_env_consistency_check) {
5465 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5470 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5471 this_thr->th.ompt_thread_info.wait_id = 0;
5472 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5473 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5474 __ompt_thread_begin(ompt_thread_worker, gtid);
5480 while (!TCR_4(__kmp_global.g.g_done)) {
5481 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5485 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5489 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5494 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5498 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5502 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5505 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5507 ompt_task_info_t *task_info;
5508 ompt_parallel_id_t my_parallel_id;
5510 task_info = __ompt_get_taskinfo(0);
5511 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5515 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5518 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5519 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5520 (*pteam)->t.t_pkfn));
5522 updateHWFPControl(*pteam);
5526 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5528 int tid = __kmp_tid_from_gtid(gtid);
5529 task_info->task_id = __ompt_task_id_new(tid);
5534 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5535 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5536 rc = (*pteam)->t.t_invoke(gtid);
5543 task_info->frame.exit_runtime_frame = NULL;
5545 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5549 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5550 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5551 (*pteam)->t.t_pkfn));
5554 __kmp_join_barrier(gtid);
5555 #if OMPT_SUPPORT && OMPT_TRACE 5557 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5560 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5561 my_parallel_id, task_info->task_id);
5563 task_info->frame.exit_runtime_frame = NULL;
5564 task_info->task_id = 0;
5569 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5572 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5573 __ompt_thread_end(ompt_thread_worker, gtid);
5577 this_thr->th.th_task_team = NULL;
5579 __kmp_common_destroy_gtid(gtid);
5581 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5588 void __kmp_internal_end_dest(
void *specific_gtid) {
5589 #if KMP_COMPILER_ICC 5590 #pragma warning(push) 5591 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5595 int gtid = (kmp_intptr_t)specific_gtid - 1;
5596 #if KMP_COMPILER_ICC 5597 #pragma warning(pop) 5600 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5613 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5614 __kmp_gtid_set_specific(gtid);
5615 #ifdef KMP_TDATA_GTID 5618 __kmp_internal_end_thread(gtid);
5621 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5627 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5628 __kmp_internal_end_atexit();
5631 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5637 void __kmp_internal_end_atexit(
void) {
5638 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5662 __kmp_internal_end_library(-1);
5664 __kmp_close_console();
5668 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5673 KMP_DEBUG_ASSERT(thread != NULL);
5675 gtid = thread->th.th_info.ds.ds_gtid;
5679 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5682 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5686 ANNOTATE_HAPPENS_BEFORE(thread);
5687 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5688 __kmp_release_64(&flag);
5692 __kmp_reap_worker(thread);
5704 if (thread->th.th_active_in_pool) {
5705 thread->th.th_active_in_pool = FALSE;
5706 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
5707 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5711 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5712 --__kmp_thread_pool_nth;
5715 __kmp_free_implicit_task(thread);
5719 __kmp_free_fast_memory(thread);
5722 __kmp_suspend_uninitialize_thread(thread);
5724 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5725 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5730 #ifdef KMP_ADJUST_BLOCKTIME 5733 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5734 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5735 if (__kmp_nth <= __kmp_avail_proc) {
5736 __kmp_zero_bt = FALSE;
5742 if (__kmp_env_consistency_check) {
5743 if (thread->th.th_cons) {
5744 __kmp_free_cons_stack(thread->th.th_cons);
5745 thread->th.th_cons = NULL;
5749 if (thread->th.th_pri_common != NULL) {
5750 __kmp_free(thread->th.th_pri_common);
5751 thread->th.th_pri_common = NULL;
5754 if (thread->th.th_task_state_memo_stack != NULL) {
5755 __kmp_free(thread->th.th_task_state_memo_stack);
5756 thread->th.th_task_state_memo_stack = NULL;
5760 if (thread->th.th_local.bget_data != NULL) {
5761 __kmp_finalize_bget(thread);
5765 #if KMP_AFFINITY_SUPPORTED 5766 if (thread->th.th_affin_mask != NULL) {
5767 KMP_CPU_FREE(thread->th.th_affin_mask);
5768 thread->th.th_affin_mask = NULL;
5772 __kmp_reap_team(thread->th.th_serial_team);
5773 thread->th.th_serial_team = NULL;
5780 static void __kmp_internal_end(
void) {
5784 __kmp_unregister_library();
5791 __kmp_reclaim_dead_roots();
5795 for (i = 0; i < __kmp_threads_capacity; i++)
5797 if (__kmp_root[i]->r.r_active)
5800 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5802 if (i < __kmp_threads_capacity) {
5814 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5815 if (TCR_4(__kmp_init_monitor)) {
5816 __kmp_reap_monitor(&__kmp_monitor);
5817 TCW_4(__kmp_init_monitor, 0);
5819 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5820 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5821 #endif // KMP_USE_MONITOR 5826 for (i = 0; i < __kmp_threads_capacity; i++) {
5827 if (__kmp_root[i]) {
5830 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5839 while (__kmp_thread_pool != NULL) {
5841 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5842 __kmp_thread_pool = thread->th.th_next_pool;
5844 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5845 thread->th.th_next_pool = NULL;
5846 thread->th.th_in_pool = FALSE;
5847 __kmp_reap_thread(thread, 0);
5849 __kmp_thread_pool_insert_pt = NULL;
5852 while (__kmp_team_pool != NULL) {
5854 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5855 __kmp_team_pool = team->t.t_next_pool;
5857 team->t.t_next_pool = NULL;
5858 __kmp_reap_team(team);
5861 __kmp_reap_task_teams();
5863 for (i = 0; i < __kmp_threads_capacity; ++i) {
5870 TCW_SYNC_4(__kmp_init_common, FALSE);
5872 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
5880 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5881 if (TCR_4(__kmp_init_monitor)) {
5882 __kmp_reap_monitor(&__kmp_monitor);
5883 TCW_4(__kmp_init_monitor, 0);
5885 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5886 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5889 TCW_4(__kmp_init_gtid, FALSE);
5898 void __kmp_internal_end_library(
int gtid_req) {
5905 if (__kmp_global.g.g_abort) {
5906 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
5910 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5911 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
5919 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
5921 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
5922 if (gtid == KMP_GTID_SHUTDOWN) {
5923 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 5924 "already shutdown\n"));
5926 }
else if (gtid == KMP_GTID_MONITOR) {
5927 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 5928 "registered, or system shutdown\n"));
5930 }
else if (gtid == KMP_GTID_DNE) {
5931 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 5934 }
else if (KMP_UBER_GTID(gtid)) {
5936 if (__kmp_root[gtid]->r.r_active) {
5937 __kmp_global.g.g_abort = -1;
5938 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5940 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
5946 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
5947 __kmp_unregister_root_current_thread(gtid);
5954 #ifdef DUMP_DEBUG_ON_EXIT 5955 if (__kmp_debug_buf)
5956 __kmp_dump_debug_buffer();
5962 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
5965 if (__kmp_global.g.g_abort) {
5966 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
5968 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5971 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5972 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5981 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
5984 __kmp_internal_end();
5986 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
5987 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5989 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
5991 #ifdef DUMP_DEBUG_ON_EXIT 5992 if (__kmp_debug_buf)
5993 __kmp_dump_debug_buffer();
5997 __kmp_close_console();
6000 __kmp_fini_allocator();
6004 void __kmp_internal_end_thread(
int gtid_req) {
6013 if (__kmp_global.g.g_abort) {
6014 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6018 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6019 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6027 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6029 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6030 if (gtid == KMP_GTID_SHUTDOWN) {
6031 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6032 "already shutdown\n"));
6034 }
else if (gtid == KMP_GTID_MONITOR) {
6035 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6036 "registered, or system shutdown\n"));
6038 }
else if (gtid == KMP_GTID_DNE) {
6039 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6043 }
else if (KMP_UBER_GTID(gtid)) {
6045 if (__kmp_root[gtid]->r.r_active) {
6046 __kmp_global.g.g_abort = -1;
6047 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6049 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6053 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6055 __kmp_unregister_root_current_thread(gtid);
6059 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6062 __kmp_threads[gtid]->th.th_task_team = NULL;
6066 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6071 #if defined KMP_DYNAMIC_LIB 6080 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6084 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6087 if (__kmp_global.g.g_abort) {
6088 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6090 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6093 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6094 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6105 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6107 for (i = 0; i < __kmp_threads_capacity; ++i) {
6108 if (KMP_UBER_GTID(i)) {
6111 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6112 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6113 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6120 __kmp_internal_end();
6122 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6123 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6125 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6127 #ifdef DUMP_DEBUG_ON_EXIT 6128 if (__kmp_debug_buf)
6129 __kmp_dump_debug_buffer();
6136 static long __kmp_registration_flag = 0;
6138 static char *__kmp_registration_str = NULL;
6141 static inline char *__kmp_reg_status_name() {
6146 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6149 void __kmp_register_library_startup(
void) {
6151 char *name = __kmp_reg_status_name();
6157 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6158 __kmp_initialize_system_tick();
6160 __kmp_read_system_time(&time.dtime);
6161 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6162 __kmp_registration_str =
6163 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6164 __kmp_registration_flag, KMP_LIBRARY_FILE);
6166 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6167 __kmp_registration_str));
6174 __kmp_env_set(name, __kmp_registration_str, 0);
6176 value = __kmp_env_get(name);
6177 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6187 char *flag_addr_str = NULL;
6188 char *flag_val_str = NULL;
6189 char const *file_name = NULL;
6190 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6191 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6194 long *flag_addr = 0;
6196 KMP_SSCANF(flag_addr_str,
"%p", &flag_addr);
6197 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6198 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6202 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6216 file_name =
"unknown library";
6220 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6221 if (!__kmp_str_match_true(duplicate_ok)) {
6223 __kmp_msg(kmp_ms_fatal,
6224 KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6225 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6227 KMP_INTERNAL_FREE(duplicate_ok);
6228 __kmp_duplicate_library_ok = 1;
6233 __kmp_env_unset(name);
6235 default: { KMP_DEBUG_ASSERT(0); }
break;
6239 KMP_INTERNAL_FREE((
void *)value);
6242 KMP_INTERNAL_FREE((
void *)name);
6246 void __kmp_unregister_library(
void) {
6248 char *name = __kmp_reg_status_name();
6249 char *value = __kmp_env_get(name);
6251 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6252 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6253 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6255 __kmp_env_unset(name);
6258 KMP_INTERNAL_FREE(__kmp_registration_str);
6259 KMP_INTERNAL_FREE(value);
6260 KMP_INTERNAL_FREE(name);
6262 __kmp_registration_flag = 0;
6263 __kmp_registration_str = NULL;
6270 #if KMP_MIC_SUPPORTED 6272 static void __kmp_check_mic_type() {
6273 kmp_cpuid_t cpuid_state = {0};
6274 kmp_cpuid_t *cs_p = &cpuid_state;
6275 __kmp_x86_cpuid(1, 0, cs_p);
6277 if ((cs_p->eax & 0xff0) == 0xB10) {
6278 __kmp_mic_type = mic2;
6279 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6280 __kmp_mic_type = mic3;
6282 __kmp_mic_type = non_mic;
6288 static void __kmp_do_serial_initialize(
void) {
6292 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6294 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6295 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6296 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6297 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6298 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6304 __kmp_validate_locks();
6307 __kmp_init_allocator();
6312 __kmp_register_library_startup();
6315 if (TCR_4(__kmp_global.g.g_done)) {
6316 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6319 __kmp_global.g.g_abort = 0;
6320 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6323 #if KMP_USE_ADAPTIVE_LOCKS 6324 #if KMP_DEBUG_ADAPTIVE_LOCKS 6325 __kmp_init_speculative_stats();
6328 #if KMP_STATS_ENABLED 6331 __kmp_init_lock(&__kmp_global_lock);
6332 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6333 __kmp_init_lock(&__kmp_debug_lock);
6334 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6335 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6336 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6337 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6338 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6339 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6340 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6341 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6342 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6343 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6344 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6345 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6346 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6347 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6348 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6350 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6352 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6356 __kmp_runtime_initialize();
6358 #if KMP_MIC_SUPPORTED 6359 __kmp_check_mic_type();
6366 __kmp_abort_delay = 0;
6370 __kmp_dflt_team_nth_ub = __kmp_xproc;
6371 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6372 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6374 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6375 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6377 __kmp_max_nth = __kmp_sys_max_nth;
6381 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6383 __kmp_monitor_wakeups =
6384 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6385 __kmp_bt_intervals =
6386 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6389 __kmp_library = library_throughput;
6391 __kmp_static = kmp_sch_static_balanced;
6398 #if KMP_FAST_REDUCTION_BARRIER 6399 #define kmp_reduction_barrier_gather_bb ((int)1) 6400 #define kmp_reduction_barrier_release_bb ((int)1) 6401 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6402 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6403 #endif // KMP_FAST_REDUCTION_BARRIER 6404 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6405 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6406 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6407 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6408 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6409 #if KMP_FAST_REDUCTION_BARRIER 6410 if (i == bs_reduction_barrier) {
6412 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6413 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6414 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6415 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6417 #endif // KMP_FAST_REDUCTION_BARRIER 6419 #if KMP_FAST_REDUCTION_BARRIER 6420 #undef kmp_reduction_barrier_release_pat 6421 #undef kmp_reduction_barrier_gather_pat 6422 #undef kmp_reduction_barrier_release_bb 6423 #undef kmp_reduction_barrier_gather_bb 6424 #endif // KMP_FAST_REDUCTION_BARRIER 6425 #if KMP_MIC_SUPPORTED 6426 if (__kmp_mic_type == mic2) {
6428 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6429 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6431 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6432 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6434 #if KMP_FAST_REDUCTION_BARRIER 6435 if (__kmp_mic_type == mic2) {
6436 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6437 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6439 #endif // KMP_FAST_REDUCTION_BARRIER 6440 #endif // KMP_MIC_SUPPORTED 6444 __kmp_env_checks = TRUE;
6446 __kmp_env_checks = FALSE;
6450 __kmp_foreign_tp = TRUE;
6452 __kmp_global.g.g_dynamic = FALSE;
6453 __kmp_global.g.g_dynamic_mode = dynamic_default;
6455 __kmp_env_initialize(NULL);
6459 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6460 if (__kmp_str_match_true(val)) {
6461 kmp_str_buf_t buffer;
6462 __kmp_str_buf_init(&buffer);
6463 __kmp_i18n_dump_catalog(&buffer);
6464 __kmp_printf(
"%s", buffer.str);
6465 __kmp_str_buf_free(&buffer);
6467 __kmp_env_free(&val);
6470 __kmp_threads_capacity =
6471 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6473 __kmp_tp_capacity = __kmp_default_tp_capacity(
6474 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6479 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6480 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6481 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6482 __kmp_thread_pool = NULL;
6483 __kmp_thread_pool_insert_pt = NULL;
6484 __kmp_team_pool = NULL;
6491 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6493 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6494 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6495 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6498 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6500 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6505 gtid = __kmp_register_root(TRUE);
6506 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6507 KMP_ASSERT(KMP_UBER_GTID(gtid));
6508 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6512 __kmp_common_initialize();
6516 __kmp_register_atfork();
6519 #if !defined KMP_DYNAMIC_LIB 6523 int rc = atexit(__kmp_internal_end_atexit);
6525 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6531 #if KMP_HANDLE_SIGNALS 6537 __kmp_install_signals(FALSE);
6540 __kmp_install_signals(TRUE);
6545 __kmp_init_counter++;
6547 __kmp_init_serial = TRUE;
6549 if (__kmp_settings) {
6554 if (__kmp_display_env || __kmp_display_env_verbose) {
6555 __kmp_env_print_2();
6557 #endif // OMP_40_ENABLED 6565 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6568 void __kmp_serial_initialize(
void) {
6569 if (__kmp_init_serial) {
6572 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6573 if (__kmp_init_serial) {
6574 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6577 __kmp_do_serial_initialize();
6578 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6581 static void __kmp_do_middle_initialize(
void) {
6583 int prev_dflt_team_nth;
6585 if (!__kmp_init_serial) {
6586 __kmp_do_serial_initialize();
6589 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6593 prev_dflt_team_nth = __kmp_dflt_team_nth;
6595 #if KMP_AFFINITY_SUPPORTED 6598 __kmp_affinity_initialize();
6602 for (i = 0; i < __kmp_threads_capacity; i++) {
6603 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6604 __kmp_affinity_set_init_mask(i, TRUE);
6609 KMP_ASSERT(__kmp_xproc > 0);
6610 if (__kmp_avail_proc == 0) {
6611 __kmp_avail_proc = __kmp_xproc;
6617 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6618 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6623 if (__kmp_dflt_team_nth == 0) {
6624 #ifdef KMP_DFLT_NTH_CORES 6626 __kmp_dflt_team_nth = __kmp_ncores;
6627 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6628 "__kmp_ncores (%d)\n",
6629 __kmp_dflt_team_nth));
6632 __kmp_dflt_team_nth = __kmp_avail_proc;
6633 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6634 "__kmp_avail_proc(%d)\n",
6635 __kmp_dflt_team_nth));
6639 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6640 __kmp_dflt_team_nth = KMP_MIN_NTH;
6642 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6643 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6648 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6650 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6655 for (i = 0; i < __kmp_threads_capacity; i++) {
6656 kmp_info_t *thread = __kmp_threads[i];
6659 if (thread->th.th_current_task->td_icvs.nproc != 0)
6662 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6667 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6668 __kmp_dflt_team_nth));
6670 #ifdef KMP_ADJUST_BLOCKTIME 6672 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6673 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6674 if (__kmp_nth > __kmp_avail_proc) {
6675 __kmp_zero_bt = TRUE;
6681 TCW_SYNC_4(__kmp_init_middle, TRUE);
6683 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6686 void __kmp_middle_initialize(
void) {
6687 if (__kmp_init_middle) {
6690 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6691 if (__kmp_init_middle) {
6692 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6695 __kmp_do_middle_initialize();
6696 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6699 void __kmp_parallel_initialize(
void) {
6700 int gtid = __kmp_entry_gtid();
6703 if (TCR_4(__kmp_init_parallel))
6705 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6706 if (TCR_4(__kmp_init_parallel)) {
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6712 if (TCR_4(__kmp_global.g.g_done)) {
6715 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6716 __kmp_infinite_loop();
6722 if (!__kmp_init_middle) {
6723 __kmp_do_middle_initialize();
6727 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6728 KMP_ASSERT(KMP_UBER_GTID(gtid));
6730 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6733 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6734 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6735 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6739 #if KMP_HANDLE_SIGNALS 6741 __kmp_install_signals(TRUE);
6745 __kmp_suspend_initialize();
6747 #if defined(USE_LOAD_BALANCE) 6748 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6749 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6752 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6753 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6757 if (__kmp_version) {
6758 __kmp_print_version_2();
6762 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6765 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6767 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6772 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6774 kmp_disp_t *dispatch;
6779 this_thr->th.th_local.this_construct = 0;
6780 #if KMP_CACHE_MANAGE 6781 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6783 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6784 KMP_DEBUG_ASSERT(dispatch);
6785 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6789 dispatch->th_disp_index = 0;
6791 dispatch->th_doacross_buf_idx =
6794 if (__kmp_env_consistency_check)
6795 __kmp_push_parallel(gtid, team->t.t_ident);
6800 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6802 if (__kmp_env_consistency_check)
6803 __kmp_pop_parallel(gtid, team->t.t_ident);
6805 __kmp_finish_implicit_task(this_thr);
6808 int __kmp_invoke_task_func(
int gtid) {
6810 int tid = __kmp_tid_from_gtid(gtid);
6811 kmp_info_t *this_thr = __kmp_threads[gtid];
6812 kmp_team_t *team = this_thr->th.th_team;
6814 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6816 if (__itt_stack_caller_create_ptr) {
6817 __kmp_itt_stack_callee_enter(
6819 team->t.t_stack_id);
6822 #if INCLUDE_SSC_MARKS 6823 SSC_MARK_INVOKING();
6828 void **exit_runtime_p;
6829 ompt_task_id_t my_task_id;
6830 ompt_parallel_id_t my_parallel_id;
6833 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
6834 .ompt_task_info.frame.exit_runtime_frame);
6836 exit_runtime_p = &dummy;
6840 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6841 my_parallel_id = team->t.ompt_team_info.parallel_id;
6843 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
6851 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6852 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6854 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6855 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6862 *exit_runtime_p = NULL;
6867 if (__itt_stack_caller_create_ptr) {
6868 __kmp_itt_stack_callee_leave(
6870 team->t.t_stack_id);
6873 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
6879 void __kmp_teams_master(
int gtid) {
6881 kmp_info_t *thr = __kmp_threads[gtid];
6882 kmp_team_t *team = thr->th.th_team;
6883 ident_t *loc = team->t.t_ident;
6884 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6885 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6886 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6887 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6888 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6891 #if INCLUDE_SSC_MARKS 6894 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
6896 (
void *)thr->th.th_teams_microtask,
6898 (microtask_t)thr->th.th_teams_microtask,
6899 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
6900 #if INCLUDE_SSC_MARKS 6906 __kmp_join_call(loc, gtid
6915 int __kmp_invoke_teams_master(
int gtid) {
6916 kmp_info_t *this_thr = __kmp_threads[gtid];
6917 kmp_team_t *team = this_thr->th.th_team;
6919 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
6920 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
6921 (
void *)__kmp_teams_master);
6923 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
6924 __kmp_teams_master(gtid);
6925 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
6935 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
6936 kmp_info_t *thr = __kmp_threads[gtid];
6938 if (num_threads > 0)
6939 thr->th.th_set_nproc = num_threads;
6946 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
6948 kmp_info_t *thr = __kmp_threads[gtid];
6949 KMP_DEBUG_ASSERT(num_teams >= 0);
6950 KMP_DEBUG_ASSERT(num_threads >= 0);
6954 if (num_teams > __kmp_max_nth) {
6955 if (!__kmp_reserve_warn) {
6956 __kmp_reserve_warn = 1;
6957 __kmp_msg(kmp_ms_warning,
6958 KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth),
6959 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
6961 num_teams = __kmp_max_nth;
6965 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6968 if (num_threads == 0) {
6969 if (!TCR_4(__kmp_init_middle))
6970 __kmp_middle_initialize();
6971 num_threads = __kmp_avail_proc / num_teams;
6972 if (num_teams * num_threads > __kmp_max_nth) {
6974 num_threads = __kmp_max_nth / num_teams;
6977 if (num_teams * num_threads > __kmp_max_nth) {
6978 int new_threads = __kmp_max_nth / num_teams;
6979 if (!__kmp_reserve_warn) {
6980 __kmp_reserve_warn = 1;
6981 __kmp_msg(kmp_ms_warning,
6982 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
6983 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
6985 num_threads = new_threads;
6988 thr->th.th_teams_size.nth = num_threads;
6992 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
6993 kmp_info_t *thr = __kmp_threads[gtid];
6994 thr->th.th_set_proc_bind = proc_bind;
7001 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7002 kmp_info_t *this_thr = __kmp_threads[gtid];
7008 KMP_DEBUG_ASSERT(team);
7009 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7010 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7013 team->t.t_construct = 0;
7014 team->t.t_ordered.dt.t_value =
7018 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7019 if (team->t.t_max_nproc > 1) {
7021 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7022 team->t.t_disp_buffer[i].buffer_index = i;
7024 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7028 team->t.t_disp_buffer[0].buffer_index = 0;
7030 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7035 KMP_ASSERT(this_thr->th.th_team == team);
7038 for (f = 0; f < team->t.t_nproc; f++) {
7039 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7040 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7045 __kmp_fork_barrier(gtid, 0);
7048 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7049 kmp_info_t *this_thr = __kmp_threads[gtid];
7051 KMP_DEBUG_ASSERT(team);
7052 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7053 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7059 if (__kmp_threads[gtid] &&
7060 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7061 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7062 __kmp_threads[gtid]);
7063 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7064 "team->t.t_nproc=%d\n",
7065 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7067 __kmp_print_structure();
7069 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7070 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7073 __kmp_join_barrier(gtid);
7076 KMP_ASSERT(this_thr->th.th_team == team);
7081 #ifdef USE_LOAD_BALANCE 7085 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7088 kmp_team_t *hot_team;
7090 if (root->r.r_active) {
7093 hot_team = root->r.r_hot_team;
7094 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7095 return hot_team->t.t_nproc - 1;
7100 for (i = 1; i < hot_team->t.t_nproc; i++) {
7101 if (hot_team->t.t_threads[i]->th.th_active) {
7110 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7113 int hot_team_active;
7114 int team_curr_active;
7117 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7119 KMP_DEBUG_ASSERT(root);
7120 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7121 ->th.th_current_task->td_icvs.dynamic == TRUE);
7122 KMP_DEBUG_ASSERT(set_nproc > 1);
7124 if (set_nproc == 1) {
7125 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7134 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7135 hot_team_active = __kmp_active_hot_team_nproc(root);
7136 team_curr_active = pool_active + hot_team_active + 1;
7139 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7140 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7141 "hot team active = %d\n",
7142 system_active, pool_active, hot_team_active));
7144 if (system_active < 0) {
7148 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7149 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7152 retval = __kmp_avail_proc - __kmp_nth +
7153 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7154 if (retval > set_nproc) {
7157 if (retval < KMP_MIN_NTH) {
7158 retval = KMP_MIN_NTH;
7161 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7169 if (system_active < team_curr_active) {
7170 system_active = team_curr_active;
7172 retval = __kmp_avail_proc - system_active + team_curr_active;
7173 if (retval > set_nproc) {
7176 if (retval < KMP_MIN_NTH) {
7177 retval = KMP_MIN_NTH;
7180 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7189 void __kmp_cleanup(
void) {
7192 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7194 if (TCR_4(__kmp_init_parallel)) {
7195 #if KMP_HANDLE_SIGNALS 7196 __kmp_remove_signals();
7198 TCW_4(__kmp_init_parallel, FALSE);
7201 if (TCR_4(__kmp_init_middle)) {
7202 #if KMP_AFFINITY_SUPPORTED 7203 __kmp_affinity_uninitialize();
7205 __kmp_cleanup_hierarchy();
7206 TCW_4(__kmp_init_middle, FALSE);
7209 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7211 if (__kmp_init_serial) {
7212 __kmp_runtime_destroy();
7213 __kmp_init_serial = FALSE;
7216 for (f = 0; f < __kmp_threads_capacity; f++) {
7217 if (__kmp_root[f] != NULL) {
7218 __kmp_free(__kmp_root[f]);
7219 __kmp_root[f] = NULL;
7222 __kmp_free(__kmp_threads);
7225 __kmp_threads = NULL;
7227 __kmp_threads_capacity = 0;
7229 #if KMP_USE_DYNAMIC_LOCK 7230 __kmp_cleanup_indirect_user_locks();
7232 __kmp_cleanup_user_locks();
7235 #if KMP_AFFINITY_SUPPORTED 7236 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7237 __kmp_cpuinfo_file = NULL;
7240 #if KMP_USE_ADAPTIVE_LOCKS 7241 #if KMP_DEBUG_ADAPTIVE_LOCKS 7242 __kmp_print_speculative_stats();
7245 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7246 __kmp_nested_nth.nth = NULL;
7247 __kmp_nested_nth.size = 0;
7248 __kmp_nested_nth.used = 0;
7249 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7250 __kmp_nested_proc_bind.bind_types = NULL;
7251 __kmp_nested_proc_bind.size = 0;
7252 __kmp_nested_proc_bind.used = 0;
7254 __kmp_i18n_catclose();
7256 #if KMP_STATS_ENABLED 7260 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7265 int __kmp_ignore_mppbeg(
void) {
7268 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7269 if (__kmp_str_match_false(env))
7276 int __kmp_ignore_mppend(
void) {
7279 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7280 if (__kmp_str_match_false(env))
7287 void __kmp_internal_begin(
void) {
7293 gtid = __kmp_entry_gtid();
7294 root = __kmp_threads[gtid]->th.th_root;
7295 KMP_ASSERT(KMP_UBER_GTID(gtid));
7297 if (root->r.r_begin)
7299 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7300 if (root->r.r_begin) {
7301 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7305 root->r.r_begin = TRUE;
7307 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7312 void __kmp_user_set_library(
enum library_type arg) {
7319 gtid = __kmp_entry_gtid();
7320 thread = __kmp_threads[gtid];
7322 root = thread->th.th_root;
7324 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7326 if (root->r.r_in_parallel) {
7328 KMP_WARNING(SetLibraryIncorrectCall);
7333 case library_serial:
7334 thread->th.th_set_nproc = 0;
7335 set__nproc(thread, 1);
7337 case library_turnaround:
7338 thread->th.th_set_nproc = 0;
7339 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7340 : __kmp_dflt_team_nth_ub);
7342 case library_throughput:
7343 thread->th.th_set_nproc = 0;
7344 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7345 : __kmp_dflt_team_nth_ub);
7348 KMP_FATAL(UnknownLibraryType, arg);
7351 __kmp_aux_set_library(arg);
7354 void __kmp_aux_set_stacksize(
size_t arg) {
7355 if (!__kmp_init_serial)
7356 __kmp_serial_initialize();
7359 if (arg & (0x1000 - 1)) {
7360 arg &= ~(0x1000 - 1);
7365 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7368 if (!TCR_4(__kmp_init_parallel)) {
7371 if (value < __kmp_sys_min_stksize)
7372 value = __kmp_sys_min_stksize;
7373 else if (value > KMP_MAX_STKSIZE)
7374 value = KMP_MAX_STKSIZE;
7376 __kmp_stksize = value;
7378 __kmp_env_stksize = TRUE;
7381 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7386 void __kmp_aux_set_library(
enum library_type arg) {
7387 __kmp_library = arg;
7389 switch (__kmp_library) {
7390 case library_serial: {
7391 KMP_INFORM(LibraryIsSerial);
7392 (void)__kmp_change_library(TRUE);
7394 case library_turnaround:
7395 (void)__kmp_change_library(TRUE);
7397 case library_throughput:
7398 (void)__kmp_change_library(FALSE);
7401 KMP_FATAL(UnknownLibraryType, arg);
7407 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7408 int blocktime = arg;
7414 __kmp_save_internal_controls(thread);
7417 if (blocktime < KMP_MIN_BLOCKTIME)
7418 blocktime = KMP_MIN_BLOCKTIME;
7419 else if (blocktime > KMP_MAX_BLOCKTIME)
7420 blocktime = KMP_MAX_BLOCKTIME;
7422 set__blocktime_team(thread->th.th_team, tid, blocktime);
7423 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7427 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7429 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7430 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7436 set__bt_set_team(thread->th.th_team, tid, bt_set);
7437 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7439 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7440 "bt_intervals=%d, monitor_updates=%d\n",
7441 __kmp_gtid_from_tid(tid, thread->th.th_team),
7442 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7443 __kmp_monitor_wakeups));
7445 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7446 __kmp_gtid_from_tid(tid, thread->th.th_team),
7447 thread->th.th_team->t.t_id, tid, blocktime));
7451 void __kmp_aux_set_defaults(
char const *str,
int len) {
7452 if (!__kmp_init_serial) {
7453 __kmp_serial_initialize();
7455 __kmp_env_initialize(str);
7459 || __kmp_display_env || __kmp_display_env_verbose
7469 PACKED_REDUCTION_METHOD_T
7470 __kmp_determine_reduction_method(
7471 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7472 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7473 kmp_critical_name *lck) {
7484 PACKED_REDUCTION_METHOD_T retval;
7488 KMP_DEBUG_ASSERT(loc);
7489 KMP_DEBUG_ASSERT(lck);
7491 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7492 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7493 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7495 retval = critical_reduce_block;
7498 team_size = __kmp_get_team_num_threads(global_tid);
7499 if (team_size == 1) {
7501 retval = empty_reduce_block;
7505 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7506 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7508 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7510 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7513 int teamsize_cutoff = 4;
7515 #if KMP_MIC_SUPPORTED 7516 if (__kmp_mic_type != non_mic) {
7517 teamsize_cutoff = 8;
7520 if (tree_available) {
7521 if (team_size <= teamsize_cutoff) {
7522 if (atomic_available) {
7523 retval = atomic_reduce_block;
7526 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7528 }
else if (atomic_available) {
7529 retval = atomic_reduce_block;
7532 #error "Unknown or unsupported OS" 7533 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7536 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7538 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7542 if (atomic_available) {
7543 if (num_vars <= 2) {
7544 retval = atomic_reduce_block;
7550 if (atomic_available && (num_vars <= 3)) {
7551 retval = atomic_reduce_block;
7552 }
else if (tree_available) {
7553 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7554 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7555 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7560 #error "Unknown or unsupported OS" 7564 #error "Unknown or unsupported architecture" 7572 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7575 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7577 int atomic_available, tree_available;
7579 switch ((forced_retval = __kmp_force_reduction_method)) {
7580 case critical_reduce_block:
7584 case atomic_reduce_block:
7585 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7586 if (!atomic_available) {
7587 KMP_WARNING(RedMethodNotSupported,
"atomic");
7588 forced_retval = critical_reduce_block;
7592 case tree_reduce_block:
7593 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7594 if (!tree_available) {
7595 KMP_WARNING(RedMethodNotSupported,
"tree");
7596 forced_retval = critical_reduce_block;
7598 #if KMP_FAST_REDUCTION_BARRIER 7599 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7608 retval = forced_retval;
7611 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7613 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7614 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7620 kmp_int32 __kmp_get_reduce_method(
void) {
7621 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)