14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
412void __kmp_warn(
char const *format, ...) {
413 char buffer[MAX_MESSAGE];
416 if (__kmp_generate_warnings == kmp_warnings_off) {
420 va_start(ap, format);
422 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
423 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
424 __kmp_vprintf(kmp_err, buffer, ap);
425 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
430void __kmp_abort_process() {
432 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
434 if (__kmp_debug_buf) {
435 __kmp_dump_debug_buffer();
438 if (KMP_OS_WINDOWS) {
441 __kmp_global.g.g_abort = SIGABRT;
455 __kmp_unregister_library();
459 __kmp_infinite_loop();
460 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
464void __kmp_abort_thread(
void) {
467 __kmp_infinite_loop();
473static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
474 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
477 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
478 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
481 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
483 __kmp_print_storage_map_gtid(
484 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
485 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
487 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
488 &thr->th.th_bar[bs_plain_barrier + 1],
489 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
492 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
493 &thr->th.th_bar[bs_forkjoin_barrier + 1],
494 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
497#if KMP_FAST_REDUCTION_BARRIER
498 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
499 &thr->th.th_bar[bs_reduction_barrier + 1],
500 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
508static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
509 int team_id,
int num_thr) {
510 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
511 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
515 &team->t.t_bar[bs_last_barrier],
516 sizeof(kmp_balign_team_t) * bs_last_barrier,
517 "%s_%d.t_bar", header, team_id);
519 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
520 &team->t.t_bar[bs_plain_barrier + 1],
521 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
524 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
525 &team->t.t_bar[bs_forkjoin_barrier + 1],
526 sizeof(kmp_balign_team_t),
527 "%s_%d.t_bar[forkjoin]", header, team_id);
529#if KMP_FAST_REDUCTION_BARRIER
530 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
531 &team->t.t_bar[bs_reduction_barrier + 1],
532 sizeof(kmp_balign_team_t),
533 "%s_%d.t_bar[reduction]", header, team_id);
536 __kmp_print_storage_map_gtid(
537 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
538 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
540 __kmp_print_storage_map_gtid(
541 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
542 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
544 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
545 &team->t.t_disp_buffer[num_disp_buff],
546 sizeof(dispatch_shared_info_t) * num_disp_buff,
547 "%s_%d.t_disp_buffer", header, team_id);
550static void __kmp_init_allocator() {
551 __kmp_init_memkind();
552 __kmp_init_target_mem();
554static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
558#if ENABLE_LIBOMPTARGET
559static void __kmp_init_omptarget() {
560 __kmp_init_target_task();
569BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
574 case DLL_PROCESS_ATTACH:
575 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
579 case DLL_PROCESS_DETACH:
580 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
593 if (lpReserved == NULL)
594 __kmp_internal_end_library(__kmp_gtid_get_specific());
598 case DLL_THREAD_ATTACH:
599 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
605 case DLL_THREAD_DETACH:
606 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
608 __kmp_internal_end_thread(__kmp_gtid_get_specific());
619void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
620 int gtid = *gtid_ref;
621#ifdef BUILD_PARALLEL_ORDERED
622 kmp_team_t *team = __kmp_team_from_gtid(gtid);
625 if (__kmp_env_consistency_check) {
626 if (__kmp_threads[gtid]->th.th_root->r.r_active)
627#if KMP_USE_DYNAMIC_LOCK
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
630 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
633#ifdef BUILD_PARALLEL_ORDERED
634 if (!team->t.t_serialized) {
636 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
644void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
645 int gtid = *gtid_ref;
646#ifdef BUILD_PARALLEL_ORDERED
647 int tid = __kmp_tid_from_gtid(gtid);
648 kmp_team_t *team = __kmp_team_from_gtid(gtid);
651 if (__kmp_env_consistency_check) {
652 if (__kmp_threads[gtid]->th.th_root->r.r_active)
653 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
655#ifdef BUILD_PARALLEL_ORDERED
656 if (!team->t.t_serialized) {
661 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
671int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
676 if (!TCR_4(__kmp_init_parallel))
677 __kmp_parallel_initialize();
678 __kmp_resume_if_soft_paused();
680 th = __kmp_threads[gtid];
681 team = th->th.th_team;
684 th->th.th_ident = id_ref;
686 if (team->t.t_serialized) {
689 kmp_int32 old_this = th->th.th_local.this_construct;
691 ++th->th.th_local.this_construct;
695 if (team->t.t_construct == old_this) {
696 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
697 th->th.th_local.this_construct);
700 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
701 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
702 team->t.t_active_level == 1) {
704 __kmp_itt_metadata_single(id_ref);
709 if (__kmp_env_consistency_check) {
710 if (status && push_ws) {
711 __kmp_push_workshare(gtid, ct_psingle, id_ref);
713 __kmp_check_workshare(gtid, ct_psingle, id_ref);
718 __kmp_itt_single_start(gtid);
724void __kmp_exit_single(
int gtid) {
726 __kmp_itt_single_end(gtid);
728 if (__kmp_env_consistency_check)
729 __kmp_pop_workshare(gtid, ct_psingle, NULL);
738static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
739 int master_tid,
int set_nthreads,
743 KMP_DEBUG_ASSERT(__kmp_init_serial);
744 KMP_DEBUG_ASSERT(root && parent_team);
745 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
749 new_nthreads = set_nthreads;
750 if (!get__dynamic_2(parent_team, master_tid)) {
753#ifdef USE_LOAD_BALANCE
754 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
755 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
756 if (new_nthreads == 1) {
757 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
758 "reservation to 1 thread\n",
762 if (new_nthreads < set_nthreads) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
764 "reservation to %d threads\n",
765 master_tid, new_nthreads));
769 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
770 new_nthreads = __kmp_avail_proc - __kmp_nth +
771 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
772 if (new_nthreads <= 1) {
773 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
774 "reservation to 1 thread\n",
778 if (new_nthreads < set_nthreads) {
779 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
780 "reservation to %d threads\n",
781 master_tid, new_nthreads));
783 new_nthreads = set_nthreads;
785 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
786 if (set_nthreads > 2) {
787 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
788 new_nthreads = (new_nthreads % set_nthreads) + 1;
789 if (new_nthreads == 1) {
790 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
791 "reservation to 1 thread\n",
795 if (new_nthreads < set_nthreads) {
796 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
797 "reservation to %d threads\n",
798 master_tid, new_nthreads));
806 if (__kmp_nth + new_nthreads -
807 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
809 int tl_nthreads = __kmp_max_nth - __kmp_nth +
810 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
811 if (tl_nthreads <= 0) {
816 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
817 __kmp_reserve_warn = 1;
818 __kmp_msg(kmp_ms_warning,
819 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
820 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
822 if (tl_nthreads == 1) {
823 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
824 "reduced reservation to 1 thread\n",
828 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
829 "reservation to %d threads\n",
830 master_tid, tl_nthreads));
831 new_nthreads = tl_nthreads;
835 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
836 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
837 if (cg_nthreads + new_nthreads -
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
840 int tl_nthreads = max_cg_threads - cg_nthreads +
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
842 if (tl_nthreads <= 0) {
847 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848 __kmp_reserve_warn = 1;
849 __kmp_msg(kmp_ms_warning,
850 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
853 if (tl_nthreads == 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
855 "reduced reservation to 1 thread\n",
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
860 "reservation to %d threads\n",
861 master_tid, tl_nthreads));
862 new_nthreads = tl_nthreads;
868 capacity = __kmp_threads_capacity;
869 if (TCR_PTR(__kmp_threads[0]) == NULL) {
875 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
876 capacity -= __kmp_hidden_helper_threads_num;
878 if (__kmp_nth + new_nthreads -
879 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
882 int slotsRequired = __kmp_nth + new_nthreads -
883 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
885 int slotsAdded = __kmp_expand_threads(slotsRequired);
886 if (slotsAdded < slotsRequired) {
888 new_nthreads -= (slotsRequired - slotsAdded);
889 KMP_ASSERT(new_nthreads >= 1);
892 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
893 __kmp_reserve_warn = 1;
894 if (__kmp_tp_cached) {
895 __kmp_msg(kmp_ms_warning,
896 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
897 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
898 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
900 __kmp_msg(kmp_ms_warning,
901 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
902 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
909 if (new_nthreads == 1) {
911 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
912 "dead roots and rechecking; requested %d threads\n",
913 __kmp_get_gtid(), set_nthreads));
915 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
917 __kmp_get_gtid(), new_nthreads, set_nthreads));
926static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
927 kmp_info_t *master_th,
int master_gtid,
928 int fork_teams_workers) {
932 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
933 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
937 master_th->th.th_info.ds.ds_tid = 0;
938 master_th->th.th_team = team;
939 master_th->th.th_team_nproc = team->t.t_nproc;
940 master_th->th.th_team_master = master_th;
941 master_th->th.th_team_serialized = FALSE;
942 master_th->th.th_dispatch = &team->t.t_dispatch[0];
945#if KMP_NESTED_HOT_TEAMS
947 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
950 int level = team->t.t_active_level - 1;
951 if (master_th->th.th_teams_microtask) {
952 if (master_th->th.th_teams_size.nteams > 1) {
956 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
957 master_th->th.th_teams_level == team->t.t_level) {
962 if (level < __kmp_hot_teams_max_level) {
963 if (hot_teams[level].hot_team) {
965 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
969 hot_teams[level].hot_team = team;
970 hot_teams[level].hot_team_nth = team->t.t_nproc;
977 use_hot_team = team == root->r.r_hot_team;
982 team->t.t_threads[0] = master_th;
983 __kmp_initialize_info(master_th, team, 0, master_gtid);
986 for (i = 1; i < team->t.t_nproc; i++) {
989 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
990 team->t.t_threads[i] = thr;
991 KMP_DEBUG_ASSERT(thr);
992 KMP_DEBUG_ASSERT(thr->th.th_team == team);
994 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
995 "T#%d(%d:%d) join =%llu, plain=%llu\n",
996 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
997 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
998 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
999 team->t.t_bar[bs_plain_barrier].b_arrived));
1000 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1001 thr->th.th_teams_level = master_th->th.th_teams_level;
1002 thr->th.th_teams_size = master_th->th.th_teams_size;
1005 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1006 for (b = 0; b < bs_last_barrier; ++b) {
1007 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1008 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1010 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1016#if KMP_AFFINITY_SUPPORTED
1020 if (!fork_teams_workers) {
1021 __kmp_partition_places(team);
1025 if (team->t.t_nproc > 1 &&
1026 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1027 team->t.b->update_num_threads(team->t.t_nproc);
1028 __kmp_add_threads_to_team(team, team->t.t_nproc);
1032 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1033 for (i = 0; i < team->t.t_nproc; i++) {
1034 kmp_info_t *thr = team->t.t_threads[i];
1035 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1036 thr->th.th_prev_level != team->t.t_level) {
1037 team->t.t_display_affinity = 1;
1046#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1050inline static void propagateFPControl(kmp_team_t *team) {
1051 if (__kmp_inherit_fp_control) {
1052 kmp_int16 x87_fpu_control_word;
1056 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1057 __kmp_store_mxcsr(&mxcsr);
1058 mxcsr &= KMP_X86_MXCSR_MASK;
1069 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1070 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1073 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1077 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1083inline static void updateHWFPControl(kmp_team_t *team) {
1084 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1087 kmp_int16 x87_fpu_control_word;
1089 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1090 __kmp_store_mxcsr(&mxcsr);
1091 mxcsr &= KMP_X86_MXCSR_MASK;
1093 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1094 __kmp_clear_x87_fpu_status_word();
1095 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1098 if (team->t.t_mxcsr != mxcsr) {
1099 __kmp_load_mxcsr(&team->t.t_mxcsr);
1104#define propagateFPControl(x) ((void)0)
1105#define updateHWFPControl(x) ((void)0)
1108static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1113void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1114 kmp_info_t *this_thr;
1115 kmp_team_t *serial_team;
1117 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1124 if (!TCR_4(__kmp_init_parallel))
1125 __kmp_parallel_initialize();
1126 __kmp_resume_if_soft_paused();
1128 this_thr = __kmp_threads[global_tid];
1129 serial_team = this_thr->th.th_serial_team;
1132 KMP_DEBUG_ASSERT(serial_team);
1135 if (__kmp_tasking_mode != tskm_immediate_exec) {
1137 this_thr->th.th_task_team ==
1138 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1139 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1141 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1142 "team %p, new task_team = NULL\n",
1143 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1144 this_thr->th.th_task_team = NULL;
1147 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1148 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1149 proc_bind = proc_bind_false;
1150 }
else if (proc_bind == proc_bind_default) {
1153 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1156 this_thr->th.th_set_proc_bind = proc_bind_default;
1159 this_thr->th.th_set_nproc = 0;
1162 ompt_data_t ompt_parallel_data = ompt_data_none;
1163 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1164 if (ompt_enabled.enabled &&
1165 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1167 ompt_task_info_t *parent_task_info;
1168 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1170 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1171 if (ompt_enabled.ompt_callback_parallel_begin) {
1174 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1175 &(parent_task_info->task_data), &(parent_task_info->frame),
1176 &ompt_parallel_data, team_size,
1177 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1182 if (this_thr->th.th_team != serial_team) {
1184 int level = this_thr->th.th_team->t.t_level;
1186 if (serial_team->t.t_serialized) {
1189 kmp_team_t *new_team;
1191 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1194 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1198 proc_bind, &this_thr->th.th_current_task->td_icvs,
1199 0 USE_NESTED_HOT_ARG(NULL));
1200 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1201 KMP_ASSERT(new_team);
1204 new_team->t.t_threads[0] = this_thr;
1205 new_team->t.t_parent = this_thr->th.th_team;
1206 serial_team = new_team;
1207 this_thr->th.th_serial_team = serial_team;
1211 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1212 global_tid, serial_team));
1220 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1221 global_tid, serial_team));
1225 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1226 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1227 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1228 serial_team->t.t_ident = loc;
1229 serial_team->t.t_serialized = 1;
1230 serial_team->t.t_nproc = 1;
1231 serial_team->t.t_parent = this_thr->th.th_team;
1232 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1233 this_thr->th.th_team = serial_team;
1234 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1236 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1237 this_thr->th.th_current_task));
1238 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1239 this_thr->th.th_current_task->td_flags.executing = 0;
1241 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1246 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1247 &this_thr->th.th_current_task->td_parent->td_icvs);
1251 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1252 this_thr->th.th_current_task->td_icvs.nproc =
1253 __kmp_nested_nth.nth[level + 1];
1256 if (__kmp_nested_proc_bind.used &&
1257 (level + 1 < __kmp_nested_proc_bind.used)) {
1258 this_thr->th.th_current_task->td_icvs.proc_bind =
1259 __kmp_nested_proc_bind.bind_types[level + 1];
1263 serial_team->t.t_pkfn = (microtask_t)(~0);
1265 this_thr->th.th_info.ds.ds_tid = 0;
1268 this_thr->th.th_team_nproc = 1;
1269 this_thr->th.th_team_master = this_thr;
1270 this_thr->th.th_team_serialized = 1;
1272 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1273 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1274 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1276 propagateFPControl(serial_team);
1279 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1280 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1281 serial_team->t.t_dispatch->th_disp_buffer =
1282 (dispatch_private_info_t *)__kmp_allocate(
1283 sizeof(dispatch_private_info_t));
1285 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1292 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1293 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1294 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1295 ++serial_team->t.t_serialized;
1296 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1299 int level = this_thr->th.th_team->t.t_level;
1302 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1303 this_thr->th.th_current_task->td_icvs.nproc =
1304 __kmp_nested_nth.nth[level + 1];
1306 serial_team->t.t_level++;
1307 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1308 "of serial team %p to %d\n",
1309 global_tid, serial_team, serial_team->t.t_level));
1312 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1314 dispatch_private_info_t *disp_buffer =
1315 (dispatch_private_info_t *)__kmp_allocate(
1316 sizeof(dispatch_private_info_t));
1317 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1318 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1320 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1324 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1328 if (__kmp_display_affinity) {
1329 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1330 this_thr->th.th_prev_num_threads != 1) {
1332 __kmp_aux_display_affinity(global_tid, NULL);
1333 this_thr->th.th_prev_level = serial_team->t.t_level;
1334 this_thr->th.th_prev_num_threads = 1;
1338 if (__kmp_env_consistency_check)
1339 __kmp_push_parallel(global_tid, NULL);
1341 serial_team->t.ompt_team_info.master_return_address = codeptr;
1342 if (ompt_enabled.enabled &&
1343 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1344 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1345 OMPT_GET_FRAME_ADDRESS(0);
1347 ompt_lw_taskteam_t lw_taskteam;
1348 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1349 &ompt_parallel_data, codeptr);
1351 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1355 if (ompt_enabled.ompt_callback_implicit_task) {
1356 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1357 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1358 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1359 ompt_task_implicit);
1360 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1361 __kmp_tid_from_gtid(global_tid);
1365 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1366 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1367 OMPT_GET_FRAME_ADDRESS(0);
1373static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1374 microtask_t microtask,
int level,
1375 int teams_level, kmp_va_list ap) {
1376 return (master_th->th.th_teams_microtask && ap &&
1377 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1382static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1383 int teams_level, kmp_va_list ap) {
1384 return ((ap == NULL && active_level == 0) ||
1385 (ap && teams_level > 0 && teams_level == level));
1392__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1393 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1394 enum fork_context_e call_context, microtask_t microtask,
1395 launch_t invoker,
int master_set_numthreads,
int level,
1397 ompt_data_t ompt_parallel_data,
void *return_address,
1403 parent_team->t.t_ident = loc;
1404 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1405 parent_team->t.t_argc = argc;
1406 argv = (
void **)parent_team->t.t_argv;
1407 for (i = argc - 1; i >= 0; --i) {
1408 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1411 if (parent_team == master_th->th.th_serial_team) {
1414 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1416 if (call_context == fork_context_gnu) {
1419 parent_team->t.t_serialized--;
1424 parent_team->t.t_pkfn = microtask;
1429 void **exit_frame_p;
1430 ompt_data_t *implicit_task_data;
1431 ompt_lw_taskteam_t lw_taskteam;
1433 if (ompt_enabled.enabled) {
1434 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1435 &ompt_parallel_data, return_address);
1436 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1438 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1442 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1443 if (ompt_enabled.ompt_callback_implicit_task) {
1444 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1445 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1446 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1447 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1451 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1453 exit_frame_p = &dummy;
1459 parent_team->t.t_serialized--;
1462 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1463 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1464 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1473 if (ompt_enabled.enabled) {
1474 *exit_frame_p = NULL;
1475 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1476 if (ompt_enabled.ompt_callback_implicit_task) {
1477 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1478 ompt_scope_end, NULL, implicit_task_data, 1,
1479 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1481 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1482 __ompt_lw_taskteam_unlink(master_th);
1483 if (ompt_enabled.ompt_callback_parallel_end) {
1484 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1485 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1486 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1488 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1494 parent_team->t.t_pkfn = microtask;
1495 parent_team->t.t_invoke = invoker;
1496 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1497 parent_team->t.t_active_level++;
1498 parent_team->t.t_level++;
1499 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1506 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1509 if (ompt_enabled.enabled) {
1510 ompt_lw_taskteam_t lw_taskteam;
1511 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1513 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1518 if (master_set_numthreads) {
1519 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1521 kmp_info_t **other_threads = parent_team->t.t_threads;
1524 int old_proc = master_th->th.th_teams_size.nth;
1525 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1526 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1527 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1529 parent_team->t.t_nproc = master_set_numthreads;
1530 for (i = 0; i < master_set_numthreads; ++i) {
1531 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1535 master_th->th.th_set_nproc = 0;
1539 if (__kmp_debugging) {
1540 int nth = __kmp_omp_num_threads(loc);
1542 master_set_numthreads = nth;
1548 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1550 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1551 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1552 proc_bind = proc_bind_false;
1555 if (proc_bind == proc_bind_default) {
1556 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1562 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1563 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1564 master_th->th.th_current_task->td_icvs.proc_bind)) {
1565 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1568 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1570 if (proc_bind_icv != proc_bind_default &&
1571 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1572 kmp_info_t **other_threads = parent_team->t.t_threads;
1573 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1574 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1578 master_th->th.th_set_proc_bind = proc_bind_default;
1580#if USE_ITT_BUILD && USE_ITT_NOTIFY
1581 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1583 __kmp_forkjoin_frames_mode == 3 &&
1584 parent_team->t.t_active_level == 1
1585 && master_th->th.th_teams_size.nteams == 1) {
1586 kmp_uint64 tmp_time = __itt_get_timestamp();
1587 master_th->th.th_frame_time = tmp_time;
1588 parent_team->t.t_region_time = tmp_time;
1590 if (__itt_stack_caller_create_ptr) {
1591 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1593 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1596#if KMP_AFFINITY_SUPPORTED
1597 __kmp_partition_places(parent_team);
1600 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1601 "master_th=%p, gtid=%d\n",
1602 root, parent_team, master_th, gtid));
1603 __kmp_internal_fork(loc, gtid, parent_team);
1604 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1605 "master_th=%p, gtid=%d\n",
1606 root, parent_team, master_th, gtid));
1608 if (call_context == fork_context_gnu)
1612 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1613 parent_team->t.t_id, parent_team->t.t_pkfn));
1615 if (!parent_team->t.t_invoke(gtid)) {
1616 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1618 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1619 parent_team->t.t_id, parent_team->t.t_pkfn));
1622 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1629__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1630 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1631 kmp_info_t *master_th, kmp_team_t *parent_team,
1633 ompt_data_t *ompt_parallel_data,
void **return_address,
1634 ompt_data_t **parent_task_data,
1642#if KMP_OS_LINUX && \
1643 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1646 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1651 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1656 master_th->th.th_serial_team->t.t_pkfn = microtask;
1659 if (call_context == fork_context_intel) {
1661 master_th->th.th_serial_team->t.t_ident = loc;
1664 master_th->th.th_serial_team->t.t_level--;
1669 void **exit_frame_p;
1670 ompt_task_info_t *task_info;
1671 ompt_lw_taskteam_t lw_taskteam;
1673 if (ompt_enabled.enabled) {
1674 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1675 ompt_parallel_data, *return_address);
1677 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1679 task_info = OMPT_CUR_TASK_INFO(master_th);
1680 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1681 if (ompt_enabled.ompt_callback_implicit_task) {
1682 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1683 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1684 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1685 &(task_info->task_data), 1,
1686 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1690 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1692 exit_frame_p = &dummy;
1697 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1698 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1699 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1708 if (ompt_enabled.enabled) {
1709 *exit_frame_p = NULL;
1710 if (ompt_enabled.ompt_callback_implicit_task) {
1711 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1712 ompt_scope_end, NULL, &(task_info->task_data), 1,
1713 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1715 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1716 __ompt_lw_taskteam_unlink(master_th);
1717 if (ompt_enabled.ompt_callback_parallel_end) {
1718 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1719 ompt_parallel_data, *parent_task_data,
1720 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1722 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1725 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1726 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1727 team = master_th->th.th_team;
1729 team->t.t_invoke = invoker;
1730 __kmp_alloc_argv_entries(argc, team, TRUE);
1731 team->t.t_argc = argc;
1732 argv = (
void **)team->t.t_argv;
1734 for (i = argc - 1; i >= 0; --i)
1735 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1737 for (i = 0; i < argc; ++i)
1739 argv[i] = parent_team->t.t_argv[i];
1747 if (ompt_enabled.enabled) {
1748 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1749 if (ompt_enabled.ompt_callback_implicit_task) {
1750 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1751 ompt_scope_end, NULL, &(task_info->task_data), 0,
1752 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1754 if (ompt_enabled.ompt_callback_parallel_end) {
1755 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1756 ompt_parallel_data, *parent_task_data,
1757 OMPT_INVOKER(call_context) | ompt_parallel_league,
1760 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1765 for (i = argc - 1; i >= 0; --i)
1766 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1771 void **exit_frame_p;
1772 ompt_task_info_t *task_info;
1773 ompt_lw_taskteam_t lw_taskteam;
1774 ompt_data_t *implicit_task_data;
1776 if (ompt_enabled.enabled) {
1777 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1778 ompt_parallel_data, *return_address);
1779 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1781 task_info = OMPT_CUR_TASK_INFO(master_th);
1782 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1785 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1786 if (ompt_enabled.ompt_callback_implicit_task) {
1787 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1789 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1790 ompt_task_implicit);
1791 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1795 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1797 exit_frame_p = &dummy;
1802 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1803 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1804 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1813 if (ompt_enabled.enabled) {
1814 *exit_frame_p = NULL;
1815 if (ompt_enabled.ompt_callback_implicit_task) {
1816 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1817 ompt_scope_end, NULL, &(task_info->task_data), 1,
1818 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1821 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1822 __ompt_lw_taskteam_unlink(master_th);
1823 if (ompt_enabled.ompt_callback_parallel_end) {
1824 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1825 ompt_parallel_data, *parent_task_data,
1826 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1828 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1832 }
else if (call_context == fork_context_gnu) {
1834 if (ompt_enabled.enabled) {
1835 ompt_lw_taskteam_t lwt;
1836 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1839 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1840 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1846 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1849 KMP_ASSERT2(call_context < fork_context_last,
1850 "__kmp_serial_fork_call: unknown fork_context parameter");
1853 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1860int __kmp_fork_call(
ident_t *loc,
int gtid,
1861 enum fork_context_e call_context,
1862 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1867 int master_this_cons;
1869 kmp_team_t *parent_team;
1870 kmp_info_t *master_th;
1874 int master_set_numthreads;
1878#if KMP_NESTED_HOT_TEAMS
1879 kmp_hot_team_ptr_t **p_hot_teams;
1882 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1885 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1886 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1889 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1891 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1892 __kmp_stkpadding += (short)((kmp_int64)dummy);
1898 if (!TCR_4(__kmp_init_parallel))
1899 __kmp_parallel_initialize();
1900 __kmp_resume_if_soft_paused();
1905 master_th = __kmp_threads[gtid];
1907 parent_team = master_th->th.th_team;
1908 master_tid = master_th->th.th_info.ds.ds_tid;
1909 master_this_cons = master_th->th.th_local.this_construct;
1910 root = master_th->th.th_root;
1911 master_active = root->r.r_active;
1912 master_set_numthreads = master_th->th.th_set_nproc;
1915 ompt_data_t ompt_parallel_data = ompt_data_none;
1916 ompt_data_t *parent_task_data;
1917 ompt_frame_t *ompt_frame;
1918 void *return_address = NULL;
1920 if (ompt_enabled.enabled) {
1921 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1923 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1928 __kmp_assign_root_init_mask();
1931 level = parent_team->t.t_level;
1933 active_level = parent_team->t.t_active_level;
1935 teams_level = master_th->th.th_teams_level;
1936#if KMP_NESTED_HOT_TEAMS
1937 p_hot_teams = &master_th->th.th_hot_teams;
1938 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1939 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1940 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1941 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1943 (*p_hot_teams)[0].hot_team_nth = 1;
1948 if (ompt_enabled.enabled) {
1949 if (ompt_enabled.ompt_callback_parallel_begin) {
1950 int team_size = master_set_numthreads
1951 ? master_set_numthreads
1952 : get__nproc_2(parent_team, master_tid);
1953 int flags = OMPT_INVOKER(call_context) |
1954 ((microtask == (microtask_t)__kmp_teams_master)
1955 ? ompt_parallel_league
1956 : ompt_parallel_team);
1957 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1958 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1961 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1965 master_th->th.th_ident = loc;
1968 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1969 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1970 call_context, microtask, invoker,
1971 master_set_numthreads, level,
1973 ompt_parallel_data, return_address,
1979 if (__kmp_tasking_mode != tskm_immediate_exec) {
1980 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1981 parent_team->t.t_task_team[master_th->th.th_task_state]);
1991 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1992 if ((!enter_teams &&
1993 (parent_team->t.t_active_level >=
1994 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1995 (__kmp_library == library_serial)) {
1996 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
1999 nthreads = master_set_numthreads
2000 ? master_set_numthreads
2002 : get__nproc_2(parent_team, master_tid);
2007 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2012 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2013 nthreads, enter_teams);
2014 if (nthreads == 1) {
2018 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2022 KMP_DEBUG_ASSERT(nthreads > 0);
2025 master_th->th.th_set_nproc = 0;
2027 if (nthreads == 1) {
2028 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2029 invoker, master_th, parent_team,
2031 &ompt_parallel_data, &return_address,
2039 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2040 "curtask=%p, curtask_max_aclevel=%d\n",
2041 parent_team->t.t_active_level, master_th,
2042 master_th->th.th_current_task,
2043 master_th->th.th_current_task->td_icvs.max_active_levels));
2047 master_th->th.th_current_task->td_flags.executing = 0;
2049 if (!master_th->th.th_teams_microtask || level > teams_level) {
2051 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2055 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2056 if ((level + 1 < __kmp_nested_nth.used) &&
2057 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2058 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2064 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2066 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2067 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2068 proc_bind = proc_bind_false;
2072 if (proc_bind == proc_bind_default) {
2073 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2076 if (master_th->th.th_teams_microtask &&
2077 microtask == (microtask_t)__kmp_teams_master) {
2078 proc_bind = __kmp_teams_proc_bind;
2084 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2085 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2086 master_th->th.th_current_task->td_icvs.proc_bind)) {
2089 if (!master_th->th.th_teams_microtask ||
2090 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2091 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2096 master_th->th.th_set_proc_bind = proc_bind_default;
2098 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2099 kmp_internal_control_t new_icvs;
2100 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2101 new_icvs.next = NULL;
2102 if (nthreads_icv > 0) {
2103 new_icvs.nproc = nthreads_icv;
2105 if (proc_bind_icv != proc_bind_default) {
2106 new_icvs.proc_bind = proc_bind_icv;
2110 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2111 team = __kmp_allocate_team(root, nthreads, nthreads,
2115 proc_bind, &new_icvs,
2116 argc USE_NESTED_HOT_ARG(master_th));
2117 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2118 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2121 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2122 team = __kmp_allocate_team(root, nthreads, nthreads,
2127 &master_th->th.th_current_task->td_icvs,
2128 argc USE_NESTED_HOT_ARG(master_th));
2129 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2130 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2131 &master_th->th.th_current_task->td_icvs);
2134 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2137 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2138 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2139 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2140 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2141 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2143 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2146 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2148 if (!master_th->th.th_teams_microtask || level > teams_level) {
2149 int new_level = parent_team->t.t_level + 1;
2150 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2151 new_level = parent_team->t.t_active_level + 1;
2152 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2155 int new_level = parent_team->t.t_level;
2156 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2157 new_level = parent_team->t.t_active_level;
2158 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2160 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2162 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2164 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2165 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2168 propagateFPControl(team);
2170 if (ompd_state & OMPD_ENABLE_BP)
2171 ompd_bp_parallel_begin();
2174 if (__kmp_tasking_mode != tskm_immediate_exec) {
2177 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2178 parent_team->t.t_task_team[master_th->th.th_task_state]);
2179 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2180 "%p, new task_team %p / team %p\n",
2181 __kmp_gtid_from_thread(master_th),
2182 master_th->th.th_task_team, parent_team,
2183 team->t.t_task_team[master_th->th.th_task_state], team));
2185 if (active_level || master_th->th.th_task_team) {
2187 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2188 if (master_th->th.th_task_state_top >=
2189 master_th->th.th_task_state_stack_sz) {
2190 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2191 kmp_uint8 *old_stack, *new_stack;
2193 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2194 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2195 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2197 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2201 old_stack = master_th->th.th_task_state_memo_stack;
2202 master_th->th.th_task_state_memo_stack = new_stack;
2203 master_th->th.th_task_state_stack_sz = new_size;
2204 __kmp_free(old_stack);
2208 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2209 master_th->th.th_task_state;
2210 master_th->th.th_task_state_top++;
2211#if KMP_NESTED_HOT_TEAMS
2212 if (master_th->th.th_hot_teams &&
2213 active_level < __kmp_hot_teams_max_level &&
2214 team == master_th->th.th_hot_teams[active_level].hot_team) {
2216 master_th->th.th_task_state =
2218 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2221 master_th->th.th_task_state = 0;
2222#if KMP_NESTED_HOT_TEAMS
2226#if !KMP_NESTED_HOT_TEAMS
2227 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2228 (team == root->r.r_hot_team));
2234 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2235 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2237 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2238 (team->t.t_master_tid == 0 &&
2239 (team->t.t_parent == root->r.r_root_team ||
2240 team->t.t_parent->t.t_serialized)));
2244 argv = (
void **)team->t.t_argv;
2246 for (i = argc - 1; i >= 0; --i) {
2247 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2248 KMP_CHECK_UPDATE(*argv, new_argv);
2252 for (i = 0; i < argc; ++i) {
2254 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2259 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2260 if (!root->r.r_active)
2261 root->r.r_active = TRUE;
2263 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2264 __kmp_setup_icv_copy(team, nthreads,
2265 &master_th->th.th_current_task->td_icvs, loc);
2268 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2271 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2274 if (team->t.t_active_level == 1
2275 && !master_th->th.th_teams_microtask) {
2277 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2278 (__kmp_forkjoin_frames_mode == 3 ||
2279 __kmp_forkjoin_frames_mode == 1)) {
2280 kmp_uint64 tmp_time = 0;
2281 if (__itt_get_timestamp_ptr)
2282 tmp_time = __itt_get_timestamp();
2284 master_th->th.th_frame_time = tmp_time;
2285 if (__kmp_forkjoin_frames_mode == 3)
2286 team->t.t_region_time = tmp_time;
2290 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2291 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2293 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2299 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2302 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2303 root, team, master_th, gtid));
2306 if (__itt_stack_caller_create_ptr) {
2309 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2310 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2311 }
else if (parent_team->t.t_serialized) {
2316 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2317 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2325 __kmp_internal_fork(loc, gtid, team);
2326 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2327 "master_th=%p, gtid=%d\n",
2328 root, team, master_th, gtid));
2331 if (call_context == fork_context_gnu) {
2332 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2337 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2338 team->t.t_id, team->t.t_pkfn));
2341#if KMP_STATS_ENABLED
2345 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2349 if (!team->t.t_invoke(gtid)) {
2350 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2353#if KMP_STATS_ENABLED
2356 KMP_SET_THREAD_STATE(previous_state);
2360 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2361 team->t.t_id, team->t.t_pkfn));
2364 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2366 if (ompt_enabled.enabled) {
2367 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2375static inline void __kmp_join_restore_state(kmp_info_t *thread,
2378 thread->th.ompt_thread_info.state =
2379 ((team->t.t_serialized) ? ompt_state_work_serial
2380 : ompt_state_work_parallel);
2383static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2384 kmp_team_t *team, ompt_data_t *parallel_data,
2385 int flags,
void *codeptr) {
2386 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2387 if (ompt_enabled.ompt_callback_parallel_end) {
2388 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2389 parallel_data, &(task_info->task_data), flags, codeptr);
2392 task_info->frame.enter_frame = ompt_data_none;
2393 __kmp_join_restore_state(thread, team);
2397void __kmp_join_call(
ident_t *loc,
int gtid
2400 enum fork_context_e fork_context
2404 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2406 kmp_team_t *parent_team;
2407 kmp_info_t *master_th;
2411 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2414 master_th = __kmp_threads[gtid];
2415 root = master_th->th.th_root;
2416 team = master_th->th.th_team;
2417 parent_team = team->t.t_parent;
2419 master_th->th.th_ident = loc;
2422 void *team_microtask = (
void *)team->t.t_pkfn;
2426 if (ompt_enabled.enabled &&
2427 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2428 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2433 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2434 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2435 "th_task_team = %p\n",
2436 __kmp_gtid_from_thread(master_th), team,
2437 team->t.t_task_team[master_th->th.th_task_state],
2438 master_th->th.th_task_team));
2439 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2440 team->t.t_task_team[master_th->th.th_task_state]);
2444 if (team->t.t_serialized) {
2445 if (master_th->th.th_teams_microtask) {
2447 int level = team->t.t_level;
2448 int tlevel = master_th->th.th_teams_level;
2449 if (level == tlevel) {
2453 }
else if (level == tlevel + 1) {
2457 team->t.t_serialized++;
2463 if (ompt_enabled.enabled) {
2464 if (fork_context == fork_context_gnu) {
2465 __ompt_lw_taskteam_unlink(master_th);
2467 __kmp_join_restore_state(master_th, parent_team);
2474 master_active = team->t.t_master_active;
2479 __kmp_internal_join(loc, gtid, team);
2481 if (__itt_stack_caller_create_ptr) {
2482 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2484 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2485 team->t.t_stack_id = NULL;
2489 master_th->th.th_task_state =
2492 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2493 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2497 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2498 parent_team->t.t_stack_id = NULL;
2506 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2507 void *codeptr = team->t.ompt_team_info.master_return_address;
2512 if (team->t.t_active_level == 1 &&
2513 (!master_th->th.th_teams_microtask ||
2514 master_th->th.th_teams_size.nteams == 1)) {
2515 master_th->th.th_ident = loc;
2518 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2519 __kmp_forkjoin_frames_mode == 3)
2520 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2521 master_th->th.th_frame_time, 0, loc,
2522 master_th->th.th_team_nproc, 1);
2523 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2524 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2525 __kmp_itt_region_joined(gtid);
2529#if KMP_AFFINITY_SUPPORTED
2532 master_th->th.th_first_place = team->t.t_first_place;
2533 master_th->th.th_last_place = team->t.t_last_place;
2537 if (master_th->th.th_teams_microtask && !exit_teams &&
2538 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2539 team->t.t_level == master_th->th.th_teams_level + 1) {
2544 ompt_data_t ompt_parallel_data = ompt_data_none;
2545 if (ompt_enabled.enabled) {
2546 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2547 if (ompt_enabled.ompt_callback_implicit_task) {
2548 int ompt_team_size = team->t.t_nproc;
2549 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2550 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2551 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2553 task_info->frame.exit_frame = ompt_data_none;
2554 task_info->task_data = ompt_data_none;
2555 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2556 __ompt_lw_taskteam_unlink(master_th);
2561 team->t.t_active_level--;
2562 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2568 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2569 int old_num = master_th->th.th_team_nproc;
2570 int new_num = master_th->th.th_teams_size.nth;
2571 kmp_info_t **other_threads = team->t.t_threads;
2572 team->t.t_nproc = new_num;
2573 for (
int i = 0; i < old_num; ++i) {
2574 other_threads[i]->th.th_team_nproc = new_num;
2577 for (
int i = old_num; i < new_num; ++i) {
2579 KMP_DEBUG_ASSERT(other_threads[i]);
2580 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2581 for (
int b = 0; b < bs_last_barrier; ++b) {
2582 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2583 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2585 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2588 if (__kmp_tasking_mode != tskm_immediate_exec) {
2590 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2596 if (ompt_enabled.enabled) {
2597 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2598 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2606 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2607 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2609 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2614 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2616 if (!master_th->th.th_teams_microtask ||
2617 team->t.t_level > master_th->th.th_teams_level) {
2619 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2621 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2624 if (ompt_enabled.enabled) {
2625 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2626 if (ompt_enabled.ompt_callback_implicit_task) {
2627 int flags = (team_microtask == (
void *)__kmp_teams_master)
2629 : ompt_task_implicit;
2630 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2631 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2632 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2633 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2635 task_info->frame.exit_frame = ompt_data_none;
2636 task_info->task_data = ompt_data_none;
2640 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2642 __kmp_pop_current_task_from_thread(master_th);
2644 master_th->th.th_def_allocator = team->t.t_def_allocator;
2647 if (ompd_state & OMPD_ENABLE_BP)
2648 ompd_bp_parallel_end();
2650 updateHWFPControl(team);
2652 if (root->r.r_active != master_active)
2653 root->r.r_active = master_active;
2655 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2663 master_th->th.th_team = parent_team;
2664 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2665 master_th->th.th_team_master = parent_team->t.t_threads[0];
2666 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2669 if (parent_team->t.t_serialized &&
2670 parent_team != master_th->th.th_serial_team &&
2671 parent_team != root->r.r_root_team) {
2672 __kmp_free_team(root,
2673 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2674 master_th->th.th_serial_team = parent_team;
2677 if (__kmp_tasking_mode != tskm_immediate_exec) {
2678 if (master_th->th.th_task_state_top >
2680 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2682 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2683 master_th->th.th_task_state;
2684 --master_th->th.th_task_state_top;
2686 master_th->th.th_task_state =
2688 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2689 }
else if (team != root->r.r_hot_team) {
2694 master_th->th.th_task_state = 0;
2697 master_th->th.th_task_team =
2698 parent_team->t.t_task_team[master_th->th.th_task_state];
2700 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2701 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2708 master_th->th.th_current_task->td_flags.executing = 1;
2710 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2712#if KMP_AFFINITY_SUPPORTED
2713 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2714 __kmp_reset_root_init_mask(gtid);
2719 OMPT_INVOKER(fork_context) |
2720 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2721 : ompt_parallel_team);
2722 if (ompt_enabled.enabled) {
2723 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2729 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2734void __kmp_save_internal_controls(kmp_info_t *thread) {
2736 if (thread->th.th_team != thread->th.th_serial_team) {
2739 if (thread->th.th_team->t.t_serialized > 1) {
2742 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2745 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2746 thread->th.th_team->t.t_serialized) {
2751 kmp_internal_control_t *control =
2752 (kmp_internal_control_t *)__kmp_allocate(
2753 sizeof(kmp_internal_control_t));
2755 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2757 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2759 control->next = thread->th.th_team->t.t_control_stack_top;
2760 thread->th.th_team->t.t_control_stack_top = control;
2766void __kmp_set_num_threads(
int new_nth,
int gtid) {
2770 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2771 KMP_DEBUG_ASSERT(__kmp_init_serial);
2775 else if (new_nth > __kmp_max_nth)
2776 new_nth = __kmp_max_nth;
2779 thread = __kmp_threads[gtid];
2780 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2783 __kmp_save_internal_controls(thread);
2785 set__nproc(thread, new_nth);
2790 root = thread->th.th_root;
2791 if (__kmp_init_parallel && (!root->r.r_active) &&
2792 (root->r.r_hot_team->t.t_nproc > new_nth)
2793#
if KMP_NESTED_HOT_TEAMS
2794 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2797 kmp_team_t *hot_team = root->r.r_hot_team;
2800 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2802 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2803 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2806 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2807 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2808 if (__kmp_tasking_mode != tskm_immediate_exec) {
2811 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2813 __kmp_free_thread(hot_team->t.t_threads[f]);
2814 hot_team->t.t_threads[f] = NULL;
2816 hot_team->t.t_nproc = new_nth;
2817#if KMP_NESTED_HOT_TEAMS
2818 if (thread->th.th_hot_teams) {
2819 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2820 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2824 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2825 hot_team->t.b->update_num_threads(new_nth);
2826 __kmp_add_threads_to_team(hot_team, new_nth);
2829 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2832 for (f = 0; f < new_nth; f++) {
2833 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2834 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2837 hot_team->t.t_size_changed = -1;
2842void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2845 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2847 gtid, max_active_levels));
2848 KMP_DEBUG_ASSERT(__kmp_init_serial);
2851 if (max_active_levels < 0) {
2852 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2857 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2858 "max_active_levels for thread %d = (%d)\n",
2859 gtid, max_active_levels));
2862 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2867 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2868 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2869 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2875 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2876 "max_active_levels for thread %d = (%d)\n",
2877 gtid, max_active_levels));
2879 thread = __kmp_threads[gtid];
2881 __kmp_save_internal_controls(thread);
2883 set__max_active_levels(thread, max_active_levels);
2887int __kmp_get_max_active_levels(
int gtid) {
2890 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2891 KMP_DEBUG_ASSERT(__kmp_init_serial);
2893 thread = __kmp_threads[gtid];
2894 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2895 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2896 "curtask_maxaclevel=%d\n",
2897 gtid, thread->th.th_current_task,
2898 thread->th.th_current_task->td_icvs.max_active_levels));
2899 return thread->th.th_current_task->td_icvs.max_active_levels;
2903void __kmp_set_num_teams(
int num_teams) {
2905 __kmp_nteams = num_teams;
2907int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2909void __kmp_set_teams_thread_limit(
int limit) {
2911 __kmp_teams_thread_limit = limit;
2913int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2915KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2916KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2919void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2921 kmp_sched_t orig_kind;
2924 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2925 gtid, (
int)kind, chunk));
2926 KMP_DEBUG_ASSERT(__kmp_init_serial);
2933 kind = __kmp_sched_without_mods(kind);
2935 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2936 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2938 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2939 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2941 kind = kmp_sched_default;
2945 thread = __kmp_threads[gtid];
2947 __kmp_save_internal_controls(thread);
2949 if (kind < kmp_sched_upper_std) {
2950 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2953 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2955 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2956 __kmp_sch_map[kind - kmp_sched_lower - 1];
2961 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2962 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2963 kmp_sched_lower - 2];
2965 __kmp_sched_apply_mods_intkind(
2966 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2967 if (kind == kmp_sched_auto || chunk < 1) {
2969 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2971 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2976void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2980 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2981 KMP_DEBUG_ASSERT(__kmp_init_serial);
2983 thread = __kmp_threads[gtid];
2985 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2986 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2988 case kmp_sch_static_greedy:
2989 case kmp_sch_static_balanced:
2990 *kind = kmp_sched_static;
2991 __kmp_sched_apply_mods_stdkind(kind, th_type);
2994 case kmp_sch_static_chunked:
2995 *kind = kmp_sched_static;
2997 case kmp_sch_dynamic_chunked:
2998 *kind = kmp_sched_dynamic;
3001 case kmp_sch_guided_iterative_chunked:
3002 case kmp_sch_guided_analytical_chunked:
3003 *kind = kmp_sched_guided;
3006 *kind = kmp_sched_auto;
3008 case kmp_sch_trapezoidal:
3009 *kind = kmp_sched_trapezoidal;
3011#if KMP_STATIC_STEAL_ENABLED
3012 case kmp_sch_static_steal:
3013 *kind = kmp_sched_static_steal;
3017 KMP_FATAL(UnknownSchedulingType, th_type);
3020 __kmp_sched_apply_mods_stdkind(kind, th_type);
3021 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3024int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3030 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3031 KMP_DEBUG_ASSERT(__kmp_init_serial);
3038 thr = __kmp_threads[gtid];
3039 team = thr->th.th_team;
3040 ii = team->t.t_level;
3044 if (thr->th.th_teams_microtask) {
3046 int tlevel = thr->th.th_teams_level;
3049 KMP_DEBUG_ASSERT(ii >= tlevel);
3061 return __kmp_tid_from_gtid(gtid);
3063 dd = team->t.t_serialized;
3065 while (ii > level) {
3066 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3068 if ((team->t.t_serialized) && (!dd)) {
3069 team = team->t.t_parent;
3073 team = team->t.t_parent;
3074 dd = team->t.t_serialized;
3079 return (dd > 1) ? (0) : (team->t.t_master_tid);
3082int __kmp_get_team_size(
int gtid,
int level) {
3088 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3089 KMP_DEBUG_ASSERT(__kmp_init_serial);
3096 thr = __kmp_threads[gtid];
3097 team = thr->th.th_team;
3098 ii = team->t.t_level;
3102 if (thr->th.th_teams_microtask) {
3104 int tlevel = thr->th.th_teams_level;
3107 KMP_DEBUG_ASSERT(ii >= tlevel);
3118 while (ii > level) {
3119 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3121 if (team->t.t_serialized && (!dd)) {
3122 team = team->t.t_parent;
3126 team = team->t.t_parent;
3131 return team->t.t_nproc;
3134kmp_r_sched_t __kmp_get_schedule_global() {
3139 kmp_r_sched_t r_sched;
3145 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3146 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3149 r_sched.r_sched_type = __kmp_static;
3152 r_sched.r_sched_type = __kmp_guided;
3154 r_sched.r_sched_type = __kmp_sched;
3156 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3158 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3160 r_sched.chunk = KMP_DEFAULT_CHUNK;
3162 r_sched.chunk = __kmp_chunk;
3170static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3172 KMP_DEBUG_ASSERT(team);
3173 if (!realloc || argc > team->t.t_max_argc) {
3175 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3176 "current entries=%d\n",
3177 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3179 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3180 __kmp_free((
void *)team->t.t_argv);
3182 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3184 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3185 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3187 team->t.t_id, team->t.t_max_argc));
3188 team->t.t_argv = &team->t.t_inline_argv[0];
3189 if (__kmp_storage_map) {
3190 __kmp_print_storage_map_gtid(
3191 -1, &team->t.t_inline_argv[0],
3192 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3193 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3198 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3199 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3201 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3203 team->t.t_id, team->t.t_max_argc));
3205 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3206 if (__kmp_storage_map) {
3207 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3208 &team->t.t_argv[team->t.t_max_argc],
3209 sizeof(
void *) * team->t.t_max_argc,
3210 "team_%d.t_argv", team->t.t_id);
3216static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3218 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3220 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3221 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3222 sizeof(dispatch_shared_info_t) * num_disp_buff);
3223 team->t.t_dispatch =
3224 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3225 team->t.t_implicit_task_taskdata =
3226 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3227 team->t.t_max_nproc = max_nth;
3230 for (i = 0; i < num_disp_buff; ++i) {
3231 team->t.t_disp_buffer[i].buffer_index = i;
3232 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3236static void __kmp_free_team_arrays(kmp_team_t *team) {
3239 for (i = 0; i < team->t.t_max_nproc; ++i) {
3240 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3241 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3242 team->t.t_dispatch[i].th_disp_buffer = NULL;
3245#if KMP_USE_HIER_SCHED
3246 __kmp_dispatch_free_hierarchies(team);
3248 __kmp_free(team->t.t_threads);
3249 __kmp_free(team->t.t_disp_buffer);
3250 __kmp_free(team->t.t_dispatch);
3251 __kmp_free(team->t.t_implicit_task_taskdata);
3252 team->t.t_threads = NULL;
3253 team->t.t_disp_buffer = NULL;
3254 team->t.t_dispatch = NULL;
3255 team->t.t_implicit_task_taskdata = 0;
3258static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3259 kmp_info_t **oldThreads = team->t.t_threads;
3261 __kmp_free(team->t.t_disp_buffer);
3262 __kmp_free(team->t.t_dispatch);
3263 __kmp_free(team->t.t_implicit_task_taskdata);
3264 __kmp_allocate_team_arrays(team, max_nth);
3266 KMP_MEMCPY(team->t.t_threads, oldThreads,
3267 team->t.t_nproc *
sizeof(kmp_info_t *));
3269 __kmp_free(oldThreads);
3272static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3274 kmp_r_sched_t r_sched =
3275 __kmp_get_schedule_global();
3277 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3279 kmp_internal_control_t g_icvs = {
3281 (kmp_int8)__kmp_global.g.g_dynamic,
3283 (kmp_int8)__kmp_env_blocktime,
3285 __kmp_dflt_blocktime,
3290 __kmp_dflt_team_nth,
3294 __kmp_dflt_max_active_levels,
3298 __kmp_nested_proc_bind.bind_types[0],
3299 __kmp_default_device,
3306static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3308 kmp_internal_control_t gx_icvs;
3309 gx_icvs.serial_nesting_level =
3311 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3312 gx_icvs.next = NULL;
3317static void __kmp_initialize_root(kmp_root_t *root) {
3319 kmp_team_t *root_team;
3320 kmp_team_t *hot_team;
3321 int hot_team_max_nth;
3322 kmp_r_sched_t r_sched =
3323 __kmp_get_schedule_global();
3324 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3325 KMP_DEBUG_ASSERT(root);
3326 KMP_ASSERT(!root->r.r_begin);
3329 __kmp_init_lock(&root->r.r_begin_lock);
3330 root->r.r_begin = FALSE;
3331 root->r.r_active = FALSE;
3332 root->r.r_in_parallel = 0;
3333 root->r.r_blocktime = __kmp_dflt_blocktime;
3334#if KMP_AFFINITY_SUPPORTED
3335 root->r.r_affinity_assigned = FALSE;
3340 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3343 __kmp_allocate_team(root,
3349 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3351 USE_NESTED_HOT_ARG(NULL)
3356 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3359 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3361 root->r.r_root_team = root_team;
3362 root_team->t.t_control_stack_top = NULL;
3365 root_team->t.t_threads[0] = NULL;
3366 root_team->t.t_nproc = 1;
3367 root_team->t.t_serialized = 1;
3369 root_team->t.t_sched.sched = r_sched.sched;
3372 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3373 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3377 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3380 __kmp_allocate_team(root,
3382 __kmp_dflt_team_nth_ub * 2,
3386 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3388 USE_NESTED_HOT_ARG(NULL)
3390 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3392 root->r.r_hot_team = hot_team;
3393 root_team->t.t_control_stack_top = NULL;
3396 hot_team->t.t_parent = root_team;
3399 hot_team_max_nth = hot_team->t.t_max_nproc;
3400 for (f = 0; f < hot_team_max_nth; ++f) {
3401 hot_team->t.t_threads[f] = NULL;
3403 hot_team->t.t_nproc = 1;
3405 hot_team->t.t_sched.sched = r_sched.sched;
3406 hot_team->t.t_size_changed = 0;
3411typedef struct kmp_team_list_item {
3412 kmp_team_p
const *entry;
3413 struct kmp_team_list_item *next;
3414} kmp_team_list_item_t;
3415typedef kmp_team_list_item_t *kmp_team_list_t;
3417static void __kmp_print_structure_team_accum(
3418 kmp_team_list_t list,
3419 kmp_team_p
const *team
3429 KMP_DEBUG_ASSERT(list != NULL);
3434 __kmp_print_structure_team_accum(list, team->t.t_parent);
3435 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3439 while (l->next != NULL && l->entry != team) {
3442 if (l->next != NULL) {
3448 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3454 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3455 sizeof(kmp_team_list_item_t));
3462static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3465 __kmp_printf(
"%s", title);
3467 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3469 __kmp_printf(
" - (nil)\n");
3473static void __kmp_print_structure_thread(
char const *title,
3474 kmp_info_p
const *thread) {
3475 __kmp_printf(
"%s", title);
3476 if (thread != NULL) {
3477 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3479 __kmp_printf(
" - (nil)\n");
3483void __kmp_print_structure(
void) {
3485 kmp_team_list_t list;
3489 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3493 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3494 "Table\n------------------------------\n");
3497 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3498 __kmp_printf(
"%2d", gtid);
3499 if (__kmp_threads != NULL) {
3500 __kmp_printf(
" %p", __kmp_threads[gtid]);
3502 if (__kmp_root != NULL) {
3503 __kmp_printf(
" %p", __kmp_root[gtid]);
3510 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3512 if (__kmp_threads != NULL) {
3514 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3515 kmp_info_t
const *thread = __kmp_threads[gtid];
3516 if (thread != NULL) {
3517 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3518 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3519 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3520 __kmp_print_structure_team(
" Serial Team: ",
3521 thread->th.th_serial_team);
3522 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3523 __kmp_print_structure_thread(
" Primary: ",
3524 thread->th.th_team_master);
3525 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3526 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3527 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3528 __kmp_print_structure_thread(
" Next in pool: ",
3529 thread->th.th_next_pool);
3531 __kmp_print_structure_team_accum(list, thread->th.th_team);
3532 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3536 __kmp_printf(
"Threads array is not allocated.\n");
3540 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3542 if (__kmp_root != NULL) {
3544 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3545 kmp_root_t
const *root = __kmp_root[gtid];
3547 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3548 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3549 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3550 __kmp_print_structure_thread(
" Uber Thread: ",
3551 root->r.r_uber_thread);
3552 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3553 __kmp_printf(
" In Parallel: %2d\n",
3554 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3556 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3557 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3561 __kmp_printf(
"Ubers array is not allocated.\n");
3564 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3566 while (list->next != NULL) {
3567 kmp_team_p
const *team = list->entry;
3569 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3570 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3571 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3572 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3573 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3574 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3575 for (i = 0; i < team->t.t_nproc; ++i) {
3576 __kmp_printf(
" Thread %2d: ", i);
3577 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3579 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3585 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3587 __kmp_print_structure_thread(
"Thread pool: ",
3588 CCAST(kmp_info_t *, __kmp_thread_pool));
3589 __kmp_print_structure_team(
"Team pool: ",
3590 CCAST(kmp_team_t *, __kmp_team_pool));
3594 while (list != NULL) {
3595 kmp_team_list_item_t *item = list;
3597 KMP_INTERNAL_FREE(item);
3606static const unsigned __kmp_primes[] = {
3607 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3608 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3609 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3610 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3611 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3612 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3613 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3614 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3615 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3616 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3617 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3621unsigned short __kmp_get_random(kmp_info_t *thread) {
3622 unsigned x = thread->th.th_x;
3623 unsigned short r = (
unsigned short)(x >> 16);
3625 thread->th.th_x = x * thread->th.th_a + 1;
3627 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3628 thread->th.th_info.ds.ds_tid, r));
3634void __kmp_init_random(kmp_info_t *thread) {
3635 unsigned seed = thread->th.th_info.ds.ds_tid;
3638 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3639 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3641 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3647static int __kmp_reclaim_dead_roots(
void) {
3650 for (i = 0; i < __kmp_threads_capacity; ++i) {
3651 if (KMP_UBER_GTID(i) &&
3652 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3655 r += __kmp_unregister_root_other_thread(i);
3680static int __kmp_expand_threads(
int nNeed) {
3682 int minimumRequiredCapacity;
3684 kmp_info_t **newThreads;
3685 kmp_root_t **newRoot;
3691#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3694 added = __kmp_reclaim_dead_roots();
3723 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3726 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3730 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3732 newCapacity = __kmp_threads_capacity;
3734 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3735 : __kmp_sys_max_nth;
3736 }
while (newCapacity < minimumRequiredCapacity);
3737 newThreads = (kmp_info_t **)__kmp_allocate(
3738 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3740 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3741 KMP_MEMCPY(newThreads, __kmp_threads,
3742 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3743 KMP_MEMCPY(newRoot, __kmp_root,
3744 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3747 kmp_old_threads_list_t *node =
3748 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3749 node->threads = __kmp_threads;
3750 node->next = __kmp_old_threads_list;
3751 __kmp_old_threads_list = node;
3753 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3754 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3755 added += newCapacity - __kmp_threads_capacity;
3756 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3758 if (newCapacity > __kmp_tp_capacity) {
3759 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3760 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3761 __kmp_threadprivate_resize_cache(newCapacity);
3763 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3765 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3774int __kmp_register_root(
int initial_thread) {
3775 kmp_info_t *root_thread;
3779 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3780 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3797 capacity = __kmp_threads_capacity;
3798 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3805 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3806 capacity -= __kmp_hidden_helper_threads_num;
3810 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3811 if (__kmp_tp_cached) {
3812 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3813 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3814 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3816 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3826 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3829 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3830 gtid <= __kmp_hidden_helper_threads_num;
3833 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3834 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3835 "hidden helper thread: T#%d\n",
3841 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3844 for (gtid = __kmp_hidden_helper_threads_num + 1;
3845 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3849 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3850 KMP_ASSERT(gtid < __kmp_threads_capacity);
3855 TCW_4(__kmp_nth, __kmp_nth + 1);
3859 if (__kmp_adjust_gtid_mode) {
3860 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3861 if (TCR_4(__kmp_gtid_mode) != 2) {
3862 TCW_4(__kmp_gtid_mode, 2);
3865 if (TCR_4(__kmp_gtid_mode) != 1) {
3866 TCW_4(__kmp_gtid_mode, 1);
3871#ifdef KMP_ADJUST_BLOCKTIME
3874 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3875 if (__kmp_nth > __kmp_avail_proc) {
3876 __kmp_zero_bt = TRUE;
3882 if (!(root = __kmp_root[gtid])) {
3883 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3884 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3887#if KMP_STATS_ENABLED
3889 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3890 __kmp_stats_thread_ptr->startLife();
3891 KMP_SET_THREAD_STATE(SERIAL_REGION);
3894 __kmp_initialize_root(root);
3897 if (root->r.r_uber_thread) {
3898 root_thread = root->r.r_uber_thread;
3900 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3901 if (__kmp_storage_map) {
3902 __kmp_print_thread_storage_map(root_thread, gtid);
3904 root_thread->th.th_info.ds.ds_gtid = gtid;
3906 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3908 root_thread->th.th_root = root;
3909 if (__kmp_env_consistency_check) {
3910 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3913 __kmp_initialize_fast_memory(root_thread);
3917 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3918 __kmp_initialize_bget(root_thread);
3920 __kmp_init_random(root_thread);
3924 if (!root_thread->th.th_serial_team) {
3925 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3926 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3927 root_thread->th.th_serial_team = __kmp_allocate_team(
3932 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3934 KMP_ASSERT(root_thread->th.th_serial_team);
3935 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3936 root_thread->th.th_serial_team));
3939 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3941 root->r.r_root_team->t.t_threads[0] = root_thread;
3942 root->r.r_hot_team->t.t_threads[0] = root_thread;
3943 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3945 root_thread->th.th_serial_team->t.t_serialized = 0;
3946 root->r.r_uber_thread = root_thread;
3949 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3950 TCW_4(__kmp_init_gtid, TRUE);
3953 __kmp_gtid_set_specific(gtid);
3956 __kmp_itt_thread_name(gtid);
3959#ifdef KMP_TDATA_GTID
3962 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3963 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3965 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3967 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3968 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3969 KMP_INIT_BARRIER_STATE));
3972 for (b = 0; b < bs_last_barrier; ++b) {
3973 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3975 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3979 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3980 KMP_INIT_BARRIER_STATE);
3982#if KMP_AFFINITY_SUPPORTED
3983 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3984 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3985 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3986 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3988 root_thread->th.th_def_allocator = __kmp_def_allocator;
3989 root_thread->th.th_prev_level = 0;
3990 root_thread->th.th_prev_num_threads = 1;
3992 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3993 tmp->cg_root = root_thread;
3994 tmp->cg_thread_limit = __kmp_cg_max_nth;
3995 tmp->cg_nthreads = 1;
3996 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3997 " cg_nthreads init to 1\n",
4000 root_thread->th.th_cg_roots = tmp;
4002 __kmp_root_counter++;
4005 if (!initial_thread && ompt_enabled.enabled) {
4007 kmp_info_t *root_thread = ompt_get_thread();
4009 ompt_set_thread_state(root_thread, ompt_state_overhead);
4011 if (ompt_enabled.ompt_callback_thread_begin) {
4012 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4013 ompt_thread_initial, __ompt_get_thread_data_internal());
4015 ompt_data_t *task_data;
4016 ompt_data_t *parallel_data;
4017 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4019 if (ompt_enabled.ompt_callback_implicit_task) {
4020 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4021 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4024 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4028 if (ompd_state & OMPD_ENABLE_BP)
4029 ompd_bp_thread_begin();
4033 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4038#if KMP_NESTED_HOT_TEAMS
4039static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4040 const int max_level) {
4042 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4043 if (!hot_teams || !hot_teams[level].hot_team) {
4046 KMP_DEBUG_ASSERT(level < max_level);
4047 kmp_team_t *team = hot_teams[level].hot_team;
4048 nth = hot_teams[level].hot_team_nth;
4050 if (level < max_level - 1) {
4051 for (i = 0; i < nth; ++i) {
4052 kmp_info_t *th = team->t.t_threads[i];
4053 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4054 if (i > 0 && th->th.th_hot_teams) {
4055 __kmp_free(th->th.th_hot_teams);
4056 th->th.th_hot_teams = NULL;
4060 __kmp_free_team(root, team, NULL);
4067static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4068 kmp_team_t *root_team = root->r.r_root_team;
4069 kmp_team_t *hot_team = root->r.r_hot_team;
4070 int n = hot_team->t.t_nproc;
4073 KMP_DEBUG_ASSERT(!root->r.r_active);
4075 root->r.r_root_team = NULL;
4076 root->r.r_hot_team = NULL;
4079 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4080#if KMP_NESTED_HOT_TEAMS
4081 if (__kmp_hot_teams_max_level >
4083 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4084 kmp_info_t *th = hot_team->t.t_threads[i];
4085 if (__kmp_hot_teams_max_level > 1) {
4086 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4088 if (th->th.th_hot_teams) {
4089 __kmp_free(th->th.th_hot_teams);
4090 th->th.th_hot_teams = NULL;
4095 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4100 if (__kmp_tasking_mode != tskm_immediate_exec) {
4101 __kmp_wait_to_unref_task_teams();
4107 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4109 (LPVOID) & (root->r.r_uber_thread->th),
4110 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4111 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4115 if (ompd_state & OMPD_ENABLE_BP)
4116 ompd_bp_thread_end();
4120 ompt_data_t *task_data;
4121 ompt_data_t *parallel_data;
4122 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4124 if (ompt_enabled.ompt_callback_implicit_task) {
4125 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4126 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4128 if (ompt_enabled.ompt_callback_thread_end) {
4129 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4130 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4136 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4137 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4139 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4140 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4143 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4144 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4145 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4146 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4147 root->r.r_uber_thread->th.th_cg_roots = NULL;
4149 __kmp_reap_thread(root->r.r_uber_thread, 1);
4153 root->r.r_uber_thread = NULL;
4155 root->r.r_begin = FALSE;
4160void __kmp_unregister_root_current_thread(
int gtid) {
4161 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4165 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4166 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4167 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4170 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4173 kmp_root_t *root = __kmp_root[gtid];
4175 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4176 KMP_ASSERT(KMP_UBER_GTID(gtid));
4177 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4178 KMP_ASSERT(root->r.r_active == FALSE);
4182 kmp_info_t *thread = __kmp_threads[gtid];
4183 kmp_team_t *team = thread->th.th_team;
4184 kmp_task_team_t *task_team = thread->th.th_task_team;
4187 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4188 task_team->tt.tt_hidden_helper_task_encountered)) {
4191 thread->th.ompt_thread_info.state = ompt_state_undefined;
4193 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4196 __kmp_reset_root(gtid, root);
4200 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4202 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4209static int __kmp_unregister_root_other_thread(
int gtid) {
4210 kmp_root_t *root = __kmp_root[gtid];
4213 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4214 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4215 KMP_ASSERT(KMP_UBER_GTID(gtid));
4216 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4217 KMP_ASSERT(root->r.r_active == FALSE);
4219 r = __kmp_reset_root(gtid, root);
4221 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4227void __kmp_task_info() {
4229 kmp_int32 gtid = __kmp_entry_gtid();
4230 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4231 kmp_info_t *this_thr = __kmp_threads[gtid];
4232 kmp_team_t *steam = this_thr->th.th_serial_team;
4233 kmp_team_t *team = this_thr->th.th_team;
4236 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4238 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4239 team->t.t_implicit_task_taskdata[tid].td_parent);
4246static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4247 int tid,
int gtid) {
4251 KMP_DEBUG_ASSERT(this_thr != NULL);
4252 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4253 KMP_DEBUG_ASSERT(team);
4254 KMP_DEBUG_ASSERT(team->t.t_threads);
4255 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4256 kmp_info_t *master = team->t.t_threads[0];
4257 KMP_DEBUG_ASSERT(master);
4258 KMP_DEBUG_ASSERT(master->th.th_root);
4262 TCW_SYNC_PTR(this_thr->th.th_team, team);
4264 this_thr->th.th_info.ds.ds_tid = tid;
4265 this_thr->th.th_set_nproc = 0;
4266 if (__kmp_tasking_mode != tskm_immediate_exec)
4269 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4271 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4272 this_thr->th.th_set_proc_bind = proc_bind_default;
4273#if KMP_AFFINITY_SUPPORTED
4274 this_thr->th.th_new_place = this_thr->th.th_current_place;
4276 this_thr->th.th_root = master->th.th_root;
4279 this_thr->th.th_team_nproc = team->t.t_nproc;
4280 this_thr->th.th_team_master = master;
4281 this_thr->th.th_team_serialized = team->t.t_serialized;
4283 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4285 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4286 tid, gtid, this_thr, this_thr->th.th_current_task));
4288 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4291 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4292 tid, gtid, this_thr, this_thr->th.th_current_task));
4297 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4299 this_thr->th.th_local.this_construct = 0;
4301 if (!this_thr->th.th_pri_common) {
4302 this_thr->th.th_pri_common =
4303 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4304 if (__kmp_storage_map) {
4305 __kmp_print_storage_map_gtid(
4306 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4307 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4309 this_thr->th.th_pri_head = NULL;
4312 if (this_thr != master &&
4313 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4315 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4316 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4319 int i = tmp->cg_nthreads--;
4320 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4321 " on node %p of thread %p to %d\n",
4322 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4327 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4329 this_thr->th.th_cg_roots->cg_nthreads++;
4330 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4331 " node %p of thread %p to %d\n",
4332 this_thr, this_thr->th.th_cg_roots,
4333 this_thr->th.th_cg_roots->cg_root,
4334 this_thr->th.th_cg_roots->cg_nthreads));
4335 this_thr->th.th_current_task->td_icvs.thread_limit =
4336 this_thr->th.th_cg_roots->cg_thread_limit;
4341 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4344 sizeof(dispatch_private_info_t) *
4345 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4346 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4347 team->t.t_max_nproc));
4348 KMP_ASSERT(dispatch);
4349 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4350 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4352 dispatch->th_disp_index = 0;
4353 dispatch->th_doacross_buf_idx = 0;
4354 if (!dispatch->th_disp_buffer) {
4355 dispatch->th_disp_buffer =
4356 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4358 if (__kmp_storage_map) {
4359 __kmp_print_storage_map_gtid(
4360 gtid, &dispatch->th_disp_buffer[0],
4361 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4363 : __kmp_dispatch_num_buffers],
4365 "th_%d.th_dispatch.th_disp_buffer "
4366 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4367 gtid, team->t.t_id, gtid);
4370 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4373 dispatch->th_dispatch_pr_current = 0;
4374 dispatch->th_dispatch_sh_current = 0;
4376 dispatch->th_deo_fcn = 0;
4377 dispatch->th_dxo_fcn = 0;
4380 this_thr->th.th_next_pool = NULL;
4382 if (!this_thr->th.th_task_state_memo_stack) {
4384 this_thr->th.th_task_state_memo_stack =
4385 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4386 this_thr->th.th_task_state_top = 0;
4387 this_thr->th.th_task_state_stack_sz = 4;
4388 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4390 this_thr->th.th_task_state_memo_stack[i] = 0;
4393 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4394 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4404kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4406 kmp_team_t *serial_team;
4407 kmp_info_t *new_thr;
4410 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4411 KMP_DEBUG_ASSERT(root && team);
4412#if !KMP_NESTED_HOT_TEAMS
4413 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4418 if (__kmp_thread_pool) {
4419 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4420 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4421 if (new_thr == __kmp_thread_pool_insert_pt) {
4422 __kmp_thread_pool_insert_pt = NULL;
4424 TCW_4(new_thr->th.th_in_pool, FALSE);
4425 __kmp_suspend_initialize_thread(new_thr);
4426 __kmp_lock_suspend_mx(new_thr);
4427 if (new_thr->th.th_active_in_pool == TRUE) {
4428 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4429 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4430 new_thr->th.th_active_in_pool = FALSE;
4432 __kmp_unlock_suspend_mx(new_thr);
4434 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4435 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4436 KMP_ASSERT(!new_thr->th.th_team);
4437 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4440 __kmp_initialize_info(new_thr, team, new_tid,
4441 new_thr->th.th_info.ds.ds_gtid);
4442 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4444 TCW_4(__kmp_nth, __kmp_nth + 1);
4446 new_thr->th.th_task_state = 0;
4447 new_thr->th.th_task_state_top = 0;
4448 new_thr->th.th_task_state_stack_sz = 4;
4450 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4452 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4456#ifdef KMP_ADJUST_BLOCKTIME
4459 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4460 if (__kmp_nth > __kmp_avail_proc) {
4461 __kmp_zero_bt = TRUE;
4470 kmp_balign_t *balign = new_thr->th.th_bar;
4471 for (b = 0; b < bs_last_barrier; ++b)
4472 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4475 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4476 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4483 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4484 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4489 if (!TCR_4(__kmp_init_monitor)) {
4490 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4491 if (!TCR_4(__kmp_init_monitor)) {
4492 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4493 TCW_4(__kmp_init_monitor, 1);
4494 __kmp_create_monitor(&__kmp_monitor);
4495 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4506 while (TCR_4(__kmp_init_monitor) < 2) {
4509 KF_TRACE(10, (
"after monitor thread has started\n"));
4512 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4519 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4521 : __kmp_hidden_helper_threads_num + 1;
4523 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4525 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4528 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4529 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4534 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4536 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4538#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4541 __itt_suppress_mark_range(
4542 __itt_suppress_range, __itt_suppress_threading_errors,
4543 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4544 __itt_suppress_mark_range(
4545 __itt_suppress_range, __itt_suppress_threading_errors,
4546 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4548 __itt_suppress_mark_range(
4549 __itt_suppress_range, __itt_suppress_threading_errors,
4550 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4552 __itt_suppress_mark_range(__itt_suppress_range,
4553 __itt_suppress_threading_errors,
4554 &new_thr->th.th_suspend_init_count,
4555 sizeof(new_thr->th.th_suspend_init_count));
4558 __itt_suppress_mark_range(__itt_suppress_range,
4559 __itt_suppress_threading_errors,
4560 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4561 sizeof(new_thr->th.th_bar[0].bb.b_go));
4562 __itt_suppress_mark_range(__itt_suppress_range,
4563 __itt_suppress_threading_errors,
4564 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4565 sizeof(new_thr->th.th_bar[1].bb.b_go));
4566 __itt_suppress_mark_range(__itt_suppress_range,
4567 __itt_suppress_threading_errors,
4568 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4569 sizeof(new_thr->th.th_bar[2].bb.b_go));
4571 if (__kmp_storage_map) {
4572 __kmp_print_thread_storage_map(new_thr, new_gtid);
4577 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4578 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4579 new_thr->th.th_serial_team = serial_team =
4580 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4584 proc_bind_default, &r_icvs,
4585 0 USE_NESTED_HOT_ARG(NULL));
4587 KMP_ASSERT(serial_team);
4588 serial_team->t.t_serialized = 0;
4590 serial_team->t.t_threads[0] = new_thr;
4592 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4596 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4599 __kmp_initialize_fast_memory(new_thr);
4603 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4604 __kmp_initialize_bget(new_thr);
4607 __kmp_init_random(new_thr);
4611 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4612 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4615 kmp_balign_t *balign = new_thr->th.th_bar;
4616 for (b = 0; b < bs_last_barrier; ++b) {
4617 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4618 balign[b].bb.team = NULL;
4619 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4620 balign[b].bb.use_oncore_barrier = 0;
4623 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4624 new_thr->th.th_sleep_loc_type = flag_unset;
4626 new_thr->th.th_spin_here = FALSE;
4627 new_thr->th.th_next_waiting = 0;
4629 new_thr->th.th_blocking =
false;
4632#if KMP_AFFINITY_SUPPORTED
4633 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4634 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4635 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4636 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4638 new_thr->th.th_def_allocator = __kmp_def_allocator;
4639 new_thr->th.th_prev_level = 0;
4640 new_thr->th.th_prev_num_threads = 1;
4642 TCW_4(new_thr->th.th_in_pool, FALSE);
4643 new_thr->th.th_active_in_pool = FALSE;
4644 TCW_4(new_thr->th.th_active, TRUE);
4652 if (__kmp_adjust_gtid_mode) {
4653 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4654 if (TCR_4(__kmp_gtid_mode) != 2) {
4655 TCW_4(__kmp_gtid_mode, 2);
4658 if (TCR_4(__kmp_gtid_mode) != 1) {
4659 TCW_4(__kmp_gtid_mode, 1);
4664#ifdef KMP_ADJUST_BLOCKTIME
4667 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4668 if (__kmp_nth > __kmp_avail_proc) {
4669 __kmp_zero_bt = TRUE;
4676 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4677 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4679 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4681 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4692static void __kmp_reinitialize_team(kmp_team_t *team,
4693 kmp_internal_control_t *new_icvs,
4695 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4696 team->t.t_threads[0], team));
4697 KMP_DEBUG_ASSERT(team && new_icvs);
4698 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4699 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4701 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4703 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4704 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4706 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4707 team->t.t_threads[0], team));
4713static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4714 kmp_internal_control_t *new_icvs,
4716 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4719 KMP_DEBUG_ASSERT(team);
4720 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4721 KMP_DEBUG_ASSERT(team->t.t_threads);
4724 team->t.t_master_tid = 0;
4726 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4727 team->t.t_nproc = new_nproc;
4730 team->t.t_next_pool = NULL;
4734 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4735 team->t.t_invoke = NULL;
4738 team->t.t_sched.sched = new_icvs->sched.sched;
4740#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4741 team->t.t_fp_control_saved = FALSE;
4742 team->t.t_x87_fpu_control_word = 0;
4743 team->t.t_mxcsr = 0;
4746 team->t.t_construct = 0;
4748 team->t.t_ordered.dt.t_value = 0;
4749 team->t.t_master_active = FALSE;
4752 team->t.t_copypriv_data = NULL;
4755 team->t.t_copyin_counter = 0;
4758 team->t.t_control_stack_top = NULL;
4760 __kmp_reinitialize_team(team, new_icvs, loc);
4763 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4766#if KMP_AFFINITY_SUPPORTED
4772static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4774 if (KMP_HIDDEN_HELPER_TEAM(team))
4777 kmp_info_t *master_th = team->t.t_threads[0];
4778 KMP_DEBUG_ASSERT(master_th != NULL);
4779 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4780 int first_place = master_th->th.th_first_place;
4781 int last_place = master_th->th.th_last_place;
4782 int masters_place = master_th->th.th_current_place;
4783 int num_masks = __kmp_affinity.num_masks;
4784 team->t.t_first_place = first_place;
4785 team->t.t_last_place = last_place;
4787 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4788 "bound to place %d partition = [%d,%d]\n",
4789 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4790 team->t.t_id, masters_place, first_place, last_place));
4792 switch (proc_bind) {
4794 case proc_bind_default:
4797 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4800 case proc_bind_primary: {
4802 int n_th = team->t.t_nproc;
4803 for (f = 1; f < n_th; f++) {
4804 kmp_info_t *th = team->t.t_threads[f];
4805 KMP_DEBUG_ASSERT(th != NULL);
4806 th->th.th_first_place = first_place;
4807 th->th.th_last_place = last_place;
4808 th->th.th_new_place = masters_place;
4809 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4810 team->t.t_display_affinity != 1) {
4811 team->t.t_display_affinity = 1;
4814 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4815 "partition = [%d,%d]\n",
4816 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4817 f, masters_place, first_place, last_place));
4821 case proc_bind_close: {
4823 int n_th = team->t.t_nproc;
4825 if (first_place <= last_place) {
4826 n_places = last_place - first_place + 1;
4828 n_places = num_masks - first_place + last_place + 1;
4830 if (n_th <= n_places) {
4831 int place = masters_place;
4832 for (f = 1; f < n_th; f++) {
4833 kmp_info_t *th = team->t.t_threads[f];
4834 KMP_DEBUG_ASSERT(th != NULL);
4836 if (place == last_place) {
4837 place = first_place;
4838 }
else if (place == (num_masks - 1)) {
4843 th->th.th_first_place = first_place;
4844 th->th.th_last_place = last_place;
4845 th->th.th_new_place = place;
4846 if (__kmp_display_affinity && place != th->th.th_current_place &&
4847 team->t.t_display_affinity != 1) {
4848 team->t.t_display_affinity = 1;
4851 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4852 "partition = [%d,%d]\n",
4853 __kmp_gtid_from_thread(team->t.t_threads[f]),
4854 team->t.t_id, f, place, first_place, last_place));
4857 int S, rem, gap, s_count;
4858 S = n_th / n_places;
4860 rem = n_th - (S * n_places);
4861 gap = rem > 0 ? n_places / rem : n_places;
4862 int place = masters_place;
4864 for (f = 0; f < n_th; f++) {
4865 kmp_info_t *th = team->t.t_threads[f];
4866 KMP_DEBUG_ASSERT(th != NULL);
4868 th->th.th_first_place = first_place;
4869 th->th.th_last_place = last_place;
4870 th->th.th_new_place = place;
4871 if (__kmp_display_affinity && place != th->th.th_current_place &&
4872 team->t.t_display_affinity != 1) {
4873 team->t.t_display_affinity = 1;
4877 if ((s_count == S) && rem && (gap_ct == gap)) {
4879 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4881 if (place == last_place) {
4882 place = first_place;
4883 }
else if (place == (num_masks - 1)) {
4891 }
else if (s_count == S) {
4892 if (place == last_place) {
4893 place = first_place;
4894 }
else if (place == (num_masks - 1)) {
4904 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4905 "partition = [%d,%d]\n",
4906 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4907 th->th.th_new_place, first_place, last_place));
4909 KMP_DEBUG_ASSERT(place == masters_place);
4913 case proc_bind_spread: {
4915 int n_th = team->t.t_nproc;
4918 if (first_place <= last_place) {
4919 n_places = last_place - first_place + 1;
4921 n_places = num_masks - first_place + last_place + 1;
4923 if (n_th <= n_places) {
4926 if (n_places != num_masks) {
4927 int S = n_places / n_th;
4928 int s_count, rem, gap, gap_ct;
4930 place = masters_place;
4931 rem = n_places - n_th * S;
4932 gap = rem ? n_th / rem : 1;
4935 if (update_master_only == 1)
4937 for (f = 0; f < thidx; f++) {
4938 kmp_info_t *th = team->t.t_threads[f];
4939 KMP_DEBUG_ASSERT(th != NULL);
4941 th->th.th_first_place = place;
4942 th->th.th_new_place = place;
4943 if (__kmp_display_affinity && place != th->th.th_current_place &&
4944 team->t.t_display_affinity != 1) {
4945 team->t.t_display_affinity = 1;
4948 while (s_count < S) {
4949 if (place == last_place) {
4950 place = first_place;
4951 }
else if (place == (num_masks - 1)) {
4958 if (rem && (gap_ct == gap)) {
4959 if (place == last_place) {
4960 place = first_place;
4961 }
else if (place == (num_masks - 1)) {
4969 th->th.th_last_place = place;
4972 if (place == last_place) {
4973 place = first_place;
4974 }
else if (place == (num_masks - 1)) {
4981 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4982 "partition = [%d,%d], num_masks: %u\n",
4983 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4984 f, th->th.th_new_place, th->th.th_first_place,
4985 th->th.th_last_place, num_masks));
4991 double current =
static_cast<double>(masters_place);
4993 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4998 if (update_master_only == 1)
5000 for (f = 0; f < thidx; f++) {
5001 first =
static_cast<int>(current);
5002 last =
static_cast<int>(current + spacing) - 1;
5003 KMP_DEBUG_ASSERT(last >= first);
5004 if (first >= n_places) {
5005 if (masters_place) {
5008 if (first == (masters_place + 1)) {
5009 KMP_DEBUG_ASSERT(f == n_th);
5012 if (last == masters_place) {
5013 KMP_DEBUG_ASSERT(f == (n_th - 1));
5017 KMP_DEBUG_ASSERT(f == n_th);
5022 if (last >= n_places) {
5023 last = (n_places - 1);
5028 KMP_DEBUG_ASSERT(0 <= first);
5029 KMP_DEBUG_ASSERT(n_places > first);
5030 KMP_DEBUG_ASSERT(0 <= last);
5031 KMP_DEBUG_ASSERT(n_places > last);
5032 KMP_DEBUG_ASSERT(last_place >= first_place);
5033 th = team->t.t_threads[f];
5034 KMP_DEBUG_ASSERT(th);
5035 th->th.th_first_place = first;
5036 th->th.th_new_place = place;
5037 th->th.th_last_place = last;
5038 if (__kmp_display_affinity && place != th->th.th_current_place &&
5039 team->t.t_display_affinity != 1) {
5040 team->t.t_display_affinity = 1;
5043 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5044 "partition = [%d,%d], spacing = %.4f\n",
5045 __kmp_gtid_from_thread(team->t.t_threads[f]),
5046 team->t.t_id, f, th->th.th_new_place,
5047 th->th.th_first_place, th->th.th_last_place, spacing));
5051 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5053 int S, rem, gap, s_count;
5054 S = n_th / n_places;
5056 rem = n_th - (S * n_places);
5057 gap = rem > 0 ? n_places / rem : n_places;
5058 int place = masters_place;
5061 if (update_master_only == 1)
5063 for (f = 0; f < thidx; f++) {
5064 kmp_info_t *th = team->t.t_threads[f];
5065 KMP_DEBUG_ASSERT(th != NULL);
5067 th->th.th_first_place = place;
5068 th->th.th_last_place = place;
5069 th->th.th_new_place = place;
5070 if (__kmp_display_affinity && place != th->th.th_current_place &&
5071 team->t.t_display_affinity != 1) {
5072 team->t.t_display_affinity = 1;
5076 if ((s_count == S) && rem && (gap_ct == gap)) {
5078 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5080 if (place == last_place) {
5081 place = first_place;
5082 }
else if (place == (num_masks - 1)) {
5090 }
else if (s_count == S) {
5091 if (place == last_place) {
5092 place = first_place;
5093 }
else if (place == (num_masks - 1)) {
5102 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5103 "partition = [%d,%d]\n",
5104 __kmp_gtid_from_thread(team->t.t_threads[f]),
5105 team->t.t_id, f, th->th.th_new_place,
5106 th->th.th_first_place, th->th.th_last_place));
5108 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5116 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5124__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5126 ompt_data_t ompt_parallel_data,
5128 kmp_proc_bind_t new_proc_bind,
5129 kmp_internal_control_t *new_icvs,
5130 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5131 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5134 int use_hot_team = !root->r.r_active;
5136 int do_place_partition = 1;
5138 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5139 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5140 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5143#if KMP_NESTED_HOT_TEAMS
5144 kmp_hot_team_ptr_t *hot_teams;
5146 team = master->th.th_team;
5147 level = team->t.t_active_level;
5148 if (master->th.th_teams_microtask) {
5149 if (master->th.th_teams_size.nteams > 1 &&
5152 (microtask_t)__kmp_teams_master ||
5153 master->th.th_teams_level <
5160 if ((master->th.th_teams_size.nteams == 1 &&
5161 master->th.th_teams_level >= team->t.t_level) ||
5162 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5163 do_place_partition = 0;
5165 hot_teams = master->th.th_hot_teams;
5166 if (level < __kmp_hot_teams_max_level && hot_teams &&
5167 hot_teams[level].hot_team) {
5175 KMP_DEBUG_ASSERT(new_nproc == 1);
5179 if (use_hot_team && new_nproc > 1) {
5180 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5181#if KMP_NESTED_HOT_TEAMS
5182 team = hot_teams[level].hot_team;
5184 team = root->r.r_hot_team;
5187 if (__kmp_tasking_mode != tskm_immediate_exec) {
5188 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5189 "task_team[1] = %p before reinit\n",
5190 team->t.t_task_team[0], team->t.t_task_team[1]));
5194 if (team->t.t_nproc != new_nproc &&
5195 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5197 int old_nthr = team->t.t_nproc;
5198 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5203 if (do_place_partition == 0)
5204 team->t.t_proc_bind = proc_bind_default;
5208 if (team->t.t_nproc == new_nproc) {
5209 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5212 if (team->t.t_size_changed == -1) {
5213 team->t.t_size_changed = 1;
5215 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5219 kmp_r_sched_t new_sched = new_icvs->sched;
5221 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5223 __kmp_reinitialize_team(team, new_icvs,
5224 root->r.r_uber_thread->th.th_ident);
5226 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5227 team->t.t_threads[0], team));
5228 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5230#if KMP_AFFINITY_SUPPORTED
5231 if ((team->t.t_size_changed == 0) &&
5232 (team->t.t_proc_bind == new_proc_bind)) {
5233 if (new_proc_bind == proc_bind_spread) {
5234 if (do_place_partition) {
5236 __kmp_partition_places(team, 1);
5239 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5240 "proc_bind = %d, partition = [%d,%d]\n",
5241 team->t.t_id, new_proc_bind, team->t.t_first_place,
5242 team->t.t_last_place));
5244 if (do_place_partition) {
5245 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5246 __kmp_partition_places(team);
5250 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5252 }
else if (team->t.t_nproc > new_nproc) {
5254 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5257 team->t.t_size_changed = 1;
5258 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5261 __kmp_add_threads_to_team(team, new_nproc);
5263#if KMP_NESTED_HOT_TEAMS
5264 if (__kmp_hot_teams_mode == 0) {
5267 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5268 hot_teams[level].hot_team_nth = new_nproc;
5271 for (f = new_nproc; f < team->t.t_nproc; f++) {
5272 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5273 if (__kmp_tasking_mode != tskm_immediate_exec) {
5276 team->t.t_threads[f]->th.th_task_team = NULL;
5278 __kmp_free_thread(team->t.t_threads[f]);
5279 team->t.t_threads[f] = NULL;
5281#if KMP_NESTED_HOT_TEAMS
5286 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5287 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5288 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5289 for (
int b = 0; b < bs_last_barrier; ++b) {
5290 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5291 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5293 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5298 team->t.t_nproc = new_nproc;
5300 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5301 __kmp_reinitialize_team(team, new_icvs,
5302 root->r.r_uber_thread->th.th_ident);
5305 for (f = 0; f < new_nproc; ++f) {
5306 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5311 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5312 team->t.t_threads[0], team));
5314 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5317 for (f = 0; f < team->t.t_nproc; f++) {
5318 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5319 team->t.t_threads[f]->th.th_team_nproc ==
5324 if (do_place_partition) {
5325 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5326#if KMP_AFFINITY_SUPPORTED
5327 __kmp_partition_places(team);
5333 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5335 int old_nproc = team->t.t_nproc;
5336 team->t.t_size_changed = 1;
5338#if KMP_NESTED_HOT_TEAMS
5339 int avail_threads = hot_teams[level].hot_team_nth;
5340 if (new_nproc < avail_threads)
5341 avail_threads = new_nproc;
5342 kmp_info_t **other_threads = team->t.t_threads;
5343 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5347 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5348 for (b = 0; b < bs_last_barrier; ++b) {
5349 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5350 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5352 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5356 if (hot_teams[level].hot_team_nth >= new_nproc) {
5359 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5360 team->t.t_nproc = new_nproc;
5364 team->t.t_nproc = hot_teams[level].hot_team_nth;
5365 hot_teams[level].hot_team_nth = new_nproc;
5367 if (team->t.t_max_nproc < new_nproc) {
5369 __kmp_reallocate_team_arrays(team, new_nproc);
5370 __kmp_reinitialize_team(team, new_icvs, NULL);
5373#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5379 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5383 for (f = team->t.t_nproc; f < new_nproc; f++) {
5384 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5385 KMP_DEBUG_ASSERT(new_worker);
5386 team->t.t_threads[f] = new_worker;
5389 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5390 "join=%llu, plain=%llu\n",
5391 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5392 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5393 team->t.t_bar[bs_plain_barrier].b_arrived));
5397 kmp_balign_t *balign = new_worker->th.th_bar;
5398 for (b = 0; b < bs_last_barrier; ++b) {
5399 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5400 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5401 KMP_BARRIER_PARENT_FLAG);
5403 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5409#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5411 new_temp_affinity.restore();
5413#if KMP_NESTED_HOT_TEAMS
5416 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5419 __kmp_add_threads_to_team(team, new_nproc);
5423 __kmp_initialize_team(team, new_nproc, new_icvs,
5424 root->r.r_uber_thread->th.th_ident);
5427 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5428 for (f = 0; f < team->t.t_nproc; ++f)
5429 __kmp_initialize_info(team->t.t_threads[f], team, f,
5430 __kmp_gtid_from_tid(f, team));
5433 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5434 for (f = old_nproc; f < team->t.t_nproc; ++f)
5435 team->t.t_threads[f]->th.th_task_state = old_state;
5438 for (f = 0; f < team->t.t_nproc; ++f) {
5439 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5440 team->t.t_threads[f]->th.th_team_nproc ==
5445 if (do_place_partition) {
5446 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5447#if KMP_AFFINITY_SUPPORTED
5448 __kmp_partition_places(team);
5453 kmp_info_t *master = team->t.t_threads[0];
5454 if (master->th.th_teams_microtask) {
5455 for (f = 1; f < new_nproc; ++f) {
5457 kmp_info_t *thr = team->t.t_threads[f];
5458 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5459 thr->th.th_teams_level = master->th.th_teams_level;
5460 thr->th.th_teams_size = master->th.th_teams_size;
5463#if KMP_NESTED_HOT_TEAMS
5467 for (f = 1; f < new_nproc; ++f) {
5468 kmp_info_t *thr = team->t.t_threads[f];
5470 kmp_balign_t *balign = thr->th.th_bar;
5471 for (b = 0; b < bs_last_barrier; ++b) {
5472 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5473 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5475 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5483 __kmp_alloc_argv_entries(argc, team, TRUE);
5484 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5488 KF_TRACE(10, (
" hot_team = %p\n", team));
5491 if (__kmp_tasking_mode != tskm_immediate_exec) {
5492 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5493 "task_team[1] = %p after reinit\n",
5494 team->t.t_task_team[0], team->t.t_task_team[1]));
5499 __ompt_team_assign_id(team, ompt_parallel_data);
5509 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5512 if (team->t.t_max_nproc >= max_nproc) {
5514 __kmp_team_pool = team->t.t_next_pool;
5516 if (max_nproc > 1 &&
5517 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5519 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5524 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5526 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5527 "task_team[1] %p to NULL\n",
5528 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5529 team->t.t_task_team[0] = NULL;
5530 team->t.t_task_team[1] = NULL;
5533 __kmp_alloc_argv_entries(argc, team, TRUE);
5534 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5537 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5538 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5541 for (b = 0; b < bs_last_barrier; ++b) {
5542 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5544 team->t.t_bar[b].b_master_arrived = 0;
5545 team->t.t_bar[b].b_team_arrived = 0;
5550 team->t.t_proc_bind = new_proc_bind;
5552 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5556 __ompt_team_assign_id(team, ompt_parallel_data);
5568 team = __kmp_reap_team(team);
5569 __kmp_team_pool = team;
5574 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5577 team->t.t_max_nproc = max_nproc;
5578 if (max_nproc > 1 &&
5579 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5581 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5586 __kmp_allocate_team_arrays(team, max_nproc);
5588 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5589 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5591 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5593 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5594 team->t.t_task_team[0] = NULL;
5596 team->t.t_task_team[1] = NULL;
5599 if (__kmp_storage_map) {
5600 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5604 __kmp_alloc_argv_entries(argc, team, FALSE);
5605 team->t.t_argc = argc;
5608 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5609 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5612 for (b = 0; b < bs_last_barrier; ++b) {
5613 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5615 team->t.t_bar[b].b_master_arrived = 0;
5616 team->t.t_bar[b].b_team_arrived = 0;
5621 team->t.t_proc_bind = new_proc_bind;
5624 __ompt_team_assign_id(team, ompt_parallel_data);
5625 team->t.ompt_serialized_team_info = NULL;
5630 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5641void __kmp_free_team(kmp_root_t *root,
5642 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5644 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5648 KMP_DEBUG_ASSERT(root);
5649 KMP_DEBUG_ASSERT(team);
5650 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5651 KMP_DEBUG_ASSERT(team->t.t_threads);
5653 int use_hot_team = team == root->r.r_hot_team;
5654#if KMP_NESTED_HOT_TEAMS
5657 level = team->t.t_active_level - 1;
5658 if (master->th.th_teams_microtask) {
5659 if (master->th.th_teams_size.nteams > 1) {
5663 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5664 master->th.th_teams_level == team->t.t_level) {
5670 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5672 if (level < __kmp_hot_teams_max_level) {
5673 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5680 TCW_SYNC_PTR(team->t.t_pkfn,
5683 team->t.t_copyin_counter = 0;
5688 if (!use_hot_team) {
5689 if (__kmp_tasking_mode != tskm_immediate_exec) {
5691 for (f = 1; f < team->t.t_nproc; ++f) {
5692 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5693 kmp_info_t *th = team->t.t_threads[f];
5694 volatile kmp_uint32 *state = &th->th.th_reap_state;
5695 while (*state != KMP_SAFE_TO_REAP) {
5699 if (!__kmp_is_thread_alive(th, &ecode)) {
5700 *state = KMP_SAFE_TO_REAP;
5705 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5706 if (fl.is_sleeping())
5707 fl.resume(__kmp_gtid_from_thread(th));
5714 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5715 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5716 if (task_team != NULL) {
5717 for (f = 0; f < team->t.t_nproc; ++f) {
5718 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5719 team->t.t_threads[f]->th.th_task_team = NULL;
5723 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5724 __kmp_get_gtid(), task_team, team->t.t_id));
5725#if KMP_NESTED_HOT_TEAMS
5726 __kmp_free_task_team(master, task_team);
5728 team->t.t_task_team[tt_idx] = NULL;
5734 team->t.t_parent = NULL;
5735 team->t.t_level = 0;
5736 team->t.t_active_level = 0;
5739 for (f = 1; f < team->t.t_nproc; ++f) {
5740 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5741 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5742 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5745 __kmp_free_thread(team->t.t_threads[f]);
5748 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5751 team->t.b->go_release();
5752 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5753 for (f = 1; f < team->t.t_nproc; ++f) {
5754 if (team->t.b->sleep[f].sleep) {
5755 __kmp_atomic_resume_64(
5756 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5757 (kmp_atomic_flag_64<> *)NULL);
5762 for (
int f = 1; f < team->t.t_nproc; ++f) {
5763 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5769 for (f = 1; f < team->t.t_nproc; ++f) {
5770 team->t.t_threads[f] = NULL;
5773 if (team->t.t_max_nproc > 1 &&
5774 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5775 distributedBarrier::deallocate(team->t.b);
5780 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5781 __kmp_team_pool = (
volatile kmp_team_t *)team;
5784 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5785 team->t.t_threads[1]->th.th_cg_roots);
5786 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5788 for (f = 1; f < team->t.t_nproc; ++f) {
5789 kmp_info_t *thr = team->t.t_threads[f];
5790 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5791 thr->th.th_cg_roots->cg_root == thr);
5793 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5794 thr->th.th_cg_roots = tmp->up;
5795 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5796 " up to node %p. cg_nthreads was %d\n",
5797 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5798 int i = tmp->cg_nthreads--;
5803 if (thr->th.th_cg_roots)
5804 thr->th.th_current_task->td_icvs.thread_limit =
5805 thr->th.th_cg_roots->cg_thread_limit;
5814kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5815 kmp_team_t *next_pool = team->t.t_next_pool;
5817 KMP_DEBUG_ASSERT(team);
5818 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5819 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5820 KMP_DEBUG_ASSERT(team->t.t_threads);
5821 KMP_DEBUG_ASSERT(team->t.t_argv);
5826 __kmp_free_team_arrays(team);
5827 if (team->t.t_argv != &team->t.t_inline_argv[0])
5828 __kmp_free((
void *)team->t.t_argv);
5860void __kmp_free_thread(kmp_info_t *this_th) {
5864 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5865 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5867 KMP_DEBUG_ASSERT(this_th);
5872 kmp_balign_t *balign = this_th->th.th_bar;
5873 for (b = 0; b < bs_last_barrier; ++b) {
5874 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5875 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5876 balign[b].bb.team = NULL;
5877 balign[b].bb.leaf_kids = 0;
5879 this_th->th.th_task_state = 0;
5880 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5883 TCW_PTR(this_th->th.th_team, NULL);
5884 TCW_PTR(this_th->th.th_root, NULL);
5885 TCW_PTR(this_th->th.th_dispatch, NULL);
5887 while (this_th->th.th_cg_roots) {
5888 this_th->th.th_cg_roots->cg_nthreads--;
5889 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5890 " %p of thread %p to %d\n",
5891 this_th, this_th->th.th_cg_roots,
5892 this_th->th.th_cg_roots->cg_root,
5893 this_th->th.th_cg_roots->cg_nthreads));
5894 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5895 if (tmp->cg_root == this_th) {
5896 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5898 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5899 this_th->th.th_cg_roots = tmp->up;
5902 if (tmp->cg_nthreads == 0) {
5905 this_th->th.th_cg_roots = NULL;
5915 __kmp_free_implicit_task(this_th);
5916 this_th->th.th_current_task = NULL;
5920 gtid = this_th->th.th_info.ds.ds_gtid;
5921 if (__kmp_thread_pool_insert_pt != NULL) {
5922 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5923 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5924 __kmp_thread_pool_insert_pt = NULL;
5933 if (__kmp_thread_pool_insert_pt != NULL) {
5934 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5936 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5938 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5939 scan = &((*scan)->th.th_next_pool))
5944 TCW_PTR(this_th->th.th_next_pool, *scan);
5945 __kmp_thread_pool_insert_pt = *scan = this_th;
5946 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5947 (this_th->th.th_info.ds.ds_gtid <
5948 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5949 TCW_4(this_th->th.th_in_pool, TRUE);
5950 __kmp_suspend_initialize_thread(this_th);
5951 __kmp_lock_suspend_mx(this_th);
5952 if (this_th->th.th_active == TRUE) {
5953 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5954 this_th->th.th_active_in_pool = TRUE;
5958 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5961 __kmp_unlock_suspend_mx(this_th);
5963 TCW_4(__kmp_nth, __kmp_nth - 1);
5965#ifdef KMP_ADJUST_BLOCKTIME
5968 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5969 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5970 if (__kmp_nth <= __kmp_avail_proc) {
5971 __kmp_zero_bt = FALSE;
5981void *__kmp_launch_thread(kmp_info_t *this_thr) {
5982#if OMP_PROFILING_SUPPORT
5983 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5985 if (ProfileTraceFile)
5986 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5989 int gtid = this_thr->th.th_info.ds.ds_gtid;
5991 kmp_team_t **
volatile pteam;
5994 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5996 if (__kmp_env_consistency_check) {
5997 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6001 if (ompd_state & OMPD_ENABLE_BP)
6002 ompd_bp_thread_begin();
6006 ompt_data_t *thread_data =
nullptr;
6007 if (ompt_enabled.enabled) {
6008 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6009 *thread_data = ompt_data_none;
6011 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6012 this_thr->th.ompt_thread_info.wait_id = 0;
6013 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6014 this_thr->th.ompt_thread_info.parallel_flags = 0;
6015 if (ompt_enabled.ompt_callback_thread_begin) {
6016 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6017 ompt_thread_worker, thread_data);
6019 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6024 while (!TCR_4(__kmp_global.g.g_done)) {
6025 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6029 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6032 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6035 if (ompt_enabled.enabled) {
6036 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6040 pteam = &this_thr->th.th_team;
6043 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6045 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6048 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6049 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6050 (*pteam)->t.t_pkfn));
6052 updateHWFPControl(*pteam);
6055 if (ompt_enabled.enabled) {
6056 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6060 rc = (*pteam)->t.t_invoke(gtid);
6064 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6065 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6066 (*pteam)->t.t_pkfn));
6069 if (ompt_enabled.enabled) {
6071 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6073 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6077 __kmp_join_barrier(gtid);
6082 if (ompd_state & OMPD_ENABLE_BP)
6083 ompd_bp_thread_end();
6087 if (ompt_enabled.ompt_callback_thread_end) {
6088 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6092 this_thr->th.th_task_team = NULL;
6094 __kmp_common_destroy_gtid(gtid);
6096 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6099#if OMP_PROFILING_SUPPORT
6100 llvm::timeTraceProfilerFinishThread();
6107void __kmp_internal_end_dest(
void *specific_gtid) {
6110 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6112 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6116 __kmp_internal_end_thread(gtid);
6119#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6121__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6122 __kmp_internal_end_atexit();
6129void __kmp_internal_end_atexit(
void) {
6130 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6154 __kmp_internal_end_library(-1);
6156 __kmp_close_console();
6160static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6165 KMP_DEBUG_ASSERT(thread != NULL);
6167 gtid = thread->th.th_info.ds.ds_gtid;
6170 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6173 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6175 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6177 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6179 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6183 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6185 __kmp_release_64(&flag);
6190 __kmp_reap_worker(thread);
6202 if (thread->th.th_active_in_pool) {
6203 thread->th.th_active_in_pool = FALSE;
6204 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6205 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6209 __kmp_free_implicit_task(thread);
6213 __kmp_free_fast_memory(thread);
6216 __kmp_suspend_uninitialize_thread(thread);
6218 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6219 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6224#ifdef KMP_ADJUST_BLOCKTIME
6227 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6228 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6229 if (__kmp_nth <= __kmp_avail_proc) {
6230 __kmp_zero_bt = FALSE;
6236 if (__kmp_env_consistency_check) {
6237 if (thread->th.th_cons) {
6238 __kmp_free_cons_stack(thread->th.th_cons);
6239 thread->th.th_cons = NULL;
6243 if (thread->th.th_pri_common != NULL) {
6244 __kmp_free(thread->th.th_pri_common);
6245 thread->th.th_pri_common = NULL;
6248 if (thread->th.th_task_state_memo_stack != NULL) {
6249 __kmp_free(thread->th.th_task_state_memo_stack);
6250 thread->th.th_task_state_memo_stack = NULL;
6254 if (thread->th.th_local.bget_data != NULL) {
6255 __kmp_finalize_bget(thread);
6259#if KMP_AFFINITY_SUPPORTED
6260 if (thread->th.th_affin_mask != NULL) {
6261 KMP_CPU_FREE(thread->th.th_affin_mask);
6262 thread->th.th_affin_mask = NULL;
6266#if KMP_USE_HIER_SCHED
6267 if (thread->th.th_hier_bar_data != NULL) {
6268 __kmp_free(thread->th.th_hier_bar_data);
6269 thread->th.th_hier_bar_data = NULL;
6273 __kmp_reap_team(thread->th.th_serial_team);
6274 thread->th.th_serial_team = NULL;
6281static void __kmp_itthash_clean(kmp_info_t *th) {
6283 if (__kmp_itt_region_domains.count > 0) {
6284 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6285 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6287 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6288 __kmp_thread_free(th, bucket);
6293 if (__kmp_itt_barrier_domains.count > 0) {
6294 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6295 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6297 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6298 __kmp_thread_free(th, bucket);
6306static void __kmp_internal_end(
void) {
6310 __kmp_unregister_library();
6317 __kmp_reclaim_dead_roots();
6321 for (i = 0; i < __kmp_threads_capacity; i++)
6323 if (__kmp_root[i]->r.r_active)
6326 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6328 if (i < __kmp_threads_capacity) {
6340 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6341 if (TCR_4(__kmp_init_monitor)) {
6342 __kmp_reap_monitor(&__kmp_monitor);
6343 TCW_4(__kmp_init_monitor, 0);
6345 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6346 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6352 for (i = 0; i < __kmp_threads_capacity; i++) {
6353 if (__kmp_root[i]) {
6356 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6365 while (__kmp_thread_pool != NULL) {
6367 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6368 __kmp_thread_pool = thread->th.th_next_pool;
6370 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6371 thread->th.th_next_pool = NULL;
6372 thread->th.th_in_pool = FALSE;
6373 __kmp_reap_thread(thread, 0);
6375 __kmp_thread_pool_insert_pt = NULL;
6378 while (__kmp_team_pool != NULL) {
6380 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6381 __kmp_team_pool = team->t.t_next_pool;
6383 team->t.t_next_pool = NULL;
6384 __kmp_reap_team(team);
6387 __kmp_reap_task_teams();
6394 for (i = 0; i < __kmp_threads_capacity; i++) {
6395 kmp_info_t *thr = __kmp_threads[i];
6396 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6401 for (i = 0; i < __kmp_threads_capacity; ++i) {
6408 TCW_SYNC_4(__kmp_init_common, FALSE);
6410 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6418 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6419 if (TCR_4(__kmp_init_monitor)) {
6420 __kmp_reap_monitor(&__kmp_monitor);
6421 TCW_4(__kmp_init_monitor, 0);
6423 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6424 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6427 TCW_4(__kmp_init_gtid, FALSE);
6436void __kmp_internal_end_library(
int gtid_req) {
6443 if (__kmp_global.g.g_abort) {
6444 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6448 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6449 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6454 if (TCR_4(__kmp_init_hidden_helper) &&
6455 !TCR_4(__kmp_hidden_helper_team_done)) {
6456 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6458 __kmp_hidden_helper_main_thread_release();
6460 __kmp_hidden_helper_threads_deinitz_wait();
6466 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6468 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6469 if (gtid == KMP_GTID_SHUTDOWN) {
6470 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6471 "already shutdown\n"));
6473 }
else if (gtid == KMP_GTID_MONITOR) {
6474 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6475 "registered, or system shutdown\n"));
6477 }
else if (gtid == KMP_GTID_DNE) {
6478 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6481 }
else if (KMP_UBER_GTID(gtid)) {
6483 if (__kmp_root[gtid]->r.r_active) {
6484 __kmp_global.g.g_abort = -1;
6485 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6486 __kmp_unregister_library();
6488 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6492 __kmp_itthash_clean(__kmp_threads[gtid]);
6495 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6496 __kmp_unregister_root_current_thread(gtid);
6503#ifdef DUMP_DEBUG_ON_EXIT
6504 if (__kmp_debug_buf)
6505 __kmp_dump_debug_buffer();
6510 __kmp_unregister_library();
6515 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6518 if (__kmp_global.g.g_abort) {
6519 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6521 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6524 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6525 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6534 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6537 __kmp_internal_end();
6539 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6540 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6542 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6544#ifdef DUMP_DEBUG_ON_EXIT
6545 if (__kmp_debug_buf)
6546 __kmp_dump_debug_buffer();
6550 __kmp_close_console();
6553 __kmp_fini_allocator();
6557void __kmp_internal_end_thread(
int gtid_req) {
6566 if (__kmp_global.g.g_abort) {
6567 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6571 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6572 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6577 if (TCR_4(__kmp_init_hidden_helper) &&
6578 !TCR_4(__kmp_hidden_helper_team_done)) {
6579 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6581 __kmp_hidden_helper_main_thread_release();
6583 __kmp_hidden_helper_threads_deinitz_wait();
6590 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6592 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6593 if (gtid == KMP_GTID_SHUTDOWN) {
6594 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6595 "already shutdown\n"));
6597 }
else if (gtid == KMP_GTID_MONITOR) {
6598 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6599 "registered, or system shutdown\n"));
6601 }
else if (gtid == KMP_GTID_DNE) {
6602 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6606 }
else if (KMP_UBER_GTID(gtid)) {
6608 if (__kmp_root[gtid]->r.r_active) {
6609 __kmp_global.g.g_abort = -1;
6610 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6612 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6616 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6618 __kmp_unregister_root_current_thread(gtid);
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6625 __kmp_threads[gtid]->th.th_task_team = NULL;
6629 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6635 if (__kmp_pause_status != kmp_hard_paused)
6639 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6644 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6647 if (__kmp_global.g.g_abort) {
6648 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6650 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6653 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6654 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6665 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6667 for (i = 0; i < __kmp_threads_capacity; ++i) {
6668 if (KMP_UBER_GTID(i)) {
6671 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6672 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6673 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6680 __kmp_internal_end();
6682 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6683 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6685 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6687#ifdef DUMP_DEBUG_ON_EXIT
6688 if (__kmp_debug_buf)
6689 __kmp_dump_debug_buffer();
6696static long __kmp_registration_flag = 0;
6698static char *__kmp_registration_str = NULL;
6701static inline char *__kmp_reg_status_name() {
6707#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6708 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6711 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6715#if defined(KMP_USE_SHM)
6717char *temp_reg_status_file_name =
nullptr;
6720void __kmp_register_library_startup(
void) {
6722 char *name = __kmp_reg_status_name();
6728#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6729 __kmp_initialize_system_tick();
6731 __kmp_read_system_time(&time.dtime);
6732 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6733 __kmp_registration_str =
6734 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6735 __kmp_registration_flag, KMP_LIBRARY_FILE);
6737 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6738 __kmp_registration_str));
6744#if defined(KMP_USE_SHM)
6745 char *shm_name = __kmp_str_format(
"/%s", name);
6746 int shm_preexist = 0;
6748 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6749 if ((fd1 == -1) && (errno == EEXIST)) {
6752 fd1 = shm_open(shm_name, O_RDWR, 0666);
6755 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6761 }
else if (fd1 == -1) {
6766 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6767 fd1 = mkstemp(temp_file_name);
6770 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6773 temp_reg_status_file_name = temp_file_name;
6775 if (shm_preexist == 0) {
6777 if (ftruncate(fd1, SHM_SIZE) == -1) {
6779 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6780 KMP_ERR(errno), __kmp_msg_null);
6784 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6785 if (data1 == MAP_FAILED) {
6787 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6790 if (shm_preexist == 0) {
6791 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6794 value = __kmp_str_format(
"%s", data1);
6795 munmap(data1, SHM_SIZE);
6799 __kmp_env_set(name, __kmp_registration_str, 0);
6801 value = __kmp_env_get(name);
6804 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6811 char *flag_addr_str = NULL;
6812 char *flag_val_str = NULL;
6813 char const *file_name = NULL;
6814 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6815 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6818 unsigned long *flag_addr = 0;
6819 unsigned long flag_val = 0;
6820 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6821 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6822 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6826 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6840 file_name =
"unknown library";
6845 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6846 if (!__kmp_str_match_true(duplicate_ok)) {
6848 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6849 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6851 KMP_INTERNAL_FREE(duplicate_ok);
6852 __kmp_duplicate_library_ok = 1;
6857#if defined(KMP_USE_SHM)
6859 shm_unlink(shm_name);
6862 __kmp_env_unset(name);
6866 KMP_DEBUG_ASSERT(0);
6870 KMP_INTERNAL_FREE((
void *)value);
6871#if defined(KMP_USE_SHM)
6872 KMP_INTERNAL_FREE((
void *)shm_name);
6875 KMP_INTERNAL_FREE((
void *)name);
6879void __kmp_unregister_library(
void) {
6881 char *name = __kmp_reg_status_name();
6884#if defined(KMP_USE_SHM)
6885 bool use_shm =
true;
6886 char *shm_name = __kmp_str_format(
"/%s", name);
6887 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6891 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6892 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6898 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6899 if (data1 != MAP_FAILED) {
6900 value = __kmp_str_format(
"%s", data1);
6901 munmap(data1, SHM_SIZE);
6905 value = __kmp_env_get(name);
6908 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6909 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6910 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6912#if defined(KMP_USE_SHM)
6914 shm_unlink(shm_name);
6916 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6917 unlink(temp_reg_status_file_name);
6920 __kmp_env_unset(name);
6924#if defined(KMP_USE_SHM)
6925 KMP_INTERNAL_FREE(shm_name);
6927 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6928 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6932 KMP_INTERNAL_FREE(__kmp_registration_str);
6933 KMP_INTERNAL_FREE(value);
6934 KMP_INTERNAL_FREE(name);
6936 __kmp_registration_flag = 0;
6937 __kmp_registration_str = NULL;
6944#if KMP_MIC_SUPPORTED
6946static void __kmp_check_mic_type() {
6947 kmp_cpuid_t cpuid_state = {0};
6948 kmp_cpuid_t *cs_p = &cpuid_state;
6949 __kmp_x86_cpuid(1, 0, cs_p);
6951 if ((cs_p->eax & 0xff0) == 0xB10) {
6952 __kmp_mic_type = mic2;
6953 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6954 __kmp_mic_type = mic3;
6956 __kmp_mic_type = non_mic;
6963static void __kmp_user_level_mwait_init() {
6964 struct kmp_cpuid buf;
6965 __kmp_x86_cpuid(7, 0, &buf);
6966 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6967 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6968 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6969 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6970 __kmp_umwait_enabled));
6973#ifndef AT_INTELPHIUSERMWAIT
6976#define AT_INTELPHIUSERMWAIT 10000
6981unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6982unsigned long getauxval(
unsigned long) {
return 0; }
6984static void __kmp_user_level_mwait_init() {
6989 if (__kmp_mic_type == mic3) {
6990 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6991 if ((res & 0x1) || __kmp_user_level_mwait) {
6992 __kmp_mwait_enabled = TRUE;
6993 if (__kmp_user_level_mwait) {
6994 KMP_INFORM(EnvMwaitWarn);
6997 __kmp_mwait_enabled = FALSE;
7000 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7001 "__kmp_mwait_enabled = %d\n",
7002 __kmp_mic_type, __kmp_mwait_enabled));
7006static void __kmp_do_serial_initialize(
void) {
7010 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7012 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7013 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7014 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7015 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7016 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7026 __kmp_validate_locks();
7028#if ENABLE_LIBOMPTARGET
7030 __kmp_init_omptarget();
7034 __kmp_init_allocator();
7040 if (__kmp_need_register_serial)
7041 __kmp_register_library_startup();
7044 if (TCR_4(__kmp_global.g.g_done)) {
7045 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7048 __kmp_global.g.g_abort = 0;
7049 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7052#if KMP_USE_ADAPTIVE_LOCKS
7053#if KMP_DEBUG_ADAPTIVE_LOCKS
7054 __kmp_init_speculative_stats();
7057#if KMP_STATS_ENABLED
7060 __kmp_init_lock(&__kmp_global_lock);
7061 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7062 __kmp_init_lock(&__kmp_debug_lock);
7063 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7064 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7065 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7066 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7067 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7068 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7069 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7070 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7071 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7072 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7073 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7074 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7075 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7076 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7077 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7079 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7081 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7085 __kmp_runtime_initialize();
7087#if KMP_MIC_SUPPORTED
7088 __kmp_check_mic_type();
7095 __kmp_abort_delay = 0;
7099 __kmp_dflt_team_nth_ub = __kmp_xproc;
7100 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7101 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7103 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7104 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7106 __kmp_max_nth = __kmp_sys_max_nth;
7107 __kmp_cg_max_nth = __kmp_sys_max_nth;
7108 __kmp_teams_max_nth = __kmp_xproc;
7109 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7110 __kmp_teams_max_nth = __kmp_sys_max_nth;
7115 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7117 __kmp_monitor_wakeups =
7118 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7119 __kmp_bt_intervals =
7120 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7123 __kmp_library = library_throughput;
7125 __kmp_static = kmp_sch_static_balanced;
7132#if KMP_FAST_REDUCTION_BARRIER
7133#define kmp_reduction_barrier_gather_bb ((int)1)
7134#define kmp_reduction_barrier_release_bb ((int)1)
7135#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7136#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7138 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7139 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7140 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7141 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7142 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7143#if KMP_FAST_REDUCTION_BARRIER
7144 if (i == bs_reduction_barrier) {
7146 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7147 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7148 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7149 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7153#if KMP_FAST_REDUCTION_BARRIER
7154#undef kmp_reduction_barrier_release_pat
7155#undef kmp_reduction_barrier_gather_pat
7156#undef kmp_reduction_barrier_release_bb
7157#undef kmp_reduction_barrier_gather_bb
7159#if KMP_MIC_SUPPORTED
7160 if (__kmp_mic_type == mic2) {
7162 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7163 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7165 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7166 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7168#if KMP_FAST_REDUCTION_BARRIER
7169 if (__kmp_mic_type == mic2) {
7170 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7171 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7178 __kmp_env_checks = TRUE;
7180 __kmp_env_checks = FALSE;
7184 __kmp_foreign_tp = TRUE;
7186 __kmp_global.g.g_dynamic = FALSE;
7187 __kmp_global.g.g_dynamic_mode = dynamic_default;
7189 __kmp_init_nesting_mode();
7191 __kmp_env_initialize(NULL);
7193#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7194 __kmp_user_level_mwait_init();
7198 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7199 if (__kmp_str_match_true(val)) {
7200 kmp_str_buf_t buffer;
7201 __kmp_str_buf_init(&buffer);
7202 __kmp_i18n_dump_catalog(&buffer);
7203 __kmp_printf(
"%s", buffer.str);
7204 __kmp_str_buf_free(&buffer);
7206 __kmp_env_free(&val);
7209 __kmp_threads_capacity =
7210 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7212 __kmp_tp_capacity = __kmp_default_tp_capacity(
7213 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7218 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7219 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7220 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7221 __kmp_thread_pool = NULL;
7222 __kmp_thread_pool_insert_pt = NULL;
7223 __kmp_team_pool = NULL;
7230 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7232 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7233 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7234 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7237 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7239 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7244 gtid = __kmp_register_root(TRUE);
7245 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7246 KMP_ASSERT(KMP_UBER_GTID(gtid));
7247 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7251 __kmp_common_initialize();
7255 __kmp_register_atfork();
7258#if !KMP_DYNAMIC_LIB || \
7259 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7264 int rc = atexit(__kmp_internal_end_atexit);
7266 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7272#if KMP_HANDLE_SIGNALS
7278 __kmp_install_signals(FALSE);
7281 __kmp_install_signals(TRUE);
7286 __kmp_init_counter++;
7288 __kmp_init_serial = TRUE;
7290 if (__kmp_version) {
7291 __kmp_print_version_1();
7294 if (__kmp_settings) {
7298 if (__kmp_display_env || __kmp_display_env_verbose) {
7299 __kmp_env_print_2();
7308 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7311void __kmp_serial_initialize(
void) {
7312 if (__kmp_init_serial) {
7315 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7316 if (__kmp_init_serial) {
7317 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7320 __kmp_do_serial_initialize();
7321 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7324static void __kmp_do_middle_initialize(
void) {
7326 int prev_dflt_team_nth;
7328 if (!__kmp_init_serial) {
7329 __kmp_do_serial_initialize();
7332 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7334 if (UNLIKELY(!__kmp_need_register_serial)) {
7337 __kmp_register_library_startup();
7342 prev_dflt_team_nth = __kmp_dflt_team_nth;
7344#if KMP_AFFINITY_SUPPORTED
7347 __kmp_affinity_initialize(__kmp_affinity);
7351 KMP_ASSERT(__kmp_xproc > 0);
7352 if (__kmp_avail_proc == 0) {
7353 __kmp_avail_proc = __kmp_xproc;
7359 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7360 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7365 if (__kmp_dflt_team_nth == 0) {
7366#ifdef KMP_DFLT_NTH_CORES
7368 __kmp_dflt_team_nth = __kmp_ncores;
7369 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7370 "__kmp_ncores (%d)\n",
7371 __kmp_dflt_team_nth));
7374 __kmp_dflt_team_nth = __kmp_avail_proc;
7375 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7376 "__kmp_avail_proc(%d)\n",
7377 __kmp_dflt_team_nth));
7381 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7382 __kmp_dflt_team_nth = KMP_MIN_NTH;
7384 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7385 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7388 if (__kmp_nesting_mode > 0)
7389 __kmp_set_nesting_mode_threads();
7393 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7395 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7400 for (i = 0; i < __kmp_threads_capacity; i++) {
7401 kmp_info_t *thread = __kmp_threads[i];
7404 if (thread->th.th_current_task->td_icvs.nproc != 0)
7407 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7412 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7413 __kmp_dflt_team_nth));
7415#ifdef KMP_ADJUST_BLOCKTIME
7417 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7418 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7419 if (__kmp_nth > __kmp_avail_proc) {
7420 __kmp_zero_bt = TRUE;
7426 TCW_SYNC_4(__kmp_init_middle, TRUE);
7428 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7431void __kmp_middle_initialize(
void) {
7432 if (__kmp_init_middle) {
7435 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7436 if (__kmp_init_middle) {
7437 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7440 __kmp_do_middle_initialize();
7441 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7444void __kmp_parallel_initialize(
void) {
7445 int gtid = __kmp_entry_gtid();
7448 if (TCR_4(__kmp_init_parallel))
7450 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7451 if (TCR_4(__kmp_init_parallel)) {
7452 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7457 if (TCR_4(__kmp_global.g.g_done)) {
7460 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7461 __kmp_infinite_loop();
7467 if (!__kmp_init_middle) {
7468 __kmp_do_middle_initialize();
7470 __kmp_assign_root_init_mask();
7471 __kmp_resume_if_hard_paused();
7474 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7475 KMP_ASSERT(KMP_UBER_GTID(gtid));
7477#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7480 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7481 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7482 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7486#if KMP_HANDLE_SIGNALS
7488 __kmp_install_signals(TRUE);
7492 __kmp_suspend_initialize();
7494#if defined(USE_LOAD_BALANCE)
7495 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7496 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7499 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7500 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7504 if (__kmp_version) {
7505 __kmp_print_version_2();
7509 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7512 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7514 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7517void __kmp_hidden_helper_initialize() {
7518 if (TCR_4(__kmp_init_hidden_helper))
7522 if (!TCR_4(__kmp_init_parallel))
7523 __kmp_parallel_initialize();
7527 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7528 if (TCR_4(__kmp_init_hidden_helper)) {
7529 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7533#if KMP_AFFINITY_SUPPORTED
7537 if (!__kmp_hh_affinity.flags.initialized)
7538 __kmp_affinity_initialize(__kmp_hh_affinity);
7542 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7546 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7549 __kmp_do_initialize_hidden_helper_threads();
7552 __kmp_hidden_helper_threads_initz_wait();
7555 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7557 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7562void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7564 kmp_disp_t *dispatch;
7569 this_thr->th.th_local.this_construct = 0;
7571 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7573 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7574 KMP_DEBUG_ASSERT(dispatch);
7575 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7579 dispatch->th_disp_index = 0;
7580 dispatch->th_doacross_buf_idx = 0;
7581 if (__kmp_env_consistency_check)
7582 __kmp_push_parallel(gtid, team->t.t_ident);
7587void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7589 if (__kmp_env_consistency_check)
7590 __kmp_pop_parallel(gtid, team->t.t_ident);
7592 __kmp_finish_implicit_task(this_thr);
7595int __kmp_invoke_task_func(
int gtid) {
7597 int tid = __kmp_tid_from_gtid(gtid);
7598 kmp_info_t *this_thr = __kmp_threads[gtid];
7599 kmp_team_t *team = this_thr->th.th_team;
7601 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7603 if (__itt_stack_caller_create_ptr) {
7605 if (team->t.t_stack_id != NULL) {
7606 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7608 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7609 __kmp_itt_stack_callee_enter(
7610 (__itt_caller)team->t.t_parent->t.t_stack_id);
7614#if INCLUDE_SSC_MARKS
7615 SSC_MARK_INVOKING();
7620 void **exit_frame_p;
7621 ompt_data_t *my_task_data;
7622 ompt_data_t *my_parallel_data;
7625 if (ompt_enabled.enabled) {
7626 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7627 .ompt_task_info.frame.exit_frame.ptr);
7629 exit_frame_p = &dummy;
7633 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7634 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7635 if (ompt_enabled.ompt_callback_implicit_task) {
7636 ompt_team_size = team->t.t_nproc;
7637 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7638 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7639 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7640 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7644#if KMP_STATS_ENABLED
7646 if (previous_state == stats_state_e::TEAMS_REGION) {
7647 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7649 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7651 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7654 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7655 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7662 *exit_frame_p = NULL;
7663 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7666#if KMP_STATS_ENABLED
7667 if (previous_state == stats_state_e::TEAMS_REGION) {
7668 KMP_SET_THREAD_STATE(previous_state);
7670 KMP_POP_PARTITIONED_TIMER();
7674 if (__itt_stack_caller_create_ptr) {
7676 if (team->t.t_stack_id != NULL) {
7677 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7679 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7680 __kmp_itt_stack_callee_leave(
7681 (__itt_caller)team->t.t_parent->t.t_stack_id);
7685 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7690void __kmp_teams_master(
int gtid) {
7692 kmp_info_t *thr = __kmp_threads[gtid];
7693 kmp_team_t *team = thr->th.th_team;
7694 ident_t *loc = team->t.t_ident;
7695 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7696 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7697 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7698 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7699 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7702 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7705 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7706 tmp->cg_nthreads = 1;
7707 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7708 " cg_nthreads to 1\n",
7710 tmp->up = thr->th.th_cg_roots;
7711 thr->th.th_cg_roots = tmp;
7715#if INCLUDE_SSC_MARKS
7718 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7719 (microtask_t)thr->th.th_teams_microtask,
7720 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7721#if INCLUDE_SSC_MARKS
7725 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7726 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7729 __kmp_join_call(loc, gtid
7738int __kmp_invoke_teams_master(
int gtid) {
7739 kmp_info_t *this_thr = __kmp_threads[gtid];
7740 kmp_team_t *team = this_thr->th.th_team;
7742 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7743 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7744 (
void *)__kmp_teams_master);
7746 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7748 int tid = __kmp_tid_from_gtid(gtid);
7749 ompt_data_t *task_data =
7750 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7751 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7752 if (ompt_enabled.ompt_callback_implicit_task) {
7753 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7754 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7756 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7759 __kmp_teams_master(gtid);
7761 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7763 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7772void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7773 kmp_info_t *thr = __kmp_threads[gtid];
7775 if (num_threads > 0)
7776 thr->th.th_set_nproc = num_threads;
7779static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7781 KMP_DEBUG_ASSERT(thr);
7783 if (!TCR_4(__kmp_init_middle))
7784 __kmp_middle_initialize();
7785 __kmp_assign_root_init_mask();
7786 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7787 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7789 if (num_threads == 0) {
7790 if (__kmp_teams_thread_limit > 0) {
7791 num_threads = __kmp_teams_thread_limit;
7793 num_threads = __kmp_avail_proc / num_teams;
7798 if (num_threads > __kmp_dflt_team_nth) {
7799 num_threads = __kmp_dflt_team_nth;
7801 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7802 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7804 if (num_teams * num_threads > __kmp_teams_max_nth) {
7805 num_threads = __kmp_teams_max_nth / num_teams;
7807 if (num_threads == 0) {
7811 if (num_threads < 0) {
7812 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7818 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7820 if (num_threads > __kmp_dflt_team_nth) {
7821 num_threads = __kmp_dflt_team_nth;
7823 if (num_teams * num_threads > __kmp_teams_max_nth) {
7824 int new_threads = __kmp_teams_max_nth / num_teams;
7825 if (new_threads == 0) {
7828 if (new_threads != num_threads) {
7829 if (!__kmp_reserve_warn) {
7830 __kmp_reserve_warn = 1;
7831 __kmp_msg(kmp_ms_warning,
7832 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7836 num_threads = new_threads;
7839 thr->th.th_teams_size.nth = num_threads;
7844void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7846 kmp_info_t *thr = __kmp_threads[gtid];
7847 if (num_teams < 0) {
7850 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7854 if (num_teams == 0) {
7855 if (__kmp_nteams > 0) {
7856 num_teams = __kmp_nteams;
7861 if (num_teams > __kmp_teams_max_nth) {
7862 if (!__kmp_reserve_warn) {
7863 __kmp_reserve_warn = 1;
7864 __kmp_msg(kmp_ms_warning,
7865 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7866 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7868 num_teams = __kmp_teams_max_nth;
7872 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7874 __kmp_push_thread_limit(thr, num_teams, num_threads);
7879void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7880 int num_teams_ub,
int num_threads) {
7881 kmp_info_t *thr = __kmp_threads[gtid];
7882 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7883 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7884 KMP_DEBUG_ASSERT(num_threads >= 0);
7886 if (num_teams_lb > num_teams_ub) {
7887 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7888 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7893 if (num_teams_lb == 0 && num_teams_ub > 0)
7894 num_teams_lb = num_teams_ub;
7896 if (num_teams_lb == 0 && num_teams_ub == 0) {
7897 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7898 if (num_teams > __kmp_teams_max_nth) {
7899 if (!__kmp_reserve_warn) {
7900 __kmp_reserve_warn = 1;
7901 __kmp_msg(kmp_ms_warning,
7902 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7903 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7905 num_teams = __kmp_teams_max_nth;
7907 }
else if (num_teams_lb == num_teams_ub) {
7908 num_teams = num_teams_ub;
7910 if (num_threads <= 0) {
7911 if (num_teams_ub > __kmp_teams_max_nth) {
7912 num_teams = num_teams_lb;
7914 num_teams = num_teams_ub;
7917 num_teams = (num_threads > __kmp_teams_max_nth)
7919 : __kmp_teams_max_nth / num_threads;
7920 if (num_teams < num_teams_lb) {
7921 num_teams = num_teams_lb;
7922 }
else if (num_teams > num_teams_ub) {
7923 num_teams = num_teams_ub;
7929 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7931 __kmp_push_thread_limit(thr, num_teams, num_threads);
7935void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7936 kmp_info_t *thr = __kmp_threads[gtid];
7937 thr->th.th_set_proc_bind = proc_bind;
7942void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7943 kmp_info_t *this_thr = __kmp_threads[gtid];
7949 KMP_DEBUG_ASSERT(team);
7950 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7951 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7954 team->t.t_construct = 0;
7955 team->t.t_ordered.dt.t_value =
7959 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7960 if (team->t.t_max_nproc > 1) {
7962 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7963 team->t.t_disp_buffer[i].buffer_index = i;
7964 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7967 team->t.t_disp_buffer[0].buffer_index = 0;
7968 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7972 KMP_ASSERT(this_thr->th.th_team == team);
7975 for (f = 0; f < team->t.t_nproc; f++) {
7976 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7977 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7982 __kmp_fork_barrier(gtid, 0);
7985void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7986 kmp_info_t *this_thr = __kmp_threads[gtid];
7988 KMP_DEBUG_ASSERT(team);
7989 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7990 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7996 if (__kmp_threads[gtid] &&
7997 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7998 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7999 __kmp_threads[gtid]);
8000 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8001 "team->t.t_nproc=%d\n",
8002 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8004 __kmp_print_structure();
8006 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8007 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8010 __kmp_join_barrier(gtid);
8012 if (ompt_enabled.enabled &&
8013 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8014 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8015 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8016 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8018 void *codeptr = NULL;
8019 if (KMP_MASTER_TID(ds_tid) &&
8020 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8021 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8022 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8024 if (ompt_enabled.ompt_callback_sync_region_wait) {
8025 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8026 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8029 if (ompt_enabled.ompt_callback_sync_region) {
8030 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8031 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8035 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8036 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8037 ompt_scope_end, NULL, task_data, 0, ds_tid,
8038 ompt_task_implicit);
8044 KMP_ASSERT(this_thr->th.th_team == team);
8049#ifdef USE_LOAD_BALANCE
8053static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8056 kmp_team_t *hot_team;
8058 if (root->r.r_active) {
8061 hot_team = root->r.r_hot_team;
8062 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8063 return hot_team->t.t_nproc - 1;
8068 for (i = 1; i < hot_team->t.t_nproc; i++) {
8069 if (hot_team->t.t_threads[i]->th.th_active) {
8078static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8081 int hot_team_active;
8082 int team_curr_active;
8085 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8087 KMP_DEBUG_ASSERT(root);
8088 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8089 ->th.th_current_task->td_icvs.dynamic == TRUE);
8090 KMP_DEBUG_ASSERT(set_nproc > 1);
8092 if (set_nproc == 1) {
8093 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8102 pool_active = __kmp_thread_pool_active_nth;
8103 hot_team_active = __kmp_active_hot_team_nproc(root);
8104 team_curr_active = pool_active + hot_team_active + 1;
8107 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8108 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8109 "hot team active = %d\n",
8110 system_active, pool_active, hot_team_active));
8112 if (system_active < 0) {
8116 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8117 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8120 retval = __kmp_avail_proc - __kmp_nth +
8121 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8122 if (retval > set_nproc) {
8125 if (retval < KMP_MIN_NTH) {
8126 retval = KMP_MIN_NTH;
8129 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8137 if (system_active < team_curr_active) {
8138 system_active = team_curr_active;
8140 retval = __kmp_avail_proc - system_active + team_curr_active;
8141 if (retval > set_nproc) {
8144 if (retval < KMP_MIN_NTH) {
8145 retval = KMP_MIN_NTH;
8148 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8157void __kmp_cleanup(
void) {
8160 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8162 if (TCR_4(__kmp_init_parallel)) {
8163#if KMP_HANDLE_SIGNALS
8164 __kmp_remove_signals();
8166 TCW_4(__kmp_init_parallel, FALSE);
8169 if (TCR_4(__kmp_init_middle)) {
8170#if KMP_AFFINITY_SUPPORTED
8171 __kmp_affinity_uninitialize();
8173 __kmp_cleanup_hierarchy();
8174 TCW_4(__kmp_init_middle, FALSE);
8177 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8179 if (__kmp_init_serial) {
8180 __kmp_runtime_destroy();
8181 __kmp_init_serial = FALSE;
8184 __kmp_cleanup_threadprivate_caches();
8186 for (f = 0; f < __kmp_threads_capacity; f++) {
8187 if (__kmp_root[f] != NULL) {
8188 __kmp_free(__kmp_root[f]);
8189 __kmp_root[f] = NULL;
8192 __kmp_free(__kmp_threads);
8195 __kmp_threads = NULL;
8197 __kmp_threads_capacity = 0;
8200 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8202 kmp_old_threads_list_t *next = ptr->next;
8203 __kmp_free(ptr->threads);
8208#if KMP_USE_DYNAMIC_LOCK
8209 __kmp_cleanup_indirect_user_locks();
8211 __kmp_cleanup_user_locks();
8215 __kmp_free(ompd_env_block);
8216 ompd_env_block = NULL;
8217 ompd_env_block_size = 0;
8221#if KMP_AFFINITY_SUPPORTED
8222 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8223 __kmp_cpuinfo_file = NULL;
8226#if KMP_USE_ADAPTIVE_LOCKS
8227#if KMP_DEBUG_ADAPTIVE_LOCKS
8228 __kmp_print_speculative_stats();
8231 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8232 __kmp_nested_nth.nth = NULL;
8233 __kmp_nested_nth.size = 0;
8234 __kmp_nested_nth.used = 0;
8235 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8236 __kmp_nested_proc_bind.bind_types = NULL;
8237 __kmp_nested_proc_bind.size = 0;
8238 __kmp_nested_proc_bind.used = 0;
8239 if (__kmp_affinity_format) {
8240 KMP_INTERNAL_FREE(__kmp_affinity_format);
8241 __kmp_affinity_format = NULL;
8244 __kmp_i18n_catclose();
8246#if KMP_USE_HIER_SCHED
8247 __kmp_hier_scheds.deallocate();
8250#if KMP_STATS_ENABLED
8254 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8259int __kmp_ignore_mppbeg(
void) {
8262 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8263 if (__kmp_str_match_false(env))
8270int __kmp_ignore_mppend(
void) {
8273 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8274 if (__kmp_str_match_false(env))
8281void __kmp_internal_begin(
void) {
8287 gtid = __kmp_entry_gtid();
8288 root = __kmp_threads[gtid]->th.th_root;
8289 KMP_ASSERT(KMP_UBER_GTID(gtid));
8291 if (root->r.r_begin)
8293 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8294 if (root->r.r_begin) {
8295 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8299 root->r.r_begin = TRUE;
8301 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8306void __kmp_user_set_library(
enum library_type arg) {
8313 gtid = __kmp_entry_gtid();
8314 thread = __kmp_threads[gtid];
8316 root = thread->th.th_root;
8318 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8320 if (root->r.r_in_parallel) {
8322 KMP_WARNING(SetLibraryIncorrectCall);
8327 case library_serial:
8328 thread->th.th_set_nproc = 0;
8329 set__nproc(thread, 1);
8331 case library_turnaround:
8332 thread->th.th_set_nproc = 0;
8333 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8334 : __kmp_dflt_team_nth_ub);
8336 case library_throughput:
8337 thread->th.th_set_nproc = 0;
8338 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8339 : __kmp_dflt_team_nth_ub);
8342 KMP_FATAL(UnknownLibraryType, arg);
8345 __kmp_aux_set_library(arg);
8348void __kmp_aux_set_stacksize(
size_t arg) {
8349 if (!__kmp_init_serial)
8350 __kmp_serial_initialize();
8353 if (arg & (0x1000 - 1)) {
8354 arg &= ~(0x1000 - 1);
8359 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8362 if (!TCR_4(__kmp_init_parallel)) {
8365 if (value < __kmp_sys_min_stksize)
8366 value = __kmp_sys_min_stksize;
8367 else if (value > KMP_MAX_STKSIZE)
8368 value = KMP_MAX_STKSIZE;
8370 __kmp_stksize = value;
8372 __kmp_env_stksize = TRUE;
8375 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8380void __kmp_aux_set_library(
enum library_type arg) {
8381 __kmp_library = arg;
8383 switch (__kmp_library) {
8384 case library_serial: {
8385 KMP_INFORM(LibraryIsSerial);
8387 case library_turnaround:
8388 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8389 __kmp_use_yield = 2;
8391 case library_throughput:
8392 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8393 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8396 KMP_FATAL(UnknownLibraryType, arg);
8402static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8403 kmp_info_t *thr = __kmp_entry_thread();
8404 teams_serialized = 0;
8405 if (thr->th.th_teams_microtask) {
8406 kmp_team_t *team = thr->th.th_team;
8407 int tlevel = thr->th.th_teams_level;
8408 int ii = team->t.t_level;
8409 teams_serialized = team->t.t_serialized;
8410 int level = tlevel + 1;
8411 KMP_DEBUG_ASSERT(ii >= tlevel);
8412 while (ii > level) {
8413 for (teams_serialized = team->t.t_serialized;
8414 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8416 if (team->t.t_serialized && (!teams_serialized)) {
8417 team = team->t.t_parent;
8421 team = team->t.t_parent;
8430int __kmp_aux_get_team_num() {
8432 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8434 if (serialized > 1) {
8437 return team->t.t_master_tid;
8443int __kmp_aux_get_num_teams() {
8445 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8447 if (serialized > 1) {
8450 return team->t.t_parent->t.t_nproc;
8489typedef struct kmp_affinity_format_field_t {
8491 const char *long_name;
8494} kmp_affinity_format_field_t;
8496static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8497#if KMP_AFFINITY_SUPPORTED
8498 {
'A',
"thread_affinity",
's'},
8500 {
't',
"team_num",
'd'},
8501 {
'T',
"num_teams",
'd'},
8502 {
'L',
"nesting_level",
'd'},
8503 {
'n',
"thread_num",
'd'},
8504 {
'N',
"num_threads",
'd'},
8505 {
'a',
"ancestor_tnum",
'd'},
8507 {
'P',
"process_id",
'd'},
8508 {
'i',
"native_thread_id",
'd'}};
8511static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8513 kmp_str_buf_t *field_buffer) {
8514 int rc, format_index, field_value;
8515 const char *width_left, *width_right;
8516 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8517 static const int FORMAT_SIZE = 20;
8518 char format[FORMAT_SIZE] = {0};
8519 char absolute_short_name = 0;
8521 KMP_DEBUG_ASSERT(gtid >= 0);
8522 KMP_DEBUG_ASSERT(th);
8523 KMP_DEBUG_ASSERT(**ptr ==
'%');
8524 KMP_DEBUG_ASSERT(field_buffer);
8526 __kmp_str_buf_clear(field_buffer);
8533 __kmp_str_buf_cat(field_buffer,
"%", 1);
8544 right_justify =
false;
8546 right_justify =
true;
8550 width_left = width_right = NULL;
8551 if (**ptr >=
'0' && **ptr <=
'9') {
8559 format[format_index++] =
'%';
8561 format[format_index++] =
'-';
8563 format[format_index++] =
'0';
8564 if (width_left && width_right) {
8568 while (i < 8 && width_left < width_right) {
8569 format[format_index++] = *width_left;
8577 found_valid_name =
false;
8578 parse_long_name = (**ptr ==
'{');
8579 if (parse_long_name)
8581 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8582 sizeof(__kmp_affinity_format_table[0]);
8584 char short_name = __kmp_affinity_format_table[i].short_name;
8585 const char *long_name = __kmp_affinity_format_table[i].long_name;
8586 char field_format = __kmp_affinity_format_table[i].field_format;
8587 if (parse_long_name) {
8588 size_t length = KMP_STRLEN(long_name);
8589 if (strncmp(*ptr, long_name, length) == 0) {
8590 found_valid_name =
true;
8593 }
else if (**ptr == short_name) {
8594 found_valid_name =
true;
8597 if (found_valid_name) {
8598 format[format_index++] = field_format;
8599 format[format_index++] =
'\0';
8600 absolute_short_name = short_name;
8604 if (parse_long_name) {
8606 absolute_short_name = 0;
8614 switch (absolute_short_name) {
8616 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8619 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8622 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8625 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8628 static const int BUFFER_SIZE = 256;
8629 char buf[BUFFER_SIZE];
8630 __kmp_expand_host_name(buf, BUFFER_SIZE);
8631 rc = __kmp_str_buf_print(field_buffer, format, buf);
8634 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8637 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8640 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8644 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8645 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8647#if KMP_AFFINITY_SUPPORTED
8650 __kmp_str_buf_init(&buf);
8651 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8652 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8653 __kmp_str_buf_free(&buf);
8659 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8661 if (parse_long_name) {
8670 KMP_ASSERT(format_index <= FORMAT_SIZE);
8680size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8681 kmp_str_buf_t *buffer) {
8682 const char *parse_ptr;
8684 const kmp_info_t *th;
8685 kmp_str_buf_t field;
8687 KMP_DEBUG_ASSERT(buffer);
8688 KMP_DEBUG_ASSERT(gtid >= 0);
8690 __kmp_str_buf_init(&field);
8691 __kmp_str_buf_clear(buffer);
8693 th = __kmp_threads[gtid];
8699 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8700 parse_ptr = __kmp_affinity_format;
8702 KMP_DEBUG_ASSERT(parse_ptr);
8704 while (*parse_ptr !=
'\0') {
8706 if (*parse_ptr ==
'%') {
8708 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8709 __kmp_str_buf_catbuf(buffer, &field);
8713 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8718 __kmp_str_buf_free(&field);
8723void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8725 __kmp_str_buf_init(&buf);
8726 __kmp_aux_capture_affinity(gtid, format, &buf);
8727 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8728 __kmp_str_buf_free(&buf);
8733void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8734 int blocktime = arg;
8740 __kmp_save_internal_controls(thread);
8743 if (blocktime < KMP_MIN_BLOCKTIME)
8744 blocktime = KMP_MIN_BLOCKTIME;
8745 else if (blocktime > KMP_MAX_BLOCKTIME)
8746 blocktime = KMP_MAX_BLOCKTIME;
8748 set__blocktime_team(thread->th.th_team, tid, blocktime);
8749 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8753 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8755 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8756 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8762 set__bt_set_team(thread->th.th_team, tid, bt_set);
8763 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8765 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8766 "bt_intervals=%d, monitor_updates=%d\n",
8767 __kmp_gtid_from_tid(tid, thread->th.th_team),
8768 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8769 __kmp_monitor_wakeups));
8771 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8772 __kmp_gtid_from_tid(tid, thread->th.th_team),
8773 thread->th.th_team->t.t_id, tid, blocktime));
8777void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8778 if (!__kmp_init_serial) {
8779 __kmp_serial_initialize();
8781 __kmp_env_initialize(str);
8783 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8791PACKED_REDUCTION_METHOD_T
8792__kmp_determine_reduction_method(
8793 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8794 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8795 kmp_critical_name *lck) {
8806 PACKED_REDUCTION_METHOD_T retval;
8810 KMP_DEBUG_ASSERT(lck);
8812#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8814 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8815#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8817 retval = critical_reduce_block;
8820 team_size = __kmp_get_team_num_threads(global_tid);
8821 if (team_size == 1) {
8823 retval = empty_reduce_block;
8827 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8829#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8830 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8832#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8833 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8835 int teamsize_cutoff = 4;
8837#if KMP_MIC_SUPPORTED
8838 if (__kmp_mic_type != non_mic) {
8839 teamsize_cutoff = 8;
8842 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8843 if (tree_available) {
8844 if (team_size <= teamsize_cutoff) {
8845 if (atomic_available) {
8846 retval = atomic_reduce_block;
8849 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8851 }
else if (atomic_available) {
8852 retval = atomic_reduce_block;
8855#error "Unknown or unsupported OS"
8859#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8861#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8865 if (atomic_available) {
8866 if (num_vars <= 2) {
8867 retval = atomic_reduce_block;
8873 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8874 if (atomic_available && (num_vars <= 3)) {
8875 retval = atomic_reduce_block;
8876 }
else if (tree_available) {
8877 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8878 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8879 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8884#error "Unknown or unsupported OS"
8888#error "Unknown or unsupported architecture"
8896 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8899 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8901 int atomic_available, tree_available;
8903 switch ((forced_retval = __kmp_force_reduction_method)) {
8904 case critical_reduce_block:
8908 case atomic_reduce_block:
8909 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8910 if (!atomic_available) {
8911 KMP_WARNING(RedMethodNotSupported,
"atomic");
8912 forced_retval = critical_reduce_block;
8916 case tree_reduce_block:
8917 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8918 if (!tree_available) {
8919 KMP_WARNING(RedMethodNotSupported,
"tree");
8920 forced_retval = critical_reduce_block;
8922#if KMP_FAST_REDUCTION_BARRIER
8923 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8932 retval = forced_retval;
8935 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8937#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8938#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8943kmp_int32 __kmp_get_reduce_method(
void) {
8944 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8949void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8953void __kmp_hard_pause() {
8954 __kmp_pause_status = kmp_hard_paused;
8955 __kmp_internal_end_thread(-1);
8959void __kmp_resume_if_soft_paused() {
8960 if (__kmp_pause_status == kmp_soft_paused) {
8961 __kmp_pause_status = kmp_not_paused;
8963 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8964 kmp_info_t *thread = __kmp_threads[gtid];
8966 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8968 if (fl.is_sleeping())
8970 else if (__kmp_try_suspend_mx(thread)) {
8971 __kmp_unlock_suspend_mx(thread);
8974 if (fl.is_sleeping()) {
8977 }
else if (__kmp_try_suspend_mx(thread)) {
8978 __kmp_unlock_suspend_mx(thread);
8990int __kmp_pause_resource(kmp_pause_status_t level) {
8991 if (level == kmp_not_paused) {
8992 if (__kmp_pause_status == kmp_not_paused) {
8996 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8997 __kmp_pause_status == kmp_hard_paused);
8998 __kmp_pause_status = kmp_not_paused;
9001 }
else if (level == kmp_soft_paused) {
9002 if (__kmp_pause_status != kmp_not_paused) {
9009 }
else if (level == kmp_hard_paused) {
9010 if (__kmp_pause_status != kmp_not_paused) {
9023void __kmp_omp_display_env(
int verbose) {
9024 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9025 if (__kmp_init_serial == 0)
9026 __kmp_do_serial_initialize();
9027 __kmp_display_env_impl(!verbose, verbose);
9028 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9032void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9034 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9036 kmp_info_t **other_threads = team->t.t_threads;
9040 for (
int f = 1; f < old_nthreads; ++f) {
9041 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9043 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9049 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9050 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9054 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9056 team->t.t_threads[f]->th.th_used_in_team.store(2);
9057 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9060 team->t.b->go_release();
9066 int count = old_nthreads - 1;
9068 count = old_nthreads - 1;
9069 for (
int f = 1; f < old_nthreads; ++f) {
9070 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9071 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9072 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9073 void *, other_threads[f]->th.th_sleep_loc);
9074 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9077 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9083 team->t.b->update_num_threads(new_nthreads);
9084 team->t.b->go_reset();
9087void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9089 KMP_DEBUG_ASSERT(team);
9095 for (
int f = 1; f < new_nthreads; ++f) {
9096 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9097 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9099 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9100 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9101 (kmp_flag_32<false, false> *)NULL);
9107 int count = new_nthreads - 1;
9109 count = new_nthreads - 1;
9110 for (
int f = 1; f < new_nthreads; ++f) {
9111 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9119kmp_info_t **__kmp_hidden_helper_threads;
9120kmp_info_t *__kmp_hidden_helper_main_thread;
9121std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9123kmp_int32 __kmp_hidden_helper_threads_num = 8;
9124kmp_int32 __kmp_enable_hidden_helper = TRUE;
9126kmp_int32 __kmp_hidden_helper_threads_num = 0;
9127kmp_int32 __kmp_enable_hidden_helper = FALSE;
9131std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9133void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9138 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9139 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9140 __kmp_hidden_helper_threads_num)
9146 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9147 __kmp_hidden_helper_initz_release();
9148 __kmp_hidden_helper_main_thread_wait();
9150 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9151 __kmp_hidden_helper_worker_thread_signal();
9157void __kmp_hidden_helper_threads_initz_routine() {
9159 const int gtid = __kmp_register_root(TRUE);
9160 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9161 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9162 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9163 __kmp_hidden_helper_threads_num;
9165 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9170 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9172 __kmp_hidden_helper_threads_deinitz_release();
9192void __kmp_init_nesting_mode() {
9193 int levels = KMP_HW_LAST;
9194 __kmp_nesting_mode_nlevels = levels;
9195 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9196 for (
int i = 0; i < levels; ++i)
9197 __kmp_nesting_nth_level[i] = 0;
9198 if (__kmp_nested_nth.size < levels) {
9199 __kmp_nested_nth.nth =
9200 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9201 __kmp_nested_nth.size = levels;
9206void __kmp_set_nesting_mode_threads() {
9207 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9209 if (__kmp_nesting_mode == 1)
9210 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9211 else if (__kmp_nesting_mode > 1)
9212 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9214 if (__kmp_topology) {
9216 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9217 loc < __kmp_nesting_mode_nlevels;
9218 loc++, hw_level++) {
9219 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9220 if (__kmp_nesting_nth_level[loc] == 1)
9224 if (__kmp_nesting_mode > 1 && loc > 1) {
9225 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9226 int num_cores = __kmp_topology->get_count(core_level);
9227 int upper_levels = 1;
9228 for (
int level = 0; level < loc - 1; ++level)
9229 upper_levels *= __kmp_nesting_nth_level[level];
9230 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9231 __kmp_nesting_nth_level[loc - 1] =
9232 num_cores / __kmp_nesting_nth_level[loc - 2];
9234 __kmp_nesting_mode_nlevels = loc;
9235 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9237 if (__kmp_avail_proc >= 4) {
9238 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9239 __kmp_nesting_nth_level[1] = 2;
9240 __kmp_nesting_mode_nlevels = 2;
9242 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9243 __kmp_nesting_mode_nlevels = 1;
9245 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9247 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9248 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9250 set__nproc(thread, __kmp_nesting_nth_level[0]);
9251 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9252 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9253 if (get__max_active_levels(thread) > 1) {
9255 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9257 if (__kmp_nesting_mode == 1)
9258 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9263#if !KMP_STATS_ENABLED
9264void __kmp_reset_stats() {}
9267int __kmp_omp_debug_struct_info = FALSE;
9268int __kmp_debugging = FALSE;
9270#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9271void __kmp_itt_fini_ittlib() {}
9272void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)