30#include "kmp_dispatch.h"
32#include "kmp_dispatch_hier.h"
36#include "ompt-specific.h"
42void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
45 KMP_DEBUG_ASSERT(gtid_ref);
47 if (__kmp_env_consistency_check) {
48 th = __kmp_threads[*gtid_ref];
49 if (th->th.th_root->r.r_active &&
50 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
51#if KMP_USE_DYNAMIC_LOCK
52 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
54 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
60void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
63 if (__kmp_env_consistency_check) {
64 th = __kmp_threads[*gtid_ref];
65 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
66 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
73 bool use_hier =
false) {
76 int monotonicity = SCHEDULE_NONMONOTONIC;
80 if (loc != NULL && loc->get_openmp_version() < 50)
81 monotonicity = SCHEDULE_MONOTONIC;
83 if (use_hier || __kmp_force_monotonic)
84 monotonicity = SCHEDULE_MONOTONIC;
85 else if (SCHEDULE_HAS_NONMONOTONIC(schedule))
86 monotonicity = SCHEDULE_NONMONOTONIC;
87 else if (SCHEDULE_HAS_MONOTONIC(schedule))
88 monotonicity = SCHEDULE_MONOTONIC;
93#if KMP_STATIC_STEAL_ENABLED
120void __kmp_dispatch_init_algorithm(
ident_t *loc,
int gtid,
121 dispatch_private_info_template<T> *pr,
123 typename traits_t<T>::signed_t st,
125 kmp_uint64 *cur_chunk,
127 typename traits_t<T>::signed_t chunk,
129 typedef typename traits_t<T>::unsigned_t UT;
130 typedef typename traits_t<T>::floating_t DBL;
140 typedef typename traits_t<T>::signed_t ST;
144 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d called "
145 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
146 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
147 traits_t<T>::spec, traits_t<T>::spec,
148 traits_t<ST>::spec, traits_t<ST>::spec,
149 traits_t<T>::spec, traits_t<T>::spec);
150 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
151 __kmp_str_free(&buff);
155 th = __kmp_threads[gtid];
156 team = th->th.th_team;
157 active = !team->t.t_serialized;
160 int itt_need_metadata_reporting =
161 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
162 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
163 team->t.t_active_level == 1;
166#if KMP_USE_HIER_SCHED
167 use_hier = pr->flags.use_hier;
173 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
174 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
178 pr->flags.nomerge = TRUE;
182 pr->flags.nomerge = FALSE;
184 pr->type_size = traits_t<T>::type_size;
186 pr->flags.ordered = TRUE;
190 pr->flags.ordered = FALSE;
193 if (pr->flags.ordered) {
194 monotonicity = SCHEDULE_MONOTONIC;
198 schedule = __kmp_static;
200 if (schedule == kmp_sch_runtime) {
203 schedule = team->t.t_sched.r_sched_type;
204 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
205 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
206 if (pr->flags.ordered)
207 monotonicity = SCHEDULE_MONOTONIC;
211 schedule = __kmp_guided;
213 schedule = __kmp_static;
217 chunk = team->t.t_sched.chunk;
226 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d new: "
227 "schedule:%%d chunk:%%%s\n",
229 KD_TRACE(10, (buff, gtid, schedule, chunk));
230 __kmp_str_free(&buff);
235 schedule = __kmp_guided;
238 chunk = KMP_DEFAULT_CHUNK;
244 schedule = __kmp_auto;
249 buff = __kmp_str_format(
250 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
251 "schedule:%%d chunk:%%%s\n",
253 KD_TRACE(10, (buff, gtid, schedule, chunk));
254 __kmp_str_free(&buff);
258#if KMP_STATIC_STEAL_ENABLED
260 if (schedule == kmp_sch_dynamic_chunked) {
261 if (monotonicity == SCHEDULE_NONMONOTONIC)
262 schedule = kmp_sch_static_steal;
266 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
267 schedule = kmp_sch_guided_iterative_chunked;
268 KMP_WARNING(DispatchManyThreads);
272 schedule = team->t.t_sched.r_sched_type;
273 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
274 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
278 schedule == __kmp_static) {
279 schedule = kmp_sch_static_balanced_chunked;
284 chunk = team->t.t_sched.chunk * chunk;
294 buff = __kmp_str_format(
295 "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
298 KD_TRACE(10, (buff, gtid, schedule, chunk));
299 __kmp_str_free(&buff);
303 pr->u.p.parm1 = chunk;
306 "unknown scheduling type");
310 if (__kmp_env_consistency_check) {
312 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
313 (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
327 tc = (UT)(lb - ub) / (-st) + 1;
335 tc = (UT)(ub - lb) / st + 1;
342 if (KMP_MASTER_GTID(gtid)) {
353 pr->u.p.last_upper = ub + st;
359 if (pr->flags.ordered) {
360 pr->ordered_bumped = 0;
361 pr->u.p.ordered_lower = 1;
362 pr->u.p.ordered_upper = 0;
367#if KMP_STATIC_STEAL_ENABLED
368 case kmp_sch_static_steal: {
372 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
375 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
376 if (nproc > 1 && ntc >= nproc) {
379 T small_chunk, extras;
380 kmp_uint32 old = UNUSED;
381 int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED);
382 if (traits_t<T>::type_size > 4) {
388 pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
389 __kmp_init_lock(pr->u.p.steal_lock);
391 small_chunk = ntc / nproc;
392 extras = ntc % nproc;
394 init =
id * small_chunk + (
id < extras ? id : extras);
395 pr->u.p.count = init;
397 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
400 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
403 KMP_DEBUG_ASSERT(pr->steal_flag == THIEF);
409 pr->u.p.parm3 = nproc;
410 pr->u.p.parm4 = (
id + 1) % nproc;
414 schedule = kmp_sch_dynamic_chunked;
415 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d switching to "
416 "kmp_sch_dynamic_chunked\n",
423 case kmp_sch_static_balanced: {
428 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
438 pr->u.p.parm1 = (
id == tc - 1);
441 pr->u.p.parm1 = FALSE;
445 T small_chunk = tc / nproc;
446 T extras = tc % nproc;
447 init =
id * small_chunk + (
id < extras ? id : extras);
448 limit = init + small_chunk - (
id < extras ? 0 : 1);
449 pr->u.p.parm1 = (
id == nproc - 1);
455 pr->u.p.parm1 = TRUE;
459 pr->u.p.parm1 = FALSE;
465 if (itt_need_metadata_reporting)
467 *cur_chunk = limit - init + 1;
470 pr->u.p.lb = lb + init;
471 pr->u.p.ub = lb + limit;
474 T ub_tmp = lb + limit * st;
475 pr->u.p.lb = lb + init * st;
479 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
481 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
484 if (pr->flags.ordered) {
485 pr->u.p.ordered_lower = init;
486 pr->u.p.ordered_upper = limit;
490 case kmp_sch_static_balanced_chunked: {
493 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
494 " -> falling-through to static_greedy\n",
496 schedule = kmp_sch_static_greedy;
498 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
504 case kmp_sch_guided_iterative_chunked: {
507 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
512 if ((2L * chunk + 1) * nproc >= tc) {
514 schedule = kmp_sch_dynamic_chunked;
518 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
519 *(
double *)&pr->u.p.parm3 =
520 guided_flt_param / (
double)nproc;
523 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
524 "kmp_sch_static_greedy\n",
526 schedule = kmp_sch_static_greedy;
530 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
536 case kmp_sch_guided_analytical_chunked: {
537 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
538 "kmp_sch_guided_analytical_chunked case\n",
542 if ((2L * chunk + 1) * nproc >= tc) {
544 schedule = kmp_sch_dynamic_chunked;
550#if KMP_USE_X87CONTROL
560 unsigned int oldFpcw = _control87(0, 0);
561 _control87(_PC_64, _MCW_PC);
565 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
572 x = 1.0 - 0.5 / (double)nproc;
583 ptrdiff_t natural_alignment =
584 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
588 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
593 *(DBL *)&pr->u.p.parm3 = x;
606 p = __kmp_pow<UT>(x, right);
611 }
while (p > target && right < (1 << 27));
619 while (left + 1 < right) {
620 mid = (left + right) / 2;
621 if (__kmp_pow<UT>(x, mid) > target) {
630 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
631 __kmp_pow<UT>(x, cross) <= target);
634 pr->u.p.parm2 = cross;
637#if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8))
638#define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3)
640#define GUIDED_ANALYTICAL_WORKAROUND (x)
644 __kmp_dispatch_guided_remaining(
645 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
647#if KMP_USE_X87CONTROL
649 _control87(oldFpcw, _MCW_PC);
653 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
654 "kmp_sch_static_greedy\n",
656 schedule = kmp_sch_static_greedy;
662 case kmp_sch_static_greedy:
665 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
667 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
669 case kmp_sch_static_chunked:
670 case kmp_sch_dynamic_chunked:
674 if (pr->u.p.parm1 <= 0)
675 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
676 else if (pr->u.p.parm1 > tc)
680 pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
681 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
682 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
685 case kmp_sch_trapezoidal: {
688 T parm1, parm2, parm3, parm4;
690 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
696 parm2 = (tc / (2 * nproc));
706 }
else if (parm1 > parm2) {
711 parm3 = (parm2 + parm1);
712 parm3 = (2 * tc + parm3 - 1) / parm3;
720 parm4 = (parm2 - parm1) / parm4;
727 pr->u.p.parm1 = parm1;
728 pr->u.p.parm2 = parm2;
729 pr->u.p.parm3 = parm3;
730 pr->u.p.parm4 = parm4;
735 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
736 KMP_HNT(GetNewerLibrary),
741 pr->schedule = schedule;
744#if KMP_USE_HIER_SCHED
746inline void __kmp_dispatch_init_hier_runtime(
ident_t *loc, T lb, T ub,
747 typename traits_t<T>::signed_t st);
750__kmp_dispatch_init_hier_runtime<kmp_int32>(
ident_t *loc, kmp_int32 lb,
751 kmp_int32 ub, kmp_int32 st) {
752 __kmp_dispatch_init_hierarchy<kmp_int32>(
753 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
754 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
758__kmp_dispatch_init_hier_runtime<kmp_uint32>(
ident_t *loc, kmp_uint32 lb,
759 kmp_uint32 ub, kmp_int32 st) {
760 __kmp_dispatch_init_hierarchy<kmp_uint32>(
761 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
762 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
766__kmp_dispatch_init_hier_runtime<kmp_int64>(
ident_t *loc, kmp_int64 lb,
767 kmp_int64 ub, kmp_int64 st) {
768 __kmp_dispatch_init_hierarchy<kmp_int64>(
769 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
770 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
774__kmp_dispatch_init_hier_runtime<kmp_uint64>(
ident_t *loc, kmp_uint64 lb,
775 kmp_uint64 ub, kmp_int64 st) {
776 __kmp_dispatch_init_hierarchy<kmp_uint64>(
777 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
778 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
782void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
783 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
784 for (
int i = 0; i < num_disp_buff; ++i) {
787 reinterpret_cast<dispatch_shared_info_template<kmp_int32>
volatile *
>(
788 &team->t.t_disp_buffer[i]);
790 sh->hier->deallocate();
791 __kmp_free(sh->hier);
802 T ub,
typename traits_t<T>::signed_t st,
803 typename traits_t<T>::signed_t chunk,
int push_ws) {
804 typedef typename traits_t<T>::unsigned_t UT;
809 kmp_uint32 my_buffer_index;
810 dispatch_private_info_template<T> *pr;
811 dispatch_shared_info_template<T>
volatile *sh;
813 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
814 sizeof(dispatch_private_info));
815 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
816 sizeof(dispatch_shared_info));
817 __kmp_assert_valid_gtid(gtid);
819 if (!TCR_4(__kmp_init_parallel))
820 __kmp_parallel_initialize();
822 __kmp_resume_if_soft_paused();
825 SSC_MARK_DISPATCH_INIT();
828 typedef typename traits_t<T>::signed_t ST;
832 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d "
833 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
834 traits_t<ST>::spec, traits_t<T>::spec,
835 traits_t<T>::spec, traits_t<ST>::spec);
836 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
837 __kmp_str_free(&buff);
841 th = __kmp_threads[gtid];
842 team = th->th.th_team;
843 active = !team->t.t_serialized;
844 th->th.th_ident = loc;
849 if (schedule == __kmp_static) {
855#if KMP_USE_HIER_SCHED
861 my_buffer_index = th->th.th_dispatch->th_disp_index;
862 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
864 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
865 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched);
870 if (pr->flags.use_hier) {
872 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d ordered loop detected. "
873 "Disabling hierarchical scheduling.\n",
875 pr->flags.use_hier = FALSE;
878 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
881 if (!ordered && !pr->flags.use_hier)
882 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
887 kmp_uint64 cur_chunk = chunk;
888 int itt_need_metadata_reporting =
889 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
890 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
891 team->t.t_active_level == 1;
894 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
895 th->th.th_dispatch->th_disp_buffer);
897 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
898 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
900 my_buffer_index = th->th.th_dispatch->th_disp_index++;
903 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
905 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
906 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
907 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
908 KD_TRACE(10, (
"__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
910 if (sh->buffer_index != my_buffer_index) {
911 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
912 " sh->buffer_index:%d\n",
913 gtid, my_buffer_index, sh->buffer_index));
914 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
915 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
918 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
919 "sh->buffer_index:%d\n",
920 gtid, my_buffer_index, sh->buffer_index));
924 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
928 chunk, (T)th->th.th_team_nproc,
929 (T)th->th.th_info.ds.ds_tid);
931 if (pr->flags.ordered == 0) {
932 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
933 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
935 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
936 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
938 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
939 th->th.th_dispatch->th_dispatch_sh_current =
940 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
942 if (pr->flags.ordered) {
943 __kmp_itt_ordered_init(gtid);
946 if (itt_need_metadata_reporting) {
948 kmp_uint64 schedtype = 0;
950 case kmp_sch_static_chunked:
951 case kmp_sch_static_balanced:
953 case kmp_sch_static_greedy:
954 cur_chunk = pr->u.p.parm1;
956 case kmp_sch_dynamic_chunked:
959 case kmp_sch_guided_iterative_chunked:
960 case kmp_sch_guided_analytical_chunked:
970 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
972#if KMP_USE_HIER_SCHED
973 if (pr->flags.use_hier) {
975 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
985 buff = __kmp_str_format(
986 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
988 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
989 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
990 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
991 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
992 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
993 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
994 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,
995 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,
996 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
997 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4));
998 __kmp_str_free(&buff);
1001#if OMPT_SUPPORT && OMPT_OPTIONAL
1002 if (ompt_enabled.ompt_callback_work) {
1003 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1004 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1005 ompt_callbacks.ompt_callback(ompt_callback_work)(
1006 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1007 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1010 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
1018template <
typename UT>
1019static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1020 typedef typename traits_t<UT>::signed_t ST;
1021 __kmp_assert_valid_gtid(gtid);
1022 kmp_info_t *th = __kmp_threads[gtid];
1024 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1025 if (!th->th.th_team->t.t_serialized) {
1027 dispatch_private_info_template<UT> *pr =
1028 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1029 th->th.th_dispatch->th_dispatch_pr_current);
1030 dispatch_shared_info_template<UT>
volatile *sh =
1031 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1032 th->th.th_dispatch->th_dispatch_sh_current);
1033 KMP_DEBUG_ASSERT(pr);
1034 KMP_DEBUG_ASSERT(sh);
1035 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1036 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1038 if (pr->ordered_bumped) {
1041 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1043 pr->ordered_bumped = 0;
1045 UT lower = pr->u.p.ordered_lower;
1051 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: "
1052 "ordered_iteration:%%%s lower:%%%s\n",
1053 traits_t<UT>::spec, traits_t<UT>::spec);
1054 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1055 __kmp_str_free(&buff);
1059 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1060 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1066 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: "
1067 "ordered_iteration:%%%s lower:%%%s\n",
1068 traits_t<UT>::spec, traits_t<UT>::spec);
1069 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1070 __kmp_str_free(&buff);
1074 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1077 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1080#ifdef KMP_GOMP_COMPAT
1082template <
typename UT>
1083static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1084 typedef typename traits_t<UT>::signed_t ST;
1085 __kmp_assert_valid_gtid(gtid);
1086 kmp_info_t *th = __kmp_threads[gtid];
1088 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1089 if (!th->th.th_team->t.t_serialized) {
1090 dispatch_private_info_template<UT> *pr =
1091 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1092 th->th.th_dispatch->th_dispatch_pr_current);
1093 dispatch_shared_info_template<UT>
volatile *sh =
1094 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1095 th->th.th_dispatch->th_dispatch_sh_current);
1096 KMP_DEBUG_ASSERT(pr);
1097 KMP_DEBUG_ASSERT(sh);
1098 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1099 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1101 UT lower = pr->u.p.ordered_lower;
1102 UT upper = pr->u.p.ordered_upper;
1103 UT inc = upper - lower + 1;
1105 if (pr->ordered_bumped == inc) {
1108 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1110 pr->ordered_bumped = 0;
1112 inc -= pr->ordered_bumped;
1118 buff = __kmp_str_format(
1119 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1120 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1121 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1122 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1123 __kmp_str_free(&buff);
1127 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1128 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1131 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting "
1132 "ordered_bumped to zero\n",
1134 pr->ordered_bumped = 0;
1140 buff = __kmp_str_format(
1141 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1142 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1143 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1144 traits_t<UT>::spec);
1146 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1147 __kmp_str_free(&buff);
1151 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1155 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1160template <
typename T>
1161int __kmp_dispatch_next_algorithm(
int gtid,
1162 dispatch_private_info_template<T> *pr,
1163 dispatch_shared_info_template<T>
volatile *sh,
1164 kmp_int32 *p_last, T *p_lb, T *p_ub,
1165 typename traits_t<T>::signed_t *p_st, T nproc,
1167 typedef typename traits_t<T>::unsigned_t UT;
1168 typedef typename traits_t<T>::signed_t ST;
1169 typedef typename traits_t<T>::floating_t DBL;
1174 UT limit, trip, init;
1175 kmp_info_t *th = __kmp_threads[gtid];
1176 kmp_team_t *team = th->th.th_team;
1178 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1179 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1180 KMP_DEBUG_ASSERT(pr);
1181 KMP_DEBUG_ASSERT(sh);
1182 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc);
1188 __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
1189 "sh:%%p nproc:%%%s tid:%%%s\n",
1190 traits_t<T>::spec, traits_t<T>::spec);
1191 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
1192 __kmp_str_free(&buff);
1197 if (pr->u.p.tc == 0) {
1199 (
"__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
1205 switch (pr->schedule) {
1206#if KMP_STATIC_STEAL_ENABLED
1207 case kmp_sch_static_steal: {
1208 T chunk = pr->u.p.parm1;
1209 UT nchunks = pr->u.p.parm2;
1211 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
1214 trip = pr->u.p.tc - 1;
1216 if (traits_t<T>::type_size > 4) {
1219 kmp_lock_t *lck = pr->u.p.steal_lock;
1220 KMP_DEBUG_ASSERT(lck != NULL);
1221 if (pr->u.p.count < (UT)pr->u.p.ub) {
1222 KMP_DEBUG_ASSERT(pr->steal_flag == READY);
1223 __kmp_acquire_lock(lck, gtid);
1225 init = (pr->u.p.count)++;
1226 status = (init < (UT)pr->u.p.ub);
1227 __kmp_release_lock(lck, gtid);
1233 T while_limit = pr->u.p.parm3;
1235 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1236 __kmp_dispatch_num_buffers;
1238 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF);
1239 while ((!status) && (while_limit != ++while_index)) {
1240 dispatch_private_info_template<T> *v;
1242 T victimId = pr->u.p.parm4;
1243 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1244 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1245 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1246 KMP_DEBUG_ASSERT(v);
1247 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1248 oldVictimId != victimId) {
1249 victimId = (victimId + 1) % nproc;
1250 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1251 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1252 KMP_DEBUG_ASSERT(v);
1254 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1257 if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1258 kmp_uint32 old = UNUSED;
1260 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1264 T small_chunk, extras;
1265 small_chunk = nchunks / nproc;
1266 extras = nchunks % nproc;
1267 init =
id * small_chunk + (
id < extras ? id : extras);
1268 __kmp_acquire_lock(lck, gtid);
1269 pr->u.p.count = init + 1;
1270 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
1271 __kmp_release_lock(lck, gtid);
1272 pr->u.p.parm4 = (
id + 1) % nproc;
1278 buff = __kmp_str_format(
1279 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1280 "count:%%%s ub:%%%s\n",
1281 traits_t<UT>::spec, traits_t<T>::spec);
1282 KD_TRACE(10, (buff, gtid,
id, pr->u.p.count, pr->u.p.ub));
1283 __kmp_str_free(&buff);
1287 if (pr->u.p.count < (UT)pr->u.p.ub)
1288 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1292 if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
1293 v->u.p.count >= (UT)v->u.p.ub) {
1294 pr->u.p.parm4 = (victimId + 1) % nproc;
1297 lckv = v->u.p.steal_lock;
1298 KMP_ASSERT(lckv != NULL);
1299 __kmp_acquire_lock(lckv, gtid);
1301 if (v->u.p.count >= limit) {
1302 __kmp_release_lock(lckv, gtid);
1303 pr->u.p.parm4 = (victimId + 1) % nproc;
1309 remaining = limit - v->u.p.count;
1310 if (remaining > 7) {
1312 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
1313 init = (v->u.p.ub -= (remaining >> 2));
1316 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
1317 init = (v->u.p.ub -= 1);
1319 __kmp_release_lock(lckv, gtid);
1324 buff = __kmp_str_format(
1325 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1326 "count:%%%s ub:%%%s\n",
1327 traits_t<UT>::spec, traits_t<UT>::spec);
1328 KD_TRACE(10, (buff, gtid, victimId, init, limit));
1329 __kmp_str_free(&buff);
1332 KMP_DEBUG_ASSERT(init + 1 <= limit);
1333 pr->u.p.parm4 = victimId;
1336 __kmp_acquire_lock(lck, gtid);
1337 pr->u.p.count = init + 1;
1339 __kmp_release_lock(lck, gtid);
1341 if (init + 1 < limit)
1342 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1355 union_i4 vold, vnew;
1356 if (pr->u.p.count < (UT)pr->u.p.ub) {
1357 KMP_DEBUG_ASSERT(pr->steal_flag == READY);
1358 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1361 while (!KMP_COMPARE_AND_STORE_REL64(
1362 (
volatile kmp_int64 *)&pr->u.p.count,
1363 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1364 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1366 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1370 init = vold.p.count;
1371 status = (init < (UT)vold.p.ub);
1376 T while_limit = pr->u.p.parm3;
1378 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1379 __kmp_dispatch_num_buffers;
1381 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF);
1382 while ((!status) && (while_limit != ++while_index)) {
1383 dispatch_private_info_template<T> *v;
1385 T victimId = pr->u.p.parm4;
1386 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1387 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1388 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1389 KMP_DEBUG_ASSERT(v);
1390 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1391 oldVictimId != victimId) {
1392 victimId = (victimId + 1) % nproc;
1393 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1394 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1395 KMP_DEBUG_ASSERT(v);
1397 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1400 if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1401 kmp_uint32 old = UNUSED;
1403 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1407 T small_chunk, extras;
1408 small_chunk = nchunks / nproc;
1409 extras = nchunks % nproc;
1410 init =
id * small_chunk + (
id < extras ? id : extras);
1411 vnew.p.count = init + 1;
1412 vnew.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
1415 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count), vnew.b);
1417 *(
volatile kmp_int64 *)(&pr->u.p.count) = vnew.b;
1419 pr->u.p.parm4 = (
id + 1) % nproc;
1425 buff = __kmp_str_format(
1426 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1427 "count:%%%s ub:%%%s\n",
1428 traits_t<UT>::spec, traits_t<T>::spec);
1429 KD_TRACE(10, (buff, gtid,
id, pr->u.p.count, pr->u.p.ub));
1430 __kmp_str_free(&buff);
1434 if (pr->u.p.count < (UT)pr->u.p.ub)
1435 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1441 vold.b = *(
volatile kmp_int64 *)(&v->u.p.count);
1442 if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
1443 vold.p.count >= (UT)vold.p.ub) {
1444 pr->u.p.parm4 = (victimId + 1) % nproc;
1448 remaining = vold.p.ub - vold.p.count;
1451 if (remaining > 7) {
1452 vnew.p.ub -= remaining >> 2;
1456 KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip);
1457 if (KMP_COMPARE_AND_STORE_REL64(
1458 (
volatile kmp_int64 *)&v->u.p.count,
1459 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1460 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1466 buff = __kmp_str_format(
1467 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1468 "count:%%%s ub:%%%s\n",
1469 traits_t<T>::spec, traits_t<T>::spec);
1470 KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub));
1471 __kmp_str_free(&buff);
1474 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
1475 vold.p.ub - vnew.p.ub);
1477 pr->u.p.parm4 = victimId;
1480 vold.p.count = init + 1;
1482 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count), vold.b);
1484 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1487 if (vold.p.count < (UT)vold.p.ub)
1488 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1504 limit = chunk + init - 1;
1506 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
1508 KMP_DEBUG_ASSERT(init <= trip);
1512 if ((last = (limit >= trip)) != 0)
1518 *p_lb = start + init;
1519 *p_ub = start + limit;
1521 *p_lb = start + init * incr;
1522 *p_ub = start + limit * incr;
1528 case kmp_sch_static_balanced: {
1531 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
1534 if ((status = !pr->u.p.count) != 0) {
1538 last = (pr->u.p.parm1 != 0);
1542 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1546 case kmp_sch_static_greedy:
1548 case kmp_sch_static_chunked: {
1551 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1552 "kmp_sch_static_[affinity|chunked] case\n",
1554 parm1 = pr->u.p.parm1;
1556 trip = pr->u.p.tc - 1;
1557 init = parm1 * (pr->u.p.count + tid);
1559 if ((status = (init <= trip)) != 0) {
1562 limit = parm1 + init - 1;
1564 if ((last = (limit >= trip)) != 0)
1570 pr->u.p.count += nproc;
1573 *p_lb = start + init;
1574 *p_ub = start + limit;
1576 *p_lb = start + init * incr;
1577 *p_ub = start + limit * incr;
1580 if (pr->flags.ordered) {
1581 pr->u.p.ordered_lower = init;
1582 pr->u.p.ordered_upper = limit;
1588 case kmp_sch_dynamic_chunked: {
1590 UT chunk_size = pr->u.p.parm1;
1591 UT nchunks = pr->u.p.parm2;
1595 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
1598 chunk_number = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1599 status = (chunk_number < nchunks);
1606 init = chunk_size * chunk_number;
1607 trip = pr->u.p.tc - 1;
1611 if ((last = (trip - init < (UT)chunk_size)))
1614 limit = chunk_size + init - 1;
1620 *p_lb = start + init;
1621 *p_ub = start + limit;
1623 *p_lb = start + init * incr;
1624 *p_ub = start + limit * incr;
1627 if (pr->flags.ordered) {
1628 pr->u.p.ordered_lower = init;
1629 pr->u.p.ordered_upper = limit;
1635 case kmp_sch_guided_iterative_chunked: {
1636 T chunkspec = pr->u.p.parm1;
1637 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
1644 init = sh->u.s.iteration;
1645 remaining = trip - init;
1646 if (remaining <= 0) {
1655 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1657 remaining = trip - init;
1658 if (remaining <= 0) {
1663 if ((T)remaining > chunkspec) {
1664 limit = init + chunkspec - 1;
1667 limit = init + remaining - 1;
1672 limit = init + (UT)((
double)remaining *
1673 *(
double *)&pr->u.p.parm3);
1674 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1675 (ST)init, (ST)limit)) {
1687 *p_lb = start + init * incr;
1688 *p_ub = start + limit * incr;
1689 if (pr->flags.ordered) {
1690 pr->u.p.ordered_lower = init;
1691 pr->u.p.ordered_upper = limit;
1705 T chunk = pr->u.p.parm1;
1707 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
1713 init = sh->u.s.iteration;
1714 remaining = trip - init;
1715 if (remaining <= 0) {
1719 KMP_DEBUG_ASSERT(chunk && init % chunk == 0);
1721 if ((T)remaining < pr->u.p.parm2) {
1724 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1726 remaining = trip - init;
1727 if (remaining <= 0) {
1732 if ((T)remaining > chunk) {
1733 limit = init + chunk - 1;
1736 limit = init + remaining - 1;
1743 __kmp_type_convert((
double)remaining * (*(
double *)&pr->u.p.parm3),
1745 UT rem = span % chunk;
1747 span += chunk - rem;
1748 limit = init + span;
1749 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1750 (ST)init, (ST)limit)) {
1762 *p_lb = start + init * incr;
1763 *p_ub = start + limit * incr;
1764 if (pr->flags.ordered) {
1765 pr->u.p.ordered_lower = init;
1766 pr->u.p.ordered_upper = limit;
1777 case kmp_sch_guided_analytical_chunked: {
1778 T chunkspec = pr->u.p.parm1;
1780#if KMP_USE_X87CONTROL
1783 unsigned int oldFpcw;
1784 unsigned int fpcwSet = 0;
1786 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1787 "kmp_sch_guided_analytical_chunked case\n",
1792 KMP_DEBUG_ASSERT(nproc > 1);
1793 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip);
1797 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1798 if (chunkIdx >= (UT)pr->u.p.parm2) {
1801 init = chunkIdx * chunkspec + pr->u.p.count;
1804 if ((status = (init > 0 && init <= trip)) != 0) {
1805 limit = init + chunkspec - 1;
1807 if ((last = (limit >= trip)) != 0)
1817#if KMP_USE_X87CONTROL
1822 oldFpcw = _control87(0, 0);
1823 _control87(_PC_64, _MCW_PC);
1828 init = __kmp_dispatch_guided_remaining<T>(
1829 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1830 KMP_DEBUG_ASSERT(init);
1834 limit = trip - __kmp_dispatch_guided_remaining<T>(
1835 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1836 KMP_ASSERT(init <= limit);
1838 KMP_DEBUG_ASSERT(limit <= trip);
1845#if KMP_USE_X87CONTROL
1849 if (fpcwSet && (oldFpcw & fpcwSet))
1850 _control87(oldFpcw, _MCW_PC);
1857 *p_lb = start + init * incr;
1858 *p_ub = start + limit * incr;
1859 if (pr->flags.ordered) {
1860 pr->u.p.ordered_lower = init;
1861 pr->u.p.ordered_upper = limit;
1872 case kmp_sch_trapezoidal: {
1874 T parm2 = pr->u.p.parm2;
1875 T parm3 = pr->u.p.parm3;
1876 T parm4 = pr->u.p.parm4;
1878 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
1881 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
1883 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1884 trip = pr->u.p.tc - 1;
1886 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1893 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1896 if ((last = (limit >= trip)) != 0)
1903 *p_lb = start + init;
1904 *p_ub = start + limit;
1906 *p_lb = start + init * incr;
1907 *p_ub = start + limit * incr;
1910 if (pr->flags.ordered) {
1911 pr->u.p.ordered_lower = init;
1912 pr->u.p.ordered_upper = limit;
1919 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
1920 KMP_HNT(GetNewerLibrary),
1928 if (pr->flags.ordered) {
1931 buff = __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d "
1932 "ordered_lower:%%%s ordered_upper:%%%s\n",
1933 traits_t<UT>::spec, traits_t<UT>::spec);
1934 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper));
1935 __kmp_str_free(&buff);
1940 buff = __kmp_str_format(
1941 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
1942 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1943 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1944 KMP_DEBUG_ASSERT(p_last);
1945 KMP_DEBUG_ASSERT(p_st);
1946 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st));
1947 __kmp_str_free(&buff);
1956#if OMPT_SUPPORT && OMPT_OPTIONAL
1957#define OMPT_LOOP_END \
1958 if (status == 0) { \
1959 if (ompt_enabled.ompt_callback_work) { \
1960 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1961 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1962 ompt_callbacks.ompt_callback(ompt_callback_work)( \
1963 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
1964 &(task_info->task_data), 0, codeptr); \
1967#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
1968 if (ompt_enabled.ompt_callback_dispatch && status) { \
1969 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1970 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1971 ompt_dispatch_chunk_t chunk; \
1972 ompt_data_t instance = ompt_data_none; \
1973 OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \
1974 instance.ptr = &chunk; \
1975 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \
1976 &(team_info->parallel_data), &(task_info->task_data), \
1977 ompt_dispatch_ws_loop_chunk, instance); \
1981#define OMPT_LOOP_END
1982#define OMPT_LOOP_DISPATCH(lb, ub, st, status)
1985#if KMP_STATS_ENABLED
1986#define KMP_STATS_LOOP_END \
1988 kmp_int64 u, l, t, i; \
1989 l = (kmp_int64)(*p_lb); \
1990 u = (kmp_int64)(*p_ub); \
1991 i = (kmp_int64)(pr->u.p.st); \
1992 if (status == 0) { \
1994 KMP_POP_PARTITIONED_TIMER(); \
1995 } else if (i == 1) { \
2000 } else if (i < 0) { \
2002 t = (l - u) / (-i) + 1; \
2007 t = (u - l) / i + 1; \
2011 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \
2014#define KMP_STATS_LOOP_END
2017template <
typename T>
2018static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
2020 typename traits_t<T>::signed_t *p_st
2021#
if OMPT_SUPPORT && OMPT_OPTIONAL
2027 typedef typename traits_t<T>::unsigned_t UT;
2028 typedef typename traits_t<T>::signed_t ST;
2033 KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
2036 dispatch_private_info_template<T> *pr;
2037 __kmp_assert_valid_gtid(gtid);
2038 kmp_info_t *th = __kmp_threads[gtid];
2039 kmp_team_t *team = th->th.th_team;
2041 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
2044 (
"__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
2045 gtid, p_lb, p_ub, p_st, p_last));
2047 if (team->t.t_serialized) {
2049 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
2050 th->th.th_dispatch->th_disp_buffer);
2051 KMP_DEBUG_ASSERT(pr);
2053 if ((status = (pr->u.p.tc != 0)) == 0) {
2060 if (__kmp_env_consistency_check) {
2061 if (pr->pushed_ws != ct_none) {
2062 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2065 }
else if (pr->flags.nomerge) {
2068 UT limit, trip, init;
2070 T chunk = pr->u.p.parm1;
2072 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
2075 init = chunk * pr->u.p.count++;
2076 trip = pr->u.p.tc - 1;
2078 if ((status = (init <= trip)) == 0) {
2085 if (__kmp_env_consistency_check) {
2086 if (pr->pushed_ws != ct_none) {
2087 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2092 limit = chunk + init - 1;
2095 if ((last = (limit >= trip)) != 0) {
2098 pr->u.p.last_upper = pr->u.p.ub;
2106 *p_lb = start + init;
2107 *p_ub = start + limit;
2109 *p_lb = start + init * incr;
2110 *p_ub = start + limit * incr;
2113 if (pr->flags.ordered) {
2114 pr->u.p.ordered_lower = init;
2115 pr->u.p.ordered_upper = limit;
2120 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d "
2121 "ordered_lower:%%%s ordered_upper:%%%s\n",
2122 traits_t<UT>::spec, traits_t<UT>::spec);
2123 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2124 pr->u.p.ordered_upper));
2125 __kmp_str_free(&buff);
2135 pr->u.p.last_upper = *p_ub;
2146 buff = __kmp_str_format(
2147 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
2148 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
2149 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2150 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2151 (p_last ? *p_last : 0), status));
2152 __kmp_str_free(&buff);
2155#if INCLUDE_SSC_MARKS
2156 SSC_MARK_DISPATCH_NEXT();
2158 OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
2164 dispatch_shared_info_template<T>
volatile *sh;
2166 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2167 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2169 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
2170 th->th.th_dispatch->th_dispatch_pr_current);
2171 KMP_DEBUG_ASSERT(pr);
2172 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
2173 th->th.th_dispatch->th_dispatch_sh_current);
2174 KMP_DEBUG_ASSERT(sh);
2176#if KMP_USE_HIER_SCHED
2177 if (pr->flags.use_hier)
2178 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2181 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2182 p_st, th->th.th_team_nproc,
2183 th->th.th_info.ds.ds_tid);
2187 num_done = test_then_inc<ST>(&sh->u.s.num_done);
2192 buff = __kmp_str_format(
2193 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2194 traits_t<ST>::spec);
2195 KD_TRACE(10, (buff, gtid, sh->u.s.num_done));
2196 __kmp_str_free(&buff);
2200#if KMP_USE_HIER_SCHED
2201 pr->flags.use_hier = FALSE;
2203 if (num_done == th->th.th_team_nproc - 1) {
2204#if KMP_STATIC_STEAL_ENABLED
2205 if (pr->schedule == kmp_sch_static_steal) {
2207 int idx = (th->th.th_dispatch->th_disp_index - 1) %
2208 __kmp_dispatch_num_buffers;
2210 for (i = 0; i < th->th.th_team_nproc; ++i) {
2211 dispatch_private_info_template<T> *buf =
2212 reinterpret_cast<dispatch_private_info_template<T> *
>(
2213 &team->t.t_dispatch[i].th_disp_buffer[idx]);
2214 KMP_ASSERT(buf->steal_flag == THIEF);
2215 KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED);
2216 if (traits_t<T>::type_size > 4) {
2218 kmp_lock_t *lck = buf->u.p.steal_lock;
2219 KMP_ASSERT(lck != NULL);
2220 __kmp_destroy_lock(lck);
2222 buf->u.p.steal_lock = NULL;
2231 sh->u.s.num_done = 0;
2232 sh->u.s.iteration = 0;
2235 if (pr->flags.ordered) {
2236 sh->u.s.ordered_iteration = 0;
2239 sh->buffer_index += __kmp_dispatch_num_buffers;
2240 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2241 gtid, sh->buffer_index));
2246 if (__kmp_env_consistency_check) {
2247 if (pr->pushed_ws != ct_none) {
2248 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2252 th->th.th_dispatch->th_deo_fcn = NULL;
2253 th->th.th_dispatch->th_dxo_fcn = NULL;
2254 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2255 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2259 pr->u.p.last_upper = pr->u.p.ub;
2262 if (p_last != NULL && status != 0)
2270 buff = __kmp_str_format(
2271 "__kmp_dispatch_next: T#%%d normal case: "
2272 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2273 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2274 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
2275 (p_last ? *p_last : 0), status));
2276 __kmp_str_free(&buff);
2279#if INCLUDE_SSC_MARKS
2280 SSC_MARK_DISPATCH_NEXT();
2282 OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
2308 kmp_uint32 my_buffer_index;
2309 dispatch_shared_info_template<kmp_int32>
volatile *sh;
2311 KMP_DEBUG_ASSERT(__kmp_init_serial);
2313 if (!TCR_4(__kmp_init_parallel))
2314 __kmp_parallel_initialize();
2315 __kmp_resume_if_soft_paused();
2318 th = __kmp_threads[gtid];
2319 team = th->th.th_team;
2320 active = !team->t.t_serialized;
2321 th->th.th_ident = loc;
2324 KD_TRACE(10, (
"__kmpc_sections: called by T#%d\n", gtid));
2331 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2332 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2334 my_buffer_index = th->th.th_dispatch->th_disp_index++;
2337 sh =
reinterpret_cast<dispatch_shared_info_template<kmp_int32>
volatile *
>(
2338 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
2339 KD_TRACE(10, (
"__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
2342 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
2343 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
2345 KD_TRACE(100, (
"__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
2346 "sh->buffer_index:%d\n",
2347 gtid, my_buffer_index, sh->buffer_index));
2348 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
2349 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
2353 KD_TRACE(100, (
"__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
2354 "sh->buffer_index:%d\n",
2355 gtid, my_buffer_index, sh->buffer_index));
2357 th->th.th_dispatch->th_dispatch_pr_current =
2359 th->th.th_dispatch->th_dispatch_sh_current =
2360 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
2363#if OMPT_SUPPORT && OMPT_OPTIONAL
2364 if (ompt_enabled.ompt_callback_work) {
2365 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2366 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2367 ompt_callbacks.ompt_callback(ompt_callback_work)(
2368 ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
2369 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2372 KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
2388 kmp_int32 numberOfSections) {
2390 KMP_TIME_PARTITIONED_BLOCK(OMP_sections_overhead);
2392 kmp_info_t *th = __kmp_threads[gtid];
2394 kmp_team_t *team = th->th.th_team;
2397 KD_TRACE(1000, (
"__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
2401 KMP_DEBUG_ASSERT(!team->t.t_serialized);
2403 dispatch_shared_info_template<kmp_int32>
volatile *sh;
2405 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2406 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2408 KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
2409 sh =
reinterpret_cast<dispatch_shared_info_template<kmp_int32>
volatile *
>(
2410 th->th.th_dispatch->th_dispatch_sh_current);
2411 KMP_DEBUG_ASSERT(sh);
2413 kmp_int32 sectionIndex = 0;
2414 bool moreSectionsToExecute =
true;
2417 sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
2418 if (sectionIndex >= numberOfSections) {
2419 moreSectionsToExecute =
false;
2424 if (!moreSectionsToExecute) {
2427 num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
2429 if (num_done == th->th.th_team_nproc - 1) {
2434 sh->u.s.num_done = 0;
2435 sh->u.s.iteration = 0;
2439 sh->buffer_index += __kmp_dispatch_num_buffers;
2440 KD_TRACE(100, (
"__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
2447 th->th.th_dispatch->th_deo_fcn = NULL;
2448 th->th.th_dispatch->th_dxo_fcn = NULL;
2449 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2450 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2452#if OMPT_SUPPORT && OMPT_OPTIONAL
2453 if (ompt_enabled.ompt_callback_dispatch) {
2454 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2455 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2456 ompt_data_t instance = ompt_data_none;
2457 instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
2458 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
2459 &(team_info->parallel_data), &(task_info->task_data),
2460 ompt_dispatch_section, instance);
2465 return sectionIndex;
2478 kmp_info_t *th = __kmp_threads[gtid];
2479 int active = !th->th.th_team->t.t_serialized;
2481 KD_TRACE(100, (
"__kmpc_end_sections: T#%d called\n", gtid));
2485#if OMPT_SUPPORT && OMPT_OPTIONAL
2486 if (ompt_enabled.ompt_callback_work) {
2487 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2488 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2489 ompt_callbacks.ompt_callback(ompt_callback_work)(
2490 ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
2491 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2496 KMP_POP_PARTITIONED_TIMER();
2497 KD_TRACE(100, (
"__kmpc_end_sections: T#%d returned\n", gtid));
2500template <
typename T>
2501static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2502 kmp_int32 *plastiter, T *plower, T *pupper,
2503 typename traits_t<T>::signed_t incr) {
2504 typedef typename traits_t<T>::unsigned_t UT;
2511 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2512 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2514 typedef typename traits_t<T>::signed_t ST;
2518 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "
2519 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2520 traits_t<T>::spec, traits_t<T>::spec,
2521 traits_t<ST>::spec, traits_t<T>::spec);
2522 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2523 __kmp_str_free(&buff);
2527 if (__kmp_env_consistency_check) {
2529 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2532 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2542 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2545 __kmp_assert_valid_gtid(gtid);
2546 th = __kmp_threads[gtid];
2547 team = th->th.th_team;
2548 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2549 nteams = th->th.th_teams_size.nteams;
2550 team_id = team->t.t_master_tid;
2551 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
2555 trip_count = *pupper - *plower + 1;
2556 }
else if (incr == -1) {
2557 trip_count = *plower - *pupper + 1;
2558 }
else if (incr > 0) {
2560 trip_count = (UT)(*pupper - *plower) / incr + 1;
2562 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2565 if (trip_count <= nteams) {
2567 __kmp_static == kmp_sch_static_greedy ||
2569 kmp_sch_static_balanced);
2571 if (team_id < trip_count) {
2572 *pupper = *plower = *plower + team_id * incr;
2574 *plower = *pupper + incr;
2576 if (plastiter != NULL)
2577 *plastiter = (team_id == trip_count - 1);
2579 if (__kmp_static == kmp_sch_static_balanced) {
2580 UT chunk = trip_count / nteams;
2581 UT extras = trip_count % nteams;
2583 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2584 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2585 if (plastiter != NULL)
2586 *plastiter = (team_id == nteams - 1);
2589 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2591 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2593 *plower += team_id * chunk_inc_count;
2594 *pupper = *plower + chunk_inc_count - incr;
2597 if (*pupper < *plower)
2598 *pupper = traits_t<T>::max_value;
2599 if (plastiter != NULL)
2600 *plastiter = *plower <= upper && *pupper > upper - incr;
2601 if (*pupper > upper)
2604 if (*pupper > *plower)
2605 *pupper = traits_t<T>::min_value;
2606 if (plastiter != NULL)
2607 *plastiter = *plower >= upper && *pupper < upper - incr;
2608 if (*pupper < upper)
2640 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2641 KMP_DEBUG_ASSERT(__kmp_init_serial);
2642#if OMPT_SUPPORT && OMPT_OPTIONAL
2643 OMPT_STORE_RETURN_ADDRESS(gtid);
2645 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2652 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2653 KMP_DEBUG_ASSERT(__kmp_init_serial);
2654#if OMPT_SUPPORT && OMPT_OPTIONAL
2655 OMPT_STORE_RETURN_ADDRESS(gtid);
2657 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2665 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2666 KMP_DEBUG_ASSERT(__kmp_init_serial);
2667#if OMPT_SUPPORT && OMPT_OPTIONAL
2668 OMPT_STORE_RETURN_ADDRESS(gtid);
2670 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2678 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2679 KMP_DEBUG_ASSERT(__kmp_init_serial);
2680#if OMPT_SUPPORT && OMPT_OPTIONAL
2681 OMPT_STORE_RETURN_ADDRESS(gtid);
2683 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2697 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2699 KMP_DEBUG_ASSERT(__kmp_init_serial);
2700#if OMPT_SUPPORT && OMPT_OPTIONAL
2701 OMPT_STORE_RETURN_ADDRESS(gtid);
2703 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2704 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2707void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2709 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2711 KMP_DEBUG_ASSERT(__kmp_init_serial);
2712#if OMPT_SUPPORT && OMPT_OPTIONAL
2713 OMPT_STORE_RETURN_ADDRESS(gtid);
2715 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2716 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2719void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2721 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2723 KMP_DEBUG_ASSERT(__kmp_init_serial);
2724#if OMPT_SUPPORT && OMPT_OPTIONAL
2725 OMPT_STORE_RETURN_ADDRESS(gtid);
2727 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2728 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2731void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2733 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2735 KMP_DEBUG_ASSERT(__kmp_init_serial);
2736#if OMPT_SUPPORT && OMPT_OPTIONAL
2737 OMPT_STORE_RETURN_ADDRESS(gtid);
2739 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2740 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2757 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2758#if OMPT_SUPPORT && OMPT_OPTIONAL
2759 OMPT_STORE_RETURN_ADDRESS(gtid);
2761 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2762#
if OMPT_SUPPORT && OMPT_OPTIONAL
2764 OMPT_LOAD_RETURN_ADDRESS(gtid)
2773 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2775#if OMPT_SUPPORT && OMPT_OPTIONAL
2776 OMPT_STORE_RETURN_ADDRESS(gtid);
2778 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2779#
if OMPT_SUPPORT && OMPT_OPTIONAL
2781 OMPT_LOAD_RETURN_ADDRESS(gtid)
2790 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2791#if OMPT_SUPPORT && OMPT_OPTIONAL
2792 OMPT_STORE_RETURN_ADDRESS(gtid);
2794 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2795#
if OMPT_SUPPORT && OMPT_OPTIONAL
2797 OMPT_LOAD_RETURN_ADDRESS(gtid)
2806 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2808#if OMPT_SUPPORT && OMPT_OPTIONAL
2809 OMPT_STORE_RETURN_ADDRESS(gtid);
2811 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2812#
if OMPT_SUPPORT && OMPT_OPTIONAL
2814 OMPT_LOAD_RETURN_ADDRESS(gtid)
2826 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2833 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2840 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2847 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2854kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2855 return value == checker;
2858kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2859 return value != checker;
2862kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2863 return value < checker;
2866kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2867 return value >= checker;
2870kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2871 return value <= checker;
2875__kmp_wait_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2876 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2880 volatile kmp_uint32 *spin = spinner;
2881 kmp_uint32 check = checker;
2883 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2887 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2888 KMP_INIT_YIELD(spins);
2889 KMP_INIT_BACKOFF(time);
2891 while (!f(r = TCR_4(*spin), check)) {
2892 KMP_FSYNC_SPIN_PREPARE(obj);
2897 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2899 KMP_FSYNC_SPIN_ACQUIRED(obj);
2903void __kmp_wait_4_ptr(
void *spinner, kmp_uint32 checker,
2904 kmp_uint32 (*pred)(
void *, kmp_uint32),
2908 void *spin = spinner;
2909 kmp_uint32 check = checker;
2911 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2914 KMP_FSYNC_SPIN_INIT(obj, spin);
2915 KMP_INIT_YIELD(spins);
2916 KMP_INIT_BACKOFF(time);
2918 while (!f(spin, check)) {
2919 KMP_FSYNC_SPIN_PREPARE(obj);
2922 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2924 KMP_FSYNC_SPIN_ACQUIRED(obj);
2929#ifdef KMP_GOMP_COMPAT
2931void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2933 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2935 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2939void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2941 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2943 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2947void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2949 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2951 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2955void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2957 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2959 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2963void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2964 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2967void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2968 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2971void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2972 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2975void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2976 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid, kmp_int32 numberOfSections)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)