LLVM OpenMP* Runtime Library
kmp_affinity.h
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18#include <limits>
19
20#if KMP_AFFINITY_SUPPORTED
21#if KMP_USE_HWLOC
22class KMPHwlocAffinity : public KMPAffinity {
23public:
24 class Mask : public KMPAffinity::Mask {
25 hwloc_cpuset_t mask;
26
27 public:
28 Mask() {
29 mask = hwloc_bitmap_alloc();
30 this->zero();
31 }
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 void copy(const KMPAffinity::Mask *src) override {
38 const Mask *convert = static_cast<const Mask *>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
40 }
41 void bitwise_and(const KMPAffinity::Mask *rhs) override {
42 const Mask *convert = static_cast<const Mask *>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
44 }
45 void bitwise_or(const KMPAffinity::Mask *rhs) override {
46 const Mask *convert = static_cast<const Mask *>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
48 }
49 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50 int begin() const override { return hwloc_bitmap_first(mask); }
51 int end() const override { return -1; }
52 int next(int previous) const override {
53 return hwloc_bitmap_next(mask, previous);
54 }
55 int get_system_affinity(bool abort_on_error) override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
58 long retval =
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60 if (retval >= 0) {
61 return 0;
62 }
63 int error = errno;
64 if (abort_on_error) {
65 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"),
66 KMP_ERR(error), __kmp_msg_null);
67 }
68 return error;
69 }
70 int set_system_affinity(bool abort_on_error) const override {
71 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
72 "Illegal set affinity operation when not capable");
73 long retval =
74 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
75 if (retval >= 0) {
76 return 0;
77 }
78 int error = errno;
79 if (abort_on_error) {
80 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
81 KMP_ERR(error), __kmp_msg_null);
82 }
83 return error;
84 }
85#if KMP_OS_WINDOWS
86 int set_process_affinity(bool abort_on_error) const override {
87 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
88 "Illegal set process affinity operation when not capable");
89 int error = 0;
90 const hwloc_topology_support *support =
91 hwloc_topology_get_support(__kmp_hwloc_topology);
92 if (support->cpubind->set_proc_cpubind) {
93 int retval;
94 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
95 HWLOC_CPUBIND_PROCESS);
96 if (retval >= 0)
97 return 0;
98 error = errno;
99 if (abort_on_error)
100 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
101 KMP_ERR(error), __kmp_msg_null);
102 }
103 return error;
104 }
105#endif
106 int get_proc_group() const override {
107 int group = -1;
108#if KMP_OS_WINDOWS
109 if (__kmp_num_proc_groups == 1) {
110 return 1;
111 }
112 for (int i = 0; i < __kmp_num_proc_groups; i++) {
113 // On windows, the long type is always 32 bits
114 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
115 unsigned long second_32_bits =
116 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
117 if (first_32_bits == 0 && second_32_bits == 0) {
118 continue;
119 }
120 if (group >= 0) {
121 return -1;
122 }
123 group = i;
124 }
125#endif /* KMP_OS_WINDOWS */
126 return group;
127 }
128 };
129 void determine_capable(const char *var) override {
130 const hwloc_topology_support *topology_support;
131 if (__kmp_hwloc_topology == NULL) {
132 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
133 __kmp_hwloc_error = TRUE;
134 if (__kmp_affinity.flags.verbose) {
135 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
136 }
137 }
138 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
139 __kmp_hwloc_error = TRUE;
140 if (__kmp_affinity.flags.verbose) {
141 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
142 }
143 }
144 }
145 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
146 // Is the system capable of setting/getting this thread's affinity?
147 // Also, is topology discovery possible? (pu indicates ability to discover
148 // processing units). And finally, were there no errors when calling any
149 // hwloc_* API functions?
150 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
151 topology_support->cpubind->get_thisthread_cpubind &&
152 topology_support->discovery->pu && !__kmp_hwloc_error) {
153 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
154 KMP_AFFINITY_ENABLE(TRUE);
155 } else {
156 // indicate that hwloc didn't work and disable affinity
157 __kmp_hwloc_error = TRUE;
158 KMP_AFFINITY_DISABLE();
159 }
160 }
161 void bind_thread(int which) override {
162 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
163 "Illegal set affinity operation when not capable");
164 KMPAffinity::Mask *mask;
165 KMP_CPU_ALLOC_ON_STACK(mask);
166 KMP_CPU_ZERO(mask);
167 KMP_CPU_SET(which, mask);
168 __kmp_set_system_affinity(mask, TRUE);
169 KMP_CPU_FREE_FROM_STACK(mask);
170 }
171 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
172 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
173 KMPAffinity::Mask *allocate_mask_array(int num) override {
174 return new Mask[num];
175 }
176 void deallocate_mask_array(KMPAffinity::Mask *array) override {
177 Mask *hwloc_array = static_cast<Mask *>(array);
178 delete[] hwloc_array;
179 }
180 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
181 int index) override {
182 Mask *hwloc_array = static_cast<Mask *>(array);
183 return &(hwloc_array[index]);
184 }
185 api_type get_api_type() const override { return HWLOC; }
186};
187#endif /* KMP_USE_HWLOC */
188
189#if KMP_OS_LINUX || KMP_OS_FREEBSD
190#if KMP_OS_LINUX
191/* On some of the older OS's that we build on, these constants aren't present
192 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
193 all systems of the same arch where they are defined, and they cannot change.
194 stone forever. */
195#include <sys/syscall.h>
196#if KMP_ARCH_X86 || KMP_ARCH_ARM
197#ifndef __NR_sched_setaffinity
198#define __NR_sched_setaffinity 241
199#elif __NR_sched_setaffinity != 241
200#error Wrong code for setaffinity system call.
201#endif /* __NR_sched_setaffinity */
202#ifndef __NR_sched_getaffinity
203#define __NR_sched_getaffinity 242
204#elif __NR_sched_getaffinity != 242
205#error Wrong code for getaffinity system call.
206#endif /* __NR_sched_getaffinity */
207#elif KMP_ARCH_AARCH64
208#ifndef __NR_sched_setaffinity
209#define __NR_sched_setaffinity 122
210#elif __NR_sched_setaffinity != 122
211#error Wrong code for setaffinity system call.
212#endif /* __NR_sched_setaffinity */
213#ifndef __NR_sched_getaffinity
214#define __NR_sched_getaffinity 123
215#elif __NR_sched_getaffinity != 123
216#error Wrong code for getaffinity system call.
217#endif /* __NR_sched_getaffinity */
218#elif KMP_ARCH_RISCV64
219#ifndef __NR_sched_setaffinity
220#define __NR_sched_setaffinity 122
221#elif __NR_sched_setaffinity != 122
222#error Wrong code for setaffinity system call.
223#endif /* __NR_sched_setaffinity */
224#ifndef __NR_sched_getaffinity
225#define __NR_sched_getaffinity 123
226#elif __NR_sched_getaffinity != 123
227#error Wrong code for getaffinity system call.
228#endif /* __NR_sched_getaffinity */
229#elif KMP_ARCH_X86_64
230#ifndef __NR_sched_setaffinity
231#define __NR_sched_setaffinity 203
232#elif __NR_sched_setaffinity != 203
233#error Wrong code for setaffinity system call.
234#endif /* __NR_sched_setaffinity */
235#ifndef __NR_sched_getaffinity
236#define __NR_sched_getaffinity 204
237#elif __NR_sched_getaffinity != 204
238#error Wrong code for getaffinity system call.
239#endif /* __NR_sched_getaffinity */
240#elif KMP_ARCH_PPC64
241#ifndef __NR_sched_setaffinity
242#define __NR_sched_setaffinity 222
243#elif __NR_sched_setaffinity != 222
244#error Wrong code for setaffinity system call.
245#endif /* __NR_sched_setaffinity */
246#ifndef __NR_sched_getaffinity
247#define __NR_sched_getaffinity 223
248#elif __NR_sched_getaffinity != 223
249#error Wrong code for getaffinity system call.
250#endif /* __NR_sched_getaffinity */
251#elif KMP_ARCH_MIPS
252#ifndef __NR_sched_setaffinity
253#define __NR_sched_setaffinity 4239
254#elif __NR_sched_setaffinity != 4239
255#error Wrong code for setaffinity system call.
256#endif /* __NR_sched_setaffinity */
257#ifndef __NR_sched_getaffinity
258#define __NR_sched_getaffinity 4240
259#elif __NR_sched_getaffinity != 4240
260#error Wrong code for getaffinity system call.
261#endif /* __NR_sched_getaffinity */
262#elif KMP_ARCH_MIPS64
263#ifndef __NR_sched_setaffinity
264#define __NR_sched_setaffinity 5195
265#elif __NR_sched_setaffinity != 5195
266#error Wrong code for setaffinity system call.
267#endif /* __NR_sched_setaffinity */
268#ifndef __NR_sched_getaffinity
269#define __NR_sched_getaffinity 5196
270#elif __NR_sched_getaffinity != 5196
271#error Wrong code for getaffinity system call.
272#endif /* __NR_sched_getaffinity */
273#elif KMP_ARCH_LOONGARCH64
274#ifndef __NR_sched_setaffinity
275#define __NR_sched_setaffinity 122
276#elif __NR_sched_setaffinity != 122
277#error Wrong code for setaffinity system call.
278#endif /* __NR_sched_setaffinity */
279#ifndef __NR_sched_getaffinity
280#define __NR_sched_getaffinity 123
281#elif __NR_sched_getaffinity != 123
282#error Wrong code for getaffinity system call.
283#endif /* __NR_sched_getaffinity */
284#elif KMP_ARCH_RISCV64
285#ifndef __NR_sched_setaffinity
286#define __NR_sched_setaffinity 122
287#elif __NR_sched_setaffinity != 122
288#error Wrong code for setaffinity system call.
289#endif /* __NR_sched_setaffinity */
290#ifndef __NR_sched_getaffinity
291#define __NR_sched_getaffinity 123
292#elif __NR_sched_getaffinity != 123
293#error Wrong code for getaffinity system call.
294#endif /* __NR_sched_getaffinity */
295#else
296#error Unknown or unsupported architecture
297#endif /* KMP_ARCH_* */
298#elif KMP_OS_FREEBSD
299#include <pthread.h>
300#include <pthread_np.h>
301#endif
302class KMPNativeAffinity : public KMPAffinity {
303 class Mask : public KMPAffinity::Mask {
304 typedef unsigned long mask_t;
305 typedef decltype(__kmp_affin_mask_size) mask_size_type;
306 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
307 static const mask_t ONE = 1;
308 mask_size_type get_num_mask_types() const {
309 return __kmp_affin_mask_size / sizeof(mask_t);
310 }
311
312 public:
313 mask_t *mask;
314 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
315 ~Mask() {
316 if (mask)
317 __kmp_free(mask);
318 }
319 void set(int i) override {
320 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
321 }
322 bool is_set(int i) const override {
323 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
324 }
325 void clear(int i) override {
326 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
327 }
328 void zero() override {
329 mask_size_type e = get_num_mask_types();
330 for (mask_size_type i = 0; i < e; ++i)
331 mask[i] = (mask_t)0;
332 }
333 void copy(const KMPAffinity::Mask *src) override {
334 const Mask *convert = static_cast<const Mask *>(src);
335 mask_size_type e = get_num_mask_types();
336 for (mask_size_type i = 0; i < e; ++i)
337 mask[i] = convert->mask[i];
338 }
339 void bitwise_and(const KMPAffinity::Mask *rhs) override {
340 const Mask *convert = static_cast<const Mask *>(rhs);
341 mask_size_type e = get_num_mask_types();
342 for (mask_size_type i = 0; i < e; ++i)
343 mask[i] &= convert->mask[i];
344 }
345 void bitwise_or(const KMPAffinity::Mask *rhs) override {
346 const Mask *convert = static_cast<const Mask *>(rhs);
347 mask_size_type e = get_num_mask_types();
348 for (mask_size_type i = 0; i < e; ++i)
349 mask[i] |= convert->mask[i];
350 }
351 void bitwise_not() override {
352 mask_size_type e = get_num_mask_types();
353 for (mask_size_type i = 0; i < e; ++i)
354 mask[i] = ~(mask[i]);
355 }
356 int begin() const override {
357 int retval = 0;
358 while (retval < end() && !is_set(retval))
359 ++retval;
360 return retval;
361 }
362 int end() const override {
363 int e;
364 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
365 return e;
366 }
367 int next(int previous) const override {
368 int retval = previous + 1;
369 while (retval < end() && !is_set(retval))
370 ++retval;
371 return retval;
372 }
373 int get_system_affinity(bool abort_on_error) override {
374 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
375 "Illegal get affinity operation when not capable");
376#if KMP_OS_LINUX
377 long retval =
378 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
379#elif KMP_OS_FREEBSD
380 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
381 reinterpret_cast<cpuset_t *>(mask));
382 int retval = (r == 0 ? 0 : -1);
383#endif
384 if (retval >= 0) {
385 return 0;
386 }
387 int error = errno;
388 if (abort_on_error) {
389 __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"),
390 KMP_ERR(error), __kmp_msg_null);
391 }
392 return error;
393 }
394 int set_system_affinity(bool abort_on_error) const override {
395 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
396 "Illegal set affinity operation when not capable");
397#if KMP_OS_LINUX
398 long retval =
399 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
400#elif KMP_OS_FREEBSD
401 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
402 reinterpret_cast<cpuset_t *>(mask));
403 int retval = (r == 0 ? 0 : -1);
404#endif
405 if (retval >= 0) {
406 return 0;
407 }
408 int error = errno;
409 if (abort_on_error) {
410 __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"),
411 KMP_ERR(error), __kmp_msg_null);
412 }
413 return error;
414 }
415 };
416 void determine_capable(const char *env_var) override {
417 __kmp_affinity_determine_capable(env_var);
418 }
419 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
420 KMPAffinity::Mask *allocate_mask() override {
421 KMPNativeAffinity::Mask *retval = new Mask();
422 return retval;
423 }
424 void deallocate_mask(KMPAffinity::Mask *m) override {
425 KMPNativeAffinity::Mask *native_mask =
426 static_cast<KMPNativeAffinity::Mask *>(m);
427 delete native_mask;
428 }
429 KMPAffinity::Mask *allocate_mask_array(int num) override {
430 return new Mask[num];
431 }
432 void deallocate_mask_array(KMPAffinity::Mask *array) override {
433 Mask *linux_array = static_cast<Mask *>(array);
434 delete[] linux_array;
435 }
436 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
437 int index) override {
438 Mask *linux_array = static_cast<Mask *>(array);
439 return &(linux_array[index]);
440 }
441 api_type get_api_type() const override { return NATIVE_OS; }
442};
443#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
444
445#if KMP_OS_WINDOWS
446class KMPNativeAffinity : public KMPAffinity {
447 class Mask : public KMPAffinity::Mask {
448 typedef ULONG_PTR mask_t;
449 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
450 mask_t *mask;
451
452 public:
453 Mask() {
454 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
455 }
456 ~Mask() {
457 if (mask)
458 __kmp_free(mask);
459 }
460 void set(int i) override {
461 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
462 }
463 bool is_set(int i) const override {
464 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
465 }
466 void clear(int i) override {
467 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
468 }
469 void zero() override {
470 for (int i = 0; i < __kmp_num_proc_groups; ++i)
471 mask[i] = 0;
472 }
473 void copy(const KMPAffinity::Mask *src) override {
474 const Mask *convert = static_cast<const Mask *>(src);
475 for (int i = 0; i < __kmp_num_proc_groups; ++i)
476 mask[i] = convert->mask[i];
477 }
478 void bitwise_and(const KMPAffinity::Mask *rhs) override {
479 const Mask *convert = static_cast<const Mask *>(rhs);
480 for (int i = 0; i < __kmp_num_proc_groups; ++i)
481 mask[i] &= convert->mask[i];
482 }
483 void bitwise_or(const KMPAffinity::Mask *rhs) override {
484 const Mask *convert = static_cast<const Mask *>(rhs);
485 for (int i = 0; i < __kmp_num_proc_groups; ++i)
486 mask[i] |= convert->mask[i];
487 }
488 void bitwise_not() override {
489 for (int i = 0; i < __kmp_num_proc_groups; ++i)
490 mask[i] = ~(mask[i]);
491 }
492 int begin() const override {
493 int retval = 0;
494 while (retval < end() && !is_set(retval))
495 ++retval;
496 return retval;
497 }
498 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
499 int next(int previous) const override {
500 int retval = previous + 1;
501 while (retval < end() && !is_set(retval))
502 ++retval;
503 return retval;
504 }
505 int set_process_affinity(bool abort_on_error) const override {
506 if (__kmp_num_proc_groups <= 1) {
507 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
508 DWORD error = GetLastError();
509 if (abort_on_error) {
510 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
511 __kmp_msg_null);
512 }
513 return error;
514 }
515 }
516 return 0;
517 }
518 int set_system_affinity(bool abort_on_error) const override {
519 if (__kmp_num_proc_groups > 1) {
520 // Check for a valid mask.
521 GROUP_AFFINITY ga;
522 int group = get_proc_group();
523 if (group < 0) {
524 if (abort_on_error) {
525 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
526 }
527 return -1;
528 }
529 // Transform the bit vector into a GROUP_AFFINITY struct
530 // and make the system call to set affinity.
531 ga.Group = group;
532 ga.Mask = mask[group];
533 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
534
535 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
536 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
537 DWORD error = GetLastError();
538 if (abort_on_error) {
539 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
540 __kmp_msg_null);
541 }
542 return error;
543 }
544 } else {
545 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
546 DWORD error = GetLastError();
547 if (abort_on_error) {
548 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
549 __kmp_msg_null);
550 }
551 return error;
552 }
553 }
554 return 0;
555 }
556 int get_system_affinity(bool abort_on_error) override {
557 if (__kmp_num_proc_groups > 1) {
558 this->zero();
559 GROUP_AFFINITY ga;
560 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
561 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
562 DWORD error = GetLastError();
563 if (abort_on_error) {
564 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
565 KMP_ERR(error), __kmp_msg_null);
566 }
567 return error;
568 }
569 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
570 (ga.Mask == 0)) {
571 return -1;
572 }
573 mask[ga.Group] = ga.Mask;
574 } else {
575 mask_t newMask, sysMask, retval;
576 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
577 DWORD error = GetLastError();
578 if (abort_on_error) {
579 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
580 KMP_ERR(error), __kmp_msg_null);
581 }
582 return error;
583 }
584 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
585 if (!retval) {
586 DWORD error = GetLastError();
587 if (abort_on_error) {
588 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
589 KMP_ERR(error), __kmp_msg_null);
590 }
591 return error;
592 }
593 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
594 if (!newMask) {
595 DWORD error = GetLastError();
596 if (abort_on_error) {
597 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
598 KMP_ERR(error), __kmp_msg_null);
599 }
600 }
601 *mask = retval;
602 }
603 return 0;
604 }
605 int get_proc_group() const override {
606 int group = -1;
607 if (__kmp_num_proc_groups == 1) {
608 return 1;
609 }
610 for (int i = 0; i < __kmp_num_proc_groups; i++) {
611 if (mask[i] == 0)
612 continue;
613 if (group >= 0)
614 return -1;
615 group = i;
616 }
617 return group;
618 }
619 };
620 void determine_capable(const char *env_var) override {
621 __kmp_affinity_determine_capable(env_var);
622 }
623 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
624 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
625 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
626 KMPAffinity::Mask *allocate_mask_array(int num) override {
627 return new Mask[num];
628 }
629 void deallocate_mask_array(KMPAffinity::Mask *array) override {
630 Mask *windows_array = static_cast<Mask *>(array);
631 delete[] windows_array;
632 }
633 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
634 int index) override {
635 Mask *windows_array = static_cast<Mask *>(array);
636 return &(windows_array[index]);
637 }
638 api_type get_api_type() const override { return NATIVE_OS; }
639};
640#endif /* KMP_OS_WINDOWS */
641#endif /* KMP_AFFINITY_SUPPORTED */
642
643// Describe an attribute for a level in the machine topology
644struct kmp_hw_attr_t {
645 int core_type : 8;
646 int core_eff : 8;
647 unsigned valid : 1;
648 unsigned reserved : 15;
649
650 static const int UNKNOWN_CORE_EFF = -1;
651
652 kmp_hw_attr_t()
653 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
654 valid(0), reserved(0) {}
655 void set_core_type(kmp_hw_core_type_t type) {
656 valid = 1;
657 core_type = type;
658 }
659 void set_core_eff(int eff) {
660 valid = 1;
661 core_eff = eff;
662 }
663 kmp_hw_core_type_t get_core_type() const {
664 return (kmp_hw_core_type_t)core_type;
665 }
666 int get_core_eff() const { return core_eff; }
667 bool is_core_type_valid() const {
668 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
669 }
670 bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
671 operator bool() const { return valid; }
672 void clear() {
673 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
674 core_eff = UNKNOWN_CORE_EFF;
675 valid = 0;
676 }
677 bool contains(const kmp_hw_attr_t &other) const {
678 if (!valid && !other.valid)
679 return true;
680 if (valid && other.valid) {
681 if (other.is_core_type_valid()) {
682 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
683 return false;
684 }
685 if (other.is_core_eff_valid()) {
686 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
687 return false;
688 }
689 return true;
690 }
691 return false;
692 }
693 bool operator==(const kmp_hw_attr_t &rhs) const {
694 return (rhs.valid == valid && rhs.core_eff == core_eff &&
695 rhs.core_type == core_type);
696 }
697 bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
698};
699
700#if KMP_AFFINITY_SUPPORTED
701KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
702#endif
703
704class kmp_hw_thread_t {
705public:
706 static const int UNKNOWN_ID = -1;
707 static const int MULTIPLE_ID = -2;
708 static int compare_ids(const void *a, const void *b);
709 static int compare_compact(const void *a, const void *b);
710 int ids[KMP_HW_LAST];
711 int sub_ids[KMP_HW_LAST];
712 bool leader;
713 int os_id;
714 kmp_hw_attr_t attrs;
715
716 void print() const;
717 void clear() {
718 for (int i = 0; i < (int)KMP_HW_LAST; ++i)
719 ids[i] = UNKNOWN_ID;
720 leader = false;
721 attrs.clear();
722 }
723};
724
725class kmp_topology_t {
726
727 struct flags_t {
728 int uniform : 1;
729 int reserved : 31;
730 };
731
732 int depth;
733
734 // The following arrays are all 'depth' long and have been
735 // allocated to hold up to KMP_HW_LAST number of objects if
736 // needed so layers can be added without reallocation of any array
737
738 // Orderd array of the types in the topology
739 kmp_hw_t *types;
740
741 // Keep quick topology ratios, for non-uniform topologies,
742 // this ratio holds the max number of itemAs per itemB
743 // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
744 int *ratio;
745
746 // Storage containing the absolute number of each topology layer
747 int *count;
748
749 // The number of core efficiencies. This is only useful for hybrid
750 // topologies. Core efficiencies will range from 0 to num efficiencies - 1
751 int num_core_efficiencies;
752 int num_core_types;
753 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
754
755 // The hardware threads array
756 // hw_threads is num_hw_threads long
757 // Each hw_thread's ids and sub_ids are depth deep
758 int num_hw_threads;
759 kmp_hw_thread_t *hw_threads;
760
761 // Equivalence hash where the key is the hardware topology item
762 // and the value is the equivalent hardware topology type in the
763 // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
764 // known equivalence for the topology type
765 kmp_hw_t equivalent[KMP_HW_LAST];
766
767 // Flags describing the topology
768 flags_t flags;
769
770 // Compact value used during sort_compact()
771 int compact;
772
773 // Insert a new topology layer after allocation
774 void _insert_layer(kmp_hw_t type, const int *ids);
775
776#if KMP_GROUP_AFFINITY
777 // Insert topology information about Windows Processor groups
778 void _insert_windows_proc_groups();
779#endif
780
781 // Count each item & get the num x's per y
782 // e.g., get the number of cores and the number of threads per core
783 // for each (x, y) in (KMP_HW_* , KMP_HW_*)
784 void _gather_enumeration_information();
785
786 // Remove layers that don't add information to the topology.
787 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
788 void _remove_radix1_layers();
789
790 // Find out if the topology is uniform
791 void _discover_uniformity();
792
793 // Set all the sub_ids for each hardware thread
794 void _set_sub_ids();
795
796 // Set global affinity variables describing the number of threads per
797 // core, the number of packages, the number of cores per package, and
798 // the number of cores.
799 void _set_globals();
800
801 // Set the last level cache equivalent type
802 void _set_last_level_cache();
803
804 // Return the number of cores with a particular attribute, 'attr'.
805 // If 'find_all' is true, then find all cores on the machine, otherwise find
806 // all cores per the layer 'above'
807 int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
808 bool find_all = false) const;
809
810public:
811 // Force use of allocate()/deallocate()
812 kmp_topology_t() = delete;
813 kmp_topology_t(const kmp_topology_t &t) = delete;
814 kmp_topology_t(kmp_topology_t &&t) = delete;
815 kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
816 kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
817
818 static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
819 static void deallocate(kmp_topology_t *);
820
821 // Functions used in create_map() routines
822 kmp_hw_thread_t &at(int index) {
823 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
824 return hw_threads[index];
825 }
826 const kmp_hw_thread_t &at(int index) const {
827 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
828 return hw_threads[index];
829 }
830 int get_num_hw_threads() const { return num_hw_threads; }
831 void sort_ids() {
832 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
833 kmp_hw_thread_t::compare_ids);
834 }
835 // Check if the hardware ids are unique, if they are
836 // return true, otherwise return false
837 bool check_ids() const;
838
839 // Function to call after the create_map() routine
840 void canonicalize();
841 void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
842
843// Functions used after canonicalize() called
844
845#if KMP_AFFINITY_SUPPORTED
846 // Set the granularity for affinity settings
847 void set_granularity(kmp_affinity_t &stgs) const;
848#endif
849 bool filter_hw_subset();
850 bool is_close(int hwt1, int hwt2, int level) const;
851 bool is_uniform() const { return flags.uniform; }
852 // Tell whether a type is a valid type in the topology
853 // returns KMP_HW_UNKNOWN when there is no equivalent type
854 kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
855 // Set type1 = type2
856 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
857 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
858 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
859 kmp_hw_t real_type2 = equivalent[type2];
860 if (real_type2 == KMP_HW_UNKNOWN)
861 real_type2 = type2;
862 equivalent[type1] = real_type2;
863 // This loop is required since any of the types may have been set to
864 // be equivalent to type1. They all must be checked and reset to type2.
865 KMP_FOREACH_HW_TYPE(type) {
866 if (equivalent[type] == type1) {
867 equivalent[type] = real_type2;
868 }
869 }
870 }
871 // Calculate number of types corresponding to level1
872 // per types corresponding to level2 (e.g., number of threads per core)
873 int calculate_ratio(int level1, int level2) const {
874 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
875 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
876 int r = 1;
877 for (int level = level1; level > level2; --level)
878 r *= ratio[level];
879 return r;
880 }
881 int get_ratio(int level) const {
882 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
883 return ratio[level];
884 }
885 int get_depth() const { return depth; };
886 kmp_hw_t get_type(int level) const {
887 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
888 return types[level];
889 }
890 int get_level(kmp_hw_t type) const {
891 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
892 int eq_type = equivalent[type];
893 if (eq_type == KMP_HW_UNKNOWN)
894 return -1;
895 for (int i = 0; i < depth; ++i)
896 if (types[i] == eq_type)
897 return i;
898 return -1;
899 }
900 int get_count(int level) const {
901 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
902 return count[level];
903 }
904 // Return the total number of cores with attribute 'attr'
905 int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
906 return _get_ncores_with_attr(attr, -1, true);
907 }
908 // Return the number of cores with attribute
909 // 'attr' per topology level 'above'
910 int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
911 return _get_ncores_with_attr(attr, above, false);
912 }
913
914#if KMP_AFFINITY_SUPPORTED
915 friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
916 void sort_compact(kmp_affinity_t &affinity) {
917 compact = affinity.compact;
918 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
919 kmp_hw_thread_t::compare_compact);
920 }
921#endif
922 void print(const char *env_var = "KMP_AFFINITY") const;
923 void dump() const;
924};
925extern kmp_topology_t *__kmp_topology;
926
927class kmp_hw_subset_t {
928 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
929
930public:
931 // Describe a machine topology item in KMP_HW_SUBSET
932 struct item_t {
933 kmp_hw_t type;
934 int num_attrs;
935 int num[MAX_ATTRS];
936 int offset[MAX_ATTRS];
937 kmp_hw_attr_t attr[MAX_ATTRS];
938 };
939 // Put parenthesis around max to avoid accidental use of Windows max macro.
940 const static int USE_ALL = (std::numeric_limits<int>::max)();
941
942private:
943 int depth;
944 int capacity;
945 item_t *items;
946 kmp_uint64 set;
947 bool absolute;
948 // The set must be able to handle up to KMP_HW_LAST number of layers
949 KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
950 // Sorting the KMP_HW_SUBSET items to follow topology order
951 // All unknown topology types will be at the beginning of the subset
952 static int hw_subset_compare(const void *i1, const void *i2) {
953 kmp_hw_t type1 = ((const item_t *)i1)->type;
954 kmp_hw_t type2 = ((const item_t *)i2)->type;
955 int level1 = __kmp_topology->get_level(type1);
956 int level2 = __kmp_topology->get_level(type2);
957 return level1 - level2;
958 }
959
960public:
961 // Force use of allocate()/deallocate()
962 kmp_hw_subset_t() = delete;
963 kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
964 kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
965 kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
966 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
967
968 static kmp_hw_subset_t *allocate() {
969 int initial_capacity = 5;
970 kmp_hw_subset_t *retval =
971 (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
972 retval->depth = 0;
973 retval->capacity = initial_capacity;
974 retval->set = 0ull;
975 retval->absolute = false;
976 retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
977 return retval;
978 }
979 static void deallocate(kmp_hw_subset_t *subset) {
980 __kmp_free(subset->items);
981 __kmp_free(subset);
982 }
983 void set_absolute() { absolute = true; }
984 bool is_absolute() const { return absolute; }
985 void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
986 for (int i = 0; i < depth; ++i) {
987 // Found an existing item for this layer type
988 // Add the num, offset, and attr to this item
989 if (items[i].type == type) {
990 int idx = items[i].num_attrs++;
991 if ((size_t)idx >= MAX_ATTRS)
992 return;
993 items[i].num[idx] = num;
994 items[i].offset[idx] = offset;
995 items[i].attr[idx] = attr;
996 return;
997 }
998 }
999 if (depth == capacity - 1) {
1000 capacity *= 2;
1001 item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
1002 for (int i = 0; i < depth; ++i)
1003 new_items[i] = items[i];
1004 __kmp_free(items);
1005 items = new_items;
1006 }
1007 items[depth].num_attrs = 1;
1008 items[depth].type = type;
1009 items[depth].num[0] = num;
1010 items[depth].offset[0] = offset;
1011 items[depth].attr[0] = attr;
1012 depth++;
1013 set |= (1ull << type);
1014 }
1015 int get_depth() const { return depth; }
1016 const item_t &at(int index) const {
1017 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1018 return items[index];
1019 }
1020 item_t &at(int index) {
1021 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1022 return items[index];
1023 }
1024 void remove(int index) {
1025 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1026 set &= ~(1ull << items[index].type);
1027 for (int j = index + 1; j < depth; ++j) {
1028 items[j - 1] = items[j];
1029 }
1030 depth--;
1031 }
1032 void sort() {
1033 KMP_DEBUG_ASSERT(__kmp_topology);
1034 qsort(items, depth, sizeof(item_t), hw_subset_compare);
1035 }
1036 bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1037 void dump() const {
1038 printf("**********************\n");
1039 printf("*** kmp_hw_subset: ***\n");
1040 printf("* depth: %d\n", depth);
1041 printf("* items:\n");
1042 for (int i = 0; i < depth; ++i) {
1043 printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1044 for (int j = 0; j < items[i].num_attrs; ++j) {
1045 printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1046 items[i].offset[j]);
1047 if (!items[i].attr[j]) {
1048 printf(" (none)\n");
1049 } else {
1050 printf(
1051 " core_type = %s, core_eff = %d\n",
1052 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1053 items[i].attr[j].get_core_eff());
1054 }
1055 }
1056 }
1057 printf("* set: 0x%llx\n", set);
1058 printf("* absolute: %d\n", absolute);
1059 printf("**********************\n");
1060 }
1061};
1062extern kmp_hw_subset_t *__kmp_hw_subset;
1063
1064/* A structure for holding machine-specific hierarchy info to be computed once
1065 at init. This structure represents a mapping of threads to the actual machine
1066 hierarchy, or to our best guess at what the hierarchy might be, for the
1067 purpose of performing an efficient barrier. In the worst case, when there is
1068 no machine hierarchy information, it produces a tree suitable for a barrier,
1069 similar to the tree used in the hyper barrier. */
1070class hierarchy_info {
1071public:
1072 /* Good default values for number of leaves and branching factor, given no
1073 affinity information. Behaves a bit like hyper barrier. */
1074 static const kmp_uint32 maxLeaves = 4;
1075 static const kmp_uint32 minBranch = 4;
1081 kmp_uint32 maxLevels;
1082
1087 kmp_uint32 depth;
1088 kmp_uint32 base_num_threads;
1089 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1090 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1091 // 2=initialization in progress
1092 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1093
1098 kmp_uint32 *numPerLevel;
1099 kmp_uint32 *skipPerLevel;
1100
1101 void deriveLevels() {
1102 int hier_depth = __kmp_topology->get_depth();
1103 for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1104 numPerLevel[level] = __kmp_topology->get_ratio(i);
1105 }
1106 }
1107
1108 hierarchy_info()
1109 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1110
1111 void fini() {
1112 if (!uninitialized && numPerLevel) {
1113 __kmp_free(numPerLevel);
1114 numPerLevel = NULL;
1115 uninitialized = not_initialized;
1116 }
1117 }
1118
1119 void init(int num_addrs) {
1120 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1121 &uninitialized, not_initialized, initializing);
1122 if (bool_result == 0) { // Wait for initialization
1123 while (TCR_1(uninitialized) != initialized)
1124 KMP_CPU_PAUSE();
1125 return;
1126 }
1127 KMP_DEBUG_ASSERT(bool_result == 1);
1128
1129 /* Added explicit initialization of the data fields here to prevent usage of
1130 dirty value observed when static library is re-initialized multiple times
1131 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1132 OpenMP). */
1133 depth = 1;
1134 resizing = 0;
1135 maxLevels = 7;
1136 numPerLevel =
1137 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1138 skipPerLevel = &(numPerLevel[maxLevels]);
1139 for (kmp_uint32 i = 0; i < maxLevels;
1140 ++i) { // init numPerLevel[*] to 1 item per level
1141 numPerLevel[i] = 1;
1142 skipPerLevel[i] = 1;
1143 }
1144
1145 // Sort table by physical ID
1146 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1147 deriveLevels();
1148 } else {
1149 numPerLevel[0] = maxLeaves;
1150 numPerLevel[1] = num_addrs / maxLeaves;
1151 if (num_addrs % maxLeaves)
1152 numPerLevel[1]++;
1153 }
1154
1155 base_num_threads = num_addrs;
1156 for (int i = maxLevels - 1; i >= 0;
1157 --i) // count non-empty levels to get depth
1158 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1159 depth++;
1160
1161 kmp_uint32 branch = minBranch;
1162 if (numPerLevel[0] == 1)
1163 branch = num_addrs / maxLeaves;
1164 if (branch < minBranch)
1165 branch = minBranch;
1166 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1167 while (numPerLevel[d] > branch ||
1168 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1169 if (numPerLevel[d] & 1)
1170 numPerLevel[d]++;
1171 numPerLevel[d] = numPerLevel[d] >> 1;
1172 if (numPerLevel[d + 1] == 1)
1173 depth++;
1174 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1175 }
1176 if (numPerLevel[0] == 1) {
1177 branch = branch >> 1;
1178 if (branch < 4)
1179 branch = minBranch;
1180 }
1181 }
1182
1183 for (kmp_uint32 i = 1; i < depth; ++i)
1184 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1185 // Fill in hierarchy in the case of oversubscription
1186 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1187 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1188
1189 uninitialized = initialized; // One writer
1190 }
1191
1192 // Resize the hierarchy if nproc changes to something larger than before
1193 void resize(kmp_uint32 nproc) {
1194 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1195 while (bool_result == 0) { // someone else is trying to resize
1196 KMP_CPU_PAUSE();
1197 if (nproc <= base_num_threads) // happy with other thread's resize
1198 return;
1199 else // try to resize
1200 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1201 }
1202 KMP_DEBUG_ASSERT(bool_result != 0);
1203 if (nproc <= base_num_threads)
1204 return; // happy with other thread's resize
1205
1206 // Calculate new maxLevels
1207 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1208 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1209 // First see if old maxLevels is enough to contain new size
1210 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1211 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1212 numPerLevel[i - 1] *= 2;
1213 old_sz *= 2;
1214 depth++;
1215 }
1216 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1217 while (nproc > old_sz) {
1218 old_sz *= 2;
1219 incs++;
1220 depth++;
1221 }
1222 maxLevels += incs;
1223
1224 // Resize arrays
1225 kmp_uint32 *old_numPerLevel = numPerLevel;
1226 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1227 numPerLevel = skipPerLevel = NULL;
1228 numPerLevel =
1229 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1230 skipPerLevel = &(numPerLevel[maxLevels]);
1231
1232 // Copy old elements from old arrays
1233 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1234 // init numPerLevel[*] to 1 item per level
1235 numPerLevel[i] = old_numPerLevel[i];
1236 skipPerLevel[i] = old_skipPerLevel[i];
1237 }
1238
1239 // Init new elements in arrays to 1
1240 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1241 // init numPerLevel[*] to 1 item per level
1242 numPerLevel[i] = 1;
1243 skipPerLevel[i] = 1;
1244 }
1245
1246 // Free old arrays
1247 __kmp_free(old_numPerLevel);
1248 }
1249
1250 // Fill in oversubscription levels of hierarchy
1251 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1252 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1253
1254 base_num_threads = nproc;
1255 resizing = 0; // One writer
1256 }
1257};
1258#endif // KMP_AFFINITY_H