Merge ^/vendor/llvm-openmp/dist up to its last change, and resolve conflicts.
This commit is contained in:
commit
7b407583d8
@ -2181,10 +2181,9 @@ struct kmp_dephash_entry {
|
||||
typedef struct kmp_dephash {
|
||||
kmp_dephash_entry_t **buckets;
|
||||
size_t size;
|
||||
#ifdef KMP_DEBUG
|
||||
size_t generation;
|
||||
kmp_uint32 nelements;
|
||||
kmp_uint32 nconflicts;
|
||||
#endif
|
||||
} kmp_dephash_t;
|
||||
|
||||
typedef struct kmp_task_affinity_info {
|
||||
@ -3342,7 +3341,7 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
|
||||
extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
|
||||
extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
|
||||
extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
|
||||
#if KMP_OS_LINUX
|
||||
#if KMP_OS_LINUX || KMP_OS_FREEBSD
|
||||
extern int kmp_set_thread_affinity_mask_initial(void);
|
||||
#endif
|
||||
#endif /* KMP_AFFINITY_SUPPORTED */
|
||||
|
@ -1968,7 +1968,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
|
||||
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
|
||||
nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
|
||||
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
|
||||
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
|
||||
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
|
||||
if (__kmp_mic_type >= mic3)
|
||||
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
|
||||
else
|
||||
@ -1982,7 +1982,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
|
||||
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
|
||||
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
|
||||
__kmp_nThreadsPerCore;
|
||||
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
|
||||
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
|
||||
if (__kmp_mic_type >= mic3)
|
||||
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
|
||||
2 * __kmp_nThreadsPerCore;
|
||||
|
@ -160,6 +160,7 @@ class KMPHwlocAffinity : public KMPAffinity {
|
||||
};
|
||||
#endif /* KMP_USE_HWLOC */
|
||||
|
||||
#if KMP_OS_LINUX || KMP_OS_FREEBSD
|
||||
#if KMP_OS_LINUX
|
||||
/* On some of the older OS's that we build on, these constants aren't present
|
||||
in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
|
||||
@ -234,6 +235,10 @@ class KMPHwlocAffinity : public KMPAffinity {
|
||||
#endif /* __NR_sched_getaffinity */
|
||||
#error Unknown or unsupported architecture
|
||||
#endif /* KMP_ARCH_* */
|
||||
#elif KMP_OS_FREEBSD
|
||||
#include <pthread.h>
|
||||
#include <pthread_np.h>
|
||||
#endif
|
||||
class KMPNativeAffinity : public KMPAffinity {
|
||||
class Mask : public KMPAffinity::Mask {
|
||||
typedef unsigned char mask_t;
|
||||
@ -294,8 +299,13 @@ class KMPNativeAffinity : public KMPAffinity {
|
||||
int get_system_affinity(bool abort_on_error) override {
|
||||
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal get affinity operation when not capable");
|
||||
#if KMP_OS_LINUX
|
||||
int retval =
|
||||
syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
|
||||
#elif KMP_OS_FREEBSD
|
||||
int retval =
|
||||
pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
|
||||
#endif
|
||||
if (retval >= 0) {
|
||||
return 0;
|
||||
}
|
||||
@ -308,8 +318,13 @@ class KMPNativeAffinity : public KMPAffinity {
|
||||
int set_system_affinity(bool abort_on_error) const override {
|
||||
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal get affinity operation when not capable");
|
||||
#if KMP_OS_LINUX
|
||||
int retval =
|
||||
syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
|
||||
#elif KMP_OS_FREEBSD
|
||||
int retval =
|
||||
pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
|
||||
#endif
|
||||
if (retval >= 0) {
|
||||
return 0;
|
||||
}
|
||||
@ -347,7 +362,7 @@ class KMPNativeAffinity : public KMPAffinity {
|
||||
}
|
||||
api_type get_api_type() const override { return NATIVE_OS; }
|
||||
};
|
||||
#endif /* KMP_OS_LINUX */
|
||||
#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
|
||||
|
||||
#if KMP_OS_WINDOWS
|
||||
class KMPNativeAffinity : public KMPAffinity {
|
||||
|
@ -545,7 +545,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
&(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
|
||||
ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
|
||||
ompt_parallel_invoker_program | ompt_parallel_team,
|
||||
OMPT_LOAD_RETURN_ADDRESS(global_tid));
|
||||
}
|
||||
__ompt_lw_taskteam_unlink(this_thr);
|
||||
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
@ -676,7 +677,8 @@ void __kmpc_flush(ident_t *loc) {
|
||||
#endif // KMP_COMPILER_ICC
|
||||
}
|
||||
#endif // KMP_MIC
|
||||
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
|
||||
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
|
||||
KMP_ARCH_RISCV64)
|
||||
// Nothing to see here move along
|
||||
#elif KMP_ARCH_PPC64
|
||||
// Nothing needed here (we have a real MB above).
|
||||
|
@ -633,5 +633,25 @@
|
||||
GOMP_loop_ull_doacross_guided_start
|
||||
#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START \
|
||||
GOMP_loop_ull_doacross_runtime_start
|
||||
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT \
|
||||
GOMP_loop_nonmonotonic_dynamic_next
|
||||
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START \
|
||||
GOMP_loop_nonmonotonic_dynamic_start
|
||||
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT \
|
||||
GOMP_loop_nonmonotonic_guided_next
|
||||
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START \
|
||||
GOMP_loop_nonmonotonic_guided_start
|
||||
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT \
|
||||
GOMP_loop_ull_nonmonotonic_dynamic_next
|
||||
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START \
|
||||
GOMP_loop_ull_nonmonotonic_dynamic_start
|
||||
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT \
|
||||
GOMP_loop_ull_nonmonotonic_guided_next
|
||||
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START \
|
||||
GOMP_loop_ull_nonmonotonic_guided_start
|
||||
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC \
|
||||
GOMP_parallel_loop_nonmonotonic_dynamic
|
||||
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED \
|
||||
GOMP_parallel_loop_nonmonotonic_guided
|
||||
|
||||
#endif /* KMP_FTN_OS_H */
|
||||
|
@ -431,7 +431,7 @@ std::atomic<int> __kmp_thread_pool_active_nth = ATOMIC_VAR_INIT(0);
|
||||
/* -------------------------------------------------
|
||||
* GLOBAL/ROOT STATE */
|
||||
KMP_ALIGN_CACHE
|
||||
kmp_global_t __kmp_global = {{0}};
|
||||
kmp_global_t __kmp_global;
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* GLOBAL SYNCHRONIZATION LOCKS */
|
||||
|
@ -22,7 +22,7 @@ extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
#define MKLOC(loc, routine) \
|
||||
static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
|
||||
static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
|
||||
|
||||
#include "kmp_ftn_os.h"
|
||||
|
||||
@ -622,10 +622,16 @@ LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
|
||||
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
|
||||
kmp_sch_dynamic_chunked)
|
||||
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
|
||||
kmp_sch_dynamic_chunked)
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
|
||||
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
|
||||
kmp_sch_guided_chunked)
|
||||
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
|
||||
kmp_sch_guided_chunked)
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
|
||||
LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
|
||||
kmp_sch_runtime)
|
||||
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
|
||||
@ -892,6 +898,16 @@ LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
|
||||
LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
|
||||
kmp_sch_guided_chunked)
|
||||
LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
|
||||
LOOP_START_ULL(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
|
||||
kmp_sch_dynamic_chunked)
|
||||
LOOP_NEXT_ULL(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
|
||||
LOOP_START_ULL(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
|
||||
kmp_sch_guided_chunked)
|
||||
LOOP_NEXT_ULL(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
|
||||
LOOP_RUNTIME_START_ULL(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
|
||||
LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
|
||||
@ -1487,6 +1503,12 @@ PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
|
||||
kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
|
||||
kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
|
||||
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP(
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
|
||||
kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
|
||||
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
|
||||
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
|
||||
@ -1942,6 +1964,26 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
|
||||
"GOMP_4.5");
|
||||
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
|
||||
"GOMP_4.5");
|
||||
|
||||
#endif // KMP_USE_VERSION_SYMBOLS
|
||||
|
||||
|
@ -2943,10 +2943,10 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
|
||||
#undef expand
|
||||
|
||||
// Exposes only one set of jump tables (*lock or *lock_with_checks).
|
||||
void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0;
|
||||
int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
|
||||
int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
|
||||
|
||||
// Jump tables for the indirect lock functions
|
||||
#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
|
||||
@ -2993,10 +2993,10 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
|
||||
#undef expand
|
||||
|
||||
// Exposes only one jump tables (*lock or *lock_with_checks).
|
||||
void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0;
|
||||
int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
|
||||
int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
|
||||
int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
|
||||
void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
|
||||
int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
|
||||
int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
|
||||
int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
|
||||
|
||||
// Lock index table.
|
||||
kmp_indirect_lock_table_t __kmp_i_lock_table;
|
||||
|
@ -1122,18 +1122,18 @@ typedef struct {
|
||||
// Function tables for direct locks. Set/unset/test differentiate functions
|
||||
// with/without consistency checking.
|
||||
extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
|
||||
extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *);
|
||||
extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
|
||||
extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
|
||||
extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
|
||||
extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
|
||||
extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
|
||||
extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
|
||||
extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
|
||||
|
||||
// Function tables for indirect locks. Set/unset/test differentiate functions
|
||||
// with/withuot consistency checking.
|
||||
extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
|
||||
extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p);
|
||||
extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
|
||||
extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
|
||||
extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
|
||||
extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
|
||||
extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
|
||||
extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
|
||||
extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
|
||||
|
||||
// Extracts direct lock tag from a user lock pointer
|
||||
#define KMP_EXTRACT_D_TAG(l) \
|
||||
|
@ -69,7 +69,7 @@
|
||||
#error Unknown compiler
|
||||
#endif
|
||||
|
||||
#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK
|
||||
#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_CNK
|
||||
#define KMP_AFFINITY_SUPPORTED 1
|
||||
#if KMP_OS_WINDOWS && KMP_ARCH_X86_64
|
||||
#define KMP_GROUP_AFFINITY 1
|
||||
@ -165,7 +165,8 @@ typedef unsigned long long kmp_uint64;
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
|
||||
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
|
||||
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
|
||||
#else
|
||||
#error "Can't determine size_t printf format specifier."
|
||||
@ -840,7 +841,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
|
||||
#endif /* KMP_OS_WINDOWS */
|
||||
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
|
||||
KMP_ARCH_MIPS64
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
|
||||
#define KMP_MB() __sync_synchronize()
|
||||
#endif
|
||||
|
||||
|
@ -98,6 +98,7 @@
|
||||
#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1)
|
||||
#define KMP_ARCH_MIPS 0
|
||||
#define KMP_ARCH_MIPS64 0
|
||||
#define KMP_ARCH_RISCV64 0
|
||||
|
||||
#if KMP_OS_WINDOWS
|
||||
#if defined(_M_AMD64) || defined(__x86_64)
|
||||
@ -135,6 +136,9 @@
|
||||
#undef KMP_ARCH_MIPS
|
||||
#define KMP_ARCH_MIPS 1
|
||||
#endif
|
||||
#elif defined __riscv && __riscv_xlen == 64
|
||||
#undef KMP_ARCH_RISCV64
|
||||
#define KMP_ARCH_RISCV64 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -199,7 +203,7 @@
|
||||
// TODO: Fixme - This is clever, but really fugly
|
||||
#if (1 != \
|
||||
KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
|
||||
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64)
|
||||
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + KMP_ARCH_RISCV64)
|
||||
#error Unknown or unsupported architecture
|
||||
#endif
|
||||
|
||||
|
@ -1190,8 +1190,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
|
||||
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
|
||||
&(parent_task_info->task_data), &(parent_task_info->frame),
|
||||
&ompt_parallel_data, team_size, ompt_parallel_invoker_program,
|
||||
codeptr);
|
||||
&ompt_parallel_data, team_size,
|
||||
ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
|
||||
}
|
||||
}
|
||||
#endif // OMPT_SUPPORT
|
||||
@ -1481,9 +1481,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
int team_size = master_set_numthreads
|
||||
? master_set_numthreads
|
||||
: get__nproc_2(parent_team, master_tid);
|
||||
int flags = OMPT_INVOKER(call_context) |
|
||||
((microtask == (microtask_t)__kmp_teams_master)
|
||||
? ompt_parallel_league
|
||||
: ompt_parallel_team);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
|
||||
parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
|
||||
OMPT_INVOKER(call_context), return_address);
|
||||
parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
|
||||
return_address);
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
@ -1512,19 +1516,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
// AC: we are in serialized parallel
|
||||
__kmpc_serialized_parallel(loc, gtid);
|
||||
KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
|
||||
// AC: need this in order enquiry functions work
|
||||
// correctly, will restore at join time
|
||||
parent_team->t.t_serialized--;
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
void *dummy;
|
||||
void **exit_runtime_p;
|
||||
void **exit_frame_p;
|
||||
|
||||
ompt_lw_taskteam_t lw_taskteam;
|
||||
|
||||
if (ompt_enabled.enabled) {
|
||||
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
|
||||
&ompt_parallel_data, return_address);
|
||||
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
|
||||
exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
|
||||
|
||||
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
|
||||
// don't use lw_taskteam after linking. content was swaped
|
||||
@ -1532,19 +1534,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
/* OMPT implicit task begin */
|
||||
implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
|
||||
implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)
|
||||
->thread_num = __kmp_tid_from_gtid(gtid);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
|
||||
implicit_task_data, 1,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
|
||||
}
|
||||
|
||||
/* OMPT state */
|
||||
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
|
||||
} else {
|
||||
exit_runtime_p = &dummy;
|
||||
exit_frame_p = &dummy;
|
||||
}
|
||||
#endif
|
||||
// AC: need to decrement t_serialized for enquiry functions to work
|
||||
// correctly, will restore at join time
|
||||
parent_team->t.t_serialized--;
|
||||
|
||||
{
|
||||
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
|
||||
@ -1552,26 +1558,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
__kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
exit_runtime_p
|
||||
exit_frame_p
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
*exit_runtime_p = NULL;
|
||||
if (ompt_enabled.enabled) {
|
||||
*exit_frame_p = NULL;
|
||||
OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, implicit_task_data, 1,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
|
||||
}
|
||||
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
|
||||
__ompt_lw_taskteam_unlink(master_th);
|
||||
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
|
||||
OMPT_INVOKER(call_context), return_address);
|
||||
&ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
|
||||
OMPT_INVOKER(call_context) | ompt_parallel_team,
|
||||
return_address);
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
@ -1586,6 +1593,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
parent_team->t.t_level++;
|
||||
parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
ompt_lw_taskteam_t lw_taskteam;
|
||||
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
|
||||
&ompt_parallel_data, return_address);
|
||||
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Change number of threads in the team if requested */
|
||||
if (master_set_numthreads) { // The parallel has num_threads clause
|
||||
if (master_set_numthreads < master_th->th.th_teams_size.nth) {
|
||||
@ -1714,7 +1730,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
void *dummy;
|
||||
void **exit_runtime_p;
|
||||
void **exit_frame_p;
|
||||
ompt_task_info_t *task_info;
|
||||
|
||||
ompt_lw_taskteam_t lw_taskteam;
|
||||
@ -1727,19 +1743,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
// don't use lw_taskteam after linking. content was swaped
|
||||
|
||||
task_info = OMPT_CUR_TASK_INFO(master_th);
|
||||
exit_runtime_p = &(task_info->frame.exit_frame.ptr);
|
||||
exit_frame_p = &(task_info->frame.exit_frame.ptr);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
|
||||
&(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)
|
||||
->thread_num = __kmp_tid_from_gtid(gtid);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
|
||||
&(task_info->task_data), 1,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num,
|
||||
ompt_task_implicit);
|
||||
}
|
||||
|
||||
/* OMPT state */
|
||||
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
|
||||
} else {
|
||||
exit_runtime_p = &dummy;
|
||||
exit_frame_p = &dummy;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1750,25 +1768,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
parent_team->t.t_argv
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
exit_runtime_p
|
||||
exit_frame_p
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
exit_runtime_p = NULL;
|
||||
*exit_frame_p = NULL;
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, &(task_info->task_data), 1,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num,
|
||||
ompt_task_implicit);
|
||||
}
|
||||
|
||||
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
|
||||
__ompt_lw_taskteam_unlink(master_th);
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
|
||||
OMPT_INVOKER(call_context), return_address);
|
||||
&ompt_parallel_data, parent_task_data,
|
||||
OMPT_INVOKER(call_context) | ompt_parallel_team,
|
||||
return_address);
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
@ -1800,6 +1820,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
team->t.t_level--;
|
||||
// AC: call special invoker for outer "parallel" of teams construct
|
||||
invoker(gtid);
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, &(task_info->task_data), 0,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
|
||||
}
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
&ompt_parallel_data, parent_task_data,
|
||||
OMPT_INVOKER(call_context) | ompt_parallel_league,
|
||||
return_address);
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
argv = args;
|
||||
for (i = argc - 1; i >= 0; --i)
|
||||
@ -1813,7 +1850,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
void *dummy;
|
||||
void **exit_runtime_p;
|
||||
void **exit_frame_p;
|
||||
ompt_task_info_t *task_info;
|
||||
|
||||
ompt_lw_taskteam_t lw_taskteam;
|
||||
@ -1824,14 +1861,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
|
||||
// don't use lw_taskteam after linking. content was swaped
|
||||
task_info = OMPT_CUR_TASK_INFO(master_th);
|
||||
exit_runtime_p = &(task_info->frame.exit_frame.ptr);
|
||||
exit_frame_p = &(task_info->frame.exit_frame.ptr);
|
||||
|
||||
/* OMPT implicit task begin */
|
||||
implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
|
||||
implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
|
||||
ompt_task_implicit);
|
||||
OMPT_CUR_TASK_INFO(master_th)
|
||||
->thread_num = __kmp_tid_from_gtid(gtid);
|
||||
}
|
||||
@ -1839,7 +1877,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
/* OMPT state */
|
||||
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
|
||||
} else {
|
||||
exit_runtime_p = &dummy;
|
||||
exit_frame_p = &dummy;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1849,18 +1887,19 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
__kmp_invoke_microtask(microtask, gtid, 0, argc, args
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
exit_runtime_p
|
||||
exit_frame_p
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
*exit_runtime_p = NULL;
|
||||
*exit_frame_p = NULL;
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, &(task_info->task_data), 1,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num,
|
||||
ompt_task_implicit);
|
||||
}
|
||||
|
||||
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
|
||||
@ -1868,7 +1907,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
&ompt_parallel_data, parent_task_data,
|
||||
OMPT_INVOKER(call_context), return_address);
|
||||
OMPT_INVOKER(call_context) | ompt_parallel_team,
|
||||
return_address);
|
||||
}
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
@ -2225,12 +2265,11 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
|
||||
|
||||
static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
|
||||
kmp_team_t *team, ompt_data_t *parallel_data,
|
||||
fork_context_e fork_context, void *codeptr) {
|
||||
int flags, void *codeptr) {
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
if (ompt_enabled.ompt_callback_parallel_end) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
|
||||
parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
|
||||
codeptr);
|
||||
parallel_data, &(task_info->task_data), flags, codeptr);
|
||||
}
|
||||
|
||||
task_info->frame.enter_frame = ompt_data_none;
|
||||
@ -2263,6 +2302,7 @@ void __kmp_join_call(ident_t *loc, int gtid
|
||||
master_th->th.th_ident = loc;
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
void *team_microtask = (void *)team->t.t_pkfn;
|
||||
if (ompt_enabled.enabled) {
|
||||
master_th->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
}
|
||||
@ -2352,10 +2392,25 @@ void __kmp_join_call(ident_t *loc, int gtid
|
||||
if (master_th->th.th_teams_microtask && !exit_teams &&
|
||||
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
|
||||
team->t.t_level == master_th->th.th_teams_level + 1) {
|
||||
// AC: We need to leave the team structure intact at the end of parallel
|
||||
// inside the teams construct, so that at the next parallel same (hot) team
|
||||
// works, only adjust nesting levels
|
||||
|
||||
// AC: We need to leave the team structure intact at the end of parallel
|
||||
// inside the teams construct, so that at the next parallel same (hot) team
|
||||
// works, only adjust nesting levels
|
||||
#if OMPT_SUPPORT
|
||||
ompt_data_t ompt_parallel_data = ompt_data_none;
|
||||
if (ompt_enabled.enabled) {
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
int ompt_team_size = team->t.t_nproc;
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
|
||||
}
|
||||
task_info->frame.exit_frame = ompt_data_none;
|
||||
task_info->task_data = ompt_data_none;
|
||||
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
|
||||
__ompt_lw_taskteam_unlink(master_th);
|
||||
}
|
||||
#endif
|
||||
/* Decrement our nested depth level */
|
||||
team->t.t_level--;
|
||||
team->t.t_active_level--;
|
||||
@ -2394,8 +2449,8 @@ void __kmp_join_call(ident_t *loc, int gtid
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
|
||||
codeptr);
|
||||
__kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
|
||||
OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2424,12 +2479,14 @@ void __kmp_join_call(ident_t *loc, int gtid
|
||||
if (ompt_enabled.enabled) {
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
int ompt_team_size = team->t.t_nproc;
|
||||
int flags = (team_microtask == (void *)__kmp_teams_master)
|
||||
? ompt_task_initial
|
||||
: ompt_task_implicit;
|
||||
int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
|
||||
}
|
||||
|
||||
task_info->frame.exit_frame = ompt_data_none;
|
||||
task_info->task_data = ompt_data_none;
|
||||
}
|
||||
@ -2503,8 +2560,12 @@ void __kmp_join_call(ident_t *loc, int gtid
|
||||
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
int flags =
|
||||
OMPT_INVOKER(fork_context) |
|
||||
((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
|
||||
: ompt_parallel_team);
|
||||
if (ompt_enabled.enabled) {
|
||||
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
|
||||
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
|
||||
codeptr);
|
||||
}
|
||||
#endif
|
||||
@ -4432,7 +4493,7 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
|
||||
KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
|
||||
}
|
||||
|
||||
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
|
||||
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
|
||||
/* Sets full mask for thread and returns old mask, no changes to structures. */
|
||||
static void
|
||||
__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
|
||||
@ -4980,7 +5041,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
||||
__kmp_partition_places(team);
|
||||
#endif
|
||||
} else { // team->t.t_nproc < new_nproc
|
||||
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
|
||||
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
|
||||
kmp_affin_mask_t *old_mask;
|
||||
if (KMP_AFFINITY_CAPABLE()) {
|
||||
KMP_CPU_ALLOC(old_mask);
|
||||
@ -5029,7 +5090,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
||||
__kmp_reinitialize_team(team, new_icvs, NULL);
|
||||
}
|
||||
|
||||
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
|
||||
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
|
||||
/* Temporarily set full mask for master thread before creation of
|
||||
workers. The reason is that workers inherit the affinity from master,
|
||||
so if a lot of workers are created on the single core quickly, they
|
||||
@ -5064,7 +5125,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
||||
}
|
||||
}
|
||||
|
||||
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
|
||||
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
|
||||
if (KMP_AFFINITY_CAPABLE()) {
|
||||
/* Restore initial master thread's affinity mask */
|
||||
__kmp_set_system_affinity(old_mask, TRUE);
|
||||
@ -5600,7 +5661,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
|
||||
void *__kmp_launch_thread(kmp_info_t *this_thr) {
|
||||
int gtid = this_thr->th.th_info.ds.ds_gtid;
|
||||
/* void *stack_data;*/
|
||||
kmp_team_t *(*volatile pteam);
|
||||
kmp_team_t **volatile pteam;
|
||||
|
||||
KMP_MB();
|
||||
KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
|
||||
@ -5618,18 +5679,15 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
|
||||
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
|
||||
this_thr->th.ompt_thread_info.wait_id = 0;
|
||||
this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
|
||||
this_thr->th.ompt_thread_info.parallel_flags = 0;
|
||||
if (ompt_enabled.ompt_callback_thread_begin) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
|
||||
ompt_thread_worker, thread_data);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
this_thr->th.ompt_thread_info.state = ompt_state_idle;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This is the place where threads wait for work */
|
||||
while (!TCR_4(__kmp_global.g.g_done)) {
|
||||
KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
|
||||
@ -5647,7 +5705,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
|
||||
}
|
||||
#endif
|
||||
|
||||
pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
|
||||
pteam = &this_thr->th.th_team;
|
||||
|
||||
/* have we been allocated? */
|
||||
if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
|
||||
@ -6956,16 +7014,16 @@ int __kmp_invoke_task_func(int gtid) {
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
void *dummy;
|
||||
void **exit_runtime_p;
|
||||
void **exit_frame_p;
|
||||
ompt_data_t *my_task_data;
|
||||
ompt_data_t *my_parallel_data;
|
||||
int ompt_team_size;
|
||||
|
||||
if (ompt_enabled.enabled) {
|
||||
exit_runtime_p = &(
|
||||
exit_frame_p = &(
|
||||
team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
|
||||
} else {
|
||||
exit_runtime_p = &dummy;
|
||||
exit_frame_p = &dummy;
|
||||
}
|
||||
|
||||
my_task_data =
|
||||
@ -6975,7 +7033,7 @@ int __kmp_invoke_task_func(int gtid) {
|
||||
ompt_team_size = team->t.t_nproc;
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
|
||||
__kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
|
||||
__kmp_tid_from_gtid(gtid), ompt_task_implicit);
|
||||
OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
|
||||
}
|
||||
#endif
|
||||
@ -6994,11 +7052,12 @@ int __kmp_invoke_task_func(int gtid) {
|
||||
tid, (int)team->t.t_argc, (void **)team->t.t_argv
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
exit_runtime_p
|
||||
exit_frame_p
|
||||
#endif
|
||||
);
|
||||
#if OMPT_SUPPORT
|
||||
*exit_runtime_p = NULL;
|
||||
*exit_frame_p = NULL;
|
||||
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
|
||||
#endif
|
||||
|
||||
#if KMP_STATS_ENABLED
|
||||
@ -7077,7 +7136,22 @@ int __kmp_invoke_teams_master(int gtid) {
|
||||
(void *)__kmp_teams_master);
|
||||
#endif
|
||||
__kmp_run_before_invoked_task(gtid, 0, this_thr, team);
|
||||
#if OMPT_SUPPORT
|
||||
int tid = __kmp_tid_from_gtid(gtid);
|
||||
ompt_data_t *task_data =
|
||||
&team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
|
||||
ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
|
||||
ompt_task_initial);
|
||||
OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
|
||||
}
|
||||
#endif
|
||||
__kmp_teams_master(gtid);
|
||||
#if OMPT_SUPPORT
|
||||
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
|
||||
#endif
|
||||
__kmp_run_after_invoked_task(gtid, 0, this_thr, team);
|
||||
return 1;
|
||||
}
|
||||
@ -7118,19 +7192,32 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
|
||||
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
|
||||
|
||||
// Remember the number of threads for inner parallel regions
|
||||
if (!TCR_4(__kmp_init_middle))
|
||||
__kmp_middle_initialize(); // get internal globals calculated
|
||||
KMP_DEBUG_ASSERT(__kmp_avail_proc);
|
||||
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
|
||||
if (num_threads == 0) {
|
||||
if (!TCR_4(__kmp_init_middle))
|
||||
__kmp_middle_initialize(); // get __kmp_avail_proc calculated
|
||||
num_threads = __kmp_avail_proc / num_teams;
|
||||
// adjust num_threads w/o warning as it is not user setting
|
||||
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
|
||||
// no thread_limit clause specified - do not change thread-limit-var ICV
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
|
||||
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
|
||||
} // prevent team size to exceed thread-limit-var
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
// adjust num_threads w/o warning as it is not user setting
|
||||
num_threads = __kmp_teams_max_nth / num_teams;
|
||||
}
|
||||
} else {
|
||||
// This thread will be the master of the league masters
|
||||
// Store new thread limit; old limit is saved in th_cg_roots list
|
||||
thr->th.th_current_task->td_icvs.thread_limit = num_threads;
|
||||
|
||||
// num_threads = min(num_threads, nthreads-var)
|
||||
if (num_threads > __kmp_dflt_team_nth) {
|
||||
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
|
||||
}
|
||||
if (num_teams * num_threads > __kmp_teams_max_nth) {
|
||||
int new_threads = __kmp_teams_max_nth / num_teams;
|
||||
if (!__kmp_reserve_warn) { // user asked for too many threads
|
||||
@ -8023,7 +8110,8 @@ __kmp_determine_reduction_method(
|
||||
|
||||
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
|
||||
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
|
||||
|
||||
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
|
||||
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
|
||||
|
@ -164,7 +164,7 @@ void *kmp_aligned_malloc(size_t sz, size_t a) {
|
||||
#if KMP_OS_WINDOWS
|
||||
res = _aligned_malloc(sz, a);
|
||||
#else
|
||||
if (err = posix_memalign(&res, a, sz)) {
|
||||
if ((err = posix_memalign(&res, a, sz))) {
|
||||
errno = err; // can be EINVAL or ENOMEM
|
||||
res = NULL;
|
||||
}
|
||||
@ -277,7 +277,7 @@ void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) {
|
||||
|
||||
kmp_proc_bind_t __kmps_get_proc_bind(void) {
|
||||
i;
|
||||
return 0;
|
||||
return proc_bind_false;
|
||||
} // __kmps_get_proc_bind
|
||||
|
||||
double __kmps_get_wtime(void) {
|
||||
|
@ -54,12 +54,64 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
|
||||
|
||||
enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
|
||||
|
||||
size_t sizes[] = { 997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029 };
|
||||
const size_t MAX_GEN = 8;
|
||||
|
||||
static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
|
||||
// TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
|
||||
// m_num_sets );
|
||||
return ((addr >> 6) ^ (addr >> 2)) % hsize;
|
||||
}
|
||||
|
||||
static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
|
||||
kmp_dephash_t *current_dephash) {
|
||||
kmp_dephash_t *h;
|
||||
|
||||
size_t gen = current_dephash->generation + 1;
|
||||
if (gen >= MAX_GEN)
|
||||
return current_dephash;
|
||||
size_t new_size = sizes[gen];
|
||||
|
||||
kmp_int32 size_to_allocate =
|
||||
new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
|
||||
|
||||
#if USE_FAST_MEMORY
|
||||
h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
|
||||
#else
|
||||
h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
|
||||
#endif
|
||||
|
||||
h->size = new_size;
|
||||
h->nelements = current_dephash->nelements;
|
||||
h->buckets = (kmp_dephash_entry **)(h + 1);
|
||||
h->generation = gen;
|
||||
|
||||
// insert existing elements in the new table
|
||||
for (size_t i = 0; i < current_dephash->size; i++) {
|
||||
kmp_dephash_entry_t *next;
|
||||
for (kmp_dephash_entry_t *entry = current_dephash->buckets[i]; entry; entry = next) {
|
||||
next = entry->next_in_bucket;
|
||||
// Compute the new hash using the new size, and insert the entry in
|
||||
// the new bucket.
|
||||
kmp_int32 new_bucket = __kmp_dephash_hash(entry->addr, h->size);
|
||||
if (entry->next_in_bucket) {
|
||||
h->nconflicts++;
|
||||
}
|
||||
entry->next_in_bucket = h->buckets[new_bucket];
|
||||
h->buckets[new_bucket] = entry;
|
||||
}
|
||||
}
|
||||
|
||||
// Free old hash table
|
||||
#if USE_FAST_MEMORY
|
||||
__kmp_fast_free(thread, current_dephash);
|
||||
#else
|
||||
__kmp_thread_free(thread, current_dephash);
|
||||
#endif
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
|
||||
kmp_taskdata_t *current_task) {
|
||||
kmp_dephash_t *h;
|
||||
@ -81,10 +133,9 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
|
||||
#endif
|
||||
h->size = h_size;
|
||||
|
||||
#ifdef KMP_DEBUG
|
||||
h->generation = 0;
|
||||
h->nelements = 0;
|
||||
h->nconflicts = 0;
|
||||
#endif
|
||||
h->buckets = (kmp_dephash_entry **)(h + 1);
|
||||
|
||||
for (size_t i = 0; i < h_size; i++)
|
||||
@ -97,7 +148,13 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
|
||||
#define ENTRY_LAST_MTXS 1
|
||||
|
||||
static kmp_dephash_entry *
|
||||
__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
|
||||
__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr) {
|
||||
kmp_dephash_t *h = *hash;
|
||||
if (h->nelements != 0
|
||||
&& h->nconflicts/h->size >= 1) {
|
||||
*hash = __kmp_dephash_extend(thread, h);
|
||||
h = *hash;
|
||||
}
|
||||
kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
|
||||
|
||||
kmp_dephash_entry_t *entry;
|
||||
@ -122,11 +179,9 @@ __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
|
||||
entry->mtx_lock = NULL;
|
||||
entry->next_in_bucket = h->buckets[bucket];
|
||||
h->buckets[bucket] = entry;
|
||||
#ifdef KMP_DEBUG
|
||||
h->nelements++;
|
||||
if (entry->next_in_bucket)
|
||||
h->nconflicts++;
|
||||
#endif
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
@ -232,7 +287,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
|
||||
|
||||
template <bool filter>
|
||||
static inline kmp_int32
|
||||
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
|
||||
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
|
||||
bool dep_barrier, kmp_int32 ndeps,
|
||||
kmp_depend_info_t *dep_list, kmp_task_t *task) {
|
||||
KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
|
||||
@ -352,7 +407,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
|
||||
|
||||
// returns true if the task has any outstanding dependence
|
||||
static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
||||
kmp_task_t *task, kmp_dephash_t *hash,
|
||||
kmp_task_t *task, kmp_dephash_t **hash,
|
||||
bool dep_barrier, kmp_int32 ndeps,
|
||||
kmp_depend_info_t *dep_list,
|
||||
kmp_int32 ndeps_noalias,
|
||||
@ -552,7 +607,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
|
||||
__kmp_init_node(node);
|
||||
new_taskdata->td_depnode = node;
|
||||
|
||||
if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
|
||||
if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash,
|
||||
NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
|
||||
noalias_dep_list)) {
|
||||
KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
|
||||
@ -633,7 +688,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
|
||||
kmp_depnode_t node = {0};
|
||||
__kmp_init_node(&node);
|
||||
|
||||
if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
|
||||
if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash,
|
||||
DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
|
||||
noalias_dep_list)) {
|
||||
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
|
||||
|
@ -140,8 +140,11 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
|
||||
#endif
|
||||
if (!KMP_MASTER_TID(ds_tid)) {
|
||||
if (ompt_enabled.ompt_callback_implicit_task) {
|
||||
int flags = this_thr->th.ompt_thread_info.parallel_flags;
|
||||
flags = (flags & ompt_parallel_league) ? ompt_task_initial
|
||||
: ompt_task_implicit;
|
||||
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
|
||||
ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
|
||||
ompt_scope_end, NULL, tId, 0, ds_tid, flags);
|
||||
}
|
||||
// return to idle state
|
||||
this_thr->th.ompt_thread_info.state = ompt_state_idle;
|
||||
|
@ -430,10 +430,8 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which,
|
||||
|
||||
#define ompt_event_macro(event_name, callback_type, event_id) \
|
||||
case event_name: \
|
||||
if (ompt_event_implementation_status(event_name)) { \
|
||||
ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \
|
||||
ompt_enabled.event_name = (callback != 0); \
|
||||
} \
|
||||
ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \
|
||||
ompt_enabled.event_name = (callback != 0); \
|
||||
if (callback) \
|
||||
return ompt_event_implementation_status(event_name); \
|
||||
else \
|
||||
@ -456,16 +454,15 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
|
||||
switch (which) {
|
||||
|
||||
#define ompt_event_macro(event_name, callback_type, event_id) \
|
||||
case event_name: \
|
||||
if (ompt_event_implementation_status(event_name)) { \
|
||||
ompt_callback_t mycb = \
|
||||
(ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \
|
||||
if (ompt_enabled.event_name && mycb) { \
|
||||
*callback = mycb; \
|
||||
return ompt_get_callback_success; \
|
||||
} \
|
||||
case event_name: { \
|
||||
ompt_callback_t mycb = \
|
||||
(ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \
|
||||
if (ompt_enabled.event_name && mycb) { \
|
||||
*callback = mycb; \
|
||||
return ompt_get_callback_success; \
|
||||
} \
|
||||
return ompt_get_callback_failure;
|
||||
return ompt_get_callback_failure; \
|
||||
}
|
||||
|
||||
FOREACH_OMPT_EVENT(ompt_event_macro)
|
||||
|
||||
|
@ -81,6 +81,7 @@ typedef struct {
|
||||
ompt_state_t state;
|
||||
ompt_wait_id_t wait_id;
|
||||
int ompt_task_yielded;
|
||||
int parallel_flags; // information for the last parallel region invoked
|
||||
void *idle_frame;
|
||||
} ompt_thread_info_t;
|
||||
|
||||
|
@ -269,10 +269,11 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
|
||||
}
|
||||
|
||||
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
|
||||
int on_heap) {
|
||||
int on_heap, bool always) {
|
||||
ompt_lw_taskteam_t *link_lwt = lwt;
|
||||
if (thr->th.th_team->t.t_serialized >
|
||||
1) { // we already have a team, so link the new team and swap values
|
||||
if (always ||
|
||||
thr->th.th_team->t.t_serialized >
|
||||
1) { // we already have a team, so link the new team and swap values
|
||||
if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
|
||||
link_lwt =
|
||||
(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
|
||||
|
@ -26,7 +26,7 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
|
||||
int gtid, ompt_data_t *ompt_pid, void *codeptr);
|
||||
|
||||
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
|
||||
int on_heap);
|
||||
int on_heap, bool always = false);
|
||||
|
||||
void __ompt_lw_taskteam_unlink(kmp_info_t *thr);
|
||||
|
||||
|
@ -161,6 +161,10 @@
|
||||
# define ITT_ARCH_MIPS64 6
|
||||
#endif /* ITT_ARCH_MIPS64 */
|
||||
|
||||
#ifndef ITT_ARCH_RISCV64
|
||||
# define ITT_ARCH_RISCV64 7
|
||||
#endif /* ITT_ARCH_RISCV64 */
|
||||
|
||||
#ifndef ITT_ARCH
|
||||
# if defined _M_IX86 || defined __i386__
|
||||
# define ITT_ARCH ITT_ARCH_IA32
|
||||
@ -178,6 +182,8 @@
|
||||
# define ITT_ARCH ITT_ARCH_MIPS
|
||||
# elif defined __mips__ && defined __mips64
|
||||
# define ITT_ARCH ITT_ARCH_MIPS64
|
||||
# elif defined __riscv && __riscv_xlen == 64
|
||||
# define ITT_ARCH ITT_ARCH_RISCV64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -330,7 +336,9 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_MIPS || ITT_ARCH==ITT_ARCH_MIPS64
|
||||
#elif ITT_ARCH == ITT_ARCH_ARM || ITT_ARCH == ITT_ARCH_PPC64 || \
|
||||
ITT_ARCH == ITT_ARCH_AARCH64 || ITT_ARCH == ITT_ARCH_MIPS || \
|
||||
ITT_ARCH == ITT_ARCH_MIPS64 || ITT_ARCH == ITT_ARCH_RISCV64
|
||||
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
|
||||
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
|
||||
#ifndef ITT_SIMPLE_INIT
|
||||
|
@ -8,6 +8,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "kmp_config.h"
|
||||
#include "kmp_os.h"
|
||||
#include "ittnotify_config.h"
|
||||
|
||||
#if ITT_PLATFORM==ITT_PLATFORM_WIN
|
||||
@ -226,8 +227,6 @@ static __itt_api_info api_list[] = {
|
||||
#pragma warning(pop)
|
||||
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
|
||||
|
||||
static const char dll_path[PATH_MAX] = { 0 };
|
||||
|
||||
/* static part descriptor which handles. all notification api attributes. */
|
||||
__itt_global _N_(_ittapi_global) = {
|
||||
ITT_MAGIC, /* identification info */
|
||||
@ -238,7 +237,7 @@ __itt_global _N_(_ittapi_global) = {
|
||||
MUTEX_INITIALIZER, /* mutex */
|
||||
NULL, /* dynamic library handle */
|
||||
NULL, /* error_handler */
|
||||
(const char**)&dll_path, /* dll_path_ptr */
|
||||
NULL, /* dll_path_ptr */
|
||||
(__itt_api_info*)&api_list, /* api_list_ptr */
|
||||
NULL, /* next __itt_global */
|
||||
NULL, /* thread_list */
|
||||
@ -1098,6 +1097,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou
|
||||
switch (lib_version) {
|
||||
case 0:
|
||||
groups = __itt_group_legacy;
|
||||
KMP_FALLTHROUGH();
|
||||
case 1:
|
||||
/* Fill all pointers from dynamic library */
|
||||
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
|
@ -495,13 +495,21 @@ __kmp_unnamed_critical_addr:
|
||||
# endif /* !KMP_ASM_INTRINS */
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
|
||||
//
|
||||
// int
|
||||
// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
|
||||
// int argc, void *p_argv[] ) {
|
||||
// (*pkfn)( & gtid, & gtid, argv[0], ... );
|
||||
// return 1;
|
||||
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// }
|
||||
|
||||
// -- Begin __kmp_invoke_microtask
|
||||
@ -991,14 +999,21 @@ KMP_LABEL(invoke_3):
|
||||
# endif /* !KMP_ASM_INTRINS */
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
|
||||
//
|
||||
// int
|
||||
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[] ) {
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// note: at call to pkfn must have %rsp 128-byte aligned for compiler
|
||||
@ -1192,15 +1207,27 @@ KMP_LABEL(kmp_1_exit):
|
||||
#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
|
||||
//
|
||||
// int
|
||||
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[] ) {
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
//
|
||||
// // FIXME: This is done at call-site and can be removed here.
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = 0;
|
||||
// #endif
|
||||
//
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// parameters:
|
||||
@ -1306,15 +1333,27 @@ KMP_LABEL(kmp_1):
|
||||
#if KMP_ARCH_PPC64
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
|
||||
//
|
||||
// int
|
||||
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[] ) {
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
// return 1;
|
||||
// int gtid, int tid,
|
||||
// int argc, void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)( & gtid, & tid, argv[0], ... );
|
||||
//
|
||||
// // FIXME: This is done at call-site and can be removed here.
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = 0;
|
||||
// #endif
|
||||
//
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// parameters:
|
||||
@ -1524,6 +1563,173 @@ __kmp_invoke_microtask:
|
||||
|
||||
#endif /* KMP_ARCH_PPC64 */
|
||||
|
||||
#if KMP_ARCH_RISCV64
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// typedef void (*microtask_t)(int *gtid, int *tid, ...);
|
||||
//
|
||||
// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
// void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)(>id, &tid, argv[0], ...);
|
||||
//
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// Parameters:
|
||||
// a0: pkfn
|
||||
// a1: gtid
|
||||
// a2: tid
|
||||
// a3: argc
|
||||
// a4: p_argv
|
||||
// a5: exit_frame_ptr
|
||||
//
|
||||
// Locals:
|
||||
// __gtid: gtid param pushed on stack so can pass >id to pkfn
|
||||
// __tid: tid param pushed on stack so can pass &tid to pkfn
|
||||
//
|
||||
// Temp. registers:
|
||||
//
|
||||
// t0: used to calculate the dynamic stack size / used to hold pkfn address
|
||||
// t1: used as temporary for stack placement calculation
|
||||
// t2: used as temporary for stack arguments
|
||||
// t3: used as temporary for number of remaining pkfn parms
|
||||
// t4: used to traverse p_argv array
|
||||
//
|
||||
// return: a0 (always 1/TRUE)
|
||||
//
|
||||
|
||||
__gtid = -20
|
||||
__tid = -24
|
||||
|
||||
// -- Begin __kmp_invoke_microtask
|
||||
// mark_begin;
|
||||
.text
|
||||
.globl __kmp_invoke_microtask
|
||||
.p2align 1
|
||||
.type __kmp_invoke_microtask,@function
|
||||
__kmp_invoke_microtask:
|
||||
.cfi_startproc
|
||||
|
||||
// First, save ra and fp
|
||||
addi sp, sp, -16
|
||||
sd ra, 8(sp)
|
||||
sd fp, 0(sp)
|
||||
addi fp, sp, 16
|
||||
.cfi_def_cfa fp, 0
|
||||
.cfi_offset ra, -8
|
||||
.cfi_offset fp, -16
|
||||
|
||||
// Compute the dynamic stack size:
|
||||
//
|
||||
// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
|
||||
// reference
|
||||
// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
|
||||
// function by register. Given that we have 8 of such registers (a[0-7])
|
||||
// and two + 'argc' arguments (consider >id and &tid), we need to
|
||||
// reserve max(0, argc - 6)*8 extra bytes
|
||||
//
|
||||
// The total number of bytes is then max(0, argc - 6)*8 + 8
|
||||
|
||||
// Compute max(0, argc - 6) using the following bithack:
|
||||
// max(0, x) = x - (x & (x >> 31)), where x := argc - 6
|
||||
// Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
|
||||
addi t0, a3, -6
|
||||
srai t1, t0, 31
|
||||
and t1, t0, t1
|
||||
sub t0, t0, t1
|
||||
|
||||
addi t0, t0, 1
|
||||
|
||||
slli t0, t0, 3
|
||||
sub sp, sp, t0
|
||||
|
||||
// Align the stack to 16 bytes
|
||||
andi sp, sp, -16
|
||||
|
||||
mv t0, a0
|
||||
mv t3, a3
|
||||
mv t4, a4
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
// Save frame pointer into exit_frame
|
||||
sd fp, 0(a5)
|
||||
#endif
|
||||
|
||||
// Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
|
||||
|
||||
sw a1, __gtid(fp)
|
||||
sw a2, __tid(fp)
|
||||
|
||||
addi a0, fp, __gtid
|
||||
addi a1, fp, __tid
|
||||
|
||||
beqz t3, .L_kmp_3
|
||||
ld a2, 0(t4)
|
||||
|
||||
addi t3, t3, -1
|
||||
beqz t3, .L_kmp_3
|
||||
ld a3, 8(t4)
|
||||
|
||||
addi t3, t3, -1
|
||||
beqz t3, .L_kmp_3
|
||||
ld a4, 16(t4)
|
||||
|
||||
addi t3, t3, -1
|
||||
beqz t3, .L_kmp_3
|
||||
ld a5, 24(t4)
|
||||
|
||||
addi t3, t3, -1
|
||||
beqz t3, .L_kmp_3
|
||||
ld a6, 32(t4)
|
||||
|
||||
addi t3, t3, -1
|
||||
beqz t3, .L_kmp_3
|
||||
ld a7, 40(t4)
|
||||
|
||||
// Prepare any additional argument passed through the stack
|
||||
addi t4, t4, 48
|
||||
mv t1, sp
|
||||
j .L_kmp_2
|
||||
.L_kmp_1:
|
||||
ld t2, 0(t4)
|
||||
sd t2, 0(t1)
|
||||
addi t4, t4, 8
|
||||
addi t1, t1, 8
|
||||
.L_kmp_2:
|
||||
addi t3, t3, -1
|
||||
bnez t3, .L_kmp_1
|
||||
|
||||
.L_kmp_3:
|
||||
// Call pkfn function
|
||||
jalr t0
|
||||
|
||||
// Restore stack and return
|
||||
|
||||
addi a0, zero, 1
|
||||
|
||||
addi sp, fp, -16
|
||||
ld fp, 0(sp)
|
||||
ld ra, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
.Lfunc_end0:
|
||||
.size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
|
||||
.cfi_endproc
|
||||
|
||||
// -- End __kmp_invoke_microtask
|
||||
|
||||
#endif /* KMP_ARCH_RISCV64 */
|
||||
|
||||
#if KMP_ARCH_ARM || KMP_ARCH_MIPS
|
||||
.data
|
||||
.comm .gomp_critical_user_,32,8
|
||||
@ -1535,7 +1741,7 @@ __kmp_unnamed_critical_addr:
|
||||
.size __kmp_unnamed_critical_addr,4
|
||||
#endif /* KMP_ARCH_ARM */
|
||||
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
|
||||
.data
|
||||
.comm .gomp_critical_user_,32,8
|
||||
.data
|
||||
@ -1544,7 +1750,8 @@ __kmp_unnamed_critical_addr:
|
||||
__kmp_unnamed_critical_addr:
|
||||
.8byte .gomp_critical_user_
|
||||
.size __kmp_unnamed_critical_addr,8
|
||||
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
|
||||
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
|
||||
KMP_ARCH_RISCV64 */
|
||||
|
||||
#if KMP_OS_LINUX
|
||||
# if KMP_ARCH_ARM
|
||||
|
@ -50,6 +50,9 @@
|
||||
#include <mach/mach.h>
|
||||
#include <sys/sysctl.h>
|
||||
#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/user.h>
|
||||
#include <pthread_np.h>
|
||||
#elif KMP_OS_NETBSD
|
||||
#include <sys/types.h>
|
||||
@ -97,7 +100,7 @@ static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
|
||||
#if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
|
||||
|
||||
/* Affinity support */
|
||||
|
||||
@ -119,16 +122,21 @@ void __kmp_affinity_bind_thread(int which) {
|
||||
void __kmp_affinity_determine_capable(const char *env_var) {
|
||||
// Check and see if the OS supports thread affinity.
|
||||
|
||||
#if KMP_OS_LINUX
|
||||
#define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
|
||||
#elif KMP_OS_FREEBSD
|
||||
#define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
|
||||
#endif
|
||||
|
||||
|
||||
#if KMP_OS_LINUX
|
||||
// If Linux* OS:
|
||||
// If the syscall fails or returns a suggestion for the size,
|
||||
// then we don't have to search for an appropriate size.
|
||||
int gCode;
|
||||
int sCode;
|
||||
unsigned char *buf;
|
||||
buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
|
||||
|
||||
// If Linux* OS:
|
||||
// If the syscall fails or returns a suggestion for the size,
|
||||
// then we don't have to search for an appropriate size.
|
||||
gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
|
||||
KA_TRACE(30, ("__kmp_affinity_determine_capable: "
|
||||
"initial getaffinity call returned %d errno = %d\n",
|
||||
@ -267,6 +275,23 @@ void __kmp_affinity_determine_capable(const char *env_var) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif KMP_OS_FREEBSD
|
||||
int gCode;
|
||||
unsigned char *buf;
|
||||
buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
|
||||
gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast<cpuset_t *>(buf));
|
||||
KA_TRACE(30, ("__kmp_affinity_determine_capable: "
|
||||
"initial getaffinity call returned %d errno = %d\n",
|
||||
gCode, errno));
|
||||
if (gCode == 0) {
|
||||
KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
|
||||
KA_TRACE(10, ("__kmp_affinity_determine_capable: "
|
||||
"affinity supported (mask size %d)\n"<
|
||||
(int)__kmp_affin_mask_size));
|
||||
KMP_INTERNAL_FREE(buf);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
// save uncaught error code
|
||||
// int error = errno;
|
||||
KMP_INTERNAL_FREE(buf);
|
||||
@ -802,6 +827,13 @@ void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
|
||||
and also gives the user the stack space they requested for all threads */
|
||||
stack_size += gtid * __kmp_stkoffset * 2;
|
||||
|
||||
#if defined(__ANDROID__) && __ANDROID_API__ < 19
|
||||
// Round the stack size to a multiple of the page size. Older versions of
|
||||
// Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
|
||||
// if the stack size was not a multiple of the page size.
|
||||
stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
|
||||
#endif
|
||||
|
||||
KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
|
||||
"__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
|
||||
gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
|
||||
@ -1972,7 +2004,7 @@ int __kmp_is_address_mapped(void *addr) {
|
||||
int found = 0;
|
||||
int rc;
|
||||
|
||||
#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD
|
||||
#if KMP_OS_LINUX || KMP_OS_HURD
|
||||
|
||||
/* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
|
||||
ranges mapped into the address space. */
|
||||
@ -2010,6 +2042,44 @@ int __kmp_is_address_mapped(void *addr) {
|
||||
// Free resources.
|
||||
fclose(file);
|
||||
KMP_INTERNAL_FREE(name);
|
||||
#elif KMP_OS_FREEBSD
|
||||
char *buf;
|
||||
size_t lstsz;
|
||||
int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
|
||||
rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
|
||||
if (rc < 0)
|
||||
return 0;
|
||||
// We pass from number of vm entry's semantic
|
||||
// to size of whole entry map list.
|
||||
lstsz = lstsz * 4 / 3;
|
||||
buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
|
||||
rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
|
||||
if (rc < 0) {
|
||||
kmpc_free(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *lw = buf;
|
||||
char *up = buf + lstsz;
|
||||
|
||||
while (lw < up) {
|
||||
struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
|
||||
size_t cursz = cur->kve_structsize;
|
||||
if (cursz == 0)
|
||||
break;
|
||||
void *start = reinterpret_cast<void *>(cur->kve_start);
|
||||
void *end = reinterpret_cast<void *>(cur->kve_end);
|
||||
// Readable/Writable addresses within current map entry
|
||||
if ((addr >= start) && (addr < end)) {
|
||||
if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
|
||||
(cur->kve_protection & KVME_PROT_WRITE) != 0) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
lw += cursz;
|
||||
}
|
||||
kmpc_free(buf);
|
||||
|
||||
#elif KMP_OS_DARWIN
|
||||
|
||||
@ -2331,7 +2401,8 @@ int __kmp_get_load_balance(int max) {
|
||||
#endif // USE_LOAD_BALANCE
|
||||
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
|
||||
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
|
||||
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
|
||||
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
|
||||
|
||||
// we really only need the case with 1 argument, because CLANG always build
|
||||
// a struct of pointers to shared variables referenced in the outlined function
|
||||
@ -2415,10 +2486,6 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
break;
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
*exit_frame_ptr = 0;
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user