Merge ^/vendor/llvm-openmp/dist up to its last change, and resolve conflicts.

This commit is contained in:
Dimitry Andric 2020-01-23 21:42:26 +00:00
commit 489b1cf2ec
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/clang1000-import/; revision=357059
24 changed files with 686 additions and 176 deletions

View File

@ -2181,10 +2181,9 @@ struct kmp_dephash_entry {
typedef struct kmp_dephash { typedef struct kmp_dephash {
kmp_dephash_entry_t **buckets; kmp_dephash_entry_t **buckets;
size_t size; size_t size;
#ifdef KMP_DEBUG size_t generation;
kmp_uint32 nelements; kmp_uint32 nelements;
kmp_uint32 nconflicts; kmp_uint32 nconflicts;
#endif
} kmp_dephash_t; } kmp_dephash_t;
typedef struct kmp_task_affinity_info { typedef struct kmp_task_affinity_info {
@ -3342,7 +3341,7 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
#if KMP_OS_LINUX #if KMP_OS_LINUX || KMP_OS_FREEBSD
extern int kmp_set_thread_affinity_mask_initial(void); extern int kmp_set_thread_affinity_mask_initial(void);
#endif #endif
#endif /* KMP_AFFINITY_SUPPORTED */ #endif /* KMP_AFFINITY_SUPPORTED */

View File

@ -1968,7 +1968,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores; __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
if (__kmp_mic_type >= mic3) if (__kmp_mic_type >= mic3)
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2; __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
else else
@ -1982,7 +1982,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1; __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
__kmp_nThreadsPerCore; __kmp_nThreadsPerCore;
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
if (__kmp_mic_type >= mic3) if (__kmp_mic_type >= mic3)
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2 * __kmp_nThreadsPerCore; 2 * __kmp_nThreadsPerCore;

View File

@ -160,6 +160,7 @@ class KMPHwlocAffinity : public KMPAffinity {
}; };
#endif /* KMP_USE_HWLOC */ #endif /* KMP_USE_HWLOC */
#if KMP_OS_LINUX || KMP_OS_FREEBSD
#if KMP_OS_LINUX #if KMP_OS_LINUX
/* On some of the older OS's that we build on, these constants aren't present /* On some of the older OS's that we build on, these constants aren't present
in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
@ -234,6 +235,10 @@ class KMPHwlocAffinity : public KMPAffinity {
#endif /* __NR_sched_getaffinity */ #endif /* __NR_sched_getaffinity */
#error Unknown or unsupported architecture #error Unknown or unsupported architecture
#endif /* KMP_ARCH_* */ #endif /* KMP_ARCH_* */
#elif KMP_OS_FREEBSD
#include <pthread.h>
#include <pthread_np.h>
#endif
class KMPNativeAffinity : public KMPAffinity { class KMPNativeAffinity : public KMPAffinity {
class Mask : public KMPAffinity::Mask { class Mask : public KMPAffinity::Mask {
typedef unsigned char mask_t; typedef unsigned char mask_t;
@ -294,8 +299,13 @@ class KMPNativeAffinity : public KMPAffinity {
int get_system_affinity(bool abort_on_error) override { int get_system_affinity(bool abort_on_error) override {
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
"Illegal get affinity operation when not capable"); "Illegal get affinity operation when not capable");
#if KMP_OS_LINUX
int retval = int retval =
syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
#elif KMP_OS_FREEBSD
int retval =
pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
#endif
if (retval >= 0) { if (retval >= 0) {
return 0; return 0;
} }
@ -308,8 +318,13 @@ class KMPNativeAffinity : public KMPAffinity {
int set_system_affinity(bool abort_on_error) const override { int set_system_affinity(bool abort_on_error) const override {
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
"Illegal get affinity operation when not capable"); "Illegal get affinity operation when not capable");
#if KMP_OS_LINUX
int retval = int retval =
syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
#elif KMP_OS_FREEBSD
int retval =
pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
#endif
if (retval >= 0) { if (retval >= 0) {
return 0; return 0;
} }
@ -347,7 +362,7 @@ class KMPNativeAffinity : public KMPAffinity {
} }
api_type get_api_type() const override { return NATIVE_OS; } api_type get_api_type() const override { return NATIVE_OS; }
}; };
#endif /* KMP_OS_LINUX */ #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
class KMPNativeAffinity : public KMPAffinity { class KMPNativeAffinity : public KMPAffinity {

View File

@ -545,7 +545,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
if (ompt_enabled.ompt_callback_parallel_end) { if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
&(serial_team->t.ompt_team_info.parallel_data), parent_task_data, &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); ompt_parallel_invoker_program | ompt_parallel_team,
OMPT_LOAD_RETURN_ADDRESS(global_tid));
} }
__ompt_lw_taskteam_unlink(this_thr); __ompt_lw_taskteam_unlink(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.state = ompt_state_overhead;
@ -676,7 +677,8 @@ void __kmpc_flush(ident_t *loc) {
#endif // KMP_COMPILER_ICC #endif // KMP_COMPILER_ICC
} }
#endif // KMP_MIC #endif // KMP_MIC
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
KMP_ARCH_RISCV64)
// Nothing to see here move along // Nothing to see here move along
#elif KMP_ARCH_PPC64 #elif KMP_ARCH_PPC64
// Nothing needed here (we have a real MB above). // Nothing needed here (we have a real MB above).

View File

@ -633,5 +633,25 @@
GOMP_loop_ull_doacross_guided_start GOMP_loop_ull_doacross_guided_start
#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START \ #define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START \
GOMP_loop_ull_doacross_runtime_start GOMP_loop_ull_doacross_runtime_start
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT \
GOMP_loop_nonmonotonic_dynamic_next
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START \
GOMP_loop_nonmonotonic_dynamic_start
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT \
GOMP_loop_nonmonotonic_guided_next
#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START \
GOMP_loop_nonmonotonic_guided_start
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT \
GOMP_loop_ull_nonmonotonic_dynamic_next
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START \
GOMP_loop_ull_nonmonotonic_dynamic_start
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT \
GOMP_loop_ull_nonmonotonic_guided_next
#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START \
GOMP_loop_ull_nonmonotonic_guided_start
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC \
GOMP_parallel_loop_nonmonotonic_dynamic
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED \
GOMP_parallel_loop_nonmonotonic_guided
#endif /* KMP_FTN_OS_H */ #endif /* KMP_FTN_OS_H */

View File

@ -431,7 +431,7 @@ std::atomic<int> __kmp_thread_pool_active_nth = ATOMIC_VAR_INIT(0);
/* ------------------------------------------------- /* -------------------------------------------------
* GLOBAL/ROOT STATE */ * GLOBAL/ROOT STATE */
KMP_ALIGN_CACHE KMP_ALIGN_CACHE
kmp_global_t __kmp_global = {{0}}; kmp_global_t __kmp_global;
/* ----------------------------------------------- */ /* ----------------------------------------------- */
/* GLOBAL SYNCHRONIZATION LOCKS */ /* GLOBAL SYNCHRONIZATION LOCKS */

View File

@ -22,7 +22,7 @@ extern "C" {
#endif // __cplusplus #endif // __cplusplus
#define MKLOC(loc, routine) \ #define MKLOC(loc, routine) \
static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
#include "kmp_ftn_os.h" #include "kmp_ftn_os.h"
@ -622,10 +622,16 @@ LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
kmp_sch_dynamic_chunked) kmp_sch_dynamic_chunked)
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
kmp_sch_dynamic_chunked)
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START), LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
kmp_sch_guided_chunked) kmp_sch_guided_chunked)
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
kmp_sch_guided_chunked)
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
kmp_sch_runtime) kmp_sch_runtime)
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
@ -892,6 +898,16 @@ LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
kmp_sch_guided_chunked) kmp_sch_guided_chunked)
LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
LOOP_START_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
kmp_sch_dynamic_chunked)
LOOP_NEXT_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
LOOP_START_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
kmp_sch_guided_chunked)
LOOP_NEXT_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
LOOP_RUNTIME_START_ULL( LOOP_RUNTIME_START_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
@ -1487,6 +1503,12 @@ PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
@ -1942,6 +1964,26 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
"GOMP_4.5"); "GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45, KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
"GOMP_4.5"); "GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
"GOMP_4.5");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
"GOMP_4.5");
#endif // KMP_USE_VERSION_SYMBOLS #endif // KMP_USE_VERSION_SYMBOLS

View File

@ -2943,10 +2943,10 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
#undef expand #undef expand
// Exposes only one set of jump tables (*lock or *lock_with_checks). // Exposes only one set of jump tables (*lock or *lock_with_checks).
void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0; void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
// Jump tables for the indirect lock functions // Jump tables for the indirect lock functions
#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
@ -2993,10 +2993,10 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
#undef expand #undef expand
// Exposes only one jump tables (*lock or *lock_with_checks). // Exposes only one jump tables (*lock or *lock_with_checks).
void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0; void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
// Lock index table. // Lock index table.
kmp_indirect_lock_table_t __kmp_i_lock_table; kmp_indirect_lock_table_t __kmp_i_lock_table;

View File

@ -1122,18 +1122,18 @@ typedef struct {
// Function tables for direct locks. Set/unset/test differentiate functions // Function tables for direct locks. Set/unset/test differentiate functions
// with/without consistency checking. // with/without consistency checking.
extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *); extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
// Function tables for indirect locks. Set/unset/test differentiate functions // Function tables for indirect locks. Set/unset/test differentiate functions
// with/withuot consistency checking. // with/withuot consistency checking.
extern void (*__kmp_indirect_init[])(kmp_user_lock_p); extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p); extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
// Extracts direct lock tag from a user lock pointer // Extracts direct lock tag from a user lock pointer
#define KMP_EXTRACT_D_TAG(l) \ #define KMP_EXTRACT_D_TAG(l) \

View File

@ -69,7 +69,7 @@
#error Unknown compiler #error Unknown compiler
#endif #endif
#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK #if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_CNK
#define KMP_AFFINITY_SUPPORTED 1 #define KMP_AFFINITY_SUPPORTED 1
#if KMP_OS_WINDOWS && KMP_ARCH_X86_64 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
#define KMP_GROUP_AFFINITY 1 #define KMP_GROUP_AFFINITY 1
@ -165,7 +165,8 @@ typedef unsigned long long kmp_uint64;
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
#else #else
#error "Can't determine size_t printf format specifier." #error "Can't determine size_t printf format specifier."
@ -840,7 +841,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#endif /* KMP_OS_WINDOWS */ #endif /* KMP_OS_WINDOWS */
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
KMP_ARCH_MIPS64 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
#define KMP_MB() __sync_synchronize() #define KMP_MB() __sync_synchronize()
#endif #endif

View File

@ -98,6 +98,7 @@
#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1) #define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1)
#define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS 0
#define KMP_ARCH_MIPS64 0 #define KMP_ARCH_MIPS64 0
#define KMP_ARCH_RISCV64 0
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
#if defined(_M_AMD64) || defined(__x86_64) #if defined(_M_AMD64) || defined(__x86_64)
@ -135,6 +136,9 @@
#undef KMP_ARCH_MIPS #undef KMP_ARCH_MIPS
#define KMP_ARCH_MIPS 1 #define KMP_ARCH_MIPS 1
#endif #endif
#elif defined __riscv && __riscv_xlen == 64
#undef KMP_ARCH_RISCV64
#define KMP_ARCH_RISCV64 1
#endif #endif
#endif #endif
@ -199,7 +203,7 @@
// TODO: Fixme - This is clever, but really fugly // TODO: Fixme - This is clever, but really fugly
#if (1 != \ #if (1 != \
KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + KMP_ARCH_RISCV64)
#error Unknown or unsupported architecture #error Unknown or unsupported architecture
#endif #endif

View File

@ -1190,8 +1190,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
&(parent_task_info->task_data), &(parent_task_info->frame), &(parent_task_info->task_data), &(parent_task_info->frame),
&ompt_parallel_data, team_size, ompt_parallel_invoker_program, &ompt_parallel_data, team_size,
codeptr); ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
} }
} }
#endif // OMPT_SUPPORT #endif // OMPT_SUPPORT
@ -1481,9 +1481,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
int team_size = master_set_numthreads int team_size = master_set_numthreads
? master_set_numthreads ? master_set_numthreads
: get__nproc_2(parent_team, master_tid); : get__nproc_2(parent_team, master_tid);
int flags = OMPT_INVOKER(call_context) |
((microtask == (microtask_t)__kmp_teams_master)
? ompt_parallel_league
: ompt_parallel_team);
ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
parent_task_data, ompt_frame, &ompt_parallel_data, team_size, parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
OMPT_INVOKER(call_context), return_address); return_address);
} }
master_th->th.ompt_thread_info.state = ompt_state_overhead; master_th->th.ompt_thread_info.state = ompt_state_overhead;
} }
@ -1512,19 +1516,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// AC: we are in serialized parallel // AC: we are in serialized parallel
__kmpc_serialized_parallel(loc, gtid); __kmpc_serialized_parallel(loc, gtid);
KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
// AC: need this in order enquiry functions work
// correctly, will restore at join time
parent_team->t.t_serialized--;
#if OMPT_SUPPORT #if OMPT_SUPPORT
void *dummy; void *dummy;
void **exit_runtime_p; void **exit_frame_p;
ompt_lw_taskteam_t lw_taskteam; ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
&ompt_parallel_data, return_address); &ompt_parallel_data, return_address);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped // don't use lw_taskteam after linking. content was swaped
@ -1532,19 +1534,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
/* OMPT implicit task begin */ /* OMPT implicit task begin */
implicit_task_data = OMPT_CUR_TASK_DATA(master_th); implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
OMPT_CUR_TASK_INFO(master_th) OMPT_CUR_TASK_INFO(master_th)
->thread_num = __kmp_tid_from_gtid(gtid); ->thread_num = __kmp_tid_from_gtid(gtid);
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
implicit_task_data, 1,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
} }
/* OMPT state */ /* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel; master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else { } else {
exit_runtime_p = &dummy; exit_frame_p = &dummy;
} }
#endif #endif
// AC: need to decrement t_serialized for enquiry functions to work
// correctly, will restore at join time
parent_team->t.t_serialized--;
{ {
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
@ -1552,26 +1558,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT #if OMPT_SUPPORT
, ,
exit_runtime_p exit_frame_p
#endif #endif
); );
} }
#if OMPT_SUPPORT #if OMPT_SUPPORT
*exit_runtime_p = NULL;
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
*exit_frame_p = NULL;
OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, implicit_task_data, 1, ompt_scope_end, NULL, implicit_task_data, 1,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
} }
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th); __ompt_lw_taskteam_unlink(master_th);
if (ompt_enabled.ompt_callback_parallel_end) { if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
OMPT_INVOKER(call_context), return_address); OMPT_INVOKER(call_context) | ompt_parallel_team,
return_address);
} }
master_th->th.ompt_thread_info.state = ompt_state_overhead; master_th->th.ompt_thread_info.state = ompt_state_overhead;
} }
@ -1586,6 +1593,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
parent_team->t.t_level++; parent_team->t.t_level++;
parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
ompt_lw_taskteam_t lw_taskteam;
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
&ompt_parallel_data, return_address);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
}
#endif
/* Change number of threads in the team if requested */ /* Change number of threads in the team if requested */
if (master_set_numthreads) { // The parallel has num_threads clause if (master_set_numthreads) { // The parallel has num_threads clause
if (master_set_numthreads < master_th->th.th_teams_size.nth) { if (master_set_numthreads < master_th->th.th_teams_size.nth) {
@ -1714,7 +1730,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT #if OMPT_SUPPORT
void *dummy; void *dummy;
void **exit_runtime_p; void **exit_frame_p;
ompt_task_info_t *task_info; ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam; ompt_lw_taskteam_t lw_taskteam;
@ -1727,19 +1743,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// don't use lw_taskteam after linking. content was swaped // don't use lw_taskteam after linking. content was swaped
task_info = OMPT_CUR_TASK_INFO(master_th); task_info = OMPT_CUR_TASK_INFO(master_th);
exit_runtime_p = &(task_info->frame.exit_frame.ptr); exit_frame_p = &(task_info->frame.exit_frame.ptr);
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
&(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
OMPT_CUR_TASK_INFO(master_th) OMPT_CUR_TASK_INFO(master_th)
->thread_num = __kmp_tid_from_gtid(gtid); ->thread_num = __kmp_tid_from_gtid(gtid);
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
&(task_info->task_data), 1,
OMPT_CUR_TASK_INFO(master_th)->thread_num,
ompt_task_implicit);
} }
/* OMPT state */ /* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel; master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else { } else {
exit_runtime_p = &dummy; exit_frame_p = &dummy;
} }
#endif #endif
@ -1750,25 +1768,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
parent_team->t.t_argv parent_team->t.t_argv
#if OMPT_SUPPORT #if OMPT_SUPPORT
, ,
exit_runtime_p exit_frame_p
#endif #endif
); );
} }
#if OMPT_SUPPORT #if OMPT_SUPPORT
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
exit_runtime_p = NULL; *exit_frame_p = NULL;
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), 1, ompt_scope_end, NULL, &(task_info->task_data), 1,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th)->thread_num,
ompt_task_implicit);
} }
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th); __ompt_lw_taskteam_unlink(master_th);
if (ompt_enabled.ompt_callback_parallel_end) { if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
OMPT_CUR_TEAM_DATA(master_th), parent_task_data, &ompt_parallel_data, parent_task_data,
OMPT_INVOKER(call_context), return_address); OMPT_INVOKER(call_context) | ompt_parallel_team,
return_address);
} }
master_th->th.ompt_thread_info.state = ompt_state_overhead; master_th->th.ompt_thread_info.state = ompt_state_overhead;
} }
@ -1800,6 +1820,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
team->t.t_level--; team->t.t_level--;
// AC: call special invoker for outer "parallel" of teams construct // AC: call special invoker for outer "parallel" of teams construct
invoker(gtid); invoker(gtid);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), 0,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
}
if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
&ompt_parallel_data, parent_task_data,
OMPT_INVOKER(call_context) | ompt_parallel_league,
return_address);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
} else { } else {
argv = args; argv = args;
for (i = argc - 1; i >= 0; --i) for (i = argc - 1; i >= 0; --i)
@ -1813,7 +1850,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT #if OMPT_SUPPORT
void *dummy; void *dummy;
void **exit_runtime_p; void **exit_frame_p;
ompt_task_info_t *task_info; ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam; ompt_lw_taskteam_t lw_taskteam;
@ -1824,14 +1861,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped // don't use lw_taskteam after linking. content was swaped
task_info = OMPT_CUR_TASK_INFO(master_th); task_info = OMPT_CUR_TASK_INFO(master_th);
exit_runtime_p = &(task_info->frame.exit_frame.ptr); exit_frame_p = &(task_info->frame.exit_frame.ptr);
/* OMPT implicit task begin */ /* OMPT implicit task begin */
implicit_task_data = OMPT_CUR_TASK_DATA(master_th); implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
ompt_task_implicit);
OMPT_CUR_TASK_INFO(master_th) OMPT_CUR_TASK_INFO(master_th)
->thread_num = __kmp_tid_from_gtid(gtid); ->thread_num = __kmp_tid_from_gtid(gtid);
} }
@ -1839,7 +1877,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
/* OMPT state */ /* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel; master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else { } else {
exit_runtime_p = &dummy; exit_frame_p = &dummy;
} }
#endif #endif
@ -1849,18 +1887,19 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__kmp_invoke_microtask(microtask, gtid, 0, argc, args __kmp_invoke_microtask(microtask, gtid, 0, argc, args
#if OMPT_SUPPORT #if OMPT_SUPPORT
, ,
exit_runtime_p exit_frame_p
#endif #endif
); );
} }
#if OMPT_SUPPORT #if OMPT_SUPPORT
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
*exit_runtime_p = NULL; *exit_frame_p = NULL;
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), 1, ompt_scope_end, NULL, &(task_info->task_data), 1,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th)->thread_num,
ompt_task_implicit);
} }
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
@ -1868,7 +1907,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (ompt_enabled.ompt_callback_parallel_end) { if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
&ompt_parallel_data, parent_task_data, &ompt_parallel_data, parent_task_data,
OMPT_INVOKER(call_context), return_address); OMPT_INVOKER(call_context) | ompt_parallel_team,
return_address);
} }
master_th->th.ompt_thread_info.state = ompt_state_overhead; master_th->th.ompt_thread_info.state = ompt_state_overhead;
} }
@ -2225,12 +2265,11 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
kmp_team_t *team, ompt_data_t *parallel_data, kmp_team_t *team, ompt_data_t *parallel_data,
fork_context_e fork_context, void *codeptr) { int flags, void *codeptr) {
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_parallel_end) { if (ompt_enabled.ompt_callback_parallel_end) {
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), parallel_data, &(task_info->task_data), flags, codeptr);
codeptr);
} }
task_info->frame.enter_frame = ompt_data_none; task_info->frame.enter_frame = ompt_data_none;
@ -2263,6 +2302,7 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th->th.th_ident = loc; master_th->th.th_ident = loc;
#if OMPT_SUPPORT #if OMPT_SUPPORT
void *team_microtask = (void *)team->t.t_pkfn;
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
master_th->th.ompt_thread_info.state = ompt_state_overhead; master_th->th.ompt_thread_info.state = ompt_state_overhead;
} }
@ -2352,10 +2392,25 @@ void __kmp_join_call(ident_t *loc, int gtid
if (master_th->th.th_teams_microtask && !exit_teams && if (master_th->th.th_teams_microtask && !exit_teams &&
team->t.t_pkfn != (microtask_t)__kmp_teams_master && team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
team->t.t_level == master_th->th.th_teams_level + 1) { team->t.t_level == master_th->th.th_teams_level + 1) {
// AC: We need to leave the team structure intact at the end of parallel // AC: We need to leave the team structure intact at the end of parallel
// inside the teams construct, so that at the next parallel same (hot) team // inside the teams construct, so that at the next parallel same (hot) team
// works, only adjust nesting levels // works, only adjust nesting levels
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data = ompt_data_none;
if (ompt_enabled.enabled) {
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_implicit_task) {
int ompt_team_size = team->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
}
task_info->frame.exit_frame = ompt_data_none;
task_info->task_data = ompt_data_none;
ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th);
}
#endif
/* Decrement our nested depth level */ /* Decrement our nested depth level */
team->t.t_level--; team->t.t_level--;
team->t.t_active_level--; team->t.t_active_level--;
@ -2394,8 +2449,8 @@ void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT #if OMPT_SUPPORT
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
codeptr); OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
} }
#endif #endif
@ -2424,12 +2479,14 @@ void __kmp_join_call(ident_t *loc, int gtid
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
int ompt_team_size = team->t.t_nproc; int flags = (team_microtask == (void *)__kmp_teams_master)
? ompt_task_initial
: ompt_task_implicit;
int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
} }
task_info->frame.exit_frame = ompt_data_none; task_info->frame.exit_frame = ompt_data_none;
task_info->task_data = ompt_data_none; task_info->task_data = ompt_data_none;
} }
@ -2503,8 +2560,12 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if OMPT_SUPPORT #if OMPT_SUPPORT
int flags =
OMPT_INVOKER(fork_context) |
((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
: ompt_parallel_team);
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
codeptr); codeptr);
} }
#endif #endif
@ -4432,7 +4493,7 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
} }
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
/* Sets full mask for thread and returns old mask, no changes to structures. */ /* Sets full mask for thread and returns old mask, no changes to structures. */
static void static void
__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
@ -4980,7 +5041,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_partition_places(team); __kmp_partition_places(team);
#endif #endif
} else { // team->t.t_nproc < new_nproc } else { // team->t.t_nproc < new_nproc
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *old_mask; kmp_affin_mask_t *old_mask;
if (KMP_AFFINITY_CAPABLE()) { if (KMP_AFFINITY_CAPABLE()) {
KMP_CPU_ALLOC(old_mask); KMP_CPU_ALLOC(old_mask);
@ -5029,7 +5090,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_reinitialize_team(team, new_icvs, NULL); __kmp_reinitialize_team(team, new_icvs, NULL);
} }
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
/* Temporarily set full mask for master thread before creation of /* Temporarily set full mask for master thread before creation of
workers. The reason is that workers inherit the affinity from master, workers. The reason is that workers inherit the affinity from master,
so if a lot of workers are created on the single core quickly, they so if a lot of workers are created on the single core quickly, they
@ -5064,7 +5125,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
} }
} }
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
if (KMP_AFFINITY_CAPABLE()) { if (KMP_AFFINITY_CAPABLE()) {
/* Restore initial master thread's affinity mask */ /* Restore initial master thread's affinity mask */
__kmp_set_system_affinity(old_mask, TRUE); __kmp_set_system_affinity(old_mask, TRUE);
@ -5600,7 +5661,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
void *__kmp_launch_thread(kmp_info_t *this_thr) { void *__kmp_launch_thread(kmp_info_t *this_thr) {
int gtid = this_thr->th.th_info.ds.ds_gtid; int gtid = this_thr->th.th_info.ds.ds_gtid;
/* void *stack_data;*/ /* void *stack_data;*/
kmp_team_t *(*volatile pteam); kmp_team_t **volatile pteam;
KMP_MB(); KMP_MB();
KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
@ -5618,18 +5679,15 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.state = ompt_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0; this_thr->th.ompt_thread_info.wait_id = 0;
this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
this_thr->th.ompt_thread_info.parallel_flags = 0;
if (ompt_enabled.ompt_callback_thread_begin) { if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
ompt_thread_worker, thread_data); ompt_thread_worker, thread_data);
} }
}
#endif
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
this_thr->th.ompt_thread_info.state = ompt_state_idle; this_thr->th.ompt_thread_info.state = ompt_state_idle;
} }
#endif #endif
/* This is the place where threads wait for work */ /* This is the place where threads wait for work */
while (!TCR_4(__kmp_global.g.g_done)) { while (!TCR_4(__kmp_global.g.g_done)) {
KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@ -5647,7 +5705,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
} }
#endif #endif
pteam = (kmp_team_t * (*))(&this_thr->th.th_team); pteam = &this_thr->th.th_team;
/* have we been allocated? */ /* have we been allocated? */
if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
@ -6956,16 +7014,16 @@ int __kmp_invoke_task_func(int gtid) {
#if OMPT_SUPPORT #if OMPT_SUPPORT
void *dummy; void *dummy;
void **exit_runtime_p; void **exit_frame_p;
ompt_data_t *my_task_data; ompt_data_t *my_task_data;
ompt_data_t *my_parallel_data; ompt_data_t *my_parallel_data;
int ompt_team_size; int ompt_team_size;
if (ompt_enabled.enabled) { if (ompt_enabled.enabled) {
exit_runtime_p = &( exit_frame_p = &(
team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
} else { } else {
exit_runtime_p = &dummy; exit_frame_p = &dummy;
} }
my_task_data = my_task_data =
@ -6975,7 +7033,7 @@ int __kmp_invoke_task_func(int gtid) {
ompt_team_size = team->t.t_nproc; ompt_team_size = team->t.t_nproc;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
__kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? __kmp_tid_from_gtid(gtid), ompt_task_implicit);
OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
} }
#endif #endif
@ -6994,11 +7052,12 @@ int __kmp_invoke_task_func(int gtid) {
tid, (int)team->t.t_argc, (void **)team->t.t_argv tid, (int)team->t.t_argc, (void **)team->t.t_argv
#if OMPT_SUPPORT #if OMPT_SUPPORT
, ,
exit_runtime_p exit_frame_p
#endif #endif
); );
#if OMPT_SUPPORT #if OMPT_SUPPORT
*exit_runtime_p = NULL; *exit_frame_p = NULL;
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
#endif #endif
#if KMP_STATS_ENABLED #if KMP_STATS_ENABLED
@ -7077,7 +7136,22 @@ int __kmp_invoke_teams_master(int gtid) {
(void *)__kmp_teams_master); (void *)__kmp_teams_master);
#endif #endif
__kmp_run_before_invoked_task(gtid, 0, this_thr, team); __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
#if OMPT_SUPPORT
int tid = __kmp_tid_from_gtid(gtid);
ompt_data_t *task_data =
&team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
ompt_task_initial);
OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
}
#endif
__kmp_teams_master(gtid); __kmp_teams_master(gtid);
#if OMPT_SUPPORT
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
#endif
__kmp_run_after_invoked_task(gtid, 0, this_thr, team); __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
return 1; return 1;
} }
@ -7118,19 +7192,32 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
// Remember the number of threads for inner parallel regions // Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
if (num_threads == 0) { if (num_threads == 0) {
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get __kmp_avail_proc calculated
num_threads = __kmp_avail_proc / num_teams; num_threads = __kmp_avail_proc / num_teams;
// adjust num_threads w/o warning as it is not user setting
// num_threads = min(num_threads, nthreads-var, thread-limit-var)
// no thread_limit clause specified - do not change thread-limit-var ICV
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
num_threads = thr->th.th_current_task->td_icvs.thread_limit;
} // prevent team size to exceed thread-limit-var
if (num_teams * num_threads > __kmp_teams_max_nth) { if (num_teams * num_threads > __kmp_teams_max_nth) {
// adjust num_threads w/o warning as it is not user setting
num_threads = __kmp_teams_max_nth / num_teams; num_threads = __kmp_teams_max_nth / num_teams;
} }
} else { } else {
// This thread will be the master of the league masters // This thread will be the master of the league masters
// Store new thread limit; old limit is saved in th_cg_roots list // Store new thread limit; old limit is saved in th_cg_roots list
thr->th.th_current_task->td_icvs.thread_limit = num_threads; thr->th.th_current_task->td_icvs.thread_limit = num_threads;
// num_threads = min(num_threads, nthreads-var)
if (num_threads > __kmp_dflt_team_nth) {
num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
}
if (num_teams * num_threads > __kmp_teams_max_nth) { if (num_teams * num_threads > __kmp_teams_max_nth) {
int new_threads = __kmp_teams_max_nth / num_teams; int new_threads = __kmp_teams_max_nth / num_teams;
if (!__kmp_reserve_warn) { // user asked for too many threads if (!__kmp_reserve_warn) { // user asked for too many threads
@ -8023,7 +8110,8 @@ __kmp_determine_reduction_method(
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD

View File

@ -164,7 +164,7 @@ void *kmp_aligned_malloc(size_t sz, size_t a) {
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
res = _aligned_malloc(sz, a); res = _aligned_malloc(sz, a);
#else #else
if (err = posix_memalign(&res, a, sz)) { if ((err = posix_memalign(&res, a, sz))) {
errno = err; // can be EINVAL or ENOMEM errno = err; // can be EINVAL or ENOMEM
res = NULL; res = NULL;
} }
@ -277,7 +277,7 @@ void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) {
kmp_proc_bind_t __kmps_get_proc_bind(void) { kmp_proc_bind_t __kmps_get_proc_bind(void) {
i; i;
return 0; return proc_bind_false;
} // __kmps_get_proc_bind } // __kmps_get_proc_bind
double __kmps_get_wtime(void) { double __kmps_get_wtime(void) {

View File

@ -54,12 +54,64 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
size_t sizes[] = { 997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029 };
const size_t MAX_GEN = 8;
static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
// TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
// m_num_sets ); // m_num_sets );
return ((addr >> 6) ^ (addr >> 2)) % hsize; return ((addr >> 6) ^ (addr >> 2)) % hsize;
} }
static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
kmp_dephash_t *current_dephash) {
kmp_dephash_t *h;
size_t gen = current_dephash->generation + 1;
if (gen >= MAX_GEN)
return current_dephash;
size_t new_size = sizes[gen];
kmp_int32 size_to_allocate =
new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
#if USE_FAST_MEMORY
h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
#else
h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
#endif
h->size = new_size;
h->nelements = current_dephash->nelements;
h->buckets = (kmp_dephash_entry **)(h + 1);
h->generation = gen;
// insert existing elements in the new table
for (size_t i = 0; i < current_dephash->size; i++) {
kmp_dephash_entry_t *next;
for (kmp_dephash_entry_t *entry = current_dephash->buckets[i]; entry; entry = next) {
next = entry->next_in_bucket;
// Compute the new hash using the new size, and insert the entry in
// the new bucket.
kmp_int32 new_bucket = __kmp_dephash_hash(entry->addr, h->size);
if (entry->next_in_bucket) {
h->nconflicts++;
}
entry->next_in_bucket = h->buckets[new_bucket];
h->buckets[new_bucket] = entry;
}
}
// Free old hash table
#if USE_FAST_MEMORY
__kmp_fast_free(thread, current_dephash);
#else
__kmp_thread_free(thread, current_dephash);
#endif
return h;
}
static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
kmp_taskdata_t *current_task) { kmp_taskdata_t *current_task) {
kmp_dephash_t *h; kmp_dephash_t *h;
@ -81,10 +133,9 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
#endif #endif
h->size = h_size; h->size = h_size;
#ifdef KMP_DEBUG h->generation = 0;
h->nelements = 0; h->nelements = 0;
h->nconflicts = 0; h->nconflicts = 0;
#endif
h->buckets = (kmp_dephash_entry **)(h + 1); h->buckets = (kmp_dephash_entry **)(h + 1);
for (size_t i = 0; i < h_size; i++) for (size_t i = 0; i < h_size; i++)
@ -97,7 +148,13 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
#define ENTRY_LAST_MTXS 1 #define ENTRY_LAST_MTXS 1
static kmp_dephash_entry * static kmp_dephash_entry *
__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr) {
kmp_dephash_t *h = *hash;
if (h->nelements != 0
&& h->nconflicts/h->size >= 1) {
*hash = __kmp_dephash_extend(thread, h);
h = *hash;
}
kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
kmp_dephash_entry_t *entry; kmp_dephash_entry_t *entry;
@ -122,11 +179,9 @@ __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
entry->mtx_lock = NULL; entry->mtx_lock = NULL;
entry->next_in_bucket = h->buckets[bucket]; entry->next_in_bucket = h->buckets[bucket];
h->buckets[bucket] = entry; h->buckets[bucket] = entry;
#ifdef KMP_DEBUG
h->nelements++; h->nelements++;
if (entry->next_in_bucket) if (entry->next_in_bucket)
h->nconflicts++; h->nconflicts++;
#endif
} }
return entry; return entry;
} }
@ -232,7 +287,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
template <bool filter> template <bool filter>
static inline kmp_int32 static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
bool dep_barrier, kmp_int32 ndeps, bool dep_barrier, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_task_t *task) { kmp_depend_info_t *dep_list, kmp_task_t *task) {
KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : " KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
@ -352,7 +407,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
// returns true if the task has any outstanding dependence // returns true if the task has any outstanding dependence
static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
kmp_task_t *task, kmp_dephash_t *hash, kmp_task_t *task, kmp_dephash_t **hash,
bool dep_barrier, kmp_int32 ndeps, bool dep_barrier, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_depend_info_t *dep_list,
kmp_int32 ndeps_noalias, kmp_int32 ndeps_noalias,
@ -552,7 +607,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_init_node(node); __kmp_init_node(node);
new_taskdata->td_depnode = node; new_taskdata->td_depnode = node;
if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash, if (__kmp_check_deps(gtid, node, new_task, &current_task->td_dephash,
NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
noalias_dep_list)) { noalias_dep_list)) {
KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
@ -633,7 +688,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_depnode_t node = {0}; kmp_depnode_t node = {0};
__kmp_init_node(&node); __kmp_init_node(&node);
if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash, if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
DEP_BARRIER, ndeps, dep_list, ndeps_noalias, DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
noalias_dep_list)) { noalias_dep_list)) {
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "

View File

@ -140,8 +140,11 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
#endif #endif
if (!KMP_MASTER_TID(ds_tid)) { if (!KMP_MASTER_TID(ds_tid)) {
if (ompt_enabled.ompt_callback_implicit_task) { if (ompt_enabled.ompt_callback_implicit_task) {
int flags = this_thr->th.ompt_thread_info.parallel_flags;
flags = (flags & ompt_parallel_league) ? ompt_task_initial
: ompt_task_implicit;
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); ompt_scope_end, NULL, tId, 0, ds_tid, flags);
} }
// return to idle state // return to idle state
this_thr->th.ompt_thread_info.state = ompt_state_idle; this_thr->th.ompt_thread_info.state = ompt_state_idle;

View File

@ -430,10 +430,8 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which,
#define ompt_event_macro(event_name, callback_type, event_id) \ #define ompt_event_macro(event_name, callback_type, event_id) \
case event_name: \ case event_name: \
if (ompt_event_implementation_status(event_name)) { \ ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \
ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ ompt_enabled.event_name = (callback != 0); \
ompt_enabled.event_name = (callback != 0); \
} \
if (callback) \ if (callback) \
return ompt_event_implementation_status(event_name); \ return ompt_event_implementation_status(event_name); \
else \ else \
@ -456,16 +454,15 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
switch (which) { switch (which) {
#define ompt_event_macro(event_name, callback_type, event_id) \ #define ompt_event_macro(event_name, callback_type, event_id) \
case event_name: \ case event_name: { \
if (ompt_event_implementation_status(event_name)) { \ ompt_callback_t mycb = \
ompt_callback_t mycb = \ (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \
(ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ if (ompt_enabled.event_name && mycb) { \
if (ompt_enabled.event_name && mycb) { \ *callback = mycb; \
*callback = mycb; \ return ompt_get_callback_success; \
return ompt_get_callback_success; \
} \
} \ } \
return ompt_get_callback_failure; return ompt_get_callback_failure; \
}
FOREACH_OMPT_EVENT(ompt_event_macro) FOREACH_OMPT_EVENT(ompt_event_macro)

View File

@ -81,6 +81,7 @@ typedef struct {
ompt_state_t state; ompt_state_t state;
ompt_wait_id_t wait_id; ompt_wait_id_t wait_id;
int ompt_task_yielded; int ompt_task_yielded;
int parallel_flags; // information for the last parallel region invoked
void *idle_frame; void *idle_frame;
} ompt_thread_info_t; } ompt_thread_info_t;

View File

@ -269,10 +269,11 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
} }
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
int on_heap) { int on_heap, bool always) {
ompt_lw_taskteam_t *link_lwt = lwt; ompt_lw_taskteam_t *link_lwt = lwt;
if (thr->th.th_team->t.t_serialized > if (always ||
1) { // we already have a team, so link the new team and swap values thr->th.th_team->t.t_serialized >
1) { // we already have a team, so link the new team and swap values
if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
link_lwt = link_lwt =
(ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));

View File

@ -26,7 +26,7 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
int gtid, ompt_data_t *ompt_pid, void *codeptr); int gtid, ompt_data_t *ompt_pid, void *codeptr);
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
int on_heap); int on_heap, bool always = false);
void __ompt_lw_taskteam_unlink(kmp_info_t *thr); void __ompt_lw_taskteam_unlink(kmp_info_t *thr);

View File

@ -161,6 +161,10 @@
# define ITT_ARCH_MIPS64 6 # define ITT_ARCH_MIPS64 6
#endif /* ITT_ARCH_MIPS64 */ #endif /* ITT_ARCH_MIPS64 */
#ifndef ITT_ARCH_RISCV64
# define ITT_ARCH_RISCV64 7
#endif /* ITT_ARCH_RISCV64 */
#ifndef ITT_ARCH #ifndef ITT_ARCH
# if defined _M_IX86 || defined __i386__ # if defined _M_IX86 || defined __i386__
# define ITT_ARCH ITT_ARCH_IA32 # define ITT_ARCH ITT_ARCH_IA32
@ -178,6 +182,8 @@
# define ITT_ARCH ITT_ARCH_MIPS # define ITT_ARCH ITT_ARCH_MIPS
# elif defined __mips__ && defined __mips64 # elif defined __mips__ && defined __mips64
# define ITT_ARCH ITT_ARCH_MIPS64 # define ITT_ARCH ITT_ARCH_MIPS64
# elif defined __riscv && __riscv_xlen == 64
# define ITT_ARCH ITT_ARCH_RISCV64
# endif # endif
#endif #endif
@ -330,7 +336,9 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
: "memory"); : "memory");
return result; return result;
} }
#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_MIPS || ITT_ARCH==ITT_ARCH_MIPS64 #elif ITT_ARCH == ITT_ARCH_ARM || ITT_ARCH == ITT_ARCH_PPC64 || \
ITT_ARCH == ITT_ARCH_AARCH64 || ITT_ARCH == ITT_ARCH_MIPS || \
ITT_ARCH == ITT_ARCH_MIPS64 || ITT_ARCH == ITT_ARCH_RISCV64
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */ #endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT #ifndef ITT_SIMPLE_INIT

View File

@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "kmp_config.h" #include "kmp_config.h"
#include "kmp_os.h"
#include "ittnotify_config.h" #include "ittnotify_config.h"
#if ITT_PLATFORM==ITT_PLATFORM_WIN #if ITT_PLATFORM==ITT_PLATFORM_WIN
@ -226,8 +227,6 @@ static __itt_api_info api_list[] = {
#pragma warning(pop) #pragma warning(pop)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
static const char dll_path[PATH_MAX] = { 0 };
/* static part descriptor which handles. all notification api attributes. */ /* static part descriptor which handles. all notification api attributes. */
__itt_global _N_(_ittapi_global) = { __itt_global _N_(_ittapi_global) = {
ITT_MAGIC, /* identification info */ ITT_MAGIC, /* identification info */
@ -238,7 +237,7 @@ __itt_global _N_(_ittapi_global) = {
MUTEX_INITIALIZER, /* mutex */ MUTEX_INITIALIZER, /* mutex */
NULL, /* dynamic library handle */ NULL, /* dynamic library handle */
NULL, /* error_handler */ NULL, /* error_handler */
(const char**)&dll_path, /* dll_path_ptr */ NULL, /* dll_path_ptr */
(__itt_api_info*)&api_list, /* api_list_ptr */ (__itt_api_info*)&api_list, /* api_list_ptr */
NULL, /* next __itt_global */ NULL, /* next __itt_global */
NULL, /* thread_list */ NULL, /* thread_list */
@ -1098,6 +1097,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou
switch (lib_version) { switch (lib_version) {
case 0: case 0:
groups = __itt_group_legacy; groups = __itt_group_legacy;
KMP_FALLTHROUGH();
case 1: case 1:
/* Fill all pointers from dynamic library */ /* Fill all pointers from dynamic library */
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)

View File

@ -495,13 +495,21 @@ __kmp_unnamed_critical_addr:
# endif /* !KMP_ASM_INTRINS */ # endif /* !KMP_ASM_INTRINS */
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int // int
// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int argc, void *p_argv[] ) { // int gtid, int tid,
// (*pkfn)( & gtid, & gtid, argv[0], ... ); // int argc, void *p_argv[]
// return 1; // #if OMPT_SUPPORT
// ,
// void **exit_frame_ptr
// #endif
// ) {
// #if OMPT_SUPPORT
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
// #endif
//
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// } // }
// -- Begin __kmp_invoke_microtask // -- Begin __kmp_invoke_microtask
@ -991,14 +999,21 @@ KMP_LABEL(invoke_3):
# endif /* !KMP_ASM_INTRINS */ # endif /* !KMP_ASM_INTRINS */
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int // int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid, // int gtid, int tid,
// int argc, void *p_argv[] ) { // int argc, void *p_argv[]
// (*pkfn)( & gtid, & tid, argv[0], ... ); // #if OMPT_SUPPORT
// return 1; // ,
// void **exit_frame_ptr
// #endif
// ) {
// #if OMPT_SUPPORT
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
// #endif
//
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// } // }
// //
// note: at call to pkfn must have %rsp 128-byte aligned for compiler // note: at call to pkfn must have %rsp 128-byte aligned for compiler
@ -1192,15 +1207,27 @@ KMP_LABEL(kmp_1_exit):
#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
//------------------------------------------------------------------------ //------------------------------------------------------------------------
//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int // int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid, // int gtid, int tid,
// int argc, void *p_argv[] ) { // int argc, void *p_argv[]
// (*pkfn)( & gtid, & tid, argv[0], ... ); // #if OMPT_SUPPORT
// return 1; // ,
// void **exit_frame_ptr
// #endif
// ) {
// #if OMPT_SUPPORT
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
// #endif
//
// (*pkfn)( & gtid, & tid, argv[0], ... );
//
// // FIXME: This is done at call-site and can be removed here.
// #if OMPT_SUPPORT
// *exit_frame_ptr = 0;
// #endif
//
// return 1;
// } // }
// //
// parameters: // parameters:
@ -1306,15 +1333,27 @@ KMP_LABEL(kmp_1):
#if KMP_ARCH_PPC64 #if KMP_ARCH_PPC64
//------------------------------------------------------------------------ //------------------------------------------------------------------------
//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int // int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid, // int gtid, int tid,
// int argc, void *p_argv[] ) { // int argc, void *p_argv[]
// (*pkfn)( & gtid, & tid, argv[0], ... ); // #if OMPT_SUPPORT
// return 1; // ,
// void **exit_frame_ptr
// #endif
// ) {
// #if OMPT_SUPPORT
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
// #endif
//
// (*pkfn)( & gtid, & tid, argv[0], ... );
//
// // FIXME: This is done at call-site and can be removed here.
// #if OMPT_SUPPORT
// *exit_frame_ptr = 0;
// #endif
//
// return 1;
// } // }
// //
// parameters: // parameters:
@ -1524,6 +1563,173 @@ __kmp_invoke_microtask:
#endif /* KMP_ARCH_PPC64 */ #endif /* KMP_ARCH_PPC64 */
#if KMP_ARCH_RISCV64
//------------------------------------------------------------------------
//
// typedef void (*microtask_t)(int *gtid, int *tid, ...);
//
// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
// void *p_argv[]
// #if OMPT_SUPPORT
// ,
// void **exit_frame_ptr
// #endif
// ) {
// #if OMPT_SUPPORT
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
// #endif
//
// (*pkfn)(&gtid, &tid, argv[0], ...);
//
// return 1;
// }
//
// Parameters:
// a0: pkfn
// a1: gtid
// a2: tid
// a3: argc
// a4: p_argv
// a5: exit_frame_ptr
//
// Locals:
// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
// __tid: tid param pushed on stack so can pass &tid to pkfn
//
// Temp. registers:
//
// t0: used to calculate the dynamic stack size / used to hold pkfn address
// t1: used as temporary for stack placement calculation
// t2: used as temporary for stack arguments
// t3: used as temporary for number of remaining pkfn parms
// t4: used to traverse p_argv array
//
// return: a0 (always 1/TRUE)
//
__gtid = -20
__tid = -24
// -- Begin __kmp_invoke_microtask
// mark_begin;
.text
.globl __kmp_invoke_microtask
.p2align 1
.type __kmp_invoke_microtask,@function
__kmp_invoke_microtask:
.cfi_startproc
// First, save ra and fp
addi sp, sp, -16
sd ra, 8(sp)
sd fp, 0(sp)
addi fp, sp, 16
.cfi_def_cfa fp, 0
.cfi_offset ra, -8
.cfi_offset fp, -16
// Compute the dynamic stack size:
//
// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
// reference
// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
// function by register. Given that we have 8 of such registers (a[0-7])
// and two + 'argc' arguments (consider &gtid and &tid), we need to
// reserve max(0, argc - 6)*8 extra bytes
//
// The total number of bytes is then max(0, argc - 6)*8 + 8
// Compute max(0, argc - 6) using the following bithack:
// max(0, x) = x - (x & (x >> 31)), where x := argc - 6
// Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
addi t0, a3, -6
srai t1, t0, 31
and t1, t0, t1
sub t0, t0, t1
addi t0, t0, 1
slli t0, t0, 3
sub sp, sp, t0
// Align the stack to 16 bytes
andi sp, sp, -16
mv t0, a0
mv t3, a3
mv t4, a4
#if OMPT_SUPPORT
// Save frame pointer into exit_frame
sd fp, 0(a5)
#endif
// Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
sw a1, __gtid(fp)
sw a2, __tid(fp)
addi a0, fp, __gtid
addi a1, fp, __tid
beqz t3, .L_kmp_3
ld a2, 0(t4)
addi t3, t3, -1
beqz t3, .L_kmp_3
ld a3, 8(t4)
addi t3, t3, -1
beqz t3, .L_kmp_3
ld a4, 16(t4)
addi t3, t3, -1
beqz t3, .L_kmp_3
ld a5, 24(t4)
addi t3, t3, -1
beqz t3, .L_kmp_3
ld a6, 32(t4)
addi t3, t3, -1
beqz t3, .L_kmp_3
ld a7, 40(t4)
// Prepare any additional argument passed through the stack
addi t4, t4, 48
mv t1, sp
j .L_kmp_2
.L_kmp_1:
ld t2, 0(t4)
sd t2, 0(t1)
addi t4, t4, 8
addi t1, t1, 8
.L_kmp_2:
addi t3, t3, -1
bnez t3, .L_kmp_1
.L_kmp_3:
// Call pkfn function
jalr t0
// Restore stack and return
addi a0, zero, 1
addi sp, fp, -16
ld fp, 0(sp)
ld ra, 8(sp)
addi sp, sp, 16
ret
.Lfunc_end0:
.size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
.cfi_endproc
// -- End __kmp_invoke_microtask
#endif /* KMP_ARCH_RISCV64 */
#if KMP_ARCH_ARM || KMP_ARCH_MIPS #if KMP_ARCH_ARM || KMP_ARCH_MIPS
.data .data
.comm .gomp_critical_user_,32,8 .comm .gomp_critical_user_,32,8
@ -1535,7 +1741,7 @@ __kmp_unnamed_critical_addr:
.size __kmp_unnamed_critical_addr,4 .size __kmp_unnamed_critical_addr,4
#endif /* KMP_ARCH_ARM */ #endif /* KMP_ARCH_ARM */
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
.data .data
.comm .gomp_critical_user_,32,8 .comm .gomp_critical_user_,32,8
.data .data
@ -1544,7 +1750,8 @@ __kmp_unnamed_critical_addr:
__kmp_unnamed_critical_addr: __kmp_unnamed_critical_addr:
.8byte .gomp_critical_user_ .8byte .gomp_critical_user_
.size __kmp_unnamed_critical_addr,8 .size __kmp_unnamed_critical_addr,8
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
KMP_ARCH_RISCV64 */
#if KMP_OS_LINUX #if KMP_OS_LINUX
# if KMP_ARCH_ARM # if KMP_ARCH_ARM

View File

@ -50,6 +50,9 @@
#include <mach/mach.h> #include <mach/mach.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/user.h>
#include <pthread_np.h> #include <pthread_np.h>
#elif KMP_OS_NETBSD #elif KMP_OS_NETBSD
#include <sys/types.h> #include <sys/types.h>
@ -97,7 +100,7 @@ static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
} }
#endif #endif
#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) #if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
/* Affinity support */ /* Affinity support */
@ -119,16 +122,21 @@ void __kmp_affinity_bind_thread(int which) {
void __kmp_affinity_determine_capable(const char *env_var) { void __kmp_affinity_determine_capable(const char *env_var) {
// Check and see if the OS supports thread affinity. // Check and see if the OS supports thread affinity.
#if KMP_OS_LINUX
#define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024) #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
#elif KMP_OS_FREEBSD
#define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
#endif
#if KMP_OS_LINUX
// If Linux* OS:
// If the syscall fails or returns a suggestion for the size,
// then we don't have to search for an appropriate size.
int gCode; int gCode;
int sCode; int sCode;
unsigned char *buf; unsigned char *buf;
buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
// If Linux* OS:
// If the syscall fails or returns a suggestion for the size,
// then we don't have to search for an appropriate size.
gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf); gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
KA_TRACE(30, ("__kmp_affinity_determine_capable: " KA_TRACE(30, ("__kmp_affinity_determine_capable: "
"initial getaffinity call returned %d errno = %d\n", "initial getaffinity call returned %d errno = %d\n",
@ -267,6 +275,23 @@ void __kmp_affinity_determine_capable(const char *env_var) {
} }
} }
} }
#elif KMP_OS_FREEBSD
int gCode;
unsigned char *buf;
buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast<cpuset_t *>(buf));
KA_TRACE(30, ("__kmp_affinity_determine_capable: "
"initial getaffinity call returned %d errno = %d\n",
gCode, errno));
if (gCode == 0) {
KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
KA_TRACE(10, ("__kmp_affinity_determine_capable: "
"affinity supported (mask size %d)\n"<
(int)__kmp_affin_mask_size));
KMP_INTERNAL_FREE(buf);
return;
}
#endif
// save uncaught error code // save uncaught error code
// int error = errno; // int error = errno;
KMP_INTERNAL_FREE(buf); KMP_INTERNAL_FREE(buf);
@ -802,6 +827,13 @@ void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
and also gives the user the stack space they requested for all threads */ and also gives the user the stack space they requested for all threads */
stack_size += gtid * __kmp_stkoffset * 2; stack_size += gtid * __kmp_stkoffset * 2;
#if defined(__ANDROID__) && __ANDROID_API__ < 19
// Round the stack size to a multiple of the page size. Older versions of
// Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
// if the stack size was not a multiple of the page size.
stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
#endif
KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
"__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
@ -1972,7 +2004,7 @@ int __kmp_is_address_mapped(void *addr) {
int found = 0; int found = 0;
int rc; int rc;
#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD #if KMP_OS_LINUX || KMP_OS_HURD
/* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
ranges mapped into the address space. */ ranges mapped into the address space. */
@ -2010,6 +2042,44 @@ int __kmp_is_address_mapped(void *addr) {
// Free resources. // Free resources.
fclose(file); fclose(file);
KMP_INTERNAL_FREE(name); KMP_INTERNAL_FREE(name);
#elif KMP_OS_FREEBSD
char *buf;
size_t lstsz;
int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
if (rc < 0)
return 0;
// We pass from number of vm entry's semantic
// to size of whole entry map list.
lstsz = lstsz * 4 / 3;
buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
if (rc < 0) {
kmpc_free(buf);
return 0;
}
char *lw = buf;
char *up = buf + lstsz;
while (lw < up) {
struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
size_t cursz = cur->kve_structsize;
if (cursz == 0)
break;
void *start = reinterpret_cast<void *>(cur->kve_start);
void *end = reinterpret_cast<void *>(cur->kve_end);
// Readable/Writable addresses within current map entry
if ((addr >= start) && (addr < end)) {
if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
(cur->kve_protection & KVME_PROT_WRITE) != 0) {
found = 1;
break;
}
}
lw += cursz;
}
kmpc_free(buf);
#elif KMP_OS_DARWIN #elif KMP_OS_DARWIN
@ -2331,7 +2401,8 @@ int __kmp_get_load_balance(int max) {
#endif // USE_LOAD_BALANCE #endif // USE_LOAD_BALANCE
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64) ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
// we really only need the case with 1 argument, because CLANG always build // we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function // a struct of pointers to shared variables referenced in the outlined function
@ -2415,10 +2486,6 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
break; break;
} }
#if OMPT_SUPPORT
*exit_frame_ptr = 0;
#endif
return 1; return 1;
} }