diff --git a/contrib/llvm-project/openmp/runtime/src/kmp.h b/contrib/llvm-project/openmp/runtime/src/kmp.h
index fdb9dbb51981..23eebe673126 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp.h
@@ -2181,10 +2181,9 @@ struct kmp_dephash_entry {
 typedef struct kmp_dephash {
   kmp_dephash_entry_t **buckets;
   size_t size;
-#ifdef KMP_DEBUG
+  size_t generation;
   kmp_uint32 nelements;
   kmp_uint32 nconflicts;
-#endif
 } kmp_dephash_t;
 
 typedef struct kmp_task_affinity_info {
@@ -3342,7 +3341,7 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
 extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX || KMP_OS_FREEBSD
 extern int kmp_set_thread_affinity_mask_initial(void);
 #endif
 #endif /* KMP_AFFINITY_SUPPORTED */
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_affinity.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_affinity.cpp
index 372c300d44eb..4c7ed3181197 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_affinity.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_affinity.cpp
@@ -1968,7 +1968,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
       nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
   if (__kmp_mic_type >= mic3)
     __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
   else
@@ -1982,7 +1982,7 @@ static void __kmp_dispatch_set_hierarchy_values() {
   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
       __kmp_nThreadsPerCore;
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
   if (__kmp_mic_type >= mic3)
     __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
         2 * __kmp_nThreadsPerCore;
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h b/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h
index c00ad3645661..f270bb6dbb8d 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h
@@ -160,6 +160,7 @@ class KMPHwlocAffinity : public KMPAffinity {
 };
 #endif /* KMP_USE_HWLOC */
 
+#if KMP_OS_LINUX || KMP_OS_FREEBSD
 #if KMP_OS_LINUX
 /* On some of the older OS's that we build on, these constants aren't present
    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
@@ -234,6 +235,10 @@ class KMPHwlocAffinity : public KMPAffinity {
 #endif /* __NR_sched_getaffinity */
 #error Unknown or unsupported architecture
 #endif /* KMP_ARCH_* */
+#elif KMP_OS_FREEBSD
+#include <pthread.h>
+#include <pthread_np.h>
+#endif
 class KMPNativeAffinity : public KMPAffinity {
   class Mask : public KMPAffinity::Mask {
     typedef unsigned char mask_t;
@@ -294,8 +299,13 @@ class KMPNativeAffinity : public KMPAffinity {
     int get_system_affinity(bool abort_on_error) override {
       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
                   "Illegal get affinity operation when not capable");
+#if KMP_OS_LINUX
       int retval =
           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
+#elif KMP_OS_FREEBSD
+      int retval =
+          pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
+#endif
       if (retval >= 0) {
         return 0;
       }
@@ -308,8 +318,13 @@ class KMPNativeAffinity : public KMPAffinity {
     int set_system_affinity(bool abort_on_error) const override {
       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
                   "Illegal get affinity operation when not capable");
+#if KMP_OS_LINUX
       int retval =
           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
+#elif KMP_OS_FREEBSD
+      int retval =
+          pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
+#endif
       if (retval >= 0) {
         return 0;
       }
@@ -347,7 +362,7 @@ class KMPNativeAffinity : public KMPAffinity {
   }
   api_type get_api_type() const override { return NATIVE_OS; }
 };
-#endif /* KMP_OS_LINUX */
+#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
 
 #if KMP_OS_WINDOWS
 class KMPNativeAffinity : public KMPAffinity {
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
index c778c97022f5..d39bf9af4334 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
@@ -545,7 +545,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
     if (ompt_enabled.ompt_callback_parallel_end) {
       ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
           &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
-          ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
+          ompt_parallel_invoker_program | ompt_parallel_team,
+          OMPT_LOAD_RETURN_ADDRESS(global_tid));
     }
     __ompt_lw_taskteam_unlink(this_thr);
     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
@@ -676,7 +677,8 @@ void __kmpc_flush(ident_t *loc) {
 #endif // KMP_COMPILER_ICC
   }
 #endif // KMP_MIC
-#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
+#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
+       KMP_ARCH_RISCV64)
 // Nothing to see here move along
 #elif KMP_ARCH_PPC64
 // Nothing needed here (we have a real MB above).
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_ftn_os.h b/contrib/llvm-project/openmp/runtime/src/kmp_ftn_os.h
index 856479cbe212..41cafab12537 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_ftn_os.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_ftn_os.h
@@ -633,5 +633,25 @@
   GOMP_loop_ull_doacross_guided_start
 #define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START                      \
   GOMP_loop_ull_doacross_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT                       \
+  GOMP_loop_nonmonotonic_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START                      \
+  GOMP_loop_nonmonotonic_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT                        \
+  GOMP_loop_nonmonotonic_guided_next
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START                       \
+  GOMP_loop_nonmonotonic_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT                   \
+  GOMP_loop_ull_nonmonotonic_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START                  \
+  GOMP_loop_ull_nonmonotonic_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT                    \
+  GOMP_loop_ull_nonmonotonic_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START                   \
+  GOMP_loop_ull_nonmonotonic_guided_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC                   \
+  GOMP_parallel_loop_nonmonotonic_dynamic
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED                    \
+  GOMP_parallel_loop_nonmonotonic_guided
 
 #endif /* KMP_FTN_OS_H */
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_global.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_global.cpp
index 1ec73b82e99c..6e636dc394ea 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_global.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_global.cpp
@@ -431,7 +431,7 @@ std::atomic<int> __kmp_thread_pool_active_nth = ATOMIC_VAR_INIT(0);
 /* -------------------------------------------------
  * GLOBAL/ROOT STATE */
 KMP_ALIGN_CACHE
-kmp_global_t __kmp_global = {{0}};
+kmp_global_t __kmp_global;
 
 /* ----------------------------------------------- */
 /* GLOBAL SYNCHRONIZATION LOCKS */
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp
index d41e027eb2b0..10841d265958 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp
@@ -22,7 +22,7 @@ extern "C" {
 #endif // __cplusplus
 
 #define MKLOC(loc, routine)                                                    \
-  static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
+  static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
 
 #include "kmp_ftn_os.h"
 
@@ -622,10 +622,16 @@ LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
            kmp_sch_dynamic_chunked)
+LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
+           kmp_sch_dynamic_chunked)
 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
+LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
            kmp_sch_guided_chunked)
+LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
+           kmp_sch_guided_chunked)
 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
+LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
                    kmp_sch_runtime)
 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
@@ -892,6 +898,16 @@ LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
                kmp_sch_guided_chunked)
 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
+LOOP_START_ULL(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
+               kmp_sch_dynamic_chunked)
+LOOP_NEXT_ULL(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
+LOOP_START_ULL(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
+               kmp_sch_guided_chunked)
+LOOP_NEXT_ULL(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
 LOOP_RUNTIME_START_ULL(
     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
@@ -1487,6 +1503,12 @@ PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
               kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
               kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
+              kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
+              kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
               kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
@@ -1942,6 +1964,26 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
                    "GOMP_4.5");
 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
                    "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
+                   "GOMP_4.5");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
+                   "GOMP_4.5");
 
 #endif // KMP_USE_VERSION_SYMBOLS
 
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp
index 78d63c67852d..2cc9e08278c4 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp
@@ -2943,10 +2943,10 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
 #undef expand
 
 // Exposes only one set of jump tables (*lock or *lock_with_checks).
-void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0;
-int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
-int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
-int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
+void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
+int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
+int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
+int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
 
 // Jump tables for the indirect lock functions
 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
@@ -2993,10 +2993,10 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
 #undef expand
 
 // Exposes only one jump tables (*lock or *lock_with_checks).
-void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0;
-int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
-int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
-int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
+void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
+int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
+int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
+int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
 
 // Lock index table.
 kmp_indirect_lock_table_t __kmp_i_lock_table;
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_lock.h b/contrib/llvm-project/openmp/runtime/src/kmp_lock.h
index ccd84eb821d9..9ad86a51657d 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_lock.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_lock.h
@@ -1122,18 +1122,18 @@ typedef struct {
 // Function tables for direct locks. Set/unset/test differentiate functions
 // with/without consistency checking.
 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
-extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *);
-extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
-extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
-extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
+extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
+extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
+extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
+extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
 
 // Function tables for indirect locks. Set/unset/test differentiate functions
 // with/withuot consistency checking.
 extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
-extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p);
-extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
-extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
-extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
+extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
+extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
+extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
+extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
 
 // Extracts direct lock tag from a user lock pointer
 #define KMP_EXTRACT_D_TAG(l)                                                   \
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_os.h b/contrib/llvm-project/openmp/runtime/src/kmp_os.h
index c4c7bcf6cc57..cd942a9c4430 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_os.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_os.h
@@ -69,7 +69,7 @@
 #error Unknown compiler
 #endif
 
-#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_CNK
 #define KMP_AFFINITY_SUPPORTED 1
 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
 #define KMP_GROUP_AFFINITY 1
@@ -165,7 +165,8 @@ typedef unsigned long long kmp_uint64;
 
 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
 #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
-#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                 \
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
 #else
 #error "Can't determine size_t printf format specifier."
@@ -840,7 +841,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
 #endif /* KMP_OS_WINDOWS */
 
 #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS ||     \
-    KMP_ARCH_MIPS64
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
 #define KMP_MB() __sync_synchronize()
 #endif
 
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_platform.h b/contrib/llvm-project/openmp/runtime/src/kmp_platform.h
index 93b22290585b..35e61a9cf3d0 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_platform.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_platform.h
@@ -98,6 +98,7 @@
 #define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1)
 #define KMP_ARCH_MIPS 0
 #define KMP_ARCH_MIPS64 0
+#define KMP_ARCH_RISCV64 0
 
 #if KMP_OS_WINDOWS
 #if defined(_M_AMD64) || defined(__x86_64)
@@ -135,6 +136,9 @@
 #undef KMP_ARCH_MIPS
 #define KMP_ARCH_MIPS 1
 #endif
+#elif defined __riscv && __riscv_xlen == 64
+#undef KMP_ARCH_RISCV64
+#define KMP_ARCH_RISCV64 1
 #endif
 #endif
 
@@ -199,7 +203,7 @@
 // TODO: Fixme - This is clever, but really fugly
 #if (1 !=                                                                      \
      KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 +          \
-         KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64)
+     KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + KMP_ARCH_RISCV64)
 #error Unknown or unsupported architecture
 #endif
 
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_runtime.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_runtime.cpp
index 7f6c149c792e..dd6e0ff70193 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_runtime.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_runtime.cpp
@@ -1190,8 +1190,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
 
       ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
           &(parent_task_info->task_data), &(parent_task_info->frame),
-          &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
-          codeptr);
+          &ompt_parallel_data, team_size,
+          ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
     }
   }
 #endif // OMPT_SUPPORT
@@ -1481,9 +1481,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
         int team_size = master_set_numthreads
                             ? master_set_numthreads
                             : get__nproc_2(parent_team, master_tid);
+        int flags = OMPT_INVOKER(call_context) |
+                    ((microtask == (microtask_t)__kmp_teams_master)
+                         ? ompt_parallel_league
+                         : ompt_parallel_team);
         ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
-            parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
-            OMPT_INVOKER(call_context), return_address);
+            parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
+            return_address);
       }
       master_th->th.ompt_thread_info.state = ompt_state_overhead;
     }
@@ -1512,19 +1516,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
         // AC: we are in serialized parallel
         __kmpc_serialized_parallel(loc, gtid);
         KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
-        // AC: need this in order enquiry functions work
-        // correctly, will restore at join time
-        parent_team->t.t_serialized--;
+
 #if OMPT_SUPPORT
         void *dummy;
-        void **exit_runtime_p;
+        void **exit_frame_p;
 
         ompt_lw_taskteam_t lw_taskteam;
 
         if (ompt_enabled.enabled) {
           __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
                                   &ompt_parallel_data, return_address);
-          exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
+          exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
 
           __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
           // don't use lw_taskteam after linking. content was swaped
@@ -1532,19 +1534,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           /* OMPT implicit task begin */
           implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
           if (ompt_enabled.ompt_callback_implicit_task) {
-            ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
-                ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
-                implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
             OMPT_CUR_TASK_INFO(master_th)
                 ->thread_num = __kmp_tid_from_gtid(gtid);
+            ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+                implicit_task_data, 1,
+                OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
           }
 
           /* OMPT state */
           master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
         } else {
-          exit_runtime_p = &dummy;
+          exit_frame_p = &dummy;
         }
 #endif
+        // AC: need to decrement t_serialized for enquiry functions to work
+        // correctly, will restore at join time
+        parent_team->t.t_serialized--;
 
         {
           KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
@@ -1552,26 +1558,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
 #if OMPT_SUPPORT
                                  ,
-                                 exit_runtime_p
+                                 exit_frame_p
 #endif
                                  );
         }
 
 #if OMPT_SUPPORT
-        *exit_runtime_p = NULL;
         if (ompt_enabled.enabled) {
+          *exit_frame_p = NULL;
           OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
           if (ompt_enabled.ompt_callback_implicit_task) {
             ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                 ompt_scope_end, NULL, implicit_task_data, 1,
-                OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+                OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
           }
+          ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
           __ompt_lw_taskteam_unlink(master_th);
-
           if (ompt_enabled.ompt_callback_parallel_end) {
             ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
-                OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
-                OMPT_INVOKER(call_context), return_address);
+                &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
+                OMPT_INVOKER(call_context) | ompt_parallel_team,
+                return_address);
           }
           master_th->th.ompt_thread_info.state = ompt_state_overhead;
         }
@@ -1586,6 +1593,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
       parent_team->t.t_level++;
       parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
 
+#if OMPT_SUPPORT
+      if (ompt_enabled.enabled) {
+        ompt_lw_taskteam_t lw_taskteam;
+        __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+                                &ompt_parallel_data, return_address);
+        __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
+      }
+#endif
+
       /* Change number of threads in the team if requested */
       if (master_set_numthreads) { // The parallel has num_threads clause
         if (master_set_numthreads < master_th->th.th_teams_size.nth) {
@@ -1714,7 +1730,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
 #if OMPT_SUPPORT
           void *dummy;
-          void **exit_runtime_p;
+          void **exit_frame_p;
           ompt_task_info_t *task_info;
 
           ompt_lw_taskteam_t lw_taskteam;
@@ -1727,19 +1743,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             // don't use lw_taskteam after linking. content was swaped
 
             task_info = OMPT_CUR_TASK_INFO(master_th);
-            exit_runtime_p = &(task_info->frame.exit_frame.ptr);
+            exit_frame_p = &(task_info->frame.exit_frame.ptr);
             if (ompt_enabled.ompt_callback_implicit_task) {
-              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
-                  ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
-                  &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
               OMPT_CUR_TASK_INFO(master_th)
                   ->thread_num = __kmp_tid_from_gtid(gtid);
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+                  &(task_info->task_data), 1,
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num,
+                  ompt_task_implicit);
             }
 
             /* OMPT state */
             master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
           } else {
-            exit_runtime_p = &dummy;
+            exit_frame_p = &dummy;
           }
 #endif
 
@@ -1750,25 +1768,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
                                    parent_team->t.t_argv
 #if OMPT_SUPPORT
                                    ,
-                                   exit_runtime_p
+                                   exit_frame_p
 #endif
                                    );
           }
 
 #if OMPT_SUPPORT
           if (ompt_enabled.enabled) {
-            exit_runtime_p = NULL;
+            *exit_frame_p = NULL;
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_end, NULL, &(task_info->task_data), 1,
-                  OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num,
+                  ompt_task_implicit);
             }
-
+            ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
             __ompt_lw_taskteam_unlink(master_th);
             if (ompt_enabled.ompt_callback_parallel_end) {
               ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
-                  OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
-                  OMPT_INVOKER(call_context), return_address);
+                  &ompt_parallel_data, parent_task_data,
+                  OMPT_INVOKER(call_context) | ompt_parallel_team,
+                  return_address);
             }
             master_th->th.ompt_thread_info.state = ompt_state_overhead;
           }
@@ -1800,6 +1820,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           team->t.t_level--;
           // AC: call special invoker for outer "parallel" of teams construct
           invoker(gtid);
+#if OMPT_SUPPORT
+          if (ompt_enabled.enabled) {
+            ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
+            if (ompt_enabled.ompt_callback_implicit_task) {
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_end, NULL, &(task_info->task_data), 0,
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
+            }
+            if (ompt_enabled.ompt_callback_parallel_end) {
+              ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+                  &ompt_parallel_data, parent_task_data,
+                  OMPT_INVOKER(call_context) | ompt_parallel_league,
+                  return_address);
+            }
+            master_th->th.ompt_thread_info.state = ompt_state_overhead;
+          }
+#endif
         } else {
           argv = args;
           for (i = argc - 1; i >= 0; --i)
@@ -1813,7 +1850,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
 #if OMPT_SUPPORT
           void *dummy;
-          void **exit_runtime_p;
+          void **exit_frame_p;
           ompt_task_info_t *task_info;
 
           ompt_lw_taskteam_t lw_taskteam;
@@ -1824,14 +1861,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
             // don't use lw_taskteam after linking. content was swaped
             task_info = OMPT_CUR_TASK_INFO(master_th);
-            exit_runtime_p = &(task_info->frame.exit_frame.ptr);
+            exit_frame_p = &(task_info->frame.exit_frame.ptr);
 
             /* OMPT implicit task begin */
             implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
-                  implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+                  implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
+                  ompt_task_implicit);
               OMPT_CUR_TASK_INFO(master_th)
                   ->thread_num = __kmp_tid_from_gtid(gtid);
             }
@@ -1839,7 +1877,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             /* OMPT state */
             master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
           } else {
-            exit_runtime_p = &dummy;
+            exit_frame_p = &dummy;
           }
 #endif
 
@@ -1849,18 +1887,19 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             __kmp_invoke_microtask(microtask, gtid, 0, argc, args
 #if OMPT_SUPPORT
                                    ,
-                                   exit_runtime_p
+                                   exit_frame_p
 #endif
                                    );
           }
 
 #if OMPT_SUPPORT
           if (ompt_enabled.enabled) {
-            *exit_runtime_p = NULL;
+            *exit_frame_p = NULL;
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_end, NULL, &(task_info->task_data), 1,
-                  OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num,
+                  ompt_task_implicit);
             }
 
             ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
@@ -1868,7 +1907,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             if (ompt_enabled.ompt_callback_parallel_end) {
               ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
                   &ompt_parallel_data, parent_task_data,
-                  OMPT_INVOKER(call_context), return_address);
+                  OMPT_INVOKER(call_context) | ompt_parallel_team,
+                  return_address);
             }
             master_th->th.ompt_thread_info.state = ompt_state_overhead;
           }
@@ -2225,12 +2265,11 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
 
 static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
                                    kmp_team_t *team, ompt_data_t *parallel_data,
-                                   fork_context_e fork_context, void *codeptr) {
+                                   int flags, void *codeptr) {
   ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
   if (ompt_enabled.ompt_callback_parallel_end) {
     ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
-        parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
-        codeptr);
+        parallel_data, &(task_info->task_data), flags, codeptr);
   }
 
   task_info->frame.enter_frame = ompt_data_none;
@@ -2263,6 +2302,7 @@ void __kmp_join_call(ident_t *loc, int gtid
   master_th->th.th_ident = loc;
 
 #if OMPT_SUPPORT
+  void *team_microtask = (void *)team->t.t_pkfn;
   if (ompt_enabled.enabled) {
     master_th->th.ompt_thread_info.state = ompt_state_overhead;
   }
@@ -2352,10 +2392,25 @@ void __kmp_join_call(ident_t *loc, int gtid
   if (master_th->th.th_teams_microtask && !exit_teams &&
       team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
       team->t.t_level == master_th->th.th_teams_level + 1) {
-    // AC: We need to leave the team structure intact at the end of parallel
-    // inside the teams construct, so that at the next parallel same (hot) team
-    // works, only adjust nesting levels
-
+// AC: We need to leave the team structure intact at the end of parallel
+// inside the teams construct, so that at the next parallel same (hot) team
+// works, only adjust nesting levels
+#if OMPT_SUPPORT
+    ompt_data_t ompt_parallel_data = ompt_data_none;
+    if (ompt_enabled.enabled) {
+      ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+      if (ompt_enabled.ompt_callback_implicit_task) {
+        int ompt_team_size = team->t.t_nproc;
+        ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+            ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
+            OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+      }
+      task_info->frame.exit_frame = ompt_data_none;
+      task_info->task_data = ompt_data_none;
+      ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
+      __ompt_lw_taskteam_unlink(master_th);
+    }
+#endif
     /* Decrement our nested depth level */
     team->t.t_level--;
     team->t.t_active_level--;
@@ -2394,8 +2449,8 @@ void __kmp_join_call(ident_t *loc, int gtid
 
 #if OMPT_SUPPORT
     if (ompt_enabled.enabled) {
-      __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
-                      codeptr);
+      __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
+                      OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
     }
 #endif
 
@@ -2424,12 +2479,14 @@ void __kmp_join_call(ident_t *loc, int gtid
   if (ompt_enabled.enabled) {
     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
     if (ompt_enabled.ompt_callback_implicit_task) {
-      int ompt_team_size = team->t.t_nproc;
+      int flags = (team_microtask == (void *)__kmp_teams_master)
+                      ? ompt_task_initial
+                      : ompt_task_implicit;
+      int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
           ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
-          OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+          OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
     }
-
     task_info->frame.exit_frame = ompt_data_none;
     task_info->task_data = ompt_data_none;
   }
@@ -2503,8 +2560,12 @@ void __kmp_join_call(ident_t *loc, int gtid
   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
 
 #if OMPT_SUPPORT
+  int flags =
+      OMPT_INVOKER(fork_context) |
+      ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
+                                                      : ompt_parallel_team);
   if (ompt_enabled.enabled) {
-    __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+    __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
                     codeptr);
   }
 #endif
@@ -4432,7 +4493,7 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
   KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
 }
 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
 /* Sets full mask for thread and returns old mask, no changes to structures. */
 static void
 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
@@ -4980,7 +5041,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
       __kmp_partition_places(team);
 #endif
     } else { // team->t.t_nproc < new_nproc
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
       kmp_affin_mask_t *old_mask;
       if (KMP_AFFINITY_CAPABLE()) {
         KMP_CPU_ALLOC(old_mask);
@@ -5029,7 +5090,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
           __kmp_reinitialize_team(team, new_icvs, NULL);
         }
 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
         /* Temporarily set full mask for master thread before creation of
            workers. The reason is that workers inherit the affinity from master,
            so if a lot of workers are created on the single core quickly, they
@@ -5064,7 +5125,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
           }
         }
 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
         if (KMP_AFFINITY_CAPABLE()) {
           /* Restore initial master thread's affinity mask */
           __kmp_set_system_affinity(old_mask, TRUE);
@@ -5600,7 +5661,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
 void *__kmp_launch_thread(kmp_info_t *this_thr) {
   int gtid = this_thr->th.th_info.ds.ds_gtid;
   /*    void                 *stack_data;*/
-  kmp_team_t *(*volatile pteam);
+  kmp_team_t **volatile pteam;
 
   KMP_MB();
   KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
@@ -5618,18 +5679,15 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     this_thr->th.ompt_thread_info.wait_id = 0;
     this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
+    this_thr->th.ompt_thread_info.parallel_flags = 0;
     if (ompt_enabled.ompt_callback_thread_begin) {
       ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
           ompt_thread_worker, thread_data);
     }
-  }
-#endif
-
-#if OMPT_SUPPORT
-  if (ompt_enabled.enabled) {
     this_thr->th.ompt_thread_info.state = ompt_state_idle;
   }
 #endif
+
   /* This is the place where threads wait for work */
   while (!TCR_4(__kmp_global.g.g_done)) {
     KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@@ -5647,7 +5705,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
     }
 #endif
 
-    pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
+    pteam = &this_thr->th.th_team;
 
     /* have we been allocated? */
     if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
@@ -6956,16 +7014,16 @@ int __kmp_invoke_task_func(int gtid) {
 
 #if OMPT_SUPPORT
   void *dummy;
-  void **exit_runtime_p;
+  void **exit_frame_p;
   ompt_data_t *my_task_data;
   ompt_data_t *my_parallel_data;
   int ompt_team_size;
 
   if (ompt_enabled.enabled) {
-    exit_runtime_p = &(
+    exit_frame_p = &(
         team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
   } else {
-    exit_runtime_p = &dummy;
+    exit_frame_p = &dummy;
   }
 
   my_task_data =
@@ -6975,7 +7033,7 @@ int __kmp_invoke_task_func(int gtid) {
     ompt_team_size = team->t.t_nproc;
     ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
         ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
-        __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
+        __kmp_tid_from_gtid(gtid), ompt_task_implicit);
     OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
   }
 #endif
@@ -6994,11 +7052,12 @@ int __kmp_invoke_task_func(int gtid) {
                               tid, (int)team->t.t_argc, (void **)team->t.t_argv
 #if OMPT_SUPPORT
                               ,
-                              exit_runtime_p
+                              exit_frame_p
 #endif
                               );
 #if OMPT_SUPPORT
-  *exit_runtime_p = NULL;
+  *exit_frame_p = NULL;
+   this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
 #endif
 
 #if KMP_STATS_ENABLED
@@ -7077,7 +7136,22 @@ int __kmp_invoke_teams_master(int gtid) {
                      (void *)__kmp_teams_master);
 #endif
   __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
+#if OMPT_SUPPORT
+  int tid = __kmp_tid_from_gtid(gtid);
+  ompt_data_t *task_data =
+      &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
+  ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
+  if (ompt_enabled.ompt_callback_implicit_task) {
+    ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+        ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
+        ompt_task_initial);
+    OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
+  }
+#endif
   __kmp_teams_master(gtid);
+#if OMPT_SUPPORT
+  this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
+#endif
   __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
   return 1;
 }
@@ -7118,19 +7192,32 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
   thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
 
   // Remember the number of threads for inner parallel regions
+  if (!TCR_4(__kmp_init_middle))
+    __kmp_middle_initialize(); // get internal globals calculated
+  KMP_DEBUG_ASSERT(__kmp_avail_proc);
+  KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
   if (num_threads == 0) {
-    if (!TCR_4(__kmp_init_middle))
-      __kmp_middle_initialize(); // get __kmp_avail_proc calculated
     num_threads = __kmp_avail_proc / num_teams;
+    // adjust num_threads w/o warning as it is not user setting
+    // num_threads = min(num_threads, nthreads-var, thread-limit-var)
+    // no thread_limit clause specified -  do not change thread-limit-var ICV
+    if (num_threads > __kmp_dflt_team_nth) {
+      num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
+    }
+    if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
+      num_threads = thr->th.th_current_task->td_icvs.thread_limit;
+    } // prevent team size to exceed thread-limit-var
     if (num_teams * num_threads > __kmp_teams_max_nth) {
-      // adjust num_threads w/o warning as it is not user setting
       num_threads = __kmp_teams_max_nth / num_teams;
     }
   } else {
     // This thread will be the master of the league masters
     // Store new thread limit; old limit is saved in th_cg_roots list
     thr->th.th_current_task->td_icvs.thread_limit = num_threads;
-
+    // num_threads = min(num_threads, nthreads-var)
+    if (num_threads > __kmp_dflt_team_nth) {
+      num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
+    }
     if (num_teams * num_threads > __kmp_teams_max_nth) {
       int new_threads = __kmp_teams_max_nth / num_teams;
       if (!__kmp_reserve_warn) { // user asked for too many threads
@@ -8023,7 +8110,8 @@ __kmp_determine_reduction_method(
 
     int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
 
-#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
+#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                   \
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
 
 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
     KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_stub.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_stub.cpp
index badbbde7c967..6b5041988d5c 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_stub.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_stub.cpp
@@ -164,7 +164,7 @@ void *kmp_aligned_malloc(size_t sz, size_t a) {
 #if KMP_OS_WINDOWS
   res = _aligned_malloc(sz, a);
 #else
-  if (err = posix_memalign(&res, a, sz)) {
+  if ((err = posix_memalign(&res, a, sz))) {
     errno = err; // can be EINVAL or ENOMEM
     res = NULL;
   }
@@ -277,7 +277,7 @@ void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) {
 
 kmp_proc_bind_t __kmps_get_proc_bind(void) {
   i;
-  return 0;
+  return proc_bind_false;
 } // __kmps_get_proc_bind
 
 double __kmps_get_wtime(void) {
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_taskdeps.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_taskdeps.cpp
index db79deac3127..f8aa51dd904a 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_taskdeps.cpp
@@ -54,12 +54,64 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
 
 enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
 
+size_t sizes[] = { 997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029 };
+const size_t MAX_GEN = 8;
+
 static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
   // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
   // m_num_sets );
   return ((addr >> 6) ^ (addr >> 2)) % hsize;
 }
 
+static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
+                                           kmp_dephash_t *current_dephash) {
+  kmp_dephash_t *h;
+
+  size_t gen = current_dephash->generation + 1;
+  if (gen >= MAX_GEN)
+    return current_dephash;
+  size_t new_size = sizes[gen];
+
+  kmp_int32 size_to_allocate =
+      new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
+
+#if USE_FAST_MEMORY
+  h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
+#else
+  h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
+#endif
+
+  h->size = new_size;
+  h->nelements = current_dephash->nelements;
+  h->buckets = (kmp_dephash_entry **)(h + 1);
+  h->generation = gen;
+
+  // insert existing elements in the new table
+  for (size_t i = 0; i < current_dephash->size; i++) {
+    kmp_dephash_entry_t *next;
+    for (kmp_dephash_entry_t *entry = current_dephash->buckets[i]; entry; entry = next) {
+      next = entry->next_in_bucket;
+      // Compute the new hash using the new size, and insert the entry in
+      // the new bucket.
+      kmp_int32 new_bucket = __kmp_dephash_hash(entry->addr, h->size);
+      if (entry->next_in_bucket) {
+        h->nconflicts++;
+      }
+      entry->next_in_bucket = h->buckets[new_bucket];
+      h->buckets[new_bucket] = entry;
+    }
+  }
+
+  // Free old hash table
+#if USE_FAST_MEMORY
+  __kmp_fast_free(thread, current_dephash);
+#else
+  __kmp_thread_free(thread, current_dephash);
+#endif
+
+  return h;
+}
+
 static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
                                            kmp_taskdata_t *current_task) {
   kmp_dephash_t *h;
@@ -81,10 +133,9 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
 #endif
   h->size = h_size;
 
-#ifdef KMP_DEBUG
+  h->generation = 0;
   h->nelements = 0;
   h->nconflicts = 0;
-#endif
   h->buckets = (kmp_dephash_entry **)(h + 1);
 
   for (size_t i = 0; i < h_size; i++)
@@ -97,7 +148,13 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
 #define ENTRY_LAST_MTXS 1
 
 static kmp_dephash_entry *
-__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
+__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr) {
+  kmp_dephash_t *h = *hash;
+  if (h->nelements != 0
+      && h->nconflicts/h->size >= 1) {
+    *hash = __kmp_dephash_extend(thread, h);
+    h = *hash;
+  }
   kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
 
   kmp_dephash_entry_t *entry;
@@ -122,11 +179,9 @@ __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
     entry->mtx_lock = NULL;
     entry->next_in_bucket = h->buckets[bucket];
     h->buckets[bucket] = entry;
-#ifdef KMP_DEBUG
     h->nelements++;
     if (entry->next_in_bucket)
       h->nconflicts++;
-#endif
   }
   return entry;
 }
@@ -232,7 +287,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
 
 template <bool filter>
 static inline kmp_int32
-__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
+__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
                    bool dep_barrier, kmp_int32 ndeps,
                    kmp_depend_info_t *dep_list, kmp_task_t *task) {
   KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
@@ -352,7 +407,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
 
 // returns true if the task has any outstanding dependence
 static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
-                             kmp_task_t *task, kmp_dephash_t *hash,
+                             kmp_task_t *task, kmp_dephash_t **hash,
                              bool dep_barrier, kmp_int32 ndeps,
                              kmp_depend_info_t *dep_list,
                              kmp_int32 ndeps_noalias,
@@ -552,7 +607,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
     __kmp_init_node(node);
     new_taskdata->td_depnode = node;
 
-    if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
+    if (__kmp_check_deps(gtid, node, new_task, &current_task->td_dephash,
                          NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
                          noalias_dep_list)) {
       KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
@@ -633,7 +688,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
   kmp_depnode_t node = {0};
   __kmp_init_node(&node);
 
-  if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
+  if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
                         DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
                         noalias_dep_list)) {
     KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_taskq.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_taskq.cpp
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_wait_release.h b/contrib/llvm-project/openmp/runtime/src/kmp_wait_release.h
index bb6bdf5d8fa5..b235be3cf64a 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_wait_release.h
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_wait_release.h
@@ -140,8 +140,11 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
 #endif
     if (!KMP_MASTER_TID(ds_tid)) {
       if (ompt_enabled.ompt_callback_implicit_task) {
+        int flags = this_thr->th.ompt_thread_info.parallel_flags;
+        flags = (flags & ompt_parallel_league) ? ompt_task_initial
+                                               : ompt_task_implicit;
         ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
-            ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
+            ompt_scope_end, NULL, tId, 0, ds_tid, flags);
       }
       // return to idle state
       this_thr->th.ompt_thread_info.state = ompt_state_idle;
diff --git a/contrib/llvm-project/openmp/runtime/src/ompt-general.cpp b/contrib/llvm-project/openmp/runtime/src/ompt-general.cpp
index 00bf606bb1d5..41b2827007b6 100644
--- a/contrib/llvm-project/openmp/runtime/src/ompt-general.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/ompt-general.cpp
@@ -430,10 +430,8 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which,
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
   case event_name:                                                             \
-    if (ompt_event_implementation_status(event_name)) {                        \
-      ompt_callbacks.ompt_callback(event_name) = (callback_type)callback;      \
-      ompt_enabled.event_name = (callback != 0);                               \
-    }                                                                          \
+    ompt_callbacks.ompt_callback(event_name) = (callback_type)callback;        \
+    ompt_enabled.event_name = (callback != 0);                                 \
     if (callback)                                                              \
       return ompt_event_implementation_status(event_name);                     \
     else                                                                       \
@@ -456,16 +454,15 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
   switch (which) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
-  case event_name:                                                             \
-    if (ompt_event_implementation_status(event_name)) {                        \
-      ompt_callback_t mycb =                                                   \
-          (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
-      if (ompt_enabled.event_name && mycb) {                                   \
-        *callback = mycb;                                                      \
-        return ompt_get_callback_success;                                      \
-      }                                                                        \
+  case event_name: {                                                           \
+    ompt_callback_t mycb =                                                     \
+        (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);             \
+    if (ompt_enabled.event_name && mycb) {                                     \
+      *callback = mycb;                                                        \
+      return ompt_get_callback_success;                                        \
     }                                                                          \
-    return ompt_get_callback_failure;
+    return ompt_get_callback_failure;                                          \
+  }
 
     FOREACH_OMPT_EVENT(ompt_event_macro)
 
diff --git a/contrib/llvm-project/openmp/runtime/src/ompt-internal.h b/contrib/llvm-project/openmp/runtime/src/ompt-internal.h
index 5a6beaf88cae..958b5943af38 100644
--- a/contrib/llvm-project/openmp/runtime/src/ompt-internal.h
+++ b/contrib/llvm-project/openmp/runtime/src/ompt-internal.h
@@ -81,6 +81,7 @@ typedef struct {
   ompt_state_t state;
   ompt_wait_id_t wait_id;
   int ompt_task_yielded;
+  int parallel_flags; // information for the last parallel region invoked
   void *idle_frame;
 } ompt_thread_info_t;
 
diff --git a/contrib/llvm-project/openmp/runtime/src/ompt-specific.cpp b/contrib/llvm-project/openmp/runtime/src/ompt-specific.cpp
index 63153d274efb..7fb81bb7d1a0 100644
--- a/contrib/llvm-project/openmp/runtime/src/ompt-specific.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/ompt-specific.cpp
@@ -269,10 +269,11 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
 }
 
 void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
-                             int on_heap) {
+                             int on_heap, bool always) {
   ompt_lw_taskteam_t *link_lwt = lwt;
-  if (thr->th.th_team->t.t_serialized >
-      1) { // we already have a team, so link the new team and swap values
+  if (always ||
+      thr->th.th_team->t.t_serialized >
+          1) { // we already have a team, so link the new team and swap values
     if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
       link_lwt =
           (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
diff --git a/contrib/llvm-project/openmp/runtime/src/ompt-specific.h b/contrib/llvm-project/openmp/runtime/src/ompt-specific.h
index 86fd928d0378..47d8a1669846 100644
--- a/contrib/llvm-project/openmp/runtime/src/ompt-specific.h
+++ b/contrib/llvm-project/openmp/runtime/src/ompt-specific.h
@@ -26,7 +26,7 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
                              int gtid, ompt_data_t *ompt_pid, void *codeptr);
 
 void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
-                             int on_heap);
+                             int on_heap, bool always = false);
 
 void __ompt_lw_taskteam_unlink(kmp_info_t *thr);
 
diff --git a/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index cc494cb4db43..f231e70d181f 100644
--- a/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -161,6 +161,10 @@
 #  define ITT_ARCH_MIPS64  6
 #endif /* ITT_ARCH_MIPS64 */
 
+#ifndef ITT_ARCH_RISCV64
+#  define ITT_ARCH_RISCV64  7
+#endif /* ITT_ARCH_RISCV64 */
+
 #ifndef ITT_ARCH
 #  if defined _M_IX86 || defined __i386__
 #    define ITT_ARCH ITT_ARCH_IA32
@@ -178,6 +182,8 @@
 #    define ITT_ARCH ITT_ARCH_MIPS
 #  elif defined __mips__ && defined __mips64
 #    define ITT_ARCH ITT_ARCH_MIPS64
+#  elif defined __riscv && __riscv_xlen == 64
+#    define ITT_ARCH ITT_ARCH_RISCV64
 #  endif
 #endif
 
@@ -330,7 +336,9 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
                           : "memory");
     return result;
 }
-#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_MIPS ||  ITT_ARCH==ITT_ARCH_MIPS64
+#elif ITT_ARCH == ITT_ARCH_ARM || ITT_ARCH == ITT_ARCH_PPC64 ||                \
+    ITT_ARCH == ITT_ARCH_AARCH64 || ITT_ARCH == ITT_ARCH_MIPS ||               \
+    ITT_ARCH == ITT_ARCH_MIPS64 || ITT_ARCH == ITT_ARCH_RISCV64
 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
 #endif /* ITT_ARCH==ITT_ARCH_IA64 */
 #ifndef ITT_SIMPLE_INIT
diff --git a/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
similarity index 99%
rename from contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
rename to contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
index a2a73ada2e0c..c48b3f420bb7 100644
--- a/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
+++ b/contrib/llvm-project/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "kmp_config.h"
+#include "kmp_os.h"
 #include "ittnotify_config.h"
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -226,8 +227,6 @@ static __itt_api_info api_list[] = {
 #pragma warning(pop)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 
-static const char dll_path[PATH_MAX] = { 0 };
-
 /* static part descriptor which handles. all notification api attributes. */
 __itt_global _N_(_ittapi_global) = {
     ITT_MAGIC,                                     /* identification info */
@@ -238,7 +237,7 @@ __itt_global _N_(_ittapi_global) = {
     MUTEX_INITIALIZER,                             /* mutex */
     NULL,                                          /* dynamic library handle */
     NULL,                                          /* error_handler */
-    (const char**)&dll_path,                       /* dll_path_ptr */
+    NULL,                                          /* dll_path_ptr */
     (__itt_api_info*)&api_list,                    /* api_list_ptr */
     NULL,                                          /* next __itt_global */
     NULL,                                          /* thread_list */
@@ -1098,6 +1097,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou
                         switch (lib_version) {
                         case 0:
                             groups = __itt_group_legacy;
+                            KMP_FALLTHROUGH();
                         case 1:
                             /* Fill all pointers from dynamic library */
                             for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
diff --git a/contrib/llvm-project/openmp/runtime/src/z_Linux_asm.S b/contrib/llvm-project/openmp/runtime/src/z_Linux_asm.S
index c451b3aa30d8..8090ff759fe1 100644
--- a/contrib/llvm-project/openmp/runtime/src/z_Linux_asm.S
+++ b/contrib/llvm-project/openmp/runtime/src/z_Linux_asm.S
@@ -495,13 +495,21 @@ __kmp_unnamed_critical_addr:
 # endif /* !KMP_ASM_INTRINS */
 
 //------------------------------------------------------------------------
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
-//
 // int
-// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
-//                         int argc, void *p_argv[] ) {
-//    (*pkfn)( & gtid, & gtid, argv[0], ... );
-//    return 1;
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+//                         int gtid, int tid,
+//                         int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+//                         ,
+//                         void **exit_frame_ptr
+// #endif
+//                       ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)( & gtid, & tid, argv[0], ... );
+//   return 1;
 // }
 
 // -- Begin __kmp_invoke_microtask
@@ -991,14 +999,21 @@ KMP_LABEL(invoke_3):
 # endif /* !KMP_ASM_INTRINS */
 
 //------------------------------------------------------------------------
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
-//
 // int
 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-//		           int gtid, int tid,
-//                         int argc, void *p_argv[] ) {
-//    (*pkfn)( & gtid, & tid, argv[0], ... );
-//    return 1;
+//                         int gtid, int tid,
+//                         int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+//                         ,
+//                         void **exit_frame_ptr
+// #endif
+//                       ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)( & gtid, & tid, argv[0], ... );
+//   return 1;
 // }
 //
 // note: at call to pkfn must have %rsp 128-byte aligned for compiler
@@ -1192,15 +1207,27 @@ KMP_LABEL(kmp_1_exit):
 #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
 
 //------------------------------------------------------------------------
-//
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
-//
 // int
 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-//		           int gtid, int tid,
-//                         int argc, void *p_argv[] ) {
-//    (*pkfn)( & gtid, & tid, argv[0], ... );
-//    return 1;
+//                         int gtid, int tid,
+//                         int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+//                         ,
+//                         void **exit_frame_ptr
+// #endif
+//                       ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)( & gtid, & tid, argv[0], ... );
+//
+// // FIXME: This is done at call-site and can be removed here.
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = 0;
+// #endif
+//
+//   return 1;
 // }
 //
 // parameters:
@@ -1306,15 +1333,27 @@ KMP_LABEL(kmp_1):
 #if KMP_ARCH_PPC64
 
 //------------------------------------------------------------------------
-//
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
-//
 // int
 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-//		           int gtid, int tid,
-//                         int argc, void *p_argv[] ) {
-//    (*pkfn)( & gtid, & tid, argv[0], ... );
-//    return 1;
+//                         int gtid, int tid,
+//                         int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+//                         ,
+//                         void **exit_frame_ptr
+// #endif
+//                       ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)( & gtid, & tid, argv[0], ... );
+//
+// // FIXME: This is done at call-site and can be removed here.
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = 0;
+// #endif
+//
+//   return 1;
 // }
 //
 // parameters:
@@ -1524,6 +1563,173 @@ __kmp_invoke_microtask:
 
 #endif /* KMP_ARCH_PPC64 */
 
+#if KMP_ARCH_RISCV64
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+//                            void *p_argv[]
+// #if OMPT_SUPPORT
+//                            ,
+//                            void **exit_frame_ptr
+// #endif
+//                            ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)(&gtid, &tid, argv[0], ...);
+//
+//   return 1;
+// }
+//
+// Parameters:
+//   a0: pkfn
+//   a1: gtid
+//   a2: tid
+//   a3: argc
+//   a4: p_argv
+//   a5: exit_frame_ptr
+//
+// Locals:
+//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
+//   __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp. registers:
+//
+//  t0: used to calculate the dynamic stack size / used to hold pkfn address
+//  t1: used as temporary for stack placement calculation
+//  t2: used as temporary for stack arguments
+//  t3: used as temporary for number of remaining pkfn parms
+//  t4: used to traverse p_argv array
+//
+// return: a0 (always 1/TRUE)
+//
+
+__gtid = -20
+__tid = -24
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+	.text
+	.globl	__kmp_invoke_microtask
+	.p2align	1
+	.type	__kmp_invoke_microtask,@function
+__kmp_invoke_microtask:
+	.cfi_startproc
+
+	// First, save ra and fp
+	addi	sp, sp, -16
+	sd	ra, 8(sp)
+	sd	fp, 0(sp)
+	addi	fp, sp, 16
+	.cfi_def_cfa	fp, 0
+	.cfi_offset	ra, -8
+	.cfi_offset	fp, -16
+
+	// Compute the dynamic stack size:
+	//
+	// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
+	//   reference
+	// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
+	//   function by register. Given that we have 8 of such registers (a[0-7])
+	//   and two + 'argc' arguments (consider &gtid and &tid), we need to
+	//   reserve max(0, argc - 6)*8 extra bytes
+	//
+	// The total number of bytes is then max(0, argc - 6)*8 + 8
+
+	// Compute max(0, argc - 6) using the following bithack:
+	// max(0, x) = x - (x & (x >> 31)), where x := argc - 6
+	// Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+	addi	t0, a3, -6
+	srai	t1, t0, 31
+	and	t1, t0, t1
+	sub	t0, t0, t1
+
+	addi	t0, t0, 1
+
+	slli	t0, t0, 3
+	sub	sp, sp, t0
+
+	// Align the stack to 16 bytes
+	andi	sp, sp, -16
+
+	mv	t0, a0
+	mv	t3, a3
+	mv	t4, a4
+
+#if OMPT_SUPPORT
+	// Save frame pointer into exit_frame
+	sd	fp, 0(a5)
+#endif
+
+	// Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
+
+	sw	a1, __gtid(fp)
+	sw	a2, __tid(fp)
+
+	addi	a0, fp, __gtid
+	addi	a1, fp, __tid
+
+	beqz	t3, .L_kmp_3
+	ld	a2, 0(t4)
+
+	addi	t3, t3, -1
+	beqz	t3, .L_kmp_3
+	ld	a3, 8(t4)
+
+	addi	t3, t3, -1
+	beqz	t3, .L_kmp_3
+	ld	a4, 16(t4)
+
+	addi	t3, t3, -1
+	beqz	t3, .L_kmp_3
+	ld	a5, 24(t4)
+
+	addi	t3, t3, -1
+	beqz	t3, .L_kmp_3
+	ld	a6, 32(t4)
+
+	addi	t3, t3, -1
+	beqz	t3, .L_kmp_3
+	ld	a7, 40(t4)
+
+	// Prepare any additional argument passed through the stack
+	addi	t4, t4, 48
+	mv	t1, sp
+	j .L_kmp_2
+.L_kmp_1:
+	ld	t2, 0(t4)
+	sd	t2, 0(t1)
+	addi	t4, t4, 8
+	addi	t1, t1, 8
+.L_kmp_2:
+	addi	t3, t3, -1
+	bnez	t3, .L_kmp_1
+
+.L_kmp_3:
+	// Call pkfn function
+	jalr	t0
+
+	// Restore stack and return
+
+	addi	a0, zero, 1
+
+	addi	sp, fp, -16
+	ld	fp, 0(sp)
+	ld	ra, 8(sp)
+	addi	sp, sp, 16
+	ret
+.Lfunc_end0:
+	.size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+	.cfi_endproc
+
+// -- End  __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_RISCV64 */
+
 #if KMP_ARCH_ARM || KMP_ARCH_MIPS
     .data
     .comm .gomp_critical_user_,32,8
@@ -1535,7 +1741,7 @@ __kmp_unnamed_critical_addr:
     .size __kmp_unnamed_critical_addr,4
 #endif /* KMP_ARCH_ARM */
 
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
     .data
     .comm .gomp_critical_user_,32,8
     .data
@@ -1544,7 +1750,8 @@ __kmp_unnamed_critical_addr:
 __kmp_unnamed_critical_addr:
     .8byte .gomp_critical_user_
     .size __kmp_unnamed_critical_addr,8
-#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
+#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
+          KMP_ARCH_RISCV64 */
 
 #if KMP_OS_LINUX
 # if KMP_ARCH_ARM
diff --git a/contrib/llvm-project/openmp/runtime/src/z_Linux_util.cpp b/contrib/llvm-project/openmp/runtime/src/z_Linux_util.cpp
index 1983fc2b9834..0ee12927e4bf 100644
--- a/contrib/llvm-project/openmp/runtime/src/z_Linux_util.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/z_Linux_util.cpp
@@ -50,6 +50,9 @@
 #include <mach/mach.h>
 #include <sys/sysctl.h>
 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/user.h>
 #include <pthread_np.h>
 #elif KMP_OS_NETBSD
 #include <sys/types.h>
@@ -97,7 +100,7 @@ static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
 }
 #endif
 
-#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
+#if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
 
 /* Affinity support */
 
@@ -119,16 +122,21 @@ void __kmp_affinity_bind_thread(int which) {
 void __kmp_affinity_determine_capable(const char *env_var) {
 // Check and see if the OS supports thread affinity.
 
+#if KMP_OS_LINUX
 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
+#elif KMP_OS_FREEBSD
+#define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
+#endif
 
+
+#if KMP_OS_LINUX
+  // If Linux* OS:
+  // If the syscall fails or returns a suggestion for the size,
+  // then we don't have to search for an appropriate size.
   int gCode;
   int sCode;
   unsigned char *buf;
   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
-
-  // If Linux* OS:
-  // If the syscall fails or returns a suggestion for the size,
-  // then we don't have to search for an appropriate size.
   gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
                 "initial getaffinity call returned %d errno = %d\n",
@@ -267,6 +275,23 @@ void __kmp_affinity_determine_capable(const char *env_var) {
       }
     }
   }
+#elif KMP_OS_FREEBSD
+  int gCode;
+  unsigned char *buf;
+  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
+  gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast<cpuset_t *>(buf));
+  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
+                "initial getaffinity call returned %d errno = %d\n",
+                gCode, errno));
+  if (gCode == 0) {
+    KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
+    KA_TRACE(10, ("__kmp_affinity_determine_capable: "
+                  "affinity supported (mask size %d)\n"<
+		  (int)__kmp_affin_mask_size));
+    KMP_INTERNAL_FREE(buf);
+    return;
+  }
+#endif
   // save uncaught error code
   // int error = errno;
   KMP_INTERNAL_FREE(buf);
@@ -802,6 +827,13 @@ void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
      and also gives the user the stack space they requested for all threads */
   stack_size += gtid * __kmp_stkoffset * 2;
 
+#if defined(__ANDROID__) && __ANDROID_API__ < 19
+    // Round the stack size to a multiple of the page size. Older versions of
+    // Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
+    // if the stack size was not a multiple of the page size.
+    stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+#endif
+
   KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
                 "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
                 gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
@@ -1972,7 +2004,7 @@ int __kmp_is_address_mapped(void *addr) {
   int found = 0;
   int rc;
 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_HURD
 
   /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
      ranges mapped into the address space. */
@@ -2010,6 +2042,44 @@ int __kmp_is_address_mapped(void *addr) {
   // Free resources.
   fclose(file);
   KMP_INTERNAL_FREE(name);
+#elif KMP_OS_FREEBSD
+  char *buf;
+  size_t lstsz;
+  int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
+  rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
+  if (rc < 0)
+     return 0;
+  // We pass from number of vm entry's semantic
+  // to size of whole entry map list.
+  lstsz = lstsz * 4 / 3;
+  buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
+  rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
+  if (rc < 0) {
+     kmpc_free(buf);
+     return 0;
+  }
+
+  char *lw = buf;
+  char *up = buf + lstsz;
+
+  while (lw < up) {
+      struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
+      size_t cursz = cur->kve_structsize;
+      if (cursz == 0)
+          break;
+      void *start = reinterpret_cast<void *>(cur->kve_start);
+      void *end = reinterpret_cast<void *>(cur->kve_end);
+      // Readable/Writable addresses within current map entry
+      if ((addr >= start) && (addr < end)) {
+          if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
+              (cur->kve_protection & KVME_PROT_WRITE) != 0) {
+              found = 1;
+              break;
+          }
+      }
+      lw += cursz;
+  }
+  kmpc_free(buf);
 
 #elif KMP_OS_DARWIN
 
@@ -2331,7 +2401,8 @@ int __kmp_get_load_balance(int max) {
 #endif // USE_LOAD_BALANCE
 
 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
-      ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
+      ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
+      KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
 
 // we really only need the case with 1 argument, because CLANG always build
 // a struct of pointers to shared variables referenced in the outlined function
@@ -2415,10 +2486,6 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
     break;
   }
 
-#if OMPT_SUPPORT
-  *exit_frame_ptr = 0;
-#endif
-
   return 1;
 }