diff --git a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
index e8095650e4fb..a5b7e36487d6 100644
--- a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
+++ b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
@@ -283,7 +283,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
 END_COMPILERRT_FUNCTION(__unordsf2)
 
 #if defined(COMPILER_RT_ARMHF_TARGET)
-DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum):
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
 	vmov s0, r0
 	vmov s1, r1
 	b SYMBOL_NAME(__unordsf2)
diff --git a/contrib/compiler-rt/lib/xray/xray_arm.cc b/contrib/compiler-rt/lib/xray/xray_arm.cc
index d89322e833e5..f5e2cd2a93c2 100644
--- a/contrib/compiler-rt/lib/xray/xray_arm.cc
+++ b/contrib/compiler-rt/lib/xray/xray_arm.cc
@@ -19,6 +19,8 @@
 #include <atomic>
 #include <cassert>
 
+extern "C" void __clear_cache(void* start, void* end);
+
 namespace __xray {
 
 uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
@@ -116,8 +118,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
   //   B #20
 
   uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  uint32_t *CurAddress = FirstAddress + 1;
   if (Enable) {
-    uint32_t *CurAddress = FirstAddress + 1;
     CurAddress =
         Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
     CurAddress =
@@ -125,6 +127,7 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
     *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
     CurAddress++;
     *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+    CurAddress++;
     std::atomic_store_explicit(
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
@@ -133,6 +136,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
   }
+  __clear_cache(reinterpret_cast<char*>(FirstAddress),
+      reinterpret_cast<char*>(CurAddress));
   return true;
 }
 
diff --git a/contrib/libc++/include/__config b/contrib/libc++/include/__config
index ddfab31f7fac..340c573a2a40 100644
--- a/contrib/libc++/include/__config
+++ b/contrib/libc++/include/__config
@@ -403,15 +403,6 @@ namespace std {
 #define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
 #endif 
 
-// A constexpr version of __builtin_memcmp was added in clang 4.0
-#if __has_builtin(__builtin_memcmp)
-# ifdef __apple_build_version__
-// No shipping version of Apple's clang has constexpr __builtin_memcmp
-# elif __clang_major__ > 3
-#  define _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR
-# endif
-#endif
-
 #elif defined(_LIBCPP_COMPILER_GCC)
 
 #define _ALIGNAS(x) __attribute__((__aligned__(x)))
diff --git a/contrib/libc++/include/__string b/contrib/libc++/include/__string
index c85b874f054b..75608cea71d9 100644
--- a/contrib/libc++/include/__string
+++ b/contrib/libc++/include/__string
@@ -243,7 +243,7 @@ char_traits<char>::compare(const char_type* __s1, const char_type* __s2, size_t
 {
     if (__n == 0)
         return 0;
-#ifdef _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR
+#if __has_feature(cxx_constexpr_string_builtins)
     return __builtin_memcmp(__s1, __s2, __n);
 #elif _LIBCPP_STD_VER <= 14
     return memcmp(__s1, __s2, __n);
@@ -265,7 +265,9 @@ char_traits<char>::find(const char_type* __s, size_t __n, const char_type& __a)
 {
     if (__n == 0)
         return NULL;
-#if _LIBCPP_STD_VER <= 14
+#if __has_feature(cxx_constexpr_string_builtins)
+    return __builtin_char_memchr(__s, to_int_type(__a), __n);
+#elif _LIBCPP_STD_VER <= 14
     return (const char_type*) memchr(__s, to_int_type(__a), __n);
 #else
     for (; __n; --__n)
@@ -331,7 +333,7 @@ char_traits<wchar_t>::compare(const char_type* __s1, const char_type* __s2, size
 {
     if (__n == 0)
         return 0;
-#if __has_builtin(__builtin_wmemcmp)
+#if __has_feature(cxx_constexpr_string_builtins)
     return __builtin_wmemcmp(__s1, __s2, __n);
 #elif _LIBCPP_STD_VER <= 14
     return wmemcmp(__s1, __s2, __n);
@@ -351,7 +353,7 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX14
 size_t
 char_traits<wchar_t>::length(const char_type* __s) _NOEXCEPT
 {
-#if __has_builtin(__builtin_wcslen)
+#if __has_feature(cxx_constexpr_string_builtins)
     return __builtin_wcslen(__s);
 #elif _LIBCPP_STD_VER <= 14
     return wcslen(__s);
@@ -369,7 +371,7 @@ char_traits<wchar_t>::find(const char_type* __s, size_t __n, const char_type& __
 {
     if (__n == 0)
         return NULL;
-#if __has_builtin(__builtin_wmemchr)
+#if __has_feature(cxx_constexpr_string_builtins)
         return __builtin_wmemchr(__s, __a, __n);
 #elif _LIBCPP_STD_VER <= 14
     return wmemchr(__s, __a, __n);
diff --git a/contrib/libc++/include/__threading_support b/contrib/libc++/include/__threading_support
index a672a9f80eda..817a2cfb934f 100644
--- a/contrib/libc++/include/__threading_support
+++ b/contrib/libc++/include/__threading_support
@@ -40,6 +40,12 @@
 #define _LIBCPP_THREAD_ABI_VISIBILITY inline _LIBCPP_INLINE_VISIBILITY
 #endif
 
+#if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(no_thread_safety_analysis)
+#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
+#else
+#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
@@ -98,25 +104,25 @@ typedef DWORD __libcpp_tls_key;
 _LIBCPP_THREAD_ABI_VISIBILITY
 int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t *__m);
 
 _LIBCPP_THREAD_ABI_VISIBILITY
 int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_mutex_lock(__libcpp_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_mutex_trylock(__libcpp_mutex_t *__m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_mutex_unlock(__libcpp_mutex_t *__m);
 
 _LIBCPP_THREAD_ABI_VISIBILITY
@@ -129,10 +135,10 @@ int __libcpp_condvar_signal(__libcpp_condvar_t* __cv);
 _LIBCPP_THREAD_ABI_VISIBILITY
 int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m);
 
-_LIBCPP_THREAD_ABI_VISIBILITY
+_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m,
                                timespec *__ts);
 
diff --git a/contrib/libc++/include/deque b/contrib/libc++/include/deque
index 0454162a8f73..92801540f2e7 100644
--- a/contrib/libc++/include/deque
+++ b/contrib/libc++/include/deque
@@ -110,8 +110,8 @@ public:
     void push_front(value_type&& v);
     void push_back(const value_type& v);
     void push_back(value_type&& v);
-    template <class... Args> reference emplace_front(Args&&... args);
-    template <class... Args> reference emplace_back(Args&&... args);
+    template <class... Args> reference emplace_front(Args&&... args);  // reference in C++17
+    template <class... Args> reference emplace_back(Args&&... args);   // reference in C++17
     template <class... Args> iterator emplace(const_iterator p, Args&&... args);
     iterator insert(const_iterator p, const value_type& v);
     iterator insert(const_iterator p, value_type&& v);
@@ -1342,8 +1342,13 @@ public:
     void push_back(const value_type& __v);
 #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
 #ifndef _LIBCPP_HAS_NO_VARIADICS
+#if _LIBCPP_STD_VER > 14
     template <class... _Args> reference emplace_front(_Args&&... __args);
-    template <class... _Args> reference emplace_back(_Args&&... __args);
+    template <class... _Args> reference emplace_back (_Args&&... __args);
+#else
+    template <class... _Args> void      emplace_front(_Args&&... __args);
+    template <class... _Args> void      emplace_back (_Args&&... __args);
+#endif
     template <class... _Args> iterator emplace(const_iterator __p, _Args&&... __args);
 #endif  // _LIBCPP_HAS_NO_VARIADICS
     void push_front(value_type&& __v);
@@ -1822,7 +1827,11 @@ deque<_Tp, _Allocator>::push_back(value_type&& __v)
 
 template <class _Tp, class _Allocator>
 template <class... _Args>
+#if _LIBCPP_STD_VER > 14
 typename deque<_Tp, _Allocator>::reference
+#else
+void
+#endif
 deque<_Tp, _Allocator>::emplace_back(_Args&&... __args)
 {
     allocator_type& __a = __base::__alloc();
@@ -1832,7 +1841,9 @@ deque<_Tp, _Allocator>::emplace_back(_Args&&... __args)
     __alloc_traits::construct(__a, _VSTD::addressof(*__base::end()),
                               _VSTD::forward<_Args>(__args)...);
     ++__base::size();
+#if _LIBCPP_STD_VER > 14
     return *--__base::end();
+#endif
 }
 
 #endif  // _LIBCPP_HAS_NO_VARIADICS
@@ -1870,7 +1881,11 @@ deque<_Tp, _Allocator>::push_front(value_type&& __v)
 
 template <class _Tp, class _Allocator>
 template <class... _Args>
+#if _LIBCPP_STD_VER > 14
 typename deque<_Tp, _Allocator>::reference
+#else
+void
+#endif
 deque<_Tp, _Allocator>::emplace_front(_Args&&... __args)
 {
     allocator_type& __a = __base::__alloc();
@@ -1880,7 +1895,9 @@ deque<_Tp, _Allocator>::emplace_front(_Args&&... __args)
     __alloc_traits::construct(__a, _VSTD::addressof(*--__base::begin()), _VSTD::forward<_Args>(__args)...);
     --__base::__start_;
     ++__base::size();
+#if _LIBCPP_STD_VER > 14
     return *__base::begin();
+#endif
 }
 
 #endif  // _LIBCPP_HAS_NO_VARIADICS
diff --git a/contrib/libc++/include/forward_list b/contrib/libc++/include/forward_list
index ce9a4b1bca90..879f2d3ce087 100644
--- a/contrib/libc++/include/forward_list
+++ b/contrib/libc++/include/forward_list
@@ -87,7 +87,7 @@ public:
     reference       front();
     const_reference front() const;
 
-    template <class... Args> reference emplace_front(Args&&... args);
+    template <class... Args> reference emplace_front(Args&&... args);  // reference in C++17
     void push_front(const value_type& v);
     void push_front(value_type&& v);
 
@@ -747,7 +747,11 @@ public:
 
 #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
 #ifndef _LIBCPP_HAS_NO_VARIADICS
+#if _LIBCPP_STD_VER > 14
     template <class... _Args> reference emplace_front(_Args&&... __args);
+#else
+    template <class... _Args> void      emplace_front(_Args&&... __args);
+#endif
 #endif
     void push_front(value_type&& __v);
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
@@ -1103,7 +1107,11 @@ forward_list<_Tp, _Alloc>::assign(initializer_list<value_type> __il)
 
 template <class _Tp, class _Alloc>
 template <class... _Args>
+#if _LIBCPP_STD_VER > 14
 typename forward_list<_Tp, _Alloc>::reference
+#else
+void
+#endif
 forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
 {
     __node_allocator& __a = base::__alloc();
@@ -1113,7 +1121,9 @@ forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
                                   _VSTD::forward<_Args>(__args)...);
     __h->__next_ = base::__before_begin()->__next_;
     base::__before_begin()->__next_ = __h.release();
+#if _LIBCPP_STD_VER > 14
     return base::__before_begin()->__next_->__value_;
+#endif
 }
 
 #endif  // _LIBCPP_HAS_NO_VARIADICS
diff --git a/contrib/libc++/include/list b/contrib/libc++/include/list
index 76c50a156093..fa148db516bc 100644
--- a/contrib/libc++/include/list
+++ b/contrib/libc++/include/list
@@ -93,10 +93,10 @@ public:
     size_type max_size() const noexcept;
 
     template <class... Args>
-        reference emplace_front(Args&&... args);
+        reference emplace_front(Args&&... args); // reference in C++17
     void pop_front();
     template <class... Args>
-        reference emplace_back(Args&&... args);
+        reference emplace_back(Args&&... args);  // reference in C++17
     void pop_back();
     void push_front(const value_type& x);
     void push_front(value_type&& x);
@@ -969,9 +969,17 @@ public:
     void push_back(value_type&& __x);
 #ifndef _LIBCPP_HAS_NO_VARIADICS
     template <class... _Args>
+#if _LIBCPP_STD_VER > 14
        reference emplace_front(_Args&&... __args);
+#else
+       void      emplace_front(_Args&&... __args);
+#endif
     template <class... _Args>
+#if _LIBCPP_STD_VER > 14
         reference emplace_back(_Args&&... __args);
+#else
+       void       emplace_back(_Args&&... __args);
+#endif
     template <class... _Args>
         iterator emplace(const_iterator __p, _Args&&... __args);
 #endif  // _LIBCPP_HAS_NO_VARIADICS
@@ -1600,7 +1608,11 @@ list<_Tp, _Alloc>::push_back(value_type&& __x)
 
 template <class _Tp, class _Alloc>
 template <class... _Args>
+#if _LIBCPP_STD_VER > 14
 typename list<_Tp, _Alloc>::reference
+#else
+void
+#endif
 list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
 {
     __node_allocator& __na = base::__node_alloc();
@@ -1609,12 +1621,20 @@ list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
     __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), _VSTD::forward<_Args>(__args)...);
     __link_nodes_at_front(__hold.get()->__as_link(), __hold.get()->__as_link());
     ++base::__sz();
+#if _LIBCPP_STD_VER > 14
     return __hold.release()->__value_;
+#else
+    __hold.release();
+#endif
 }
 
 template <class _Tp, class _Alloc>
 template <class... _Args>
+#if _LIBCPP_STD_VER > 14
 typename list<_Tp, _Alloc>::reference
+#else
+void
+#endif
 list<_Tp, _Alloc>::emplace_back(_Args&&... __args)
 {
     __node_allocator& __na = base::__node_alloc();
@@ -1624,7 +1644,11 @@ list<_Tp, _Alloc>::emplace_back(_Args&&... __args)
     __link_pointer __nl = __hold->__as_link();
     __link_nodes_at_back(__nl, __nl);
     ++base::__sz();
+#if _LIBCPP_STD_VER > 14
     return __hold.release()->__value_;
+#else
+    __hold.release();
+#endif
 }
 
 template <class _Tp, class _Alloc>
diff --git a/contrib/libc++/include/queue b/contrib/libc++/include/queue
index 271c203e1d24..57d420c7406c 100644
--- a/contrib/libc++/include/queue
+++ b/contrib/libc++/include/queue
@@ -63,7 +63,7 @@ public:
 
     void push(const value_type& v);
     void push(value_type&& v);
-    template <class... Args> reference emplace(Args&&... args);
+    template <class... Args> reference emplace(Args&&... args); // reference in C++17
     void pop();
 
     void swap(queue& q) noexcept(is_nothrow_swappable_v<Container>)
@@ -292,8 +292,13 @@ public:
 #ifndef _LIBCPP_HAS_NO_VARIADICS
     template <class... _Args>
         _LIBCPP_INLINE_VISIBILITY
+#if _LIBCPP_STD_VER > 14
         reference emplace(_Args&&... __args)
             { return c.emplace_back(_VSTD::forward<_Args>(__args)...);}
+#else
+        void     emplace(_Args&&... __args)
+            {        c.emplace_back(_VSTD::forward<_Args>(__args)...);}
+#endif
 #endif  // _LIBCPP_HAS_NO_VARIADICS
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     _LIBCPP_INLINE_VISIBILITY
diff --git a/contrib/libc++/include/stack b/contrib/libc++/include/stack
index 228864e4709b..c797ea5ec6e9 100644
--- a/contrib/libc++/include/stack
+++ b/contrib/libc++/include/stack
@@ -55,7 +55,7 @@ public:
 
     void push(const value_type& x);
     void push(value_type&& x);
-    template <class... Args> reference emplace(Args&&... args);
+    template <class... Args> reference emplace(Args&&... args); // reference in C++17
     void pop();
 
     void swap(stack& c) noexcept(is_nothrow_swappable_v<Container>)
@@ -199,8 +199,13 @@ public:
 #ifndef _LIBCPP_HAS_NO_VARIADICS
     template <class... _Args>
         _LIBCPP_INLINE_VISIBILITY
+#if _LIBCPP_STD_VER > 14
         reference emplace(_Args&&... __args)
         { return c.emplace_back(_VSTD::forward<_Args>(__args)...);}
+#else
+        void      emplace(_Args&&... __args)
+        {        c.emplace_back(_VSTD::forward<_Args>(__args)...);}
+#endif
 #endif  // _LIBCPP_HAS_NO_VARIADICS
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     _LIBCPP_INLINE_VISIBILITY
diff --git a/contrib/libc++/include/vector b/contrib/libc++/include/vector
index 4dd0e527ff57..ded057b10c8b 100644
--- a/contrib/libc++/include/vector
+++ b/contrib/libc++/include/vector
@@ -99,7 +99,7 @@ public:
     void push_back(const value_type& x);
     void push_back(value_type&& x);
     template <class... Args>
-        reference emplace_back(Args&&... args);
+        reference emplace_back(Args&&... args); // reference in C++17
     void pop_back();
 
     template <class... Args> iterator emplace(const_iterator position, Args&&... args);
@@ -218,7 +218,7 @@ public:
     const_reference back() const;
 
     void push_back(const value_type& x);
-    template <class... Args> reference emplace_back(Args&&... args);  // C++14
+    template <class... Args> reference emplace_back(Args&&... args);  // C++14; reference in C++17
     void pop_back();
 
     template <class... Args> iterator emplace(const_iterator position, Args&&... args);  // C++14
@@ -679,7 +679,11 @@ public:
 #ifndef _LIBCPP_HAS_NO_VARIADICS
     template <class... _Args>
         _LIBCPP_INLINE_VISIBILITY
+#if _LIBCPP_STD_VER > 14
         reference emplace_back(_Args&&... __args);
+#else
+        void      emplace_back(_Args&&... __args);
+#endif
 #endif  // _LIBCPP_HAS_NO_VARIADICS
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     _LIBCPP_INLINE_VISIBILITY
@@ -1625,7 +1629,11 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
 template <class _Tp, class _Allocator>
 template <class... _Args>
 inline
+#if _LIBCPP_STD_VER > 14
 typename vector<_Tp, _Allocator>::reference
+#else
+void
+#endif
 vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
 {
     if (this->__end_ < this->__end_cap())
@@ -1639,7 +1647,9 @@ vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
     }
     else
         __emplace_back_slow_path(_VSTD::forward<_Args>(__args)...);
+#if _LIBCPP_STD_VER > 14
     return this->back();
+#endif
 }
 
 #endif  // _LIBCPP_HAS_NO_VARIADICS
@@ -2336,9 +2346,16 @@ public:
     void push_back(const value_type& __x);
 #if _LIBCPP_STD_VER > 11
     template <class... _Args>
-    _LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args) {
+#if _LIBCPP_STD_VER > 14
+    _LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args)
+#else
+    _LIBCPP_INLINE_VISIBILITY void      emplace_back(_Args&&... __args)
+#endif
+    {
         push_back ( value_type ( _VSTD::forward<_Args>(__args)... ));
+#if _LIBCPP_STD_VER > 14
         return this->back();
+#endif
     }
 #endif
 
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 07d5b5ea40dc..dcec3860f2ca 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id :
   GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
   Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
 
+def int_amdgcn_implicit_buffer_ptr :
+  GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index e8d86bdbca5b..c8d057949493 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return MayAlias;
 
     AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
-                               AAMDNodes(), V2, V2Size, V2AAInfo,
-                               nullptr, UnderlyingV2);
+                               AAMDNodes(), V2, MemoryLocation::UnknownSize,
+                               V2AAInfo, nullptr, UnderlyingV2);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
-      // cannot alias per GEP semantics: "A pointer value formed from a
-      // getelementptr instruction is associated with the addresses associated
-      // with the first operand of the getelementptr".
+      // cannot alias per GEP semantics: "Any memory access must be done through
+      // a pointer value associated with an address range of the memory access,
+      // otherwise the behavior is undefined.".
       return R;
 
     // If the max search depth is reached the result is undefined
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a44b92effc7e..145b9c50367a 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -919,7 +919,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // If this isn't a LocalAsMetadata record, we're dropping it.  This used
     // to be legal, but there's no upgrade path.
     auto dropRecord = [&] {
-      MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
+      MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo);
+      NextMetadataNo++;
     };
     if (Record.size() != 2) {
       dropRecord();
@@ -934,7 +935,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
 
     MetadataList.assignValue(
         LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_OLD_NODE: {
@@ -959,7 +961,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
       } else
         Elts.push_back(nullptr);
     }
-    MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
+    MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_VALUE: {
@@ -972,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
 
     MetadataList.assignValue(
         ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_DISTINCT_NODE:
@@ -985,7 +989,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
       Elts.push_back(getMDOrNull(ID));
     MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
                                         : MDNode::get(Context, Elts),
-                             NextMetadataNo++);
+                             NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_LOCATION: {
@@ -999,7 +1004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     Metadata *InlinedAt = getMDOrNull(Record[4]);
     MetadataList.assignValue(
         GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_GENERIC_DEBUG: {
@@ -1019,7 +1025,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
       DwarfOps.push_back(getMDOrNull(Record[I]));
     MetadataList.assignValue(
         GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_SUBRANGE: {
@@ -1030,7 +1037,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     MetadataList.assignValue(
         GET_OR_DISTINCT(DISubrange,
                         (Context, Record[1], unrotateSign(Record[2]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_ENUMERATOR: {
@@ -1041,7 +1049,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
                                        getMDString(Record[2]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_BASIC_TYPE: {
@@ -1053,7 +1062,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         GET_OR_DISTINCT(DIBasicType,
                         (Context, Record[1], getMDString(Record[2]), Record[3],
                          Record[4], Record[5])),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_DERIVED_TYPE: {
@@ -1069,7 +1079,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                          getDITypeRefOrNull(Record[5]),
                          getDITypeRefOrNull(Record[6]), Record[7], Record[8],
                          Record[9], Flags, getDITypeRefOrNull(Record[11]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_COMPOSITE_TYPE: {
@@ -1134,7 +1145,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     if (!IsNotUsedInTypeRef && Identifier)
       MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
 
-    MetadataList.assignValue(CT, NextMetadataNo++);
+    MetadataList.assignValue(CT, NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_SUBROUTINE_TYPE: {
@@ -1151,7 +1163,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
 
     MetadataList.assignValue(
         GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
 
@@ -1165,7 +1178,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                         (Context, getMDOrNull(Record[1]),
                          getMDString(Record[2]), getMDString(Record[3]),
                          getMDString(Record[4]), getMDString(Record[5]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
 
@@ -1181,7 +1195,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
              Record.size() == 3 ? DIFile::CSK_None
                                 : static_cast<DIFile::ChecksumKind>(Record[3]),
              Record.size() == 3 ? nullptr : getMDString(Record[4]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_COMPILE_UNIT: {
@@ -1200,7 +1215,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         Record.size() <= 14 ? 0 : Record[14],
         Record.size() <= 16 ? true : Record[16]);
 
-    MetadataList.assignValue(CU, NextMetadataNo++);
+    MetadataList.assignValue(CU, NextMetadataNo);
+    NextMetadataNo++;
 
     // Move the Upgrade the list of subprograms.
     if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
@@ -1247,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                        getMDOrNull(Record[16 + Offset]), // declaration
                        getMDOrNull(Record[17 + Offset])  // variables
                        ));
-    MetadataList.assignValue(SP, NextMetadataNo++);
+    MetadataList.assignValue(SP, NextMetadataNo);
+    NextMetadataNo++;
 
     // Upgrade sp->function mapping to function->sp mapping.
     if (HasFn) {
@@ -1272,7 +1289,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         GET_OR_DISTINCT(DILexicalBlock,
                         (Context, getMDOrNull(Record[1]),
                          getMDOrNull(Record[2]), Record[3], Record[4])),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_LEXICAL_BLOCK_FILE: {
@@ -1284,7 +1302,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         GET_OR_DISTINCT(DILexicalBlockFile,
                         (Context, getMDOrNull(Record[1]),
                          getMDOrNull(Record[2]), Record[3])),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_NAMESPACE: {
@@ -1298,7 +1317,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                         (Context, getMDOrNull(Record[1]),
                          getMDOrNull(Record[2]), getMDString(Record[3]),
                          Record[4], ExportSymbols)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_MACRO: {
@@ -1310,7 +1330,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         GET_OR_DISTINCT(DIMacro,
                         (Context, Record[1], Record[2], getMDString(Record[3]),
                          getMDString(Record[4]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_MACRO_FILE: {
@@ -1322,7 +1343,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         GET_OR_DISTINCT(DIMacroFile,
                         (Context, Record[1], Record[2], getMDOrNull(Record[3]),
                          getMDOrNull(Record[4]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_TEMPLATE_TYPE: {
@@ -1333,7 +1355,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
                                              (Context, getMDString(Record[1]),
                                               getDITypeRefOrNull(Record[2]))),
-                             NextMetadataNo++);
+                             NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_TEMPLATE_VALUE: {
@@ -1346,7 +1369,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                         (Context, Record[1], getMDString(Record[2]),
                          getDITypeRefOrNull(Record[3]),
                          getMDOrNull(Record[4]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_GLOBAL_VAR: {
@@ -1364,7 +1388,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                            getMDOrNull(Record[4]), Record[5],
                            getDITypeRefOrNull(Record[6]), Record[7], Record[8],
                            getMDOrNull(Record[10]), Record[11])),
-          NextMetadataNo++);
+          NextMetadataNo);
+      NextMetadataNo++;
     } else if (Version == 0) {
       // Upgrade old metadata, which stored a global variable reference or a
       // ConstantInt here.
@@ -1396,7 +1421,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
            getMDOrNull(Record[10]), AlignInBits));
 
       auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
-      MetadataList.assignValue(DGVE, NextMetadataNo++);
+      MetadataList.assignValue(DGVE, NextMetadataNo);
+      NextMetadataNo++;
       if (Attach)
         Attach->addDebugInfo(DGVE);
     } else
@@ -1429,7 +1455,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                          getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
                          getDITypeRefOrNull(Record[5 + HasTag]),
                          Record[6 + HasTag], Flags, AlignInBits)),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_EXPRESSION: {
@@ -1446,7 +1473,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
 
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_GLOBAL_VAR_EXPR: {
@@ -1457,7 +1485,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
                                              (Context, getMDOrNull(Record[1]),
                                               getMDOrNull(Record[2]))),
-                             NextMetadataNo++);
+                             NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_OBJC_PROPERTY: {
@@ -1471,7 +1500,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                          getMDOrNull(Record[2]), Record[3],
                          getMDString(Record[4]), getMDString(Record[5]),
                          Record[6], getDITypeRefOrNull(Record[7]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_IMPORTED_ENTITY: {
@@ -1484,7 +1514,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                         (Context, Record[1], getMDOrNull(Record[2]),
                          getDITypeRefOrNull(Record[3]), Record[4],
                          getMDString(Record[5]))),
-        NextMetadataNo++);
+        NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_STRING_OLD: {
@@ -1494,13 +1525,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
     ++NumMDStringLoaded;
     Metadata *MD = MDString::get(Context, String);
-    MetadataList.assignValue(MD, NextMetadataNo++);
+    MetadataList.assignValue(MD, NextMetadataNo);
+    NextMetadataNo++;
     break;
   }
   case bitc::METADATA_STRINGS: {
     auto CreateNextMDString = [&](StringRef Str) {
       ++NumMDStringLoaded;
-      MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+      MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo);
+      NextMetadataNo++;
     };
     if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
       return Err;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 27a9ac337f25..6906f67ebacb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
                       LD->getPointerInfo().getWithOffset(Offset),
                       MinAlign(Align, Increment), MMOFlags, AAInfo);
       LdChain.push_back(L.getValue(1));
-      if (L->getValueType(0).isVector()) {
+      if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+        // Later code assumes the vector loads produced will be mergeable, so we
+        // must pad the final entry up to the previous width. Scalars are
+        // combined separately.
         SmallVector<SDValue, 16> Loads;
         Loads.push_back(L);
         unsigned size = L->getValueSizeInBits(0);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 740766b151bb..91c335fac32d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
 def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
     "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
 
-def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
-    "AvoidQuadLdStPairs", "true",
-    "Do not form quad load/store pair operations">;
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+    "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
 
 def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
     "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M1 processors",
-                                    [FeatureAvoidQuadLdStPairs,
+                                    [FeatureSlowPaired128,
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureCustomCheapAsMoveHandling,
@@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
 
 def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M2/M3 processors",
-                                    [FeatureAvoidQuadLdStPairs,
+                                    [FeatureSlowPaired128,
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureCustomCheapAsMoveHandling,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5c8acba26aab..4c789926e3e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
     return false;
 
   // On some CPUs quad load/store pairs are slower than two single load/stores.
-  if (Subtarget.avoidQuadLdStPairs()) {
+  if (Subtarget.isPaired128Slow()) {
     switch (MI.getOpcode()) {
     default:
       break;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 73f63b8b9f67..a99340225082 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -79,7 +79,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   bool CustomAsCheapAsMove = false;
   bool UsePostRAScheduler = false;
   bool Misaligned128StoreIsSlow = false;
-  bool AvoidQuadLdStPairs = false;
+  bool Paired128IsSlow = false;
   bool UseAlternateSExtLoadCVTF32Pattern = false;
   bool HasArithmeticBccFusion = false;
   bool HasArithmeticCbzFusion = false;
@@ -189,7 +189,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   }
   bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
   bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
-  bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
+  bool isPaired128Slow() const { return Paired128IsSlow; }
   bool useAlternateSExtLoadCVTF32Pattern() const {
     return UseAlternateSExtLoadCVTF32Pattern;
   }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0b2badff7ccf..13022009af16 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
   "Enable SI Machine Scheduler"
 >;
 
+// Unless +-flat-for-global is specified, turn on FlatForGlobal for
+// all OS-es on VI and newer hardware to avoid assertion failures due
+// to missing ADDR64 variants of MUBUF instructions.
+// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
+// instructions.
+
 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
   "FlatForGlobal",
   "true",
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 4acd55eb6120..974e79fff3d7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
-  if (STM.isAmdCodeObjectV2()) {
+  if (STM.isAmdCodeObjectV2(*MF)) {
     getSIProgramInfo(KernelInfo, *MF);
     EmitAmdKernelCodeT(*MF, KernelInfo);
   }
@@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
 void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
-  if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
+  if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
     AMDGPUTargetStreamer *TS =
         static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
     SmallString<128> SymbolName;
@@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
 
   // FIXME: Should use getKernArgSize
   header.kernarg_segment_byte_size =
-      STM.getKernArgSegmentSize(MFI->getABIArgOffset());
+    STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
   header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2b4fc5397b18..5bf347e48650 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
   unsigned Opc
     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 
-  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
-  // omod
-  SDValue Ops[8];
-
-  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
-  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
-  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
-  CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 
 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e48c1943cb01..54caa2c5dfad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   SDLoc SL(N);
   switch (Opc) {
   case ISD::FADD: {
+    if (!mayIgnoreSignedZero(N0))
+      return SDValue();
+
     // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
     SDValue LHS = N0.getOperand(0);
     SDValue RHS = N0.getOperand(1);
@@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   }
   case ISD::FMA:
   case ISD::FMAD: {
+    if (!mayIgnoreSignedZero(N0))
+      return SDValue();
+
     // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
     SDValue LHS = N0.getOperand(0);
     SDValue MHS = N0.getOperand(1);
@@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CONST_DATA_PTR)
   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
   NODE_NAME_CASE(KILL)
+  NODE_NAME_CASE(DUMMY_CHAIN)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
   NODE_NAME_CASE(SENDMSGHALT)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 69567aa5f713..f6adceac6f11 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -119,6 +119,16 @@ class AMDGPUTargetLowering : public TargetLowering {
 public:
   AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
 
+  bool mayIgnoreSignedZero(SDValue Op) const {
+    if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+      return true;
+
+    if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
+      return BO->Flags.hasNoSignedZeros();
+
+    return false;
+  }
+
   bool isFAbsFree(EVT VT) const override;
   bool isFNegFree(EVT VT) const override;
   bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -320,6 +330,7 @@ enum NodeType : unsigned {
   INTERP_P2,
   PC_ADD_REL_OFFSET,
   KILL,
+  DUMMY_CHAIN,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index f079c8d0c70c..d7fa28bdc001 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
 // This argument to this node is a dword address.
 def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
 def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
 def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 74851aedbb21..c35a67de1d7f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
   ParseSubtargetFeatures(GPU, FullFS);
 
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+  // variants of MUBUF instructions.
+  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+    FlatForGlobal = true;
+  }
+
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
@@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
 }
 
-unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
-  unsigned ImplicitBytes = getImplicitArgNumBytes();
+unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
+					    unsigned ExplicitArgBytes) const {
+  unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
   if (ImplicitBytes == 0)
     return ExplicitArgBytes;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 51ba501bddd1..0e3cb7dc1f87 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -311,22 +311,31 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
     return EnableXNACK;
   }
 
-  bool isAmdCodeObjectV2() const {
-    return isAmdHsaOS() || isMesa3DOS();
+  bool isMesaKernel(const MachineFunction &MF) const {
+    return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
+
+  // Covers VS/PS/CS graphics shaders
+  bool isMesaGfxShader(const MachineFunction &MF) const {
+    return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
+
+  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+    return isAmdHsaOS() || isMesaKernel(MF);
   }
 
   /// \brief Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
-  unsigned getExplicitKernelArgOffset() const {
-    return isAmdCodeObjectV2() ? 0 : 36;
+  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+    return isAmdCodeObjectV2(MF) ? 0 : 36;
   }
 
   unsigned getAlignmentForImplicitArgPtr() const {
     return isAmdHsaOS() ? 8 : 4;
   }
 
-  unsigned getImplicitArgNumBytes() const {
-    if (isMesa3DOS())
+  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+    if (isMesaKernel(MF))
       return 16;
     if (isAmdHsaOS() && isOpenCLEnv())
       return 32;
@@ -585,7 +594,7 @@ class SISubtarget final : public AMDGPUSubtarget {
     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
   }
 
-  unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+  unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
 
   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index de7ce5cb9e47..77fee4356b65 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
     llvm_unreachable("Unsupported private trunc store");
   }
 
-  SDValue Chain = Store->getChain();
+  SDValue OldChain = Store->getChain();
+  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+  // Skip dummy
+  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
   SDValue BasePtr = Store->getBasePtr();
   SDValue Offset = Store->getOffset();
   EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
 
   // Store dword
   // TODO: Can we be smarter about MachinePointerInfo?
-  return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+  // If we are part of expanded vector, make our neighbors depend on this store
+  if (VectorTrunc) {
+    // Make all other vector elements depend on this store
+    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+  }
+  return NewStore;
 }
 
 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // Neither LOCAL nor PRIVATE can do vectors at the moment
   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+      // Add an extra level of chain to isolate this vector
+      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+      // TODO: can the chain be replaced without creating a new store?
+      SDValue NewStore = DAG.getTruncStore(
+          NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+          MemVT, StoreNode->getAlignment(),
+          StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+      StoreNode = cast<StoreSDNode>(NewStore);
+    }
+
     return scalarizeVectorStore(StoreNode, DAG);
   }
 
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       // Put the mask in correct place
       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
 
-      // Put the mask in correct place
+      // Put the value bits in correct place
       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
 
@@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
     unsigned PartOffset = VA.getLocMemOffset();
-    unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
+    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
     SDValue Arg = DAG.getLoad(
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 19795bdde647..9210e66b0fe7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
 
 def MOV : R600_1OP <0x19, "MOV", []>;
 
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+  (outs),
+  (ins),
+  "DUMMY_CHAIN",
+  [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
 
 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d0a69eafc58e..0b5715515880 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
 
   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
     PreloadedPrivateBufferReg = TRI->getPreloadedValue(
       MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
   }
@@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
-    assert(ST.isAmdCodeObjectV2());
+    assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
     MRI.addLiveIn(PreloadedPrivateBufferReg);
     MBB.addLiveIn(PreloadedPrivateBufferReg);
   }
@@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
   bool CopyBuffer = ResourceRegUsed &&
     PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+    ST.isAmdCodeObjectV2(MF) &&
     ScratchRsrcReg != PreloadedPrivateBufferReg;
 
   // This needs to be careful of the copying order to avoid overwriting one of
@@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
-    assert(!ST.isAmdCodeObjectV2());
+  if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+    assert(!ST.isAmdCodeObjectV2(MF));
     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
 
-    unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
-    unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
     unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
 
     // Use relocations to get the pointer, and setup the other bits manually.
     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-    BuildMI(MBB, I, DL, SMovB32, Rsrc0)
-      .addExternalSymbol("SCRATCH_RSRC_DWORD0")
-      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
-    BuildMI(MBB, I, DL, SMovB32, Rsrc1)
-      .addExternalSymbol("SCRATCH_RSRC_DWORD1")
-      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+    if (MFI->hasPrivateMemoryInputPtr()) {
+      unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+      if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
+
+        BuildMI(MBB, I, DL, Mov64, Rsrc01)
+          .addReg(PreloadedPrivateBufferReg)
+          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+      } else {
+        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+
+        PointerType *PtrTy =
+          PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+                           AMDGPUAS::CONSTANT_ADDRESS);
+        MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+        auto MMO = MF.getMachineMemOperand(PtrInfo,
+                                           MachineMemOperand::MOLoad |
+                                           MachineMemOperand::MOInvariant |
+                                           MachineMemOperand::MODereferenceable,
+                                           0, 0);
+        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
+          .addReg(PreloadedPrivateBufferReg)
+          .addImm(0) // offset
+          .addImm(0) // glc
+          .addMemOperand(MMO)
+          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+      }
+    } else {
+      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+      BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+        .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+        .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+    }
 
     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
       .addImm(Rsrc23 & 0xffffffff)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9140fe6cd148..b98f9f400ee7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
   if (!AMDGPU::isShader(CallConv)) {
     assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
   } else {
-    assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+    assert(!Info->hasDispatchPtr() &&
            !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
            !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
            !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
            !Info->hasWorkItemIDZ());
   }
 
+  if (Info->hasPrivateMemoryInputPtr()) {
+    unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
+    MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+    CCInfo.AllocateReg(PrivateMemoryPtrReg);
+  }
+
   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
   if (Info->hasPrivateSegmentBuffer()) {
     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
     if (VA.isMemLoc()) {
       VT = Ins[i].VT;
       EVT MemVT = VA.getLocVT();
-      const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+      const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
                               VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
       // thread group and global sizes.
@@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
   if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
     HasStackObjects = true;
 
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF)) {
     if (HasStackObjects) {
       // If we have stack objects, we unquestionably need the private buffer
       // resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // TODO: Should this propagate fast-math-flags?
 
   switch (IntrinsicID) {
+  case Intrinsic::amdgcn_implicit_buffer_ptr: {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+  }
   case Intrinsic::amdgcn_dispatch_ptr:
   case Intrinsic::amdgcn_queue_ptr: {
-    if (!Subtarget->isAmdCodeObjectV2()) {
+    if (!Subtarget->isAmdCodeObjectV2(MF)) {
       DiagnosticInfoUnsupported BadIntrin(
           *MF.getFunction(), "unsupported hsa intrinsic without hsa target",
           DL.getDebugLoc());
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e911817c451d..ecd46b95ca6f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     PrivateSegmentWaveByteOffset(false),
     WorkItemIDX(false),
     WorkItemIDY(false),
-    WorkItemIDZ(false) {
+    WorkItemIDZ(false),
+    PrivateMemoryInputPtr(false) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   const Function *F = MF.getFunction();
 
@@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   if (HasStackObjects || MaySpill)
     PrivateSegmentWaveByteOffset = true;
 
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF)) {
     if (HasStackObjects || MaySpill)
       PrivateSegmentBuffer = true;
 
@@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 
     if (F->hasFnAttribute("amdgpu-dispatch-id"))
       DispatchID = true;
+  } else if (ST.isMesaGfxShader(MF)) {
+    if (HasStackObjects || MaySpill)
+      PrivateMemoryInputPtr = true;
   }
 
   // We don't need to worry about accessing spills with flat instructions.
@@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
   return FlatScratchInitUserSGPR;
 }
 
+unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
+  PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return PrivateMemoryPtrUserSGPR;
+}
+
 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
                                                        MachineFunction *MF,
                                                        unsigned FrameIndex,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3b4e233cd787..6fc8d18bceba 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   unsigned ScratchRSrcReg;
   unsigned ScratchWaveOffsetReg;
 
+  // Input registers for non-HSA ABI
+  unsigned PrivateMemoryPtrUserSGPR;
+
   // Input registers setup for the HSA ABI.
   // User SGPRs in allocation order.
   unsigned PrivateSegmentBufferUserSGPR;
@@ -163,6 +166,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   bool WorkItemIDY : 1;
   bool WorkItemIDZ : 1;
 
+  // Private memory buffer
+  // Compute directly in sgpr[0:1]
+  // Other shaders indirect 64-bits at sgpr[0:1]
+  bool PrivateMemoryInputPtr : 1;
+
   MCPhysReg getNextUserSGPR() const {
     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -198,6 +206,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
   unsigned addDispatchID(const SIRegisterInfo &TRI);
   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
+  unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
 
   // Add system SGPRs.
   unsigned addWorkGroupIDX() {
@@ -302,6 +311,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     return WorkItemIDZ;
   }
 
+  bool hasPrivateMemoryInputPtr() const {
+    return PrivateMemoryInputPtr;
+  }
+
   unsigned getNumUserSGPRs() const {
     return NumUserSGPRs;
   }
@@ -338,6 +351,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     return QueuePtrUserSGPR;
   }
 
+  unsigned getPrivateMemoryPtrUserSGPR() const {
+    return PrivateMemoryPtrUserSGPR;
+  }
+
   bool hasSpilledSGPRs() const {
     return HasSpilledSGPRs;
   }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8c4b24a4504d..a1ed5e8441df 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
   case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
     return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
   case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
-    assert(ST.isAmdCodeObjectV2() &&
-           "Non-CodeObjectV2 ABI currently uses relocations");
-    assert(MFI->hasPrivateSegmentBuffer());
-    return MFI->PrivateSegmentBufferUserSGPR;
+    if (ST.isAmdCodeObjectV2(MF)) {
+      assert(MFI->hasPrivateSegmentBuffer());
+      return MFI->PrivateSegmentBufferUserSGPR;
+    }
+    assert(MFI->hasPrivateMemoryInputPtr());
+    return MFI->PrivateMemoryPtrUserSGPR;
   case SIRegisterInfo::KERNARG_SEGMENT_PTR:
     assert(MFI->hasKernargSegmentPtr());
     return MFI->KernargSegmentPtrUserSGPR;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5efa64d25ce1..c2a4d4ba99b1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
 }
 
 class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+  // v_div_scale_{f32|f64} do not support input modifiers.
+  let HasModifiers = 0;
   let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-  let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+  let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
 }
 
 def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
 def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
   let SchedRW = [WriteFloatFMA, WriteSALU];
   let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
 }
 
 // Double precision division pre-scale.
 def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
   let SchedRW = [WriteDouble, WriteSALU];
   let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 8ec9cb02813c..95db35ce8ffb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit the rest of the function body.
   EmitFunctionBody();
 
+  // Emit the XRay table for this function.
+  emitXRayTable();
+
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fb4c689bcb59..1606c1576465 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7571,11 +7571,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::MUL:           return LowerMUL(Op, DAG);
   case ISD::SDIV:
-    if (Subtarget->isTargetWindows())
+    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
       return LowerDIV_Windows(Op, DAG, /* Signed */ true);
     return LowerSDIV(Op, DAG);
   case ISD::UDIV:
-    if (Subtarget->isTargetWindows())
+    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
       return LowerDIV_Windows(Op, DAG, /* Signed */ false);
     return LowerUDIV(Op, DAG);
   case ISD::ADDC:
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index da96040d9996..08fe2bad281e 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31272,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
 }
 
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
-                                        const X86Subtarget &Subtarget) {
-  if (!Subtarget.hasAVX512())
-    return false;
-
-  // FIXME: Scalar type may be supported if we move it to vector register.
-  if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
-    return false;
-
-  EVT SrcElVT = SrcVT.getScalarType();
-  EVT DstElVT = DstVT.getScalarType();
-  if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
-    return false;
-  if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
-    return false;
-  if (SrcVT.is512BitVector() || Subtarget.hasVLX())
-    return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
-  return false;
-}
-
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
-  if (Subtarget.hasSSE2())
-    // v16i16 -> v16i8
-    if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
-      return true;
-  if (Subtarget.hasSSE41())
-    // v8i32 -> v8i16
-    if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
-      return true;
-  return false;
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectUSatPattern(SDValue In, EVT VT) {
-  if (In.getOpcode() != ISD::UMIN)
-    return SDValue();
-
-  //Saturation with truncation. We truncate from InVT to VT.
-  assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
-    "Unexpected types for truncate operation");
-
-  APInt C;
-  if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
-    // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
-    // the element size of the destination type.
-    return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
-      SDValue();
-  }
-  return SDValue();
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// The types should allow to use VPMOVUS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
-                                       const X86Subtarget &Subtarget) {
-  if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
-    return SDValue();
-  return detectUSatPattern(In, VT);
-}
-
-static SDValue
-combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
-                        const X86Subtarget &Subtarget) {
-  SDValue USatVal = detectUSatPattern(In, VT);
-  if (USatVal) {
-    if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
-      return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
-    if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
-      SDValue Lo, Hi;
-      std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
-      return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
-    }
-  }
-  return SDValue();
-}
-
 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
 /// X86ISD::AVG instruction.
@@ -31925,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                           St->getPointerInfo(), St->getAlignment(),
                           St->getMemOperand()->getFlags());
 
-    if (SDValue Val =
-        detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
-      return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
-                             dl, Val, St->getBasePtr(),
-                             St->getMemoryVT(), St->getMemOperand(), DAG);
-
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
@@ -32551,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
     return Avg;
 
-  // Try to combine truncation with unsigned saturation.
-  if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
-    return Val;
-
   // The bitcast source is a direct mmx result.
   // Detect bitcasts between i32 to x86mmx
   if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -33790,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try to synthesize horizontal adds from adds of shuffles.
+  // Try to synthesize horizontal subs from subs of shuffles.
   EVT VT = N->getValueType(0);
   if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
        (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
-      isHorizontalBinOp(Op0, Op1, true))
+      isHorizontalBinOp(Op0, Op1, false))
     return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
 
   return OptimizeConditionalInDecrement(N, DAG);
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6e30919246c7..7b0bddbbb831 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1436,6 +1436,14 @@ static bool canSinkInstructions(
     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
         I->getType()->isTokenTy())
       return false;
+
+    // Conservatively return false if I is an inline-asm instruction. Sinking
+    // and merging inline-asm instructions can potentially create arguments
+    // that cannot satisfy the inline-asm constraints.
+    if (const auto *C = dyn_cast<CallInst>(I))
+      if (C->isInlineAsm())
+        return false;
+
     // Everything must have only one use too, apart from stores which
     // have no uses.
     if (!isa<StoreInst>(I) && !I->hasOneUse())
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
index d03c22af5b29..b531a66dbab3 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
@@ -9025,7 +9025,8 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
 
   // Variables that have initialization with side-effects are required.
   if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
-      !VD->evaluateValue())
+      // We can get a value-dependent initializer during error recovery.
+      (VD->getInit()->isValueDependent() || !VD->evaluateValue()))
     return true;
 
   // Likewise, variables with tuple-like bindings are required if their
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index 137c69420ddf..e142a21b1e74 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -112,9 +112,8 @@ CodeGenFunction::~CodeGenFunction() {
   if (FirstBlockInfo)
     destroyBlockInfos(FirstBlockInfo);
 
-  if (getLangOpts().OpenMP) {
+  if (getLangOpts().OpenMP && CurFn)
     CGM.getOpenMPRuntime().functionFinished(*this);
-  }
 }
 
 CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp
index 7d60aad3895d..3b4f3f81399d 100644
--- a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp
@@ -92,7 +92,13 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
       Relations.emplace_back((unsigned)SymbolRole::RelationAccessorOf,
                              AssociatedProp);
 
-    if (!IndexCtx.handleDecl(D, (unsigned)SymbolRole::Dynamic, Relations))
+    // getLocation() returns beginning token of a method declaration, but for
+    // indexing purposes we want to point to the base name.
+    SourceLocation MethodLoc = D->getSelectorStartLoc();
+    if (MethodLoc.isInvalid())
+      MethodLoc = D->getLocation();
+
+    if (!IndexCtx.handleDecl(D, MethodLoc, (unsigned)SymbolRole::Dynamic, Relations))
       return false;
     IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D);
     bool hasIBActionAndFirst = D->hasAttr<IBActionAttr>();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
index 45eff5ee6b62..b053c83c3f69 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
@@ -1684,7 +1684,7 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
     // If this is an incomplete array type, the actual type needs to
     // be calculated here.
     llvm::APSInt Zero(maxElements.getBitWidth(), maxElements.isUnsigned());
-    if (maxElements == Zero) {
+    if (maxElements == Zero && !Entity.isVariableLengthArrayNew()) {
       // Sizing an array implicitly to zero is not allowed by ISO C,
       // but is supported by GNU.
       SemaRef.Diag(IList->getLocStart(),
diff --git a/contrib/llvm/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/CMakeLists.txt
index 48ac5e038cdb..be424efbbd87 100644
--- a/contrib/llvm/tools/lld/CMakeLists.txt
+++ b/contrib/llvm/tools/lld/CMakeLists.txt
@@ -42,6 +42,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS})
   link_directories(${LLVM_LIBRARY_DIRS})
 
+  set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
   set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
   find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
 
diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
index 887ca12537ad..3cc235386b88 100644
--- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
+++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
@@ -918,12 +918,7 @@ const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) {
     return CurOutSec ? CurOutSec : (*OutputSections)[0];
   }
 
-  if (auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym))
-    return DR->Section ? DR->Section->OutSec : nullptr;
-  if (auto *DS = dyn_cast_or_null<DefinedSynthetic>(Sym))
-    return DS->Section;
-
-  return nullptr;
+  return SymbolTableSection<ELFT>::getOutputSection(Sym);
 }
 
 // Returns indices of ELF headers containing specific section, identified
diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h
index dfefb3821e75..f7e891ec3a66 100644
--- a/contrib/llvm/tools/lld/ELF/SyntheticSections.h
+++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h
@@ -372,6 +372,8 @@ class SymbolTableSection final : public SyntheticSection<ELFT> {
 
   ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; }
 
+  static const OutputSectionBase *getOutputSection(SymbolBody *Sym);
+
   unsigned NumLocals = 0;
   StringTableSection<ELFT> &StrTabSec;
 
@@ -379,8 +381,6 @@ class SymbolTableSection final : public SyntheticSection<ELFT> {
   void writeLocalSymbols(uint8_t *&Buf);
   void writeGlobalSymbols(uint8_t *Buf);
 
-  const OutputSectionBase *getOutputSection(SymbolBody *Sym);
-
   // A vector of symbols and their string table offsets.
   std::vector<SymbolTableEntry> Symbols;
 };
diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc
index 60a86e8078b9..168c885c085e 100644
--- a/lib/clang/include/clang/Basic/Version.inc
+++ b/lib/clang/include/clang/Basic/Version.inc
@@ -8,4 +8,4 @@
 
 #define	CLANG_VENDOR			"FreeBSD "
 
-#define	SVN_REVISION			"292951"
+#define	SVN_REVISION			"293443"
diff --git a/lib/clang/include/lld/Config/Version.inc b/lib/clang/include/lld/Config/Version.inc
index 7f3314307fe9..6975ed0edddb 100644
--- a/lib/clang/include/lld/Config/Version.inc
+++ b/lib/clang/include/lld/Config/Version.inc
@@ -4,5 +4,5 @@
 #define LLD_VERSION_STRING "4.0.0"
 #define LLD_VERSION_MAJOR 4
 #define LLD_VERSION_MINOR 0
-#define LLD_REVISION_STRING "292951"
+#define LLD_REVISION_STRING "293443"
 #define LLD_REPOSITORY_STRING "FreeBSD"