diff --git a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S index e8095650e4fb..a5b7e36487d6 100644 --- a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S +++ b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S @@ -283,7 +283,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2) END_COMPILERRT_FUNCTION(__unordsf2) #if defined(COMPILER_RT_ARMHF_TARGET) -DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum): +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum) vmov s0, r0 vmov s1, r1 b SYMBOL_NAME(__unordsf2) diff --git a/contrib/compiler-rt/lib/xray/xray_arm.cc b/contrib/compiler-rt/lib/xray/xray_arm.cc index d89322e833e5..f5e2cd2a93c2 100644 --- a/contrib/compiler-rt/lib/xray/xray_arm.cc +++ b/contrib/compiler-rt/lib/xray/xray_arm.cc @@ -19,6 +19,8 @@ #include #include +extern "C" void __clear_cache(void* start, void* end); + namespace __xray { uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { @@ -116,8 +118,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, // B #20 uint32_t *FirstAddress = reinterpret_cast(Sled.Address); + uint32_t *CurAddress = FirstAddress + 1; if (Enable) { - uint32_t *CurAddress = FirstAddress + 1; CurAddress = Write32bitLoadR0(CurAddress, reinterpret_cast(FuncId)); CurAddress = @@ -125,6 +127,7 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); CurAddress++; *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + CurAddress++; std::atomic_store_explicit( reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); @@ -133,6 +136,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId, reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); } + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } diff --git a/contrib/libc++/include/__config b/contrib/libc++/include/__config index ddfab31f7fac..340c573a2a40 100644 --- a/contrib/libc++/include/__config +++ b/contrib/libc++/include/__config @@ -403,15 +403,6 @@ namespace std { #define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow"))) #endif -// A constexpr version of __builtin_memcmp was added in clang 4.0 -#if __has_builtin(__builtin_memcmp) -# ifdef __apple_build_version__ -// No shipping version of Apple's clang has constexpr __builtin_memcmp -# elif __clang_major__ > 3 -# define _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR -# endif -#endif - #elif defined(_LIBCPP_COMPILER_GCC) #define _ALIGNAS(x) __attribute__((__aligned__(x))) diff --git a/contrib/libc++/include/__string b/contrib/libc++/include/__string index c85b874f054b..75608cea71d9 100644 --- a/contrib/libc++/include/__string +++ b/contrib/libc++/include/__string @@ -243,7 +243,7 @@ char_traits::compare(const char_type* __s1, const char_type* __s2, size_t { if (__n == 0) return 0; -#ifdef _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR +#if __has_feature(cxx_constexpr_string_builtins) return __builtin_memcmp(__s1, __s2, __n); #elif _LIBCPP_STD_VER <= 14 return memcmp(__s1, __s2, __n); @@ -265,7 +265,9 @@ char_traits::find(const char_type* __s, size_t __n, const char_type& __a) { if (__n == 0) return NULL; -#if _LIBCPP_STD_VER <= 14 +#if __has_feature(cxx_constexpr_string_builtins) + return __builtin_char_memchr(__s, to_int_type(__a), __n); +#elif _LIBCPP_STD_VER <= 14 return (const char_type*) memchr(__s, to_int_type(__a), __n); #else for (; __n; --__n) @@ -331,7 +333,7 @@ char_traits::compare(const char_type* __s1, const char_type* __s2, size { if (__n == 0) return 0; -#if __has_builtin(__builtin_wmemcmp) +#if __has_feature(cxx_constexpr_string_builtins) return __builtin_wmemcmp(__s1, __s2, __n); #elif _LIBCPP_STD_VER <= 14 return wmemcmp(__s1, __s2, __n); @@ -351,7 +353,7 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX14 size_t char_traits::length(const char_type* __s) _NOEXCEPT { -#if __has_builtin(__builtin_wcslen) +#if __has_feature(cxx_constexpr_string_builtins) return __builtin_wcslen(__s); #elif _LIBCPP_STD_VER <= 14 return wcslen(__s); @@ -369,7 +371,7 @@ char_traits::find(const char_type* __s, size_t __n, const char_type& __ { if (__n == 0) return NULL; -#if __has_builtin(__builtin_wmemchr) +#if __has_feature(cxx_constexpr_string_builtins) return __builtin_wmemchr(__s, __a, __n); #elif _LIBCPP_STD_VER <= 14 return wmemchr(__s, __a, __n); diff --git a/contrib/libc++/include/__threading_support b/contrib/libc++/include/__threading_support index a672a9f80eda..817a2cfb934f 100644 --- a/contrib/libc++/include/__threading_support +++ b/contrib/libc++/include/__threading_support @@ -40,6 +40,12 @@ #define _LIBCPP_THREAD_ABI_VISIBILITY inline _LIBCPP_INLINE_VISIBILITY #endif +#if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(no_thread_safety_analysis) +#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) +#else +#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS +#endif + _LIBCPP_BEGIN_NAMESPACE_STD #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) @@ -98,25 +104,25 @@ typedef DWORD __libcpp_tls_key; _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t *__m); _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_trylock(__libcpp_mutex_t *__m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t *__m); _LIBCPP_THREAD_ABI_VISIBILITY @@ -129,10 +135,10 @@ int __libcpp_condvar_signal(__libcpp_condvar_t* __cv); _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m); -_LIBCPP_THREAD_ABI_VISIBILITY +_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, timespec *__ts); diff --git a/contrib/libc++/include/deque b/contrib/libc++/include/deque index 0454162a8f73..92801540f2e7 100644 --- a/contrib/libc++/include/deque +++ b/contrib/libc++/include/deque @@ -110,8 +110,8 @@ public: void push_front(value_type&& v); void push_back(const value_type& v); void push_back(value_type&& v); - template reference emplace_front(Args&&... args); - template reference emplace_back(Args&&... args); + template reference emplace_front(Args&&... args); // reference in C++17 + template reference emplace_back(Args&&... args); // reference in C++17 template iterator emplace(const_iterator p, Args&&... args); iterator insert(const_iterator p, const value_type& v); iterator insert(const_iterator p, value_type&& v); @@ -1342,8 +1342,13 @@ public: void push_back(const value_type& __v); #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES #ifndef _LIBCPP_HAS_NO_VARIADICS +#if _LIBCPP_STD_VER > 14 template reference emplace_front(_Args&&... __args); - template reference emplace_back(_Args&&... __args); + template reference emplace_back (_Args&&... __args); +#else + template void emplace_front(_Args&&... __args); + template void emplace_back (_Args&&... __args); +#endif template iterator emplace(const_iterator __p, _Args&&... __args); #endif // _LIBCPP_HAS_NO_VARIADICS void push_front(value_type&& __v); @@ -1822,7 +1827,11 @@ deque<_Tp, _Allocator>::push_back(value_type&& __v) template template +#if _LIBCPP_STD_VER > 14 typename deque<_Tp, _Allocator>::reference +#else +void +#endif deque<_Tp, _Allocator>::emplace_back(_Args&&... __args) { allocator_type& __a = __base::__alloc(); @@ -1832,7 +1841,9 @@ deque<_Tp, _Allocator>::emplace_back(_Args&&... __args) __alloc_traits::construct(__a, _VSTD::addressof(*__base::end()), _VSTD::forward<_Args>(__args)...); ++__base::size(); +#if _LIBCPP_STD_VER > 14 return *--__base::end(); +#endif } #endif // _LIBCPP_HAS_NO_VARIADICS @@ -1870,7 +1881,11 @@ deque<_Tp, _Allocator>::push_front(value_type&& __v) template template +#if _LIBCPP_STD_VER > 14 typename deque<_Tp, _Allocator>::reference +#else +void +#endif deque<_Tp, _Allocator>::emplace_front(_Args&&... __args) { allocator_type& __a = __base::__alloc(); @@ -1880,7 +1895,9 @@ deque<_Tp, _Allocator>::emplace_front(_Args&&... __args) __alloc_traits::construct(__a, _VSTD::addressof(*--__base::begin()), _VSTD::forward<_Args>(__args)...); --__base::__start_; ++__base::size(); +#if _LIBCPP_STD_VER > 14 return *__base::begin(); +#endif } #endif // _LIBCPP_HAS_NO_VARIADICS diff --git a/contrib/libc++/include/forward_list b/contrib/libc++/include/forward_list index ce9a4b1bca90..879f2d3ce087 100644 --- a/contrib/libc++/include/forward_list +++ b/contrib/libc++/include/forward_list @@ -87,7 +87,7 @@ public: reference front(); const_reference front() const; - template reference emplace_front(Args&&... args); + template reference emplace_front(Args&&... args); // reference in C++17 void push_front(const value_type& v); void push_front(value_type&& v); @@ -747,7 +747,11 @@ public: #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES #ifndef _LIBCPP_HAS_NO_VARIADICS +#if _LIBCPP_STD_VER > 14 template reference emplace_front(_Args&&... __args); +#else + template void emplace_front(_Args&&... __args); +#endif #endif void push_front(value_type&& __v); #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES @@ -1103,7 +1107,11 @@ forward_list<_Tp, _Alloc>::assign(initializer_list __il) template template +#if _LIBCPP_STD_VER > 14 typename forward_list<_Tp, _Alloc>::reference +#else +void +#endif forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { __node_allocator& __a = base::__alloc(); @@ -1113,7 +1121,9 @@ forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args) _VSTD::forward<_Args>(__args)...); __h->__next_ = base::__before_begin()->__next_; base::__before_begin()->__next_ = __h.release(); +#if _LIBCPP_STD_VER > 14 return base::__before_begin()->__next_->__value_; +#endif } #endif // _LIBCPP_HAS_NO_VARIADICS diff --git a/contrib/libc++/include/list b/contrib/libc++/include/list index 76c50a156093..fa148db516bc 100644 --- a/contrib/libc++/include/list +++ b/contrib/libc++/include/list @@ -93,10 +93,10 @@ public: size_type max_size() const noexcept; template - reference emplace_front(Args&&... args); + reference emplace_front(Args&&... args); // reference in C++17 void pop_front(); template - reference emplace_back(Args&&... args); + reference emplace_back(Args&&... args); // reference in C++17 void pop_back(); void push_front(const value_type& x); void push_front(value_type&& x); @@ -969,9 +969,17 @@ public: void push_back(value_type&& __x); #ifndef _LIBCPP_HAS_NO_VARIADICS template +#if _LIBCPP_STD_VER > 14 reference emplace_front(_Args&&... __args); +#else + void emplace_front(_Args&&... __args); +#endif template +#if _LIBCPP_STD_VER > 14 reference emplace_back(_Args&&... __args); +#else + void emplace_back(_Args&&... __args); +#endif template iterator emplace(const_iterator __p, _Args&&... __args); #endif // _LIBCPP_HAS_NO_VARIADICS @@ -1600,7 +1608,11 @@ list<_Tp, _Alloc>::push_back(value_type&& __x) template template +#if _LIBCPP_STD_VER > 14 typename list<_Tp, _Alloc>::reference +#else +void +#endif list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { __node_allocator& __na = base::__node_alloc(); @@ -1609,12 +1621,20 @@ list<_Tp, _Alloc>::emplace_front(_Args&&... __args) __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), _VSTD::forward<_Args>(__args)...); __link_nodes_at_front(__hold.get()->__as_link(), __hold.get()->__as_link()); ++base::__sz(); +#if _LIBCPP_STD_VER > 14 return __hold.release()->__value_; +#else + __hold.release(); +#endif } template template +#if _LIBCPP_STD_VER > 14 typename list<_Tp, _Alloc>::reference +#else +void +#endif list<_Tp, _Alloc>::emplace_back(_Args&&... __args) { __node_allocator& __na = base::__node_alloc(); @@ -1624,7 +1644,11 @@ list<_Tp, _Alloc>::emplace_back(_Args&&... __args) __link_pointer __nl = __hold->__as_link(); __link_nodes_at_back(__nl, __nl); ++base::__sz(); +#if _LIBCPP_STD_VER > 14 return __hold.release()->__value_; +#else + __hold.release(); +#endif } template diff --git a/contrib/libc++/include/queue b/contrib/libc++/include/queue index 271c203e1d24..57d420c7406c 100644 --- a/contrib/libc++/include/queue +++ b/contrib/libc++/include/queue @@ -63,7 +63,7 @@ public: void push(const value_type& v); void push(value_type&& v); - template reference emplace(Args&&... args); + template reference emplace(Args&&... args); // reference in C++17 void pop(); void swap(queue& q) noexcept(is_nothrow_swappable_v) @@ -292,8 +292,13 @@ public: #ifndef _LIBCPP_HAS_NO_VARIADICS template _LIBCPP_INLINE_VISIBILITY +#if _LIBCPP_STD_VER > 14 reference emplace(_Args&&... __args) { return c.emplace_back(_VSTD::forward<_Args>(__args)...);} +#else + void emplace(_Args&&... __args) + { c.emplace_back(_VSTD::forward<_Args>(__args)...);} +#endif #endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES _LIBCPP_INLINE_VISIBILITY diff --git a/contrib/libc++/include/stack b/contrib/libc++/include/stack index 228864e4709b..c797ea5ec6e9 100644 --- a/contrib/libc++/include/stack +++ b/contrib/libc++/include/stack @@ -55,7 +55,7 @@ public: void push(const value_type& x); void push(value_type&& x); - template reference emplace(Args&&... args); + template reference emplace(Args&&... args); // reference in C++17 void pop(); void swap(stack& c) noexcept(is_nothrow_swappable_v) @@ -199,8 +199,13 @@ public: #ifndef _LIBCPP_HAS_NO_VARIADICS template _LIBCPP_INLINE_VISIBILITY +#if _LIBCPP_STD_VER > 14 reference emplace(_Args&&... __args) { return c.emplace_back(_VSTD::forward<_Args>(__args)...);} +#else + void emplace(_Args&&... __args) + { c.emplace_back(_VSTD::forward<_Args>(__args)...);} +#endif #endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES _LIBCPP_INLINE_VISIBILITY diff --git a/contrib/libc++/include/vector b/contrib/libc++/include/vector index 4dd0e527ff57..ded057b10c8b 100644 --- a/contrib/libc++/include/vector +++ b/contrib/libc++/include/vector @@ -99,7 +99,7 @@ public: void push_back(const value_type& x); void push_back(value_type&& x); template - reference emplace_back(Args&&... args); + reference emplace_back(Args&&... args); // reference in C++17 void pop_back(); template iterator emplace(const_iterator position, Args&&... args); @@ -218,7 +218,7 @@ public: const_reference back() const; void push_back(const value_type& x); - template reference emplace_back(Args&&... args); // C++14 + template reference emplace_back(Args&&... args); // C++14; reference in C++17 void pop_back(); template iterator emplace(const_iterator position, Args&&... args); // C++14 @@ -679,7 +679,11 @@ public: #ifndef _LIBCPP_HAS_NO_VARIADICS template _LIBCPP_INLINE_VISIBILITY +#if _LIBCPP_STD_VER > 14 reference emplace_back(_Args&&... __args); +#else + void emplace_back(_Args&&... __args); +#endif #endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES _LIBCPP_INLINE_VISIBILITY @@ -1625,7 +1629,11 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) template template inline +#if _LIBCPP_STD_VER > 14 typename vector<_Tp, _Allocator>::reference +#else +void +#endif vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) { if (this->__end_ < this->__end_cap()) @@ -1639,7 +1647,9 @@ vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) } else __emplace_back_slow_path(_VSTD::forward<_Args>(__args)...); +#if _LIBCPP_STD_VER > 14 return this->back(); +#endif } #endif // _LIBCPP_HAS_NO_VARIADICS @@ -2336,9 +2346,16 @@ public: void push_back(const value_type& __x); #if _LIBCPP_STD_VER > 11 template - _LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args) { +#if _LIBCPP_STD_VER > 14 + _LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args) +#else + _LIBCPP_INLINE_VISIBILITY void emplace_back(_Args&&... __args) +#endif + { push_back ( value_type ( _VSTD::forward<_Args>(__args)... )); +#if _LIBCPP_STD_VER > 14 return this->back(); +#endif } #endif diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 07d5b5ea40dc..dcec3860f2ca 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id : GCCBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; +def int_amdgcn_implicit_buffer_ptr : + GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Instruction Intrinsics //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e8d86bdbca5b..c8d057949493 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return MayAlias; AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, - AAMDNodes(), V2, V2Size, V2AAInfo, - nullptr, UnderlyingV2); + AAMDNodes(), V2, MemoryLocation::UnknownSize, + V2AAInfo, nullptr, UnderlyingV2); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values - // cannot alias per GEP semantics: "A pointer value formed from a - // getelementptr instruction is associated with the addresses associated - // with the first operand of the getelementptr". + // cannot alias per GEP semantics: "Any memory access must be done through + // a pointer value associated with an address range of the memory access, + // otherwise the behavior is undefined.". return R; // If the max search depth is reached the result is undefined diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index a44b92effc7e..145b9c50367a 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -919,7 +919,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // If this isn't a LocalAsMetadata record, we're dropping it. This used // to be legal, but there's no upgrade path. auto dropRecord = [&] { - MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++); + MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo); + NextMetadataNo++; }; if (Record.size() != 2) { dropRecord(); @@ -934,7 +935,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_OLD_NODE: { @@ -959,7 +961,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( } else Elts.push_back(nullptr); } - MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++); + MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_VALUE: { @@ -972,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_DISTINCT_NODE: @@ -985,7 +989,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Elts.push_back(getMDOrNull(ID)); MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) : MDNode::get(Context, Elts), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_LOCATION: { @@ -999,7 +1004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Metadata *InlinedAt = getMDOrNull(Record[4]); MetadataList.assignValue( GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GENERIC_DEBUG: { @@ -1019,7 +1025,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( DwarfOps.push_back(getMDOrNull(Record[I])); MetadataList.assignValue( GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_SUBRANGE: { @@ -1030,7 +1037,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DISubrange, (Context, Record[1], unrotateSign(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_ENUMERATOR: { @@ -1041,7 +1049,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]), getMDString(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_BASIC_TYPE: { @@ -1053,7 +1062,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIBasicType, (Context, Record[1], getMDString(Record[2]), Record[3], Record[4], Record[5])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_DERIVED_TYPE: { @@ -1069,7 +1079,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getDITypeRefOrNull(Record[5]), getDITypeRefOrNull(Record[6]), Record[7], Record[8], Record[9], Flags, getDITypeRefOrNull(Record[11]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_COMPOSITE_TYPE: { @@ -1134,7 +1145,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (!IsNotUsedInTypeRef && Identifier) MetadataList.addTypeRef(*Identifier, *cast(CT)); - MetadataList.assignValue(CT, NextMetadataNo++); + MetadataList.assignValue(CT, NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_SUBROUTINE_TYPE: { @@ -1151,7 +1163,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } @@ -1165,7 +1178,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDString(Record[4]), getMDString(Record[5]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } @@ -1181,7 +1195,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Record.size() == 3 ? DIFile::CSK_None : static_cast(Record[3]), Record.size() == 3 ? nullptr : getMDString(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_COMPILE_UNIT: { @@ -1200,7 +1215,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Record.size() <= 14 ? 0 : Record[14], Record.size() <= 16 ? true : Record[16]); - MetadataList.assignValue(CU, NextMetadataNo++); + MetadataList.assignValue(CU, NextMetadataNo); + NextMetadataNo++; // Move the Upgrade the list of subprograms. if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11])) @@ -1247,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[16 + Offset]), // declaration getMDOrNull(Record[17 + Offset]) // variables )); - MetadataList.assignValue(SP, NextMetadataNo++); + MetadataList.assignValue(SP, NextMetadataNo); + NextMetadataNo++; // Upgrade sp->function mapping to function->sp mapping. if (HasFn) { @@ -1272,7 +1289,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DILexicalBlock, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3], Record[4])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_LEXICAL_BLOCK_FILE: { @@ -1284,7 +1302,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DILexicalBlockFile, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_NAMESPACE: { @@ -1298,7 +1317,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), getMDString(Record[3]), Record[4], ExportSymbols)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_MACRO: { @@ -1310,7 +1330,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIMacro, (Context, Record[1], Record[2], getMDString(Record[3]), getMDString(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_MACRO_FILE: { @@ -1322,7 +1343,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIMacroFile, (Context, Record[1], Record[2], getMDOrNull(Record[3]), getMDOrNull(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_TEMPLATE_TYPE: { @@ -1333,7 +1355,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, (Context, getMDString(Record[1]), getDITypeRefOrNull(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_TEMPLATE_VALUE: { @@ -1346,7 +1369,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, Record[1], getMDString(Record[2]), getDITypeRefOrNull(Record[3]), getMDOrNull(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GLOBAL_VAR: { @@ -1364,7 +1388,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[4]), Record[5], getDITypeRefOrNull(Record[6]), Record[7], Record[8], getMDOrNull(Record[10]), Record[11])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; } else if (Version == 0) { // Upgrade old metadata, which stored a global variable reference or a // ConstantInt here. @@ -1396,7 +1421,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[10]), AlignInBits)); auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr); - MetadataList.assignValue(DGVE, NextMetadataNo++); + MetadataList.assignValue(DGVE, NextMetadataNo); + NextMetadataNo++; if (Attach) Attach->addDebugInfo(DGVE); } else @@ -1429,7 +1455,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag], getDITypeRefOrNull(Record[5 + HasTag]), Record[6 + HasTag], Flags, AlignInBits)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_EXPRESSION: { @@ -1446,7 +1473,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GLOBAL_VAR_EXPR: { @@ -1457,7 +1485,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_OBJC_PROPERTY: { @@ -1471,7 +1500,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[2]), Record[3], getMDString(Record[4]), getMDString(Record[5]), Record[6], getDITypeRefOrNull(Record[7]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_IMPORTED_ENTITY: { @@ -1484,7 +1514,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, Record[1], getMDOrNull(Record[2]), getDITypeRefOrNull(Record[3]), Record[4], getMDString(Record[5]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_STRING_OLD: { @@ -1494,13 +1525,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String); ++NumMDStringLoaded; Metadata *MD = MDString::get(Context, String); - MetadataList.assignValue(MD, NextMetadataNo++); + MetadataList.assignValue(MD, NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_STRINGS: { auto CreateNextMDString = [&](StringRef Str) { ++NumMDStringLoaded; - MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++); + MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo); + NextMetadataNo++; }; if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString)) return Err; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 27a9ac337f25..6906f67ebacb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector()) { + if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { + // Later code assumes the vector loads produced will be mergeable, so we + // must pad the final entry up to the previous width. Scalars are + // combined separately. SmallVector Loads; Loads.push_back(L); unsigned size = L->getValueSizeInBits(0); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index 740766b151bb..91c335fac32d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; -def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs", - "AvoidQuadLdStPairs", "true", - "Do not form quad load/store pair operations">; +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", @@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M1 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, @@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M2/M3 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5c8acba26aab..4c789926e3e4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { return false; // On some CPUs quad load/store pairs are slower than two single load/stores. - if (Subtarget.avoidQuadLdStPairs()) { + if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { default: break; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index 73f63b8b9f67..a99340225082 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -79,7 +79,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool CustomAsCheapAsMove = false; bool UsePostRAScheduler = false; bool Misaligned128StoreIsSlow = false; - bool AvoidQuadLdStPairs = false; + bool Paired128IsSlow = false; bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; @@ -189,7 +189,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { } bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } - bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; } + bool isPaired128Slow() const { return Paired128IsSlow; } bool useAlternateSExtLoadCVTF32Pattern() const { return UseAlternateSExtLoadCVTF32Pattern; } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td index 0b2badff7ccf..13022009af16 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "Enable SI Machine Scheduler" >; +// Unless +-flat-for-global is specified, turn on FlatForGlobal for +// all OS-es on VI and newer hardware to avoid assertion failures due +// to missing ADDR64 variants of MUBUF instructions. +// FIXME: moveToVALU should be able to handle converting addr64 MUBUF +// instructions. + def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 4acd55eb6120..974e79fff3d7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; - if (STM.isAmdCodeObjectV2()) { + if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); - if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; @@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, // FIXME: Should use getKernArgSize header.kernarg_segment_byte_size = - STM.getKernArgSegmentSize(MFI->getABIArgOffset()); + STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2b4fc5397b18..5bf347e48650 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, - // omod - SDValue Ops[8]; - - SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); - SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); - SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); - CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e48c1943cb01..54caa2c5dfad 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, SDLoc SL(N); switch (Opc) { case ISD::FADD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); @@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, } case ISD::FMA: case ISD::FMAD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) SDValue LHS = N0.getOperand(0); SDValue MHS = N0.getOperand(1); @@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(KILL) + NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 69567aa5f713..f6adceac6f11 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -119,6 +119,16 @@ class AMDGPUTargetLowering : public TargetLowering { public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); + bool mayIgnoreSignedZero(SDValue Op) const { + if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only + return true; + + if (const auto *BO = dyn_cast(Op)) + return BO->Flags.hasNoSignedZeros(); + + return false; + } + bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; @@ -320,6 +330,7 @@ enum NodeType : unsigned { INTERP_P2, PC_ADD_REL_OFFSET, KILL, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index f079c8d0c70c..d7fa28bdc001 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode< // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; +// Force dependencies for vector trunc stores +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; + def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 74851aedbb21..c35a67de1d7f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, ParseSubtargetFeatures(GPU, FullFS); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es + // on VI and newer hardware to avoid assertion failures due to missing ADDR64 + // variants of MUBUF instructions. + if (!hasAddr64() && !FS.contains("flat-for-global")) { + FlatForGlobal = true; + } + // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? @@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { - unsigned ImplicitBytes = getImplicitArgNumBytes(); +unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, + unsigned ExplicitArgBytes) const { + unsigned ImplicitBytes = getImplicitArgNumBytes(MF); if (ImplicitBytes == 0) return ExplicitArgBytes; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 51ba501bddd1..0e3cb7dc1f87 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -311,22 +311,31 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return EnableXNACK; } - bool isAmdCodeObjectV2() const { - return isAmdHsaOS() || isMesa3DOS(); + bool isMesaKernel(const MachineFunction &MF) const { + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const MachineFunction &MF) const { + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + bool isAmdCodeObjectV2(const MachineFunction &MF) const { + return isAmdHsaOS() || isMesaKernel(MF); } /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset() const { - return isAmdCodeObjectV2() ? 0 : 36; + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { + return isAmdCodeObjectV2(MF) ? 0 : 36; } unsigned getAlignmentForImplicitArgPtr() const { return isAmdHsaOS() ? 8 : 4; } - unsigned getImplicitArgNumBytes() const { - if (isMesa3DOS()) + unsigned getImplicitArgNumBytes(const MachineFunction &MF) const { + if (isMesaKernel(MF)) return 16; if (isAmdHsaOS() && isOpenCLEnv()) return 32; @@ -585,7 +594,7 @@ class SISubtarget final : public AMDGPUSubtarget { return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } - unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index de7ce5cb9e47..77fee4356b65 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, llvm_unreachable("Unsupported private trunc store"); } - SDValue Chain = Store->getChain(); + SDValue OldChain = Store->getChain(); + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); + // Skip dummy + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); @@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, // Store dword // TODO: Can we be smarter about MachinePointerInfo? - return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + + // If we are part of expanded vector, make our neighbors depend on this store + if (VectorTrunc) { + // Make all other vector elements depend on this store + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); + } + return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + // Add an extra level of chain to isolate this vector + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); + // TODO: can the chain be replaced without creating a new store? + SDValue NewStore = DAG.getTruncStore( + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), + MemVT, StoreNode->getAlignment(), + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + StoreNode = cast(NewStore); + } + return scalarizeVectorStore(StoreNode, DAG); } @@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Put the mask in correct place SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); - // Put the mask in correct place + // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); @@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td index 19795bdde647..9210e66b0fe7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; def MOV : R600_1OP <0x19, "MOV", []>; + +// This is a hack to get rid of DUMMY_CHAIN nodes. +// Most DUMMY_CHAINs should be eliminated during legalization, but undef +// values can sneak in some to selection. +let isPseudo = 1, isCodeGenOnly = 1 in { +def DUMMY_CHAIN : AMDGPUInst < + (outs), + (ins), + "DUMMY_CHAIN", + [(R600dummy_chain)] +>; +} // end let isPseudo = 1, isCodeGenOnly = 1 + + let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { class MOV_IMM : AMDGPUInst < diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index d0a69eafc58e..0b5715515880 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2()); + assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && + ST.isAmdCodeObjectV2(MF) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addReg(PreloadedPrivateBufferReg, RegState::Kill); } - if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) { - assert(!ST.isAmdCodeObjectV2()); + if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) { + assert(!ST.isAmdCodeObjectV2(MF)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (MFI->hasPrivateMemoryInputPtr()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); + + BuildMI(MBB, I, DL, Mov64, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); + + PointerType *PtrTy = + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + auto MMO = MF.getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, + 0, 0); + BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addImm(0) // offset + .addImm(0) // glc + .addMemOperand(MMO) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9140fe6cd148..b98f9f400ee7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (!AMDGPU::isShader(CallConv)) { assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); } else { - assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() && + assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && @@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments( !Info->hasWorkItemIDZ()); } + if (Info->hasPrivateMemoryInputPtr()) { + unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI); + MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(PrivateMemoryPtrReg); + } + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? if (Info->hasPrivateSegmentBuffer()) { unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); @@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. @@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (getTargetMachine().getOptLevel() == CodeGenOpt::None) HasStackObjects = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_implicit_buffer_ptr: { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2()) { + if (!Subtarget->isAmdCodeObjectV2(MF)) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index e911817c451d..ecd46b95ca6f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + PrivateMemoryInputPtr(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); @@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; + } else if (ST.isMesaGfxShader(MF)) { + if (HasStackObjects || MaySpill) + PrivateMemoryInputPtr = true; } // We don't need to worry about accessing spills with flat instructions. @@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } +unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { + PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return PrivateMemoryPtrUserSGPR; +} + SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( MachineFunction *MF, unsigned FrameIndex, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 3b4e233cd787..6fc8d18bceba 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // Input registers for non-HSA ABI + unsigned PrivateMemoryPtrUserSGPR; + // Input registers setup for the HSA ABI. // User SGPRs in allocation order. unsigned PrivateSegmentBufferUserSGPR; @@ -163,6 +166,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + // Private memory buffer + // Compute directly in sgpr[0:1] + // Other shaders indirect 64-bits at sgpr[0:1] + bool PrivateMemoryInputPtr : 1; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -198,6 +206,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); + unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -302,6 +311,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return WorkItemIDZ; } + bool hasPrivateMemoryInputPtr() const { + return PrivateMemoryInputPtr; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -338,6 +351,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return QueuePtrUserSGPR; } + unsigned getPrivateMemoryPtrUserSGPR() const { + return PrivateMemoryPtrUserSGPR; + } + bool hasSpilledSGPRs() const { return HasSpilledSGPRs; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8c4b24a4504d..a1ed5e8441df 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(ST.isAmdCodeObjectV2() && - "Non-CodeObjectV2 ABI currently uses relocations"); - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; + if (ST.isAmdCodeObjectV2(MF)) { + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + } + assert(MFI->hasPrivateMemoryInputPtr()); + return MFI->PrivateMemoryPtrUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5efa64d25ce1..c2a4d4ba99b1 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -70,8 +70,10 @@ class VOP3_Profile : VOPProfile { } class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { + // v_div_scale_{f32|f64} do not support input modifiers. + let HasModifiers = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { @@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPU def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } // Double precision division pre-scale. def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8ec9cb02813c..95db35ce8ffb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + emitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index fb4c689bcb59..1606c1576465 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -7571,11 +7571,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDC: diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index da96040d9996..08fe2bad281e 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31272,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) - return false; - if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectUSatPattern(SDValue In, EVT VT) { - if (In.getOpcode() != ISD::UMIN) - return SDValue(); - - //Saturation with truncation. We truncate from InVT to VT. - assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && - "Unexpected types for truncate operation"); - - APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { - // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according - // the element size of the destination type. - return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : - SDValue(); - } - return SDValue(); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget) { - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT); -} - -static SDValue -combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } - return SDValue(); -} - /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31925,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -32551,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) - return Val; - // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { @@ -33790,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } } - // Try to synthesize horizontal adds from adds of shuffles. + // Try to synthesize horizontal subs from subs of shuffles. EVT VT = N->getValueType(0); if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && - isHorizontalBinOp(Op0, Op1, true)) + isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6e30919246c7..7b0bddbbb831 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1436,6 +1436,14 @@ static bool canSinkInstructions( if (isa(I) || I->isEHPad() || isa(I) || I->getType()->isTokenTy()) return false; + + // Conservatively return false if I is an inline-asm instruction. Sinking + // and merging inline-asm instructions can potentially create arguments + // that cannot satisfy the inline-asm constraints. + if (const auto *C = dyn_cast(I)) + if (C->isInlineAsm()) + return false; + // Everything must have only one use too, apart from stores which // have no uses. if (!isa(I) && !I->hasOneUse()) diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp index d03c22af5b29..b531a66dbab3 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp @@ -9025,7 +9025,8 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { // Variables that have initialization with side-effects are required. if (VD->getInit() && VD->getInit()->HasSideEffects(*this) && - !VD->evaluateValue()) + // We can get a value-dependent initializer during error recovery. + (VD->getInit()->isValueDependent() || !VD->evaluateValue())) return true; // Likewise, variables with tuple-like bindings are required if their diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp index 137c69420ddf..e142a21b1e74 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp @@ -112,9 +112,8 @@ CodeGenFunction::~CodeGenFunction() { if (FirstBlockInfo) destroyBlockInfos(FirstBlockInfo); - if (getLangOpts().OpenMP) { + if (getLangOpts().OpenMP && CurFn) CGM.getOpenMPRuntime().functionFinished(*this); - } } CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, diff --git a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp index 7d60aad3895d..3b4f3f81399d 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp @@ -92,7 +92,13 @@ class IndexingDeclVisitor : public ConstDeclVisitor { Relations.emplace_back((unsigned)SymbolRole::RelationAccessorOf, AssociatedProp); - if (!IndexCtx.handleDecl(D, (unsigned)SymbolRole::Dynamic, Relations)) + // getLocation() returns beginning token of a method declaration, but for + // indexing purposes we want to point to the base name. + SourceLocation MethodLoc = D->getSelectorStartLoc(); + if (MethodLoc.isInvalid()) + MethodLoc = D->getLocation(); + + if (!IndexCtx.handleDecl(D, MethodLoc, (unsigned)SymbolRole::Dynamic, Relations)) return false; IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D); bool hasIBActionAndFirst = D->hasAttr(); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp index 45eff5ee6b62..b053c83c3f69 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp @@ -1684,7 +1684,7 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, // If this is an incomplete array type, the actual type needs to // be calculated here. llvm::APSInt Zero(maxElements.getBitWidth(), maxElements.isUnsigned()); - if (maxElements == Zero) { + if (maxElements == Zero && !Entity.isVariableLengthArrayNew()) { // Sizing an array implicitly to zero is not allowed by ISO C, // but is supported by GNU. SemaRef.Diag(IList->getLocStart(), diff --git a/contrib/llvm/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/CMakeLists.txt index 48ac5e038cdb..be424efbbd87 100644 --- a/contrib/llvm/tools/lld/CMakeLists.txt +++ b/contrib/llvm/tools/lld/CMakeLists.txt @@ -42,6 +42,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) + set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index 887ca12537ad..3cc235386b88 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -918,12 +918,7 @@ const OutputSectionBase *LinkerScript::getSymbolSection(StringRef S) { return CurOutSec ? CurOutSec : (*OutputSections)[0]; } - if (auto *DR = dyn_cast_or_null>(Sym)) - return DR->Section ? DR->Section->OutSec : nullptr; - if (auto *DS = dyn_cast_or_null(Sym)) - return DS->Section; - - return nullptr; + return SymbolTableSection::getOutputSection(Sym); } // Returns indices of ELF headers containing specific section, identified diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h index dfefb3821e75..f7e891ec3a66 100644 --- a/contrib/llvm/tools/lld/ELF/SyntheticSections.h +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h @@ -372,6 +372,8 @@ class SymbolTableSection final : public SyntheticSection { ArrayRef getSymbols() const { return Symbols; } + static const OutputSectionBase *getOutputSection(SymbolBody *Sym); + unsigned NumLocals = 0; StringTableSection &StrTabSec; @@ -379,8 +381,6 @@ class SymbolTableSection final : public SyntheticSection { void writeLocalSymbols(uint8_t *&Buf); void writeGlobalSymbols(uint8_t *Buf); - const OutputSectionBase *getOutputSection(SymbolBody *Sym); - // A vector of symbols and their string table offsets. std::vector Symbols; }; diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index 60a86e8078b9..168c885c085e 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -8,4 +8,4 @@ #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "292951" +#define SVN_REVISION "293443" diff --git a/lib/clang/include/lld/Config/Version.inc b/lib/clang/include/lld/Config/Version.inc index 7f3314307fe9..6975ed0edddb 100644 --- a/lib/clang/include/lld/Config/Version.inc +++ b/lib/clang/include/lld/Config/Version.inc @@ -4,5 +4,5 @@ #define LLD_VERSION_STRING "4.0.0" #define LLD_VERSION_MAJOR 4 #define LLD_VERSION_MINOR 0 -#define LLD_REVISION_STRING "292951" +#define LLD_REVISION_STRING "293443" #define LLD_REPOSITORY_STRING "FreeBSD"