Merge llvm, clang, compiler-rt, libc++, lld and lldb release_40 branch

r293443, and update build glue.
This commit is contained in:
Dimitry Andric 2017-01-29 21:56:47 +00:00
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/clang400-import/; revision=312967
47 changed files with 434 additions and 245 deletions

View File

@ -283,7 +283,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
END_COMPILERRT_FUNCTION(__unordsf2) END_COMPILERRT_FUNCTION(__unordsf2)
#if defined(COMPILER_RT_ARMHF_TARGET) #if defined(COMPILER_RT_ARMHF_TARGET)
DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum): DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
vmov s0, r0 vmov s0, r0
vmov s1, r1 vmov s1, r1
b SYMBOL_NAME(__unordsf2) b SYMBOL_NAME(__unordsf2)

View File

@ -19,6 +19,8 @@
#include <atomic> #include <atomic>
#include <cassert> #include <cassert>
extern "C" void __clear_cache(void* start, void* end);
namespace __xray { namespace __xray {
uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
@ -116,8 +118,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
// B #20 // B #20
uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address); uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
uint32_t *CurAddress = FirstAddress + 1;
if (Enable) { if (Enable) {
uint32_t *CurAddress = FirstAddress + 1;
CurAddress = CurAddress =
Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
CurAddress = CurAddress =
@ -125,6 +127,7 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
*CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
CurAddress++; CurAddress++;
*CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
CurAddress++;
std::atomic_store_explicit( std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
@ -133,6 +136,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
} }
__clear_cache(reinterpret_cast<char*>(FirstAddress),
reinterpret_cast<char*>(CurAddress));
return true; return true;
} }

View File

@ -403,15 +403,6 @@ namespace std {
#define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow"))) #define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
#endif #endif
// A constexpr version of __builtin_memcmp was added in clang 4.0
#if __has_builtin(__builtin_memcmp)
# ifdef __apple_build_version__
// No shipping version of Apple's clang has constexpr __builtin_memcmp
# elif __clang_major__ > 3
# define _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR
# endif
#endif
#elif defined(_LIBCPP_COMPILER_GCC) #elif defined(_LIBCPP_COMPILER_GCC)
#define _ALIGNAS(x) __attribute__((__aligned__(x))) #define _ALIGNAS(x) __attribute__((__aligned__(x)))

View File

@ -243,7 +243,7 @@ char_traits<char>::compare(const char_type* __s1, const char_type* __s2, size_t
{ {
if (__n == 0) if (__n == 0)
return 0; return 0;
#ifdef _LIBCPP_BUILTIN_MEMCMP_ISCONSTEXPR #if __has_feature(cxx_constexpr_string_builtins)
return __builtin_memcmp(__s1, __s2, __n); return __builtin_memcmp(__s1, __s2, __n);
#elif _LIBCPP_STD_VER <= 14 #elif _LIBCPP_STD_VER <= 14
return memcmp(__s1, __s2, __n); return memcmp(__s1, __s2, __n);
@ -265,7 +265,9 @@ char_traits<char>::find(const char_type* __s, size_t __n, const char_type& __a)
{ {
if (__n == 0) if (__n == 0)
return NULL; return NULL;
#if _LIBCPP_STD_VER <= 14 #if __has_feature(cxx_constexpr_string_builtins)
return __builtin_char_memchr(__s, to_int_type(__a), __n);
#elif _LIBCPP_STD_VER <= 14
return (const char_type*) memchr(__s, to_int_type(__a), __n); return (const char_type*) memchr(__s, to_int_type(__a), __n);
#else #else
for (; __n; --__n) for (; __n; --__n)
@ -331,7 +333,7 @@ char_traits<wchar_t>::compare(const char_type* __s1, const char_type* __s2, size
{ {
if (__n == 0) if (__n == 0)
return 0; return 0;
#if __has_builtin(__builtin_wmemcmp) #if __has_feature(cxx_constexpr_string_builtins)
return __builtin_wmemcmp(__s1, __s2, __n); return __builtin_wmemcmp(__s1, __s2, __n);
#elif _LIBCPP_STD_VER <= 14 #elif _LIBCPP_STD_VER <= 14
return wmemcmp(__s1, __s2, __n); return wmemcmp(__s1, __s2, __n);
@ -351,7 +353,7 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX14
size_t size_t
char_traits<wchar_t>::length(const char_type* __s) _NOEXCEPT char_traits<wchar_t>::length(const char_type* __s) _NOEXCEPT
{ {
#if __has_builtin(__builtin_wcslen) #if __has_feature(cxx_constexpr_string_builtins)
return __builtin_wcslen(__s); return __builtin_wcslen(__s);
#elif _LIBCPP_STD_VER <= 14 #elif _LIBCPP_STD_VER <= 14
return wcslen(__s); return wcslen(__s);
@ -369,7 +371,7 @@ char_traits<wchar_t>::find(const char_type* __s, size_t __n, const char_type& __
{ {
if (__n == 0) if (__n == 0)
return NULL; return NULL;
#if __has_builtin(__builtin_wmemchr) #if __has_feature(cxx_constexpr_string_builtins)
return __builtin_wmemchr(__s, __a, __n); return __builtin_wmemchr(__s, __a, __n);
#elif _LIBCPP_STD_VER <= 14 #elif _LIBCPP_STD_VER <= 14
return wmemchr(__s, __a, __n); return wmemchr(__s, __a, __n);

View File

@ -40,6 +40,12 @@
#define _LIBCPP_THREAD_ABI_VISIBILITY inline _LIBCPP_INLINE_VISIBILITY #define _LIBCPP_THREAD_ABI_VISIBILITY inline _LIBCPP_INLINE_VISIBILITY
#endif #endif
#if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(no_thread_safety_analysis)
#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
#else
#define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
#endif
_LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD
#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
@ -98,25 +104,25 @@ typedef DWORD __libcpp_tls_key;
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m); int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m); int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m); int __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t *__m); int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m); int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_mutex_lock(__libcpp_mutex_t *__m); int __libcpp_mutex_lock(__libcpp_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_mutex_trylock(__libcpp_mutex_t *__m); int __libcpp_mutex_trylock(__libcpp_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_mutex_unlock(__libcpp_mutex_t *__m); int __libcpp_mutex_unlock(__libcpp_mutex_t *__m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY
@ -129,10 +135,10 @@ int __libcpp_condvar_signal(__libcpp_condvar_t* __cv);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY
int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv); int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m); int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m);
_LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m,
timespec *__ts); timespec *__ts);

View File

@ -110,8 +110,8 @@ public:
void push_front(value_type&& v); void push_front(value_type&& v);
void push_back(const value_type& v); void push_back(const value_type& v);
void push_back(value_type&& v); void push_back(value_type&& v);
template <class... Args> reference emplace_front(Args&&... args); template <class... Args> reference emplace_front(Args&&... args); // reference in C++17
template <class... Args> reference emplace_back(Args&&... args); template <class... Args> reference emplace_back(Args&&... args); // reference in C++17
template <class... Args> iterator emplace(const_iterator p, Args&&... args); template <class... Args> iterator emplace(const_iterator p, Args&&... args);
iterator insert(const_iterator p, const value_type& v); iterator insert(const_iterator p, const value_type& v);
iterator insert(const_iterator p, value_type&& v); iterator insert(const_iterator p, value_type&& v);
@ -1342,8 +1342,13 @@ public:
void push_back(const value_type& __v); void push_back(const value_type& __v);
#ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
#if _LIBCPP_STD_VER > 14
template <class... _Args> reference emplace_front(_Args&&... __args); template <class... _Args> reference emplace_front(_Args&&... __args);
template <class... _Args> reference emplace_back(_Args&&... __args); template <class... _Args> reference emplace_back (_Args&&... __args);
#else
template <class... _Args> void emplace_front(_Args&&... __args);
template <class... _Args> void emplace_back (_Args&&... __args);
#endif
template <class... _Args> iterator emplace(const_iterator __p, _Args&&... __args); template <class... _Args> iterator emplace(const_iterator __p, _Args&&... __args);
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
void push_front(value_type&& __v); void push_front(value_type&& __v);
@ -1822,7 +1827,11 @@ deque<_Tp, _Allocator>::push_back(value_type&& __v)
template <class _Tp, class _Allocator> template <class _Tp, class _Allocator>
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
typename deque<_Tp, _Allocator>::reference typename deque<_Tp, _Allocator>::reference
#else
void
#endif
deque<_Tp, _Allocator>::emplace_back(_Args&&... __args) deque<_Tp, _Allocator>::emplace_back(_Args&&... __args)
{ {
allocator_type& __a = __base::__alloc(); allocator_type& __a = __base::__alloc();
@ -1832,7 +1841,9 @@ deque<_Tp, _Allocator>::emplace_back(_Args&&... __args)
__alloc_traits::construct(__a, _VSTD::addressof(*__base::end()), __alloc_traits::construct(__a, _VSTD::addressof(*__base::end()),
_VSTD::forward<_Args>(__args)...); _VSTD::forward<_Args>(__args)...);
++__base::size(); ++__base::size();
#if _LIBCPP_STD_VER > 14
return *--__base::end(); return *--__base::end();
#endif
} }
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
@ -1870,7 +1881,11 @@ deque<_Tp, _Allocator>::push_front(value_type&& __v)
template <class _Tp, class _Allocator> template <class _Tp, class _Allocator>
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
typename deque<_Tp, _Allocator>::reference typename deque<_Tp, _Allocator>::reference
#else
void
#endif
deque<_Tp, _Allocator>::emplace_front(_Args&&... __args) deque<_Tp, _Allocator>::emplace_front(_Args&&... __args)
{ {
allocator_type& __a = __base::__alloc(); allocator_type& __a = __base::__alloc();
@ -1880,7 +1895,9 @@ deque<_Tp, _Allocator>::emplace_front(_Args&&... __args)
__alloc_traits::construct(__a, _VSTD::addressof(*--__base::begin()), _VSTD::forward<_Args>(__args)...); __alloc_traits::construct(__a, _VSTD::addressof(*--__base::begin()), _VSTD::forward<_Args>(__args)...);
--__base::__start_; --__base::__start_;
++__base::size(); ++__base::size();
#if _LIBCPP_STD_VER > 14
return *__base::begin(); return *__base::begin();
#endif
} }
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS

View File

@ -87,7 +87,7 @@ public:
reference front(); reference front();
const_reference front() const; const_reference front() const;
template <class... Args> reference emplace_front(Args&&... args); template <class... Args> reference emplace_front(Args&&... args); // reference in C++17
void push_front(const value_type& v); void push_front(const value_type& v);
void push_front(value_type&& v); void push_front(value_type&& v);
@ -747,7 +747,11 @@ public:
#ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
#if _LIBCPP_STD_VER > 14
template <class... _Args> reference emplace_front(_Args&&... __args); template <class... _Args> reference emplace_front(_Args&&... __args);
#else
template <class... _Args> void emplace_front(_Args&&... __args);
#endif
#endif #endif
void push_front(value_type&& __v); void push_front(value_type&& __v);
#endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES
@ -1103,7 +1107,11 @@ forward_list<_Tp, _Alloc>::assign(initializer_list<value_type> __il)
template <class _Tp, class _Alloc> template <class _Tp, class _Alloc>
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
typename forward_list<_Tp, _Alloc>::reference typename forward_list<_Tp, _Alloc>::reference
#else
void
#endif
forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args) forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
{ {
__node_allocator& __a = base::__alloc(); __node_allocator& __a = base::__alloc();
@ -1113,7 +1121,9 @@ forward_list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
_VSTD::forward<_Args>(__args)...); _VSTD::forward<_Args>(__args)...);
__h->__next_ = base::__before_begin()->__next_; __h->__next_ = base::__before_begin()->__next_;
base::__before_begin()->__next_ = __h.release(); base::__before_begin()->__next_ = __h.release();
#if _LIBCPP_STD_VER > 14
return base::__before_begin()->__next_->__value_; return base::__before_begin()->__next_->__value_;
#endif
} }
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS

View File

@ -93,10 +93,10 @@ public:
size_type max_size() const noexcept; size_type max_size() const noexcept;
template <class... Args> template <class... Args>
reference emplace_front(Args&&... args); reference emplace_front(Args&&... args); // reference in C++17
void pop_front(); void pop_front();
template <class... Args> template <class... Args>
reference emplace_back(Args&&... args); reference emplace_back(Args&&... args); // reference in C++17
void pop_back(); void pop_back();
void push_front(const value_type& x); void push_front(const value_type& x);
void push_front(value_type&& x); void push_front(value_type&& x);
@ -969,9 +969,17 @@ public:
void push_back(value_type&& __x); void push_back(value_type&& __x);
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
reference emplace_front(_Args&&... __args); reference emplace_front(_Args&&... __args);
#else
void emplace_front(_Args&&... __args);
#endif
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
reference emplace_back(_Args&&... __args); reference emplace_back(_Args&&... __args);
#else
void emplace_back(_Args&&... __args);
#endif
template <class... _Args> template <class... _Args>
iterator emplace(const_iterator __p, _Args&&... __args); iterator emplace(const_iterator __p, _Args&&... __args);
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
@ -1600,7 +1608,11 @@ list<_Tp, _Alloc>::push_back(value_type&& __x)
template <class _Tp, class _Alloc> template <class _Tp, class _Alloc>
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
typename list<_Tp, _Alloc>::reference typename list<_Tp, _Alloc>::reference
#else
void
#endif
list<_Tp, _Alloc>::emplace_front(_Args&&... __args) list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
{ {
__node_allocator& __na = base::__node_alloc(); __node_allocator& __na = base::__node_alloc();
@ -1609,12 +1621,20 @@ list<_Tp, _Alloc>::emplace_front(_Args&&... __args)
__node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), _VSTD::forward<_Args>(__args)...); __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), _VSTD::forward<_Args>(__args)...);
__link_nodes_at_front(__hold.get()->__as_link(), __hold.get()->__as_link()); __link_nodes_at_front(__hold.get()->__as_link(), __hold.get()->__as_link());
++base::__sz(); ++base::__sz();
#if _LIBCPP_STD_VER > 14
return __hold.release()->__value_; return __hold.release()->__value_;
#else
__hold.release();
#endif
} }
template <class _Tp, class _Alloc> template <class _Tp, class _Alloc>
template <class... _Args> template <class... _Args>
#if _LIBCPP_STD_VER > 14
typename list<_Tp, _Alloc>::reference typename list<_Tp, _Alloc>::reference
#else
void
#endif
list<_Tp, _Alloc>::emplace_back(_Args&&... __args) list<_Tp, _Alloc>::emplace_back(_Args&&... __args)
{ {
__node_allocator& __na = base::__node_alloc(); __node_allocator& __na = base::__node_alloc();
@ -1624,7 +1644,11 @@ list<_Tp, _Alloc>::emplace_back(_Args&&... __args)
__link_pointer __nl = __hold->__as_link(); __link_pointer __nl = __hold->__as_link();
__link_nodes_at_back(__nl, __nl); __link_nodes_at_back(__nl, __nl);
++base::__sz(); ++base::__sz();
#if _LIBCPP_STD_VER > 14
return __hold.release()->__value_; return __hold.release()->__value_;
#else
__hold.release();
#endif
} }
template <class _Tp, class _Alloc> template <class _Tp, class _Alloc>

View File

@ -63,7 +63,7 @@ public:
void push(const value_type& v); void push(const value_type& v);
void push(value_type&& v); void push(value_type&& v);
template <class... Args> reference emplace(Args&&... args); template <class... Args> reference emplace(Args&&... args); // reference in C++17
void pop(); void pop();
void swap(queue& q) noexcept(is_nothrow_swappable_v<Container>) void swap(queue& q) noexcept(is_nothrow_swappable_v<Container>)
@ -292,8 +292,13 @@ public:
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
template <class... _Args> template <class... _Args>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY
#if _LIBCPP_STD_VER > 14
reference emplace(_Args&&... __args) reference emplace(_Args&&... __args)
{ return c.emplace_back(_VSTD::forward<_Args>(__args)...);} { return c.emplace_back(_VSTD::forward<_Args>(__args)...);}
#else
void emplace(_Args&&... __args)
{ c.emplace_back(_VSTD::forward<_Args>(__args)...);}
#endif
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
#endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY

View File

@ -55,7 +55,7 @@ public:
void push(const value_type& x); void push(const value_type& x);
void push(value_type&& x); void push(value_type&& x);
template <class... Args> reference emplace(Args&&... args); template <class... Args> reference emplace(Args&&... args); // reference in C++17
void pop(); void pop();
void swap(stack& c) noexcept(is_nothrow_swappable_v<Container>) void swap(stack& c) noexcept(is_nothrow_swappable_v<Container>)
@ -199,8 +199,13 @@ public:
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
template <class... _Args> template <class... _Args>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY
#if _LIBCPP_STD_VER > 14
reference emplace(_Args&&... __args) reference emplace(_Args&&... __args)
{ return c.emplace_back(_VSTD::forward<_Args>(__args)...);} { return c.emplace_back(_VSTD::forward<_Args>(__args)...);}
#else
void emplace(_Args&&... __args)
{ c.emplace_back(_VSTD::forward<_Args>(__args)...);}
#endif
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
#endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY

View File

@ -99,7 +99,7 @@ public:
void push_back(const value_type& x); void push_back(const value_type& x);
void push_back(value_type&& x); void push_back(value_type&& x);
template <class... Args> template <class... Args>
reference emplace_back(Args&&... args); reference emplace_back(Args&&... args); // reference in C++17
void pop_back(); void pop_back();
template <class... Args> iterator emplace(const_iterator position, Args&&... args); template <class... Args> iterator emplace(const_iterator position, Args&&... args);
@ -218,7 +218,7 @@ public:
const_reference back() const; const_reference back() const;
void push_back(const value_type& x); void push_back(const value_type& x);
template <class... Args> reference emplace_back(Args&&... args); // C++14 template <class... Args> reference emplace_back(Args&&... args); // C++14; reference in C++17
void pop_back(); void pop_back();
template <class... Args> iterator emplace(const_iterator position, Args&&... args); // C++14 template <class... Args> iterator emplace(const_iterator position, Args&&... args); // C++14
@ -679,7 +679,11 @@ public:
#ifndef _LIBCPP_HAS_NO_VARIADICS #ifndef _LIBCPP_HAS_NO_VARIADICS
template <class... _Args> template <class... _Args>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY
#if _LIBCPP_STD_VER > 14
reference emplace_back(_Args&&... __args); reference emplace_back(_Args&&... __args);
#else
void emplace_back(_Args&&... __args);
#endif
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
#endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES
_LIBCPP_INLINE_VISIBILITY _LIBCPP_INLINE_VISIBILITY
@ -1625,7 +1629,11 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
template <class _Tp, class _Allocator> template <class _Tp, class _Allocator>
template <class... _Args> template <class... _Args>
inline inline
#if _LIBCPP_STD_VER > 14
typename vector<_Tp, _Allocator>::reference typename vector<_Tp, _Allocator>::reference
#else
void
#endif
vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
{ {
if (this->__end_ < this->__end_cap()) if (this->__end_ < this->__end_cap())
@ -1639,7 +1647,9 @@ vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
} }
else else
__emplace_back_slow_path(_VSTD::forward<_Args>(__args)...); __emplace_back_slow_path(_VSTD::forward<_Args>(__args)...);
#if _LIBCPP_STD_VER > 14
return this->back(); return this->back();
#endif
} }
#endif // _LIBCPP_HAS_NO_VARIADICS #endif // _LIBCPP_HAS_NO_VARIADICS
@ -2336,9 +2346,16 @@ public:
void push_back(const value_type& __x); void push_back(const value_type& __x);
#if _LIBCPP_STD_VER > 11 #if _LIBCPP_STD_VER > 11
template <class... _Args> template <class... _Args>
_LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args) { #if _LIBCPP_STD_VER > 14
_LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args)
#else
_LIBCPP_INLINE_VISIBILITY void emplace_back(_Args&&... __args)
#endif
{
push_back ( value_type ( _VSTD::forward<_Args>(__args)... )); push_back ( value_type ( _VSTD::forward<_Args>(__args)... ));
#if _LIBCPP_STD_VER > 14
return this->back(); return this->back();
#endif
} }
#endif #endif

View File

@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id :
GCCBuiltin<"__builtin_amdgcn_dispatch_id">, GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
def int_amdgcn_implicit_buffer_ptr :
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Instruction Intrinsics // Instruction Intrinsics
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias; return MayAlias;
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
AAMDNodes(), V2, V2Size, V2AAInfo, AAMDNodes(), V2, MemoryLocation::UnknownSize,
nullptr, UnderlyingV2); V2AAInfo, nullptr, UnderlyingV2);
if (R != MustAlias) if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values // If V2 is known not to alias GEP base pointer, then the two values
// cannot alias per GEP semantics: "A pointer value formed from a // cannot alias per GEP semantics: "Any memory access must be done through
// getelementptr instruction is associated with the addresses associated // a pointer value associated with an address range of the memory access,
// with the first operand of the getelementptr". // otherwise the behavior is undefined.".
return R; return R;
// If the max search depth is reached the result is undefined // If the max search depth is reached the result is undefined

View File

@ -919,7 +919,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// If this isn't a LocalAsMetadata record, we're dropping it. This used // If this isn't a LocalAsMetadata record, we're dropping it. This used
// to be legal, but there's no upgrade path. // to be legal, but there's no upgrade path.
auto dropRecord = [&] { auto dropRecord = [&] {
MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++); MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo);
NextMetadataNo++;
}; };
if (Record.size() != 2) { if (Record.size() != 2) {
dropRecord(); dropRecord();
@ -934,7 +935,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_OLD_NODE: { case bitc::METADATA_OLD_NODE: {
@ -959,7 +961,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
} else } else
Elts.push_back(nullptr); Elts.push_back(nullptr);
} }
MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++); MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_VALUE: { case bitc::METADATA_VALUE: {
@ -972,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_DISTINCT_NODE: case bitc::METADATA_DISTINCT_NODE:
@ -985,7 +989,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Elts.push_back(getMDOrNull(ID)); Elts.push_back(getMDOrNull(ID));
MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
: MDNode::get(Context, Elts), : MDNode::get(Context, Elts),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_LOCATION: { case bitc::METADATA_LOCATION: {
@ -999,7 +1004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Metadata *InlinedAt = getMDOrNull(Record[4]); Metadata *InlinedAt = getMDOrNull(Record[4]);
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)), GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_GENERIC_DEBUG: { case bitc::METADATA_GENERIC_DEBUG: {
@ -1019,7 +1025,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
DwarfOps.push_back(getMDOrNull(Record[I])); DwarfOps.push_back(getMDOrNull(Record[I]));
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)), GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_SUBRANGE: { case bitc::METADATA_SUBRANGE: {
@ -1030,7 +1037,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(DISubrange, GET_OR_DISTINCT(DISubrange,
(Context, Record[1], unrotateSign(Record[2]))), (Context, Record[1], unrotateSign(Record[2]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_ENUMERATOR: { case bitc::METADATA_ENUMERATOR: {
@ -1041,7 +1049,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]), GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
getMDString(Record[2]))), getMDString(Record[2]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_BASIC_TYPE: { case bitc::METADATA_BASIC_TYPE: {
@ -1053,7 +1062,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIBasicType, GET_OR_DISTINCT(DIBasicType,
(Context, Record[1], getMDString(Record[2]), Record[3], (Context, Record[1], getMDString(Record[2]), Record[3],
Record[4], Record[5])), Record[4], Record[5])),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_DERIVED_TYPE: { case bitc::METADATA_DERIVED_TYPE: {
@ -1069,7 +1079,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[5]), getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8], getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], Flags, getDITypeRefOrNull(Record[11]))), Record[9], Flags, getDITypeRefOrNull(Record[11]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_COMPOSITE_TYPE: { case bitc::METADATA_COMPOSITE_TYPE: {
@ -1134,7 +1145,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (!IsNotUsedInTypeRef && Identifier) if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT)); MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
MetadataList.assignValue(CT, NextMetadataNo++); MetadataList.assignValue(CT, NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_SUBROUTINE_TYPE: { case bitc::METADATA_SUBROUTINE_TYPE: {
@ -1151,7 +1163,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)), GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
@ -1165,7 +1178,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]), (Context, getMDOrNull(Record[1]),
getMDString(Record[2]), getMDString(Record[3]), getMDString(Record[2]), getMDString(Record[3]),
getMDString(Record[4]), getMDString(Record[5]))), getMDString(Record[4]), getMDString(Record[5]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
@ -1181,7 +1195,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.size() == 3 ? DIFile::CSK_None Record.size() == 3 ? DIFile::CSK_None
: static_cast<DIFile::ChecksumKind>(Record[3]), : static_cast<DIFile::ChecksumKind>(Record[3]),
Record.size() == 3 ? nullptr : getMDString(Record[4]))), Record.size() == 3 ? nullptr : getMDString(Record[4]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_COMPILE_UNIT: { case bitc::METADATA_COMPILE_UNIT: {
@ -1200,7 +1215,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.size() <= 14 ? 0 : Record[14], Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16]); Record.size() <= 16 ? true : Record[16]);
MetadataList.assignValue(CU, NextMetadataNo++); MetadataList.assignValue(CU, NextMetadataNo);
NextMetadataNo++;
// Move the Upgrade the list of subprograms. // Move the Upgrade the list of subprograms.
if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11])) if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
@ -1247,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[16 + Offset]), // declaration getMDOrNull(Record[16 + Offset]), // declaration
getMDOrNull(Record[17 + Offset]) // variables getMDOrNull(Record[17 + Offset]) // variables
)); ));
MetadataList.assignValue(SP, NextMetadataNo++); MetadataList.assignValue(SP, NextMetadataNo);
NextMetadataNo++;
// Upgrade sp->function mapping to function->sp mapping. // Upgrade sp->function mapping to function->sp mapping.
if (HasFn) { if (HasFn) {
@ -1272,7 +1289,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DILexicalBlock, GET_OR_DISTINCT(DILexicalBlock,
(Context, getMDOrNull(Record[1]), (Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3], Record[4])), getMDOrNull(Record[2]), Record[3], Record[4])),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_LEXICAL_BLOCK_FILE: { case bitc::METADATA_LEXICAL_BLOCK_FILE: {
@ -1284,7 +1302,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DILexicalBlockFile, GET_OR_DISTINCT(DILexicalBlockFile,
(Context, getMDOrNull(Record[1]), (Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3])), getMDOrNull(Record[2]), Record[3])),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_NAMESPACE: { case bitc::METADATA_NAMESPACE: {
@ -1298,7 +1317,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]), (Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]), getMDOrNull(Record[2]), getMDString(Record[3]),
Record[4], ExportSymbols)), Record[4], ExportSymbols)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_MACRO: { case bitc::METADATA_MACRO: {
@ -1310,7 +1330,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIMacro, GET_OR_DISTINCT(DIMacro,
(Context, Record[1], Record[2], getMDString(Record[3]), (Context, Record[1], Record[2], getMDString(Record[3]),
getMDString(Record[4]))), getMDString(Record[4]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_MACRO_FILE: { case bitc::METADATA_MACRO_FILE: {
@ -1322,7 +1343,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIMacroFile, GET_OR_DISTINCT(DIMacroFile,
(Context, Record[1], Record[2], getMDOrNull(Record[3]), (Context, Record[1], Record[2], getMDOrNull(Record[3]),
getMDOrNull(Record[4]))), getMDOrNull(Record[4]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_TEMPLATE_TYPE: { case bitc::METADATA_TEMPLATE_TYPE: {
@ -1333,7 +1355,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
(Context, getMDString(Record[1]), (Context, getMDString(Record[1]),
getDITypeRefOrNull(Record[2]))), getDITypeRefOrNull(Record[2]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_TEMPLATE_VALUE: { case bitc::METADATA_TEMPLATE_VALUE: {
@ -1346,7 +1369,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Record[1], getMDString(Record[2]), (Context, Record[1], getMDString(Record[2]),
getDITypeRefOrNull(Record[3]), getDITypeRefOrNull(Record[3]),
getMDOrNull(Record[4]))), getMDOrNull(Record[4]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_GLOBAL_VAR: { case bitc::METADATA_GLOBAL_VAR: {
@ -1364,7 +1388,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[4]), Record[5], getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8], getDITypeRefOrNull(Record[6]), Record[7], Record[8],
getMDOrNull(Record[10]), Record[11])), getMDOrNull(Record[10]), Record[11])),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
} else if (Version == 0) { } else if (Version == 0) {
// Upgrade old metadata, which stored a global variable reference or a // Upgrade old metadata, which stored a global variable reference or a
// ConstantInt here. // ConstantInt here.
@ -1396,7 +1421,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[10]), AlignInBits)); getMDOrNull(Record[10]), AlignInBits));
auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr); auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
MetadataList.assignValue(DGVE, NextMetadataNo++); MetadataList.assignValue(DGVE, NextMetadataNo);
NextMetadataNo++;
if (Attach) if (Attach)
Attach->addDebugInfo(DGVE); Attach->addDebugInfo(DGVE);
} else } else
@ -1429,7 +1455,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag], getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
getDITypeRefOrNull(Record[5 + HasTag]), getDITypeRefOrNull(Record[5 + HasTag]),
Record[6 + HasTag], Flags, AlignInBits)), Record[6 + HasTag], Flags, AlignInBits)),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_EXPRESSION: { case bitc::METADATA_EXPRESSION: {
@ -1446,7 +1473,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue( MetadataList.assignValue(
GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_GLOBAL_VAR_EXPR: { case bitc::METADATA_GLOBAL_VAR_EXPR: {
@ -1457,7 +1485,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression, MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
(Context, getMDOrNull(Record[1]), (Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]))), getMDOrNull(Record[2]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_OBJC_PROPERTY: { case bitc::METADATA_OBJC_PROPERTY: {
@ -1471,7 +1500,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[2]), Record[3], getMDOrNull(Record[2]), Record[3],
getMDString(Record[4]), getMDString(Record[5]), getMDString(Record[4]), getMDString(Record[5]),
Record[6], getDITypeRefOrNull(Record[7]))), Record[6], getDITypeRefOrNull(Record[7]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_IMPORTED_ENTITY: { case bitc::METADATA_IMPORTED_ENTITY: {
@ -1484,7 +1514,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Record[1], getMDOrNull(Record[2]), (Context, Record[1], getMDOrNull(Record[2]),
getDITypeRefOrNull(Record[3]), Record[4], getDITypeRefOrNull(Record[3]), Record[4],
getMDString(Record[5]))), getMDString(Record[5]))),
NextMetadataNo++); NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_STRING_OLD: { case bitc::METADATA_STRING_OLD: {
@ -1494,13 +1525,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String); HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
++NumMDStringLoaded; ++NumMDStringLoaded;
Metadata *MD = MDString::get(Context, String); Metadata *MD = MDString::get(Context, String);
MetadataList.assignValue(MD, NextMetadataNo++); MetadataList.assignValue(MD, NextMetadataNo);
NextMetadataNo++;
break; break;
} }
case bitc::METADATA_STRINGS: { case bitc::METADATA_STRINGS: {
auto CreateNextMDString = [&](StringRef Str) { auto CreateNextMDString = [&](StringRef Str) {
++NumMDStringLoaded; ++NumMDStringLoaded;
MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++); MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo);
NextMetadataNo++;
}; };
if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString)) if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
return Err; return Err;

View File

@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LD->getPointerInfo().getWithOffset(Offset), LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo); MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1)); LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) { if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
// Later code assumes the vector loads produced will be mergeable, so we
// must pad the final entry up to the previous width. Scalars are
// combined separately.
SmallVector<SDValue, 16> Loads; SmallVector<SDValue, 16> Loads;
Loads.push_back(L); Loads.push_back(L);
unsigned size = L->getValueSizeInBits(0); unsigned size = L->getValueSizeInBits(0);

View File

@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
"Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs", def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"AvoidQuadLdStPairs", "true", "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
"Do not form quad load/store pair operations">;
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors", "Samsung Exynos-M1 processors",
[FeatureAvoidQuadLdStPairs, [FeatureSlowPaired128,
FeatureCRC, FeatureCRC,
FeatureCrypto, FeatureCrypto,
FeatureCustomCheapAsMoveHandling, FeatureCustomCheapAsMoveHandling,
@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2/M3 processors", "Samsung Exynos-M2/M3 processors",
[FeatureAvoidQuadLdStPairs, [FeatureSlowPaired128,
FeatureCRC, FeatureCRC,
FeatureCrypto, FeatureCrypto,
FeatureCustomCheapAsMoveHandling, FeatureCustomCheapAsMoveHandling,

View File

@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return false; return false;
// On some CPUs quad load/store pairs are slower than two single load/stores. // On some CPUs quad load/store pairs are slower than two single load/stores.
if (Subtarget.avoidQuadLdStPairs()) { if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) { switch (MI.getOpcode()) {
default: default:
break; break;

View File

@ -79,7 +79,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool CustomAsCheapAsMove = false; bool CustomAsCheapAsMove = false;
bool UsePostRAScheduler = false; bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false; bool Misaligned128StoreIsSlow = false;
bool AvoidQuadLdStPairs = false; bool Paired128IsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false; bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false; bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false; bool HasArithmeticCbzFusion = false;
@ -189,7 +189,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
} }
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; } bool isPaired128Slow() const { return Paired128IsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const { bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern; return UseAlternateSExtLoadCVTF32Pattern;
} }

View File

@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"Enable SI Machine Scheduler" "Enable SI Machine Scheduler"
>; >;
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
// instructions.
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal", "FlatForGlobal",
"true", "true",

View File

@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo; SIProgramInfo KernelInfo;
if (STM.isAmdCodeObjectV2()) { if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF); getSIProgramInfo(KernelInfo, *MF);
EmitAmdKernelCodeT(*MF, KernelInfo); EmitAmdKernelCodeT(*MF, KernelInfo);
} }
@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
AMDGPUTargetStreamer *TS = AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
SmallString<128> SymbolName; SmallString<128> SymbolName;
@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
// FIXME: Should use getKernArgSize // FIXME: Should use getKernArgSize
header.kernarg_segment_byte_size = header.kernarg_segment_byte_size =
STM.getKernArgSegmentSize(MFI->getABIArgOffset()); STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;

View File

@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
// omod CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
SDValue Ops[8];
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
} }
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,

View File

@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDLoc SL(N); SDLoc SL(N);
switch (Opc) { switch (Opc) {
case ISD::FADD: { case ISD::FADD: {
if (!mayIgnoreSignedZero(N0))
return SDValue();
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
SDValue LHS = N0.getOperand(0); SDValue LHS = N0.getOperand(0);
SDValue RHS = N0.getOperand(1); SDValue RHS = N0.getOperand(1);
@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
} }
case ISD::FMA: case ISD::FMA:
case ISD::FMAD: { case ISD::FMAD: {
if (!mayIgnoreSignedZero(N0))
return SDValue();
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
SDValue LHS = N0.getOperand(0); SDValue LHS = N0.getOperand(0);
SDValue MHS = N0.getOperand(1); SDValue MHS = N0.getOperand(1);
@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(KILL) NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(SENDMSGHALT)

View File

@ -119,6 +119,16 @@ class AMDGPUTargetLowering : public TargetLowering {
public: public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
return true;
if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
return BO->Flags.hasNoSignedZeros();
return false;
}
bool isFAbsFree(EVT VT) const override; bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override; bool isFNegFree(EVT VT) const override;
bool isTruncateFree(EVT Src, EVT Dest) const override; bool isTruncateFree(EVT Src, EVT Dest) const override;
@ -320,6 +330,7 @@ enum NodeType : unsigned {
INTERP_P2, INTERP_P2,
PC_ADD_REL_OFFSET, PC_ADD_REL_OFFSET,
KILL, KILL,
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR, STORE_MSKOR,
LOAD_CONSTANT, LOAD_CONSTANT,

View File

@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address. // This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
// Force dependencies for vector trunc stores
def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;

View File

@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
ParseSubtargetFeatures(GPU, FullFS); ParseSubtargetFeatures(GPU, FullFS);
// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
// variants of MUBUF instructions.
if (!hasAddr64() && !FS.contains("flat-for-global")) {
FlatForGlobal = true;
}
// FIXME: I don't think think Evergreen has any useful support for // FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere // denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these? // if someone tries to enable these?
@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
} }
unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
unsigned ImplicitBytes = getImplicitArgNumBytes(); unsigned ExplicitArgBytes) const {
unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0) if (ImplicitBytes == 0)
return ExplicitArgBytes; return ExplicitArgBytes;

View File

@ -311,22 +311,31 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return EnableXNACK; return EnableXNACK;
} }
bool isAmdCodeObjectV2() const { bool isMesaKernel(const MachineFunction &MF) const {
return isAmdHsaOS() || isMesa3DOS(); return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
// Covers VS/PS/CS graphics shaders
bool isMesaGfxShader(const MachineFunction &MF) const {
return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
bool isAmdCodeObjectV2(const MachineFunction &MF) const {
return isAmdHsaOS() || isMesaKernel(MF);
} }
/// \brief Returns the offset in bytes from the start of the input buffer /// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument. /// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset() const { unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
return isAmdCodeObjectV2() ? 0 : 36; return isAmdCodeObjectV2(MF) ? 0 : 36;
} }
unsigned getAlignmentForImplicitArgPtr() const { unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4; return isAmdHsaOS() ? 8 : 4;
} }
unsigned getImplicitArgNumBytes() const { unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
if (isMesa3DOS()) if (isMesaKernel(MF))
return 16; return 16;
if (isAmdHsaOS() && isOpenCLEnv()) if (isAmdHsaOS() && isOpenCLEnv())
return 32; return 32;
@ -585,7 +594,7 @@ class SISubtarget final : public AMDGPUSubtarget {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
} }
unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

View File

@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
llvm_unreachable("Unsupported private trunc store"); llvm_unreachable("Unsupported private trunc store");
} }
SDValue Chain = Store->getChain(); SDValue OldChain = Store->getChain();
bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
// Skip dummy
SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
SDValue BasePtr = Store->getBasePtr(); SDValue BasePtr = Store->getBasePtr();
SDValue Offset = Store->getOffset(); SDValue Offset = Store->getOffset();
EVT MemVT = Store->getMemoryVT(); EVT MemVT = Store->getMemoryVT();
@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Store dword // Store dword
// TODO: Can we be smarter about MachinePointerInfo? // TODO: Can we be smarter about MachinePointerInfo?
return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
// If we are part of expanded vector, make our neighbors depend on this store
if (VectorTrunc) {
// Make all other vector elements depend on this store
Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
}
return NewStore;
} }
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Neither LOCAL nor PRIVATE can do vectors at the moment // Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) { VT.isVector()) {
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
SDValue NewStore = DAG.getTruncStore(
NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
MemVT, StoreNode->getAlignment(),
StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
StoreNode = cast<StoreSDNode>(NewStore);
}
return scalarizeVectorStore(StoreNode, DAG); return scalarizeVectorStore(StoreNode, DAG);
} }
@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Put the mask in correct place // Put the mask in correct place
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
// Put the mask in correct place // Put the value bits in correct place
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad( SDValue Arg = DAG.getLoad(

View File

@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>; def MOV : R600_1OP <0x19, "MOV", []>;
// This is a hack to get rid of DUMMY_CHAIN nodes.
// Most DUMMY_CHAINs should be eliminated during legalization, but undef
// values can sneak in some to selection.
let isPseudo = 1, isCodeGenOnly = 1 in {
def DUMMY_CHAIN : AMDGPUInst <
(outs),
(ins),
"DUMMY_CHAIN",
[(R600dummy_chain)]
>;
} // end let isPseudo = 1, isCodeGenOnly = 1
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <

View File

@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdCodeObjectV2()) { if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue( PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
} }
@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
} }
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
assert(ST.isAmdCodeObjectV2()); assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
MRI.addLiveIn(PreloadedPrivateBufferReg); MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg);
} }
@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed && bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister && PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
ST.isAmdCodeObjectV2(MF) &&
ScratchRsrcReg != PreloadedPrivateBufferReg; ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of // This needs to be careful of the copying order to avoid overwriting one of
@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill); .addReg(PreloadedPrivateBufferReg, RegState::Kill);
} }
if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) { if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
assert(!ST.isAmdCodeObjectV2()); assert(!ST.isAmdCodeObjectV2(MF));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
// Use relocations to get the pointer, and setup the other bits manually. // Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23(); uint64_t Rsrc23 = TII->getScratchRsrcWords23();
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc1) if (MFI->hasPrivateMemoryInputPtr()) {
.addExternalSymbol("SCRATCH_RSRC_DWORD1") unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
BuildMI(MBB, I, DL, Mov64, Rsrc01)
.addReg(PreloadedPrivateBufferReg)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
PointerType *PtrTy =
PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
0, 0);
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(PreloadedPrivateBufferReg)
.addImm(0) // offset
.addImm(0) // glc
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
} else {
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
BuildMI(MBB, I, DL, SMovB32, Rsrc2) BuildMI(MBB, I, DL, SMovB32, Rsrc2)
.addImm(Rsrc23 & 0xffffffff) .addImm(Rsrc23 & 0xffffffff)

View File

@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (!AMDGPU::isShader(CallConv)) { if (!AMDGPU::isShader(CallConv)) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else { } else {
assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() && assert(!Info->hasDispatchPtr() &&
!Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() && !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
!Info->hasWorkItemIDZ()); !Info->hasWorkItemIDZ());
} }
if (Info->hasPrivateMemoryInputPtr()) {
unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
CCInfo.AllocateReg(PrivateMemoryPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs? // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) { if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) { if (VA.isMemLoc()) {
VT = Ins[i].VT; VT = Ins[i].VT;
EVT MemVT = VA.getLocVT(); EVT MemVT = VA.getLocVT();
const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset(); VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about // The first 36 bytes of the input buffer contains information about
// thread group and global sizes. // thread group and global sizes.
@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (getTargetMachine().getOptLevel() == CodeGenOpt::None) if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
HasStackObjects = true; HasStackObjects = true;
if (ST.isAmdCodeObjectV2()) { if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) { if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer // If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user // resource. For the Code Object V2 ABI, this will be the first 4 user
@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// TODO: Should this propagate fast-math-flags? // TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) { switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
}
case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: { case Intrinsic::amdgcn_queue_ptr: {
if (!Subtarget->isAmdCodeObjectV2()) { if (!Subtarget->isAmdCodeObjectV2(MF)) {
DiagnosticInfoUnsupported BadIntrin( DiagnosticInfoUnsupported BadIntrin(
*MF.getFunction(), "unsupported hsa intrinsic without hsa target", *MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc()); DL.getDebugLoc());

View File

@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateSegmentWaveByteOffset(false), PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false), WorkItemIDX(false),
WorkItemIDY(false), WorkItemIDY(false),
WorkItemIDZ(false) { WorkItemIDZ(false),
PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction(); const Function *F = MF.getFunction();
@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill) if (HasStackObjects || MaySpill)
PrivateSegmentWaveByteOffset = true; PrivateSegmentWaveByteOffset = true;
if (ST.isAmdCodeObjectV2()) { if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects || MaySpill) if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true; PrivateSegmentBuffer = true;
@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-dispatch-id")) if (F->hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true; DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
if (HasStackObjects || MaySpill)
PrivateMemoryInputPtr = true;
} }
// We don't need to worry about accessing spills with flat instructions. // We don't need to worry about accessing spills with flat instructions.
@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR; return FlatScratchInitUserSGPR;
} }
unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
NumUserSGPRs += 2;
return PrivateMemoryPtrUserSGPR;
}
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF, MachineFunction *MF,
unsigned FrameIndex, unsigned FrameIndex,

View File

@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned ScratchRSrcReg; unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg; unsigned ScratchWaveOffsetReg;
// Input registers for non-HSA ABI
unsigned PrivateMemoryPtrUserSGPR;
// Input registers setup for the HSA ABI. // Input registers setup for the HSA ABI.
// User SGPRs in allocation order. // User SGPRs in allocation order.
unsigned PrivateSegmentBufferUserSGPR; unsigned PrivateSegmentBufferUserSGPR;
@ -163,6 +166,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
bool WorkItemIDY : 1; bool WorkItemIDY : 1;
bool WorkItemIDZ : 1; bool WorkItemIDZ : 1;
// Private memory buffer
// Compute directly in sgpr[0:1]
// Other shaders indirect 64-bits at sgpr[0:1]
bool PrivateMemoryInputPtr : 1;
MCPhysReg getNextUserSGPR() const { MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs; return AMDGPU::SGPR0 + NumUserSGPRs;
@ -198,6 +206,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
// Add system SGPRs. // Add system SGPRs.
unsigned addWorkGroupIDX() { unsigned addWorkGroupIDX() {
@ -302,6 +311,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
return WorkItemIDZ; return WorkItemIDZ;
} }
bool hasPrivateMemoryInputPtr() const {
return PrivateMemoryInputPtr;
}
unsigned getNumUserSGPRs() const { unsigned getNumUserSGPRs() const {
return NumUserSGPRs; return NumUserSGPRs;
} }
@ -338,6 +351,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
return QueuePtrUserSGPR; return QueuePtrUserSGPR;
} }
unsigned getPrivateMemoryPtrUserSGPR() const {
return PrivateMemoryPtrUserSGPR;
}
bool hasSpilledSGPRs() const { bool hasSpilledSGPRs() const {
return HasSpilledSGPRs; return HasSpilledSGPRs;
} }

View File

@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
assert(ST.isAmdCodeObjectV2() && if (ST.isAmdCodeObjectV2(MF)) {
"Non-CodeObjectV2 ABI currently uses relocations"); assert(MFI->hasPrivateSegmentBuffer());
assert(MFI->hasPrivateSegmentBuffer()); return MFI->PrivateSegmentBufferUserSGPR;
return MFI->PrivateSegmentBufferUserSGPR; }
assert(MFI->hasPrivateMemoryInputPtr());
return MFI->PrivateMemoryPtrUserSGPR;
case SIRegisterInfo::KERNARG_SEGMENT_PTR: case SIRegisterInfo::KERNARG_SEGMENT_PTR:
assert(MFI->hasKernargSegmentPtr()); assert(MFI->hasKernargSegmentPtr());
return MFI->KernargSegmentPtrUserSGPR; return MFI->KernargSegmentPtrUserSGPR;

View File

@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
} }
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
// v_div_scale_{f32|f64} do not support input modifiers.
let HasModifiers = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
} }
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> { def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU]; let SchedRW = [WriteFloatFMA, WriteSALU];
let hasExtraSrcRegAllocReq = 1; let hasExtraSrcRegAllocReq = 1;
let AsmMatchConverter = "";
} }
// Double precision division pre-scale. // Double precision division pre-scale.
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU]; let SchedRW = [WriteDouble, WriteSALU];
let hasExtraSrcRegAllocReq = 1; let hasExtraSrcRegAllocReq = 1;
let AsmMatchConverter = "";
} }
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;

View File

@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body. // Emit the rest of the function body.
EmitFunctionBody(); EmitFunctionBody();
// Emit the XRay table for this function.
emitXRayTable();
// If we need V4T thumb mode Register Indirect Jump pads, emit them. // If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's // These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU. // relatively easy to exceed the thumb branch range within a TU.

View File

@ -7571,11 +7571,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV: case ISD::SDIV:
if (Subtarget->isTargetWindows()) if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerDIV_Windows(Op, DAG, /* Signed */ true);
return LowerSDIV(Op, DAG); return LowerSDIV(Op, DAG);
case ISD::UDIV: case ISD::UDIV:
if (Subtarget->isTargetWindows()) if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerDIV_Windows(Op, DAG, /* Signed */ false);
return LowerUDIV(Op, DAG); return LowerUDIV(Op, DAG);
case ISD::ADDC: case ISD::ADDC:

View File

@ -31272,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
} }
/// Check if truncation with saturation form type \p SrcVT to \p DstVT
/// is valid for the given \p Subtarget.
static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX512())
return false;
// FIXME: Scalar type may be supported if we move it to vector register.
if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
return false;
EVT SrcElVT = SrcVT.getScalarType();
EVT DstElVT = DstVT.getScalarType();
if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
return false;
if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
return false;
if (SrcVT.is512BitVector() || Subtarget.hasVLX())
return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
return false;
}
/// Return true if VPACK* instruction can be used for the given types
/// and it is avalable on \p Subtarget.
static bool
isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
if (Subtarget.hasSSE2())
// v16i16 -> v16i8
if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
return true;
if (Subtarget.hasSSE41())
// v8i32 -> v8i16
if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
return true;
return false;
}
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// Return the source value to be truncated or SDValue() if the pattern was not
/// matched.
static SDValue detectUSatPattern(SDValue In, EVT VT) {
if (In.getOpcode() != ISD::UMIN)
return SDValue();
//Saturation with truncation. We truncate from InVT to VT.
assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
"Unexpected types for truncate operation");
APInt C;
if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
// the element size of the destination type.
return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
SDValue();
}
return SDValue();
}
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// The types should allow to use VPMOVUS* instruction on AVX512.
/// Return the source value to be truncated or SDValue() if the pattern was not
/// matched.
static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
const X86Subtarget &Subtarget) {
if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return SDValue();
return detectUSatPattern(In, VT);
}
static SDValue
combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue USatVal = detectUSatPattern(In, VT);
if (USatVal) {
if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
}
}
return SDValue();
}
/// This function detects the AVG pattern between vectors of unsigned i8/i16, /// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction. /// X86ISD::AVG instruction.
@ -31925,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(), St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags()); St->getMemOperand()->getFlags());
if (SDValue Val =
detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
dl, Val, St->getBasePtr(),
St->getMemoryVT(), St->getMemOperand(), DAG);
const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements(); unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type"); assert(StVT != VT && "Cannot truncate to the same type");
@ -32551,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg; return Avg;
// Try to combine truncation with unsigned saturation.
if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
return Val;
// The bitcast source is a direct mmx result. // The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx // Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@ -33790,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
} }
} }
// Try to synthesize horizontal adds from adds of shuffles. // Try to synthesize horizontal subs from subs of shuffles.
EVT VT = N->getValueType(0); EVT VT = N->getValueType(0);
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true)) isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG); return OptimizeConditionalInDecrement(N, DAG);

View File

@ -1436,6 +1436,14 @@ static bool canSinkInstructions(
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
I->getType()->isTokenTy()) I->getType()->isTokenTy())
return false; return false;
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
if (const auto *C = dyn_cast<CallInst>(I))
if (C->isInlineAsm())
return false;
// Everything must have only one use too, apart from stores which // Everything must have only one use too, apart from stores which
// have no uses. // have no uses.
if (!isa<StoreInst>(I) && !I->hasOneUse()) if (!isa<StoreInst>(I) && !I->hasOneUse())

View File

@ -9025,7 +9025,8 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
// Variables that have initialization with side-effects are required. // Variables that have initialization with side-effects are required.
if (VD->getInit() && VD->getInit()->HasSideEffects(*this) && if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
!VD->evaluateValue()) // We can get a value-dependent initializer during error recovery.
(VD->getInit()->isValueDependent() || !VD->evaluateValue()))
return true; return true;
// Likewise, variables with tuple-like bindings are required if their // Likewise, variables with tuple-like bindings are required if their

View File

@ -112,9 +112,8 @@ CodeGenFunction::~CodeGenFunction() {
if (FirstBlockInfo) if (FirstBlockInfo)
destroyBlockInfos(FirstBlockInfo); destroyBlockInfos(FirstBlockInfo);
if (getLangOpts().OpenMP) { if (getLangOpts().OpenMP && CurFn)
CGM.getOpenMPRuntime().functionFinished(*this); CGM.getOpenMPRuntime().functionFinished(*this);
}
} }
CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,

View File

@ -92,7 +92,13 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
Relations.emplace_back((unsigned)SymbolRole::RelationAccessorOf, Relations.emplace_back((unsigned)SymbolRole::RelationAccessorOf,
AssociatedProp); AssociatedProp);
if (!IndexCtx.handleDecl(D, (unsigned)SymbolRole::Dynamic, Relations)) // getLocation() returns beginning token of a method declaration, but for
// indexing purposes we want to point to the base name.
SourceLocation MethodLoc = D->getSelectorStartLoc();
if (MethodLoc.isInvalid())
MethodLoc = D->getLocation();
if (!IndexCtx.handleDecl(D, MethodLoc, (unsigned)SymbolRole::Dynamic, Relations))
return false; return false;
IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D); IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D);
bool hasIBActionAndFirst = D->hasAttr<IBActionAttr>(); bool hasIBActionAndFirst = D->hasAttr<IBActionAttr>();

View File

@ -1684,7 +1684,7 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
// If this is an incomplete array type, the actual type needs to // If this is an incomplete array type, the actual type needs to
// be calculated here. // be calculated here.
llvm::APSInt Zero(maxElements.getBitWidth(), maxElements.isUnsigned()); llvm::APSInt Zero(maxElements.getBitWidth(), maxElements.isUnsigned());
if (maxElements == Zero) { if (maxElements == Zero && !Entity.isVariableLengthArrayNew()) {
// Sizing an array implicitly to zero is not allowed by ISO C, // Sizing an array implicitly to zero is not allowed by ISO C,
// but is supported by GNU. // but is supported by GNU.
SemaRef.Diag(IList->getLocStart(), SemaRef.Diag(IList->getLocStart(),

View File

@ -42,6 +42,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS}) link_directories(${LLVM_LIBRARY_DIRS})
set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)

View File

@ -918,12 +918,7 @@ const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) {
return CurOutSec ? CurOutSec : (*OutputSections)[0]; return CurOutSec ? CurOutSec : (*OutputSections)[0];
} }
if (auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) return SymbolTableSection<ELFT>::getOutputSection(Sym);
return DR->Section ? DR->Section->OutSec : nullptr;
if (auto *DS = dyn_cast_or_null<DefinedSynthetic>(Sym))
return DS->Section;
return nullptr;
} }
// Returns indices of ELF headers containing specific section, identified // Returns indices of ELF headers containing specific section, identified

View File

@ -372,6 +372,8 @@ class SymbolTableSection final : public SyntheticSection<ELFT> {
ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; } ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; }
static const OutputSectionBase *getOutputSection(SymbolBody *Sym);
unsigned NumLocals = 0; unsigned NumLocals = 0;
StringTableSection<ELFT> &StrTabSec; StringTableSection<ELFT> &StrTabSec;
@ -379,8 +381,6 @@ class SymbolTableSection final : public SyntheticSection<ELFT> {
void writeLocalSymbols(uint8_t *&Buf); void writeLocalSymbols(uint8_t *&Buf);
void writeGlobalSymbols(uint8_t *Buf); void writeGlobalSymbols(uint8_t *Buf);
const OutputSectionBase *getOutputSection(SymbolBody *Sym);
// A vector of symbols and their string table offsets. // A vector of symbols and their string table offsets.
std::vector<SymbolTableEntry> Symbols; std::vector<SymbolTableEntry> Symbols;
}; };

View File

@ -8,4 +8,4 @@
#define CLANG_VENDOR "FreeBSD " #define CLANG_VENDOR "FreeBSD "
#define SVN_REVISION "292951" #define SVN_REVISION "293443"

View File

@ -4,5 +4,5 @@
#define LLD_VERSION_STRING "4.0.0" #define LLD_VERSION_STRING "4.0.0"
#define LLD_VERSION_MAJOR 4 #define LLD_VERSION_MAJOR 4
#define LLD_VERSION_MINOR 0 #define LLD_VERSION_MINOR 0
#define LLD_REVISION_STRING "292951" #define LLD_REVISION_STRING "293443"
#define LLD_REPOSITORY_STRING "FreeBSD" #define LLD_REPOSITORY_STRING "FreeBSD"